toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_static.c"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_defer            (PL_parser->lex_defer)
  56 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  58 #define PL_lex_inpat            (PL_parser->lex_inpat)
  59 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  60 #define PL_lex_op               (PL_parser->lex_op)
  61 #define PL_lex_repl             (PL_parser->lex_repl)
  62 #define PL_lex_starts           (PL_parser->lex_starts)
  63 #define PL_lex_stuff            (PL_parser->lex_stuff)
  64 #define PL_multi_start          (PL_parser->multi_start)
  65 #define PL_multi_open           (PL_parser->multi_open)
  66 #define PL_multi_close          (PL_parser->multi_close)
  67 #define PL_preambled            (PL_parser->preambled)
  68 #define PL_sublex_info          (PL_parser->sublex_info)
  69 #define PL_linestr              (PL_parser->linestr)
  70 #define PL_expect               (PL_parser->expect)
  71 #define PL_copline              (PL_parser->copline)
  72 #define PL_bufptr               (PL_parser->bufptr)
  73 #define PL_oldbufptr            (PL_parser->oldbufptr)
  74 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  75 #define PL_linestart            (PL_parser->linestart)
  76 #define PL_bufend               (PL_parser->bufend)
  77 #define PL_last_uni             (PL_parser->last_uni)
  78 #define PL_last_lop             (PL_parser->last_lop)
  79 #define PL_last_lop_op          (PL_parser->last_lop_op)
  80 #define PL_lex_state            (PL_parser->lex_state)
  81 #define PL_rsfp                 (PL_parser->rsfp)
  82 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  83 #define PL_in_my                (PL_parser->in_my)
  84 #define PL_in_my_stash          (PL_parser->in_my_stash)
  85 #define PL_tokenbuf             (PL_parser->tokenbuf)
  86 #define PL_multi_end            (PL_parser->multi_end)
  87 #define PL_error_count          (PL_parser->error_count)
  88
  89 #  define PL_nexttoke           (PL_parser->nexttoke)
  90 #  define PL_nexttype           (PL_parser->nexttype)
  91 #  define PL_nextval            (PL_parser->nextval)
  92
  93 static const char* const ident_too_long = "Identifier too long";
  94
  95 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  96
  97 #define XENUMMASK  0x3f
  98 #define XFAKEEOF   0x40
  99 #define XFAKEBRACK 0x80
 100
 101 #ifdef USE_UTF8_SCRIPTS
 102 #   define UTF cBOOL(!IN_BYTES)
 103 #else
 104 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 105 #endif
 106
 107 /* The maximum number of characters preceding the unrecognized one to display */
 108 #define UNRECOGNIZED_PRECEDE_COUNT 10
 109
 110 /* In variables named $^X, these are the legal values for X.
 111  * 1999-02-27 mjd-perl-patch@plover.com */
 112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 113
 114 #define SPACE_OR_TAB(c) isBLANK_A(c)
 115
 116 #define HEXFP_PEEK(s)     \
 117     (((s[0] == '.') && \
 118       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 119      isALPHA_FOLD_EQ(s[0], 'p'))
 120
 121 /* LEX_* are values for PL_lex_state, the state of the lexer.
 122  * They are arranged oddly so that the guard on the switch statement
 123  * can get by with a single comparison (if the compiler is smart enough).
 124  *
 125  * These values refer to the various states within a sublex parse,
 126  * i.e. within a double quotish string
 127  */
 128
 129 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 130
 131 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 132 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 133 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 134 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 135 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 136
 137                                    /* at end of code, eg "$x" followed by:  */
 138 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 139 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 140
 141 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 142                                         string or after \E, $foo, etc       */
 143 #define LEX_INTERPCONST          2 /* NOT USED */
 144 #define LEX_FORMLINE             1 /* expecting a format line               */
 145 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 146
 147
 148 #ifdef DEBUGGING
 149 static const char* const lex_state_names[] = {
 150     "KNOWNEXT",
 151     "FORMLINE",
 152     "INTERPCONST",
 153     "INTERPCONCAT",
 154     "INTERPENDMAYBE",
 155     "INTERPEND",
 156     "INTERPSTART",
 157     "INTERPPUSH",
 158     "INTERPCASEMOD",
 159     "INTERPNORMAL",
 160     "NORMAL"
 161 };
 162 #endif
 163
 164 #include "keywords.h"
 165
 166 /* CLINE is a macro that ensures PL_copline has a sane value */
 167
 168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 169
 170 /*
 171  * Convenience functions to return different tokens and prime the
 172  * lexer for the next token.  They all take an argument.
 173  *
 174  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 175  * OPERATOR     : generic operator
 176  * AOPERATOR    : assignment operator
 177  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 178  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 179  * PREREF       : *EXPR where EXPR is not a simple identifier
 180  * TERM         : expression term
 181  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 182  * LOOPX        : loop exiting command (goto, last, dump, etc)
 183  * FTST         : file test operator
 184  * FUN0         : zero-argument function
 185  * FUN0OP       : zero-argument function, with its op created in this file
 186  * FUN1         : not used, except for not, which isn't a UNIOP
 187  * BOop         : bitwise or or xor
 188  * BAop         : bitwise and
 189  * BCop         : bitwise complement
 190  * SHop         : shift operator
 191  * PWop         : power operator
 192  * PMop         : pattern-matching operator
 193  * Aop          : addition-level operator
 194  * AopNOASSIGN  : addition-level operator that is never part of .=
 195  * Mop          : multiplication-level operator
 196  * Eop          : equality-testing operator
 197  * Rop          : relational operator <= != gt
 198  *
 199  * Also see LOP and lop() below.
 200  */
 201
 202 #ifdef DEBUGGING /* Serve -DT. */
 203 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 204 #else
 205 #   define REPORT(retval) (retval)
 206 #endif
 207
 208 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 209 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 210 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 211 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 212 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 213 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 214 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 215 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 216 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
 217                          pl_yylval.ival=f, \
 218                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 219                          REPORT((int)LOOPEX))
 220 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 221 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 222 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 223 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 224 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 225 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 226 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 227                        REPORT('~')
 228 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 229 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 230 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 231 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 232 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 233 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 234 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 235 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 236
 237 /* This bit of chicanery makes a unary function followed by
 238  * a parenthesis into a function with one argument, highest precedence.
 239  * The UNIDOR macro is for unary functions that can be followed by the //
 240  * operator (such as C<shift // 0>).
 241  */
 242 #define UNI3(f,x,have_x) { \
 243         pl_yylval.ival = f; \
 244         if (have_x) PL_expect = x; \
 245         PL_bufptr = s; \
 246         PL_last_uni = PL_oldbufptr; \
 247         PL_last_lop_op = f; \
 248         if (*s == '(') \
 249             return REPORT( (int)FUNC1 ); \
 250         s = skipspace(s); \
 251         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 252         }
 253 #define UNI(f)    UNI3(f,XTERM,1)
 254 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 255 #define UNIPROTO(f,optional) { \
 256         if (optional) PL_last_uni = PL_oldbufptr; \
 257         OPERATOR(f); \
 258         }
 259
 260 #define UNIBRACK(f) UNI3(f,0,0)
 261
 262 /* grandfather return to old style */
 263 #define OLDLOP(f) \
 264         do { \
 265             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 266                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 267             pl_yylval.ival = (f); \
 268             PL_expect = XTERM; \
 269             PL_bufptr = s; \
 270             return (int)LSTOP; \
 271         } while(0)
 272
 273 #define COPLINE_INC_WITH_HERELINES                  \
 274     STMT_START {                                     \
 275         CopLINE_inc(PL_curcop);                       \
 276         if (PL_parser->herelines)                      \
 277             CopLINE(PL_curcop) += PL_parser->herelines, \
 278             PL_parser->herelines = 0;                    \
 279     } STMT_END
 280 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 281  * is no sublex_push to follow. */
 282 #define COPLINE_SET_FROM_MULTI_END            \
 283     STMT_START {                               \
 284         CopLINE_set(PL_curcop, PL_multi_end);   \
 285         if (PL_multi_end != PL_multi_start)      \
 286             PL_parser->herelines = 0;             \
 287     } STMT_END
 288
 289
 290 #ifdef DEBUGGING
 291
 292 /* how to interpret the pl_yylval associated with the token */
 293 enum token_type {
 294     TOKENTYPE_NONE,
 295     TOKENTYPE_IVAL,
 296     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 297     TOKENTYPE_PVAL,
 298     TOKENTYPE_OPVAL
 299 };
 300
 301 static struct debug_tokens {
 302     const int token;
 303     enum token_type type;
 304     const char *name;
 305 } const debug_tokens[] =
 306 {
 307     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 308     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 309     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 310     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 311     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 312     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 313     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 314     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 315     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 316     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 317     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 318     { DO,               TOKENTYPE_NONE,         "DO" },
 319     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 320     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 321     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 322     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 323     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 324     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 325     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 326     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 327     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 328     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 329     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 330     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 331     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 332     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 333     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 334     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 335     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 336     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 337     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 338     { IF,               TOKENTYPE_IVAL,         "IF" },
 339     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 340     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 341     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 342     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 343     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 344     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 345     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 346     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 347     { MY,               TOKENTYPE_IVAL,         "MY" },
 348     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 349     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 350     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 351     { OROR,             TOKENTYPE_NONE,         "OROR" },
 352     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 353     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 354     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 355     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 356     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 357     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 358     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 359     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 360     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 361     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 362     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 363     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 364     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 365     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 366     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 367     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 368     { SUB,              TOKENTYPE_NONE,         "SUB" },
 369     { THING,            TOKENTYPE_OPVAL,        "THING" },
 370     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 371     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 372     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 373     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 374     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 375     { USE,              TOKENTYPE_IVAL,         "USE" },
 376     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 377     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 378     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 379     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 380     { 0,                TOKENTYPE_NONE,         NULL }
 381 };
 382
 383 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 384
 385 STATIC int
 386 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 387 {
 388     PERL_ARGS_ASSERT_TOKEREPORT;
 389
 390     if (DEBUG_T_TEST) {
 391         const char *name = NULL;
 392         enum token_type type = TOKENTYPE_NONE;
 393         const struct debug_tokens *p;
 394         SV* const report = newSVpvs("<== ");
 395
 396         for (p = debug_tokens; p->token; p++) {
 397             if (p->token == (int)rv) {
 398                 name = p->name;
 399                 type = p->type;
 400                 break;
 401             }
 402         }
 403         if (name)
 404             Perl_sv_catpv(aTHX_ report, name);
 405         else if (isGRAPH(rv))
 406         {
 407             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 408             if ((char)rv == 'p')
 409                 sv_catpvs(report, " (pending identifier)");
 410         }
 411         else if (!rv)
 412             sv_catpvs(report, "EOF");
 413         else
 414             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 415         switch (type) {
 416         case TOKENTYPE_NONE:
 417             break;
 418         case TOKENTYPE_IVAL:
 419             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 420             break;
 421         case TOKENTYPE_OPNUM:
 422             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 423                                     PL_op_name[lvalp->ival]);
 424             break;
 425         case TOKENTYPE_PVAL:
 426             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 427             break;
 428         case TOKENTYPE_OPVAL:
 429             if (lvalp->opval) {
 430                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 431                                     PL_op_name[lvalp->opval->op_type]);
 432                 if (lvalp->opval->op_type == OP_CONST) {
 433                     Perl_sv_catpvf(aTHX_ report, " %s",
 434                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 435                 }
 436
 437             }
 438             else
 439                 sv_catpvs(report, "(opval=null)");
 440             break;
 441         }
 442         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 443     };
 444     return (int)rv;
 445 }
 446
 447
 448 /* print the buffer with suitable escapes */
 449
 450 STATIC void
 451 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 452 {
 453     SV* const tmp = newSVpvs("");
 454
 455     PERL_ARGS_ASSERT_PRINTBUF;
 456
 457     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 458     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 459     GCC_DIAG_RESTORE;
 460     SvREFCNT_dec(tmp);
 461 }
 462
 463 #endif
 464
 465 static int
 466 S_deprecate_commaless_var_list(pTHX) {
 467     PL_expect = XTERM;
 468     deprecate("comma-less variable list");
 469     return REPORT(','); /* grandfather non-comma-format format */
 470 }
 471
 472 /*
 473  * S_ao
 474  *
 475  * This subroutine looks for an '=' next to the operator that has just been
 476  * parsed and turns it into an ASSIGNOP if it finds one.
 477  */
 478
 479 STATIC int
 480 S_ao(pTHX_ int toketype)
 481 {
 482     if (*PL_bufptr == '=') {
 483         PL_bufptr++;
 484         if (toketype == ANDAND)
 485             pl_yylval.ival = OP_ANDASSIGN;
 486         else if (toketype == OROR)
 487             pl_yylval.ival = OP_ORASSIGN;
 488         else if (toketype == DORDOR)
 489             pl_yylval.ival = OP_DORASSIGN;
 490         toketype = ASSIGNOP;
 491     }
 492     return REPORT(toketype);
 493 }
 494
 495 /*
 496  * S_no_op
 497  * When Perl expects an operator and finds something else, no_op
 498  * prints the warning.  It always prints "<something> found where
 499  * operator expected.  It prints "Missing semicolon on previous line?"
 500  * if the surprise occurs at the start of the line.  "do you need to
 501  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 502  * where the compiler doesn't know if foo is a method call or a function.
 503  * It prints "Missing operator before end of line" if there's nothing
 504  * after the missing operator, or "... before <...>" if there is something
 505  * after the missing operator.
 506  */
 507
 508 STATIC void
 509 S_no_op(pTHX_ const char *const what, char *s)
 510 {
 511     char * const oldbp = PL_bufptr;
 512     const bool is_first = (PL_oldbufptr == PL_linestart);
 513
 514     PERL_ARGS_ASSERT_NO_OP;
 515
 516     if (!s)
 517         s = oldbp;
 518     else
 519         PL_bufptr = s;
 520     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 521     if (ckWARN_d(WARN_SYNTAX)) {
 522         if (is_first)
 523             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 524                     "\t(Missing semicolon on previous line?)\n");
 525         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 526             const char *t;
 527             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 528                                                             t += UTF ? UTF8SKIP(t) : 1)
 529                 NOOP;
 530             if (t < PL_bufptr && isSPACE(*t))
 531                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 532                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 533                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 534         }
 535         else {
 536             assert(s >= oldbp);
 537             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 538                     "\t(Missing operator before %"UTF8f"?)\n",
 539                      UTF8fARG(UTF, s - oldbp, oldbp));
 540         }
 541     }
 542     PL_bufptr = oldbp;
 543 }
 544
 545 /*
 546  * S_missingterm
 547  * Complain about missing quote/regexp/heredoc terminator.
 548  * If it's called with NULL then it cauterizes the line buffer.
 549  * If we're in a delimited string and the delimiter is a control
 550  * character, it's reformatted into a two-char sequence like ^C.
 551  * This is fatal.
 552  */
 553
 554 STATIC void
 555 S_missingterm(pTHX_ char *s)
 556 {
 557     char tmpbuf[3];
 558     char q;
 559     if (s) {
 560         char * const nl = strrchr(s,'\n');
 561         if (nl)
 562             *nl = '\0';
 563     }
 564     else if ((U8) PL_multi_close < 32) {
 565         *tmpbuf = '^';
 566         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 567         tmpbuf[2] = '\0';
 568         s = tmpbuf;
 569     }
 570     else {
 571         *tmpbuf = (char)PL_multi_close;
 572         tmpbuf[1] = '\0';
 573         s = tmpbuf;
 574     }
 575     q = strchr(s,'"') ? '\'' : '"';
 576     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 577 }
 578
 579 #include "feature.h"
 580
 581 /*
 582  * Check whether the named feature is enabled.
 583  */
 584 bool
 585 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 586 {
 587     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 588
 589     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 590
 591     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 592
 593     if (namelen > MAX_FEATURE_LEN)
 594         return FALSE;
 595     memcpy(&he_name[8], name, namelen);
 596
 597     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 598                                      REFCOUNTED_HE_EXISTS));
 599 }
 600
 601 /*
 602  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 603  * utf16-to-utf8-reversed.
 604  */
 605
 606 #ifdef PERL_CR_FILTER
 607 static void
 608 strip_return(SV *sv)
 609 {
 610     const char *s = SvPVX_const(sv);
 611     const char * const e = s + SvCUR(sv);
 612
 613     PERL_ARGS_ASSERT_STRIP_RETURN;
 614
 615     /* outer loop optimized to do nothing if there are no CR-LFs */
 616     while (s < e) {
 617         if (*s++ == '\r' && *s == '\n') {
 618             /* hit a CR-LF, need to copy the rest */
 619             char *d = s - 1;
 620             *d++ = *s++;
 621             while (s < e) {
 622                 if (*s == '\r' && s[1] == '\n')
 623                     s++;
 624                 *d++ = *s++;
 625             }
 626             SvCUR(sv) -= s - d;
 627             return;
 628         }
 629     }
 630 }
 631
 632 STATIC I32
 633 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 634 {
 635     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 636     if (count > 0 && !maxlen)
 637         strip_return(sv);
 638     return count;
 639 }
 640 #endif
 641
 642 /*
 643 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 644
 645 Creates and initialises a new lexer/parser state object, supplying
 646 a context in which to lex and parse from a new source of Perl code.
 647 A pointer to the new state object is placed in L</PL_parser>.  An entry
 648 is made on the save stack so that upon unwinding the new state object
 649 will be destroyed and the former value of L</PL_parser> will be restored.
 650 Nothing else need be done to clean up the parsing context.
 651
 652 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 653 non-null, provides a string (in SV form) containing code to be parsed.
 654 A copy of the string is made, so subsequent modification of I<line>
 655 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 656 from which code will be read to be parsed.  If both are non-null, the
 657 code in I<line> comes first and must consist of complete lines of input,
 658 and I<rsfp> supplies the remainder of the source.
 659
 660 The I<flags> parameter is reserved for future use.  Currently it is only
 661 used by perl internally, so extensions should always pass zero.
 662
 663 =cut
 664 */
 665
 666 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 667    can share filters with the current parser.
 668    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 669    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 670    destruction. This is used to handle the case of defaulting to reading the
 671    script from the standard input because no filename was given on the command
 672    line (without getting confused by situation where STDIN has been closed, so
 673    the script handle is opened on fd 0)  */
 674
 675 void
 676 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 677 {
 678     const char *s = NULL;
 679     yy_parser *parser, *oparser;
 680     if (flags && flags & ~LEX_START_FLAGS)
 681         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 682
 683     /* create and initialise a parser */
 684
 685     Newxz(parser, 1, yy_parser);
 686     parser->old_parser = oparser = PL_parser;
 687     PL_parser = parser;
 688
 689     parser->stack = NULL;
 690     parser->ps = NULL;
 691     parser->stack_size = 0;
 692
 693     /* on scope exit, free this parser and restore any outer one */
 694     SAVEPARSER(parser);
 695     parser->saved_curcop = PL_curcop;
 696
 697     /* initialise lexer state */
 698
 699     parser->nexttoke = 0;
 700     parser->error_count = oparser ? oparser->error_count : 0;
 701     parser->copline = parser->preambling = NOLINE;
 702     parser->lex_state = LEX_NORMAL;
 703     parser->expect = XSTATE;
 704     parser->rsfp = rsfp;
 705     parser->rsfp_filters =
 706       !(flags & LEX_START_SAME_FILTER) || !oparser
 707         ? NULL
 708         : MUTABLE_AV(SvREFCNT_inc(
 709             oparser->rsfp_filters
 710              ? oparser->rsfp_filters
 711              : (oparser->rsfp_filters = newAV())
 712           ));
 713
 714     Newx(parser->lex_brackstack, 120, char);
 715     Newx(parser->lex_casestack, 12, char);
 716     *parser->lex_casestack = '\0';
 717     Newxz(parser->lex_shared, 1, LEXSHARED);
 718
 719     if (line) {
 720         STRLEN len;
 721         s = SvPV_const(line, len);
 722         parser->linestr = flags & LEX_START_COPIED
 723                             ? SvREFCNT_inc_simple_NN(line)
 724                             : newSVpvn_flags(s, len, SvUTF8(line));
 725         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 726     } else {
 727         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 728     }
 729     parser->oldoldbufptr =
 730         parser->oldbufptr =
 731         parser->bufptr =
 732         parser->linestart = SvPVX(parser->linestr);
 733     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 734     parser->last_lop = parser->last_uni = NULL;
 735
 736     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 737                                                         |LEX_DONT_CLOSE_RSFP));
 738     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 739                                                         |LEX_DONT_CLOSE_RSFP));
 740
 741     parser->in_pod = parser->filtered = 0;
 742 }
 743
 744
 745 /* delete a parser object */
 746
 747 void
 748 Perl_parser_free(pTHX_  const yy_parser *parser)
 749 {
 750     PERL_ARGS_ASSERT_PARSER_FREE;
 751
 752     PL_curcop = parser->saved_curcop;
 753     SvREFCNT_dec(parser->linestr);
 754
 755     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 756         PerlIO_clearerr(parser->rsfp);
 757     else if (parser->rsfp && (!parser->old_parser ||
 758                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 759         PerlIO_close(parser->rsfp);
 760     SvREFCNT_dec(parser->rsfp_filters);
 761     SvREFCNT_dec(parser->lex_stuff);
 762     SvREFCNT_dec(parser->sublex_info.repl);
 763
 764     Safefree(parser->lex_brackstack);
 765     Safefree(parser->lex_casestack);
 766     Safefree(parser->lex_shared);
 767     PL_parser = parser->old_parser;
 768     Safefree(parser);
 769 }
 770
 771 void
 772 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 773 {
 774     I32 nexttoke = parser->nexttoke;
 775     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 776     while (nexttoke--) {
 777         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 778          && parser->nextval[nexttoke].opval
 779          && parser->nextval[nexttoke].opval->op_slabbed
 780          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 781             op_free(parser->nextval[nexttoke].opval);
 782             parser->nextval[nexttoke].opval = NULL;
 783         }
 784     }
 785 }
 786
 787
 788 /*
 789 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 790
 791 Buffer scalar containing the chunk currently under consideration of the
 792 text currently being lexed.  This is always a plain string scalar (for
 793 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 794 normal scalar means; instead refer to the buffer directly by the pointer
 795 variables described below.
 796
 797 The lexer maintains various C<char*> pointers to things in the
 798 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 799 reallocated, all of these pointers must be updated.  Don't attempt to
 800 do this manually, but rather use L</lex_grow_linestr> if you need to
 801 reallocate the buffer.
 802
 803 The content of the text chunk in the buffer is commonly exactly one
 804 complete line of input, up to and including a newline terminator,
 805 but there are situations where it is otherwise.  The octets of the
 806 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 807 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 808 flag on this scalar, which may disagree with it.
 809
 810 For direct examination of the buffer, the variable
 811 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 812 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 813 of these pointers is usually preferable to examination of the scalar
 814 through normal scalar means.
 815
 816 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 817
 818 Direct pointer to the end of the chunk of text currently being lexed, the
 819 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 820 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 821 always located at the end of the buffer, and does not count as part of
 822 the buffer's contents.
 823
 824 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 825
 826 Points to the current position of lexing inside the lexer buffer.
 827 Characters around this point may be freely examined, within
 828 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 829 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 830 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 831
 832 Lexing code (whether in the Perl core or not) moves this pointer past
 833 the characters that it consumes.  It is also expected to perform some
 834 bookkeeping whenever a newline character is consumed.  This movement
 835 can be more conveniently performed by the function L</lex_read_to>,
 836 which handles newlines appropriately.
 837
 838 Interpretation of the buffer's octets can be abstracted out by
 839 using the slightly higher-level functions L</lex_peek_unichar> and
 840 L</lex_read_unichar>.
 841
 842 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 843
 844 Points to the start of the current line inside the lexer buffer.
 845 This is useful for indicating at which column an error occurred, and
 846 not much else.  This must be updated by any lexing code that consumes
 847 a newline; the function L</lex_read_to> handles this detail.
 848
 849 =cut
 850 */
 851
 852 /*
 853 =for apidoc Amx|bool|lex_bufutf8
 854
 855 Indicates whether the octets in the lexer buffer
 856 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 857 of Unicode characters.  If not, they should be interpreted as Latin-1
 858 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 859
 860 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 861 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 862 encoding.
 863
 864 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 865 is significant, but not the whole story regarding the input character
 866 encoding.  Normally, when a file is being read, the scalar contains octets
 867 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 868 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 869 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 870 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 871 is in effect.  This logic may change in the future; use this function
 872 instead of implementing the logic yourself.
 873
 874 =cut
 875 */
 876
 877 bool
 878 Perl_lex_bufutf8(pTHX)
 879 {
 880     return UTF;
 881 }
 882
 883 /*
 884 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 885
 886 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 887 at least I<len> octets (including terminating C<NUL>).  Returns a
 888 pointer to the reallocated buffer.  This is necessary before making
 889 any direct modification of the buffer that would increase its length.
 890 L</lex_stuff_pvn> provides a more convenient way to insert text into
 891 the buffer.
 892
 893 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 894 this function updates all of the lexer's variables that point directly
 895 into the buffer.
 896
 897 =cut
 898 */
 899
 900 char *
 901 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 902 {
 903     SV *linestr;
 904     char *buf;
 905     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 906     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 907     linestr = PL_parser->linestr;
 908     buf = SvPVX(linestr);
 909     if (len <= SvLEN(linestr))
 910         return buf;
 911     bufend_pos = PL_parser->bufend - buf;
 912     bufptr_pos = PL_parser->bufptr - buf;
 913     oldbufptr_pos = PL_parser->oldbufptr - buf;
 914     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 915     linestart_pos = PL_parser->linestart - buf;
 916     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 917     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 918     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 919                             PL_parser->lex_shared->re_eval_start - buf : 0;
 920
 921     buf = sv_grow(linestr, len);
 922
 923     PL_parser->bufend = buf + bufend_pos;
 924     PL_parser->bufptr = buf + bufptr_pos;
 925     PL_parser->oldbufptr = buf + oldbufptr_pos;
 926     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 927     PL_parser->linestart = buf + linestart_pos;
 928     if (PL_parser->last_uni)
 929         PL_parser->last_uni = buf + last_uni_pos;
 930     if (PL_parser->last_lop)
 931         PL_parser->last_lop = buf + last_lop_pos;
 932     if (PL_parser->lex_shared->re_eval_start)
 933         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 934     return buf;
 935 }
 936
 937 /*
 938 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 939
 940 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 941 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 942 reallocating the buffer if necessary.  This means that lexing code that
 943 runs later will see the characters as if they had appeared in the input.
 944 It is not recommended to do this as part of normal parsing, and most
 945 uses of this facility run the risk of the inserted characters being
 946 interpreted in an unintended manner.
 947
 948 The string to be inserted is represented by I<len> octets starting
 949 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 950 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 951 The characters are recoded for the lexer buffer, according to how the
 952 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 953 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 954 function is more convenient.
 955
 956 =cut
 957 */
 958
 959 void
 960 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 961 {
 962     dVAR;
 963     char *bufptr;
 964     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 965     if (flags & ~(LEX_STUFF_UTF8))
 966         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 967     if (UTF) {
 968         if (flags & LEX_STUFF_UTF8) {
 969             goto plain_copy;
 970         } else {
 971             STRLEN highhalf = 0;    /* Count of variants */
 972             const char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 if (! UTF8_IS_INVARIANT(*p)) {
 975                     highhalf++;
 976                 }
 977             }
 978             if (!highhalf)
 979                 goto plain_copy;
 980             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 981             bufptr = PL_parser->bufptr;
 982             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 983             SvCUR_set(PL_parser->linestr,
 984                 SvCUR(PL_parser->linestr) + len+highhalf);
 985             PL_parser->bufend += len+highhalf;
 986             for (p = pv; p != e; p++) {
 987                 U8 c = (U8)*p;
 988                 if (! UTF8_IS_INVARIANT(c)) {
 989                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
 990                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
 991                 } else {
 992                     *bufptr++ = (char)c;
 993                 }
 994             }
 995         }
 996     } else {
 997         if (flags & LEX_STUFF_UTF8) {
 998             STRLEN highhalf = 0;
 999             const char *p, *e = pv+len;
1000             for (p = pv; p != e; p++) {
1001                 U8 c = (U8)*p;
1002                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1003                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1004                                 "non-Latin-1 character into Latin-1 input");
1005                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1006                     p++;
1007                     highhalf++;
1008                 } else if (! UTF8_IS_INVARIANT(c)) {
1009                     /* malformed UTF-8 */
1010                     ENTER;
1011                     SAVESPTR(PL_warnhook);
1012                     PL_warnhook = PERL_WARNHOOK_FATAL;
1013                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1014                     LEAVE;
1015                 }
1016             }
1017             if (!highhalf)
1018                 goto plain_copy;
1019             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1020             bufptr = PL_parser->bufptr;
1021             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1022             SvCUR_set(PL_parser->linestr,
1023                 SvCUR(PL_parser->linestr) + len-highhalf);
1024             PL_parser->bufend += len-highhalf;
1025             p = pv;
1026             while (p < e) {
1027                 if (UTF8_IS_INVARIANT(*p)) {
1028                     *bufptr++ = *p;
1029                     p++;
1030                 }
1031                 else {
1032                     assert(p < e -1 );
1033                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1034                     p += 2;
1035                 }
1036             }
1037         } else {
1038           plain_copy:
1039             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1040             bufptr = PL_parser->bufptr;
1041             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1042             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1043             PL_parser->bufend += len;
1044             Copy(pv, bufptr, len, char);
1045         }
1046     }
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1051
1052 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1053 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1054 reallocating the buffer if necessary.  This means that lexing code that
1055 runs later will see the characters as if they had appeared in the input.
1056 It is not recommended to do this as part of normal parsing, and most
1057 uses of this facility run the risk of the inserted characters being
1058 interpreted in an unintended manner.
1059
1060 The string to be inserted is represented by octets starting at I<pv>
1061 and continuing to the first nul.  These octets are interpreted as either
1062 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1063 in I<flags>.  The characters are recoded for the lexer buffer, according
1064 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1065 If it is not convenient to nul-terminate a string to be inserted, the
1066 L</lex_stuff_pvn> function is more appropriate.
1067
1068 =cut
1069 */
1070
1071 void
1072 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1073 {
1074     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1075     lex_stuff_pvn(pv, strlen(pv), flags);
1076 }
1077
1078 /*
1079 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1080
1081 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1082 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1083 reallocating the buffer if necessary.  This means that lexing code that
1084 runs later will see the characters as if they had appeared in the input.
1085 It is not recommended to do this as part of normal parsing, and most
1086 uses of this facility run the risk of the inserted characters being
1087 interpreted in an unintended manner.
1088
1089 The string to be inserted is the string value of I<sv>.  The characters
1090 are recoded for the lexer buffer, according to how the buffer is currently
1091 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1092 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1093 need to construct a scalar.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1100 {
1101     char *pv;
1102     STRLEN len;
1103     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1104     if (flags)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1106     pv = SvPV(sv, len);
1107     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1108 }
1109
1110 /*
1111 =for apidoc Amx|void|lex_unstuff|char *ptr
1112
1113 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1114 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1115 This hides the discarded text from any lexing code that runs later,
1116 as if the text had never appeared.
1117
1118 This is not the normal way to consume lexed text.  For that, use
1119 L</lex_read_to>.
1120
1121 =cut
1122 */
1123
1124 void
1125 Perl_lex_unstuff(pTHX_ char *ptr)
1126 {
1127     char *buf, *bufend;
1128     STRLEN unstuff_len;
1129     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1130     buf = PL_parser->bufptr;
1131     if (ptr < buf)
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1133     if (ptr == buf)
1134         return;
1135     bufend = PL_parser->bufend;
1136     if (ptr > bufend)
1137         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138     unstuff_len = ptr - buf;
1139     Move(ptr, buf, bufend+1-ptr, char);
1140     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1141     PL_parser->bufend = bufend - unstuff_len;
1142 }
1143
1144 /*
1145 =for apidoc Amx|void|lex_read_to|char *ptr
1146
1147 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1148 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1149 performing the correct bookkeeping whenever a newline character is passed.
1150 This is the normal way to consume lexed text.
1151
1152 Interpretation of the buffer's octets can be abstracted out by
1153 using the slightly higher-level functions L</lex_peek_unichar> and
1154 L</lex_read_unichar>.
1155
1156 =cut
1157 */
1158
1159 void
1160 Perl_lex_read_to(pTHX_ char *ptr)
1161 {
1162     char *s;
1163     PERL_ARGS_ASSERT_LEX_READ_TO;
1164     s = PL_parser->bufptr;
1165     if (ptr < s || ptr > PL_parser->bufend)
1166         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1167     for (; s != ptr; s++)
1168         if (*s == '\n') {
1169             COPLINE_INC_WITH_HERELINES;
1170             PL_parser->linestart = s+1;
1171         }
1172     PL_parser->bufptr = ptr;
1173 }
1174
1175 /*
1176 =for apidoc Amx|void|lex_discard_to|char *ptr
1177
1178 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1179 up to I<ptr>.  The remaining content of the buffer will be moved, and
1180 all pointers into the buffer updated appropriately.  I<ptr> must not
1181 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1182 it is not permitted to discard text that has yet to be lexed.
1183
1184 Normally it is not necessarily to do this directly, because it suffices to
1185 use the implicit discarding behaviour of L</lex_next_chunk> and things
1186 based on it.  However, if a token stretches across multiple lines,
1187 and the lexing code has kept multiple lines of text in the buffer for
1188 that purpose, then after completion of the token it would be wise to
1189 explicitly discard the now-unneeded earlier lines, to avoid future
1190 multi-line tokens growing the buffer without bound.
1191
1192 =cut
1193 */
1194
1195 void
1196 Perl_lex_discard_to(pTHX_ char *ptr)
1197 {
1198     char *buf;
1199     STRLEN discard_len;
1200     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1201     buf = SvPVX(PL_parser->linestr);
1202     if (ptr < buf)
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1204     if (ptr == buf)
1205         return;
1206     if (ptr > PL_parser->bufptr)
1207         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1208     discard_len = ptr - buf;
1209     if (PL_parser->oldbufptr < ptr)
1210         PL_parser->oldbufptr = ptr;
1211     if (PL_parser->oldoldbufptr < ptr)
1212         PL_parser->oldoldbufptr = ptr;
1213     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1214         PL_parser->last_uni = NULL;
1215     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1216         PL_parser->last_lop = NULL;
1217     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1218     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1219     PL_parser->bufend -= discard_len;
1220     PL_parser->bufptr -= discard_len;
1221     PL_parser->oldbufptr -= discard_len;
1222     PL_parser->oldoldbufptr -= discard_len;
1223     if (PL_parser->last_uni)
1224         PL_parser->last_uni -= discard_len;
1225     if (PL_parser->last_lop)
1226         PL_parser->last_lop -= discard_len;
1227 }
1228
1229 /*
1230 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1231
1232 Reads in the next chunk of text to be lexed, appending it to
1233 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1234 looked to the end of the current chunk and wants to know more.  It is
1235 usual, but not necessary, for lexing to have consumed the entirety of
1236 the current chunk at this time.
1237
1238 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1239 chunk (i.e., the current chunk has been entirely consumed), normally the
1240 current chunk will be discarded at the same time that the new chunk is
1241 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1242 will not be discarded.  If the current chunk has not been entirely
1243 consumed, then it will not be discarded regardless of the flag.
1244
1245 Returns true if some new text was added to the buffer, or false if the
1246 buffer has reached the end of the input text.
1247
1248 =cut
1249 */
1250
1251 #define LEX_FAKE_EOF 0x80000000
1252 #define LEX_NO_TERM  0x40000000
1253
1254 bool
1255 Perl_lex_next_chunk(pTHX_ U32 flags)
1256 {
1257     SV *linestr;
1258     char *buf;
1259     STRLEN old_bufend_pos, new_bufend_pos;
1260     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1261     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1262     bool got_some_for_debugger = 0;
1263     bool got_some;
1264     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1265         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1266     linestr = PL_parser->linestr;
1267     buf = SvPVX(linestr);
1268     if (!(flags & LEX_KEEP_PREVIOUS) &&
1269             PL_parser->bufptr == PL_parser->bufend) {
1270         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1271         linestart_pos = 0;
1272         if (PL_parser->last_uni != PL_parser->bufend)
1273             PL_parser->last_uni = NULL;
1274         if (PL_parser->last_lop != PL_parser->bufend)
1275             PL_parser->last_lop = NULL;
1276         last_uni_pos = last_lop_pos = 0;
1277         *buf = 0;
1278         SvCUR(linestr) = 0;
1279     } else {
1280         old_bufend_pos = PL_parser->bufend - buf;
1281         bufptr_pos = PL_parser->bufptr - buf;
1282         oldbufptr_pos = PL_parser->oldbufptr - buf;
1283         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1284         linestart_pos = PL_parser->linestart - buf;
1285         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1286         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1287     }
1288     if (flags & LEX_FAKE_EOF) {
1289         goto eof;
1290     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1291         got_some = 0;
1292     } else if (filter_gets(linestr, old_bufend_pos)) {
1293         got_some = 1;
1294         got_some_for_debugger = 1;
1295     } else if (flags & LEX_NO_TERM) {
1296         got_some = 0;
1297     } else {
1298         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1299             sv_setpvs(linestr, "");
1300         eof:
1301         /* End of real input.  Close filehandle (unless it was STDIN),
1302          * then add implicit termination.
1303          */
1304         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1305             PerlIO_clearerr(PL_parser->rsfp);
1306         else if (PL_parser->rsfp)
1307             (void)PerlIO_close(PL_parser->rsfp);
1308         PL_parser->rsfp = NULL;
1309         PL_parser->in_pod = PL_parser->filtered = 0;
1310         if (!PL_in_eval && PL_minus_p) {
1311             sv_catpvs(linestr,
1312                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1313             PL_minus_n = PL_minus_p = 0;
1314         } else if (!PL_in_eval && PL_minus_n) {
1315             sv_catpvs(linestr, /*{*/";}");
1316             PL_minus_n = 0;
1317         } else
1318             sv_catpvs(linestr, ";");
1319         got_some = 1;
1320     }
1321     buf = SvPVX(linestr);
1322     new_bufend_pos = SvCUR(linestr);
1323     PL_parser->bufend = buf + new_bufend_pos;
1324     PL_parser->bufptr = buf + bufptr_pos;
1325     PL_parser->oldbufptr = buf + oldbufptr_pos;
1326     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1327     PL_parser->linestart = buf + linestart_pos;
1328     if (PL_parser->last_uni)
1329         PL_parser->last_uni = buf + last_uni_pos;
1330     if (PL_parser->last_lop)
1331         PL_parser->last_lop = buf + last_lop_pos;
1332     if (PL_parser->preambling != NOLINE) {
1333         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1334         PL_parser->preambling = NOLINE;
1335     }
1336     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1337             PL_curstash != PL_debstash) {
1338         /* debugger active and we're not compiling the debugger code,
1339          * so store the line into the debugger's array of lines
1340          */
1341         update_debugger_info(NULL, buf+old_bufend_pos,
1342             new_bufend_pos-old_bufend_pos);
1343     }
1344     return got_some;
1345 }
1346
1347 /*
1348 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1349
1350 Looks ahead one (Unicode) character in the text currently being lexed.
1351 Returns the codepoint (unsigned integer value) of the next character,
1352 or -1 if lexing has reached the end of the input text.  To consume the
1353 peeked character, use L</lex_read_unichar>.
1354
1355 If the next character is in (or extends into) the next chunk of input
1356 text, the next chunk will be read in.  Normally the current chunk will be
1357 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1358 then the current chunk will not be discarded.
1359
1360 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1361 is encountered, an exception is generated.
1362
1363 =cut
1364 */
1365
1366 I32
1367 Perl_lex_peek_unichar(pTHX_ U32 flags)
1368 {
1369     dVAR;
1370     char *s, *bufend;
1371     if (flags & ~(LEX_KEEP_PREVIOUS))
1372         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1373     s = PL_parser->bufptr;
1374     bufend = PL_parser->bufend;
1375     if (UTF) {
1376         U8 head;
1377         I32 unichar;
1378         STRLEN len, retlen;
1379         if (s == bufend) {
1380             if (!lex_next_chunk(flags))
1381                 return -1;
1382             s = PL_parser->bufptr;
1383             bufend = PL_parser->bufend;
1384         }
1385         head = (U8)*s;
1386         if (UTF8_IS_INVARIANT(head))
1387             return head;
1388         if (UTF8_IS_START(head)) {
1389             len = UTF8SKIP(&head);
1390             while ((STRLEN)(bufend-s) < len) {
1391                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1392                     break;
1393                 s = PL_parser->bufptr;
1394                 bufend = PL_parser->bufend;
1395             }
1396         }
1397         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1398         if (retlen == (STRLEN)-1) {
1399             /* malformed UTF-8 */
1400             ENTER;
1401             SAVESPTR(PL_warnhook);
1402             PL_warnhook = PERL_WARNHOOK_FATAL;
1403             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1404             LEAVE;
1405         }
1406         return unichar;
1407     } else {
1408         if (s == bufend) {
1409             if (!lex_next_chunk(flags))
1410                 return -1;
1411             s = PL_parser->bufptr;
1412         }
1413         return (U8)*s;
1414     }
1415 }
1416
1417 /*
1418 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1419
1420 Reads the next (Unicode) character in the text currently being lexed.
1421 Returns the codepoint (unsigned integer value) of the character read,
1422 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1423 if lexing has reached the end of the input text.  To non-destructively
1424 examine the next character, use L</lex_peek_unichar> instead.
1425
1426 If the next character is in (or extends into) the next chunk of input
1427 text, the next chunk will be read in.  Normally the current chunk will be
1428 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1429 then the current chunk will not be discarded.
1430
1431 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1432 is encountered, an exception is generated.
1433
1434 =cut
1435 */
1436
1437 I32
1438 Perl_lex_read_unichar(pTHX_ U32 flags)
1439 {
1440     I32 c;
1441     if (flags & ~(LEX_KEEP_PREVIOUS))
1442         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1443     c = lex_peek_unichar(flags);
1444     if (c != -1) {
1445         if (c == '\n')
1446             COPLINE_INC_WITH_HERELINES;
1447         if (UTF)
1448             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1449         else
1450             ++(PL_parser->bufptr);
1451     }
1452     return c;
1453 }
1454
1455 /*
1456 =for apidoc Amx|void|lex_read_space|U32 flags
1457
1458 Reads optional spaces, in Perl style, in the text currently being
1459 lexed.  The spaces may include ordinary whitespace characters and
1460 Perl-style comments.  C<#line> directives are processed if encountered.
1461 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1462 at a non-space character (or the end of the input text).
1463
1464 If spaces extend into the next chunk of input text, the next chunk will
1465 be read in.  Normally the current chunk will be discarded at the same
1466 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1467 chunk will not be discarded.
1468
1469 =cut
1470 */
1471
1472 #define LEX_NO_INCLINE    0x40000000
1473 #define LEX_NO_NEXT_CHUNK 0x80000000
1474
1475 void
1476 Perl_lex_read_space(pTHX_ U32 flags)
1477 {
1478     char *s, *bufend;
1479     const bool can_incline = !(flags & LEX_NO_INCLINE);
1480     bool need_incline = 0;
1481     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1482         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1483     s = PL_parser->bufptr;
1484     bufend = PL_parser->bufend;
1485     while (1) {
1486         char c = *s;
1487         if (c == '#') {
1488             do {
1489                 c = *++s;
1490             } while (!(c == '\n' || (c == 0 && s == bufend)));
1491         } else if (c == '\n') {
1492             s++;
1493             if (can_incline) {
1494                 PL_parser->linestart = s;
1495                 if (s == bufend)
1496                     need_incline = 1;
1497                 else
1498                     incline(s);
1499             }
1500         } else if (isSPACE(c)) {
1501             s++;
1502         } else if (c == 0 && s == bufend) {
1503             bool got_more;
1504             line_t l;
1505             if (flags & LEX_NO_NEXT_CHUNK)
1506                 break;
1507             PL_parser->bufptr = s;
1508             l = CopLINE(PL_curcop);
1509             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1510             got_more = lex_next_chunk(flags);
1511             CopLINE_set(PL_curcop, l);
1512             s = PL_parser->bufptr;
1513             bufend = PL_parser->bufend;
1514             if (!got_more)
1515                 break;
1516             if (can_incline && need_incline && PL_parser->rsfp) {
1517                 incline(s);
1518                 need_incline = 0;
1519             }
1520         } else {
1521             break;
1522         }
1523     }
1524     PL_parser->bufptr = s;
1525 }
1526
1527 /*
1528
1529 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1530
1531 This function performs syntax checking on a prototype, C<proto>.
1532 If C<warn> is true, any illegal characters or mismatched brackets
1533 will trigger illegalproto warnings, declaring that they were
1534 detected in the prototype for C<name>.
1535
1536 The return value is C<true> if this is a valid prototype, and
1537 C<false> if it is not, regardless of whether C<warn> was C<true> or
1538 C<false>.
1539
1540 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1541
1542 =cut
1543
1544  */
1545
1546 bool
1547 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1548 {
1549     STRLEN len, origlen;
1550     char *p = proto ? SvPV(proto, len) : NULL;
1551     bool bad_proto = FALSE;
1552     bool in_brackets = FALSE;
1553     bool after_slash = FALSE;
1554     char greedy_proto = ' ';
1555     bool proto_after_greedy_proto = FALSE;
1556     bool must_be_last = FALSE;
1557     bool underscore = FALSE;
1558     bool bad_proto_after_underscore = FALSE;
1559
1560     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1561
1562     if (!proto)
1563         return TRUE;
1564
1565     origlen = len;
1566     for (; len--; p++) {
1567         if (!isSPACE(*p)) {
1568             if (must_be_last)
1569                 proto_after_greedy_proto = TRUE;
1570             if (underscore) {
1571                 if (!strchr(";@%", *p))
1572                     bad_proto_after_underscore = TRUE;
1573                 underscore = FALSE;
1574             }
1575             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1576                 bad_proto = TRUE;
1577             }
1578             else {
1579                 if (*p == '[')
1580                     in_brackets = TRUE;
1581                 else if (*p == ']')
1582                     in_brackets = FALSE;
1583                 else if ((*p == '@' || *p == '%') &&
1584                     !after_slash &&
1585                     !in_brackets ) {
1586                     must_be_last = TRUE;
1587                     greedy_proto = *p;
1588                 }
1589                 else if (*p == '_')
1590                     underscore = TRUE;
1591             }
1592             if (*p == '\\')
1593                 after_slash = TRUE;
1594             else
1595                 after_slash = FALSE;
1596         }
1597     }
1598
1599     if (warn) {
1600         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1601         p -= origlen;
1602         p = SvUTF8(proto)
1603             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1604                              origlen, UNI_DISPLAY_ISPRINT)
1605             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1606
1607         if (proto_after_greedy_proto)
1608             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1609                         "Prototype after '%c' for %"SVf" : %s",
1610                         greedy_proto, SVfARG(name), p);
1611         if (in_brackets)
1612             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1613                         "Missing ']' in prototype for %"SVf" : %s",
1614                         SVfARG(name), p);
1615         if (bad_proto)
1616             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1617                         "Illegal character in prototype for %"SVf" : %s",
1618                         SVfARG(name), p);
1619         if (bad_proto_after_underscore)
1620             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1621                         "Illegal character after '_' in prototype for %"SVf" : %s",
1622                         SVfARG(name), p);
1623     }
1624
1625     return (! (proto_after_greedy_proto || bad_proto) );
1626 }
1627
1628 /*
1629  * S_incline
1630  * This subroutine has nothing to do with tilting, whether at windmills
1631  * or pinball tables.  Its name is short for "increment line".  It
1632  * increments the current line number in CopLINE(PL_curcop) and checks
1633  * to see whether the line starts with a comment of the form
1634  *    # line 500 "foo.pm"
1635  * If so, it sets the current line number and file to the values in the comment.
1636  */
1637
1638 STATIC void
1639 S_incline(pTHX_ const char *s)
1640 {
1641     const char *t;
1642     const char *n;
1643     const char *e;
1644     line_t line_num;
1645
1646     PERL_ARGS_ASSERT_INCLINE;
1647
1648     COPLINE_INC_WITH_HERELINES;
1649     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1650      && s+1 == PL_bufend && *s == ';') {
1651         /* fake newline in string eval */
1652         CopLINE_dec(PL_curcop);
1653         return;
1654     }
1655     if (*s++ != '#')
1656         return;
1657     while (SPACE_OR_TAB(*s))
1658         s++;
1659     if (strnEQ(s, "line", 4))
1660         s += 4;
1661     else
1662         return;
1663     if (SPACE_OR_TAB(*s))
1664         s++;
1665     else
1666         return;
1667     while (SPACE_OR_TAB(*s))
1668         s++;
1669     if (!isDIGIT(*s))
1670         return;
1671
1672     n = s;
1673     while (isDIGIT(*s))
1674         s++;
1675     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1676         return;
1677     while (SPACE_OR_TAB(*s))
1678         s++;
1679     if (*s == '"' && (t = strchr(s+1, '"'))) {
1680         s++;
1681         e = t + 1;
1682     }
1683     else {
1684         t = s;
1685         while (!isSPACE(*t))
1686             t++;
1687         e = t;
1688     }
1689     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1690         e++;
1691     if (*e != '\n' && *e != '\0')
1692         return;         /* false alarm */
1693
1694     line_num = grok_atou(n, &e) - 1;
1695
1696     if (t - s > 0) {
1697         const STRLEN len = t - s;
1698
1699         if (!PL_rsfp && !PL_parser->filtered) {
1700             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1701              * to *{"::_<newfilename"} */
1702             /* However, the long form of evals is only turned on by the
1703                debugger - usually they're "(eval %lu)" */
1704             GV * const cfgv = CopFILEGV(PL_curcop);
1705             if (cfgv) {
1706                 char smallbuf[128];
1707                 STRLEN tmplen2 = len;
1708                 char *tmpbuf2;
1709                 GV *gv2;
1710
1711                 if (tmplen2 + 2 <= sizeof smallbuf)
1712                     tmpbuf2 = smallbuf;
1713                 else
1714                     Newx(tmpbuf2, tmplen2 + 2, char);
1715
1716                 tmpbuf2[0] = '_';
1717                 tmpbuf2[1] = '<';
1718
1719                 memcpy(tmpbuf2 + 2, s, tmplen2);
1720                 tmplen2 += 2;
1721
1722                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1723                 if (!isGV(gv2)) {
1724                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1725                     /* adjust ${"::_<newfilename"} to store the new file name */
1726                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1727                     /* The line number may differ. If that is the case,
1728                        alias the saved lines that are in the array.
1729                        Otherwise alias the whole array. */
1730                     if (CopLINE(PL_curcop) == line_num) {
1731                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1732                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1733                     }
1734                     else if (GvAV(cfgv)) {
1735                         AV * const av = GvAV(cfgv);
1736                         const I32 start = CopLINE(PL_curcop)+1;
1737                         I32 items = AvFILLp(av) - start;
1738                         if (items > 0) {
1739                             AV * const av2 = GvAVn(gv2);
1740                             SV **svp = AvARRAY(av) + start;
1741                             I32 l = (I32)line_num+1;
1742                             while (items--)
1743                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1744                         }
1745                     }
1746                 }
1747
1748                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1749             }
1750         }
1751         CopFILE_free(PL_curcop);
1752         CopFILE_setn(PL_curcop, s, len);
1753     }
1754     CopLINE_set(PL_curcop, line_num);
1755 }
1756
1757 #define skipspace(s) skipspace_flags(s, 0)
1758
1759
1760 STATIC void
1761 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1762 {
1763     AV *av = CopFILEAVx(PL_curcop);
1764     if (av) {
1765         SV * sv;
1766         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1767         else {
1768             sv = *av_fetch(av, 0, 1);
1769             SvUPGRADE(sv, SVt_PVMG);
1770         }
1771         if (!SvPOK(sv)) sv_setpvs(sv,"");
1772         if (orig_sv)
1773             sv_catsv(sv, orig_sv);
1774         else
1775             sv_catpvn(sv, buf, len);
1776         if (!SvIOK(sv)) {
1777             (void)SvIOK_on(sv);
1778             SvIV_set(sv, 0);
1779         }
1780         if (PL_parser->preambling == NOLINE)
1781             av_store(av, CopLINE(PL_curcop), sv);
1782     }
1783 }
1784
1785 /*
1786  * S_skipspace
1787  * Called to gobble the appropriate amount and type of whitespace.
1788  * Skips comments as well.
1789  */
1790
1791 STATIC char *
1792 S_skipspace_flags(pTHX_ char *s, U32 flags)
1793 {
1794     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1795     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1796         while (s < PL_bufend && SPACE_OR_TAB(*s))
1797             s++;
1798     } else {
1799         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1800         PL_bufptr = s;
1801         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1802                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1803                     LEX_NO_NEXT_CHUNK : 0));
1804         s = PL_bufptr;
1805         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1806         if (PL_linestart > PL_bufptr)
1807             PL_bufptr = PL_linestart;
1808         return s;
1809     }
1810     return s;
1811 }
1812
1813 /*
1814  * S_check_uni
1815  * Check the unary operators to ensure there's no ambiguity in how they're
1816  * used.  An ambiguous piece of code would be:
1817  *     rand + 5
1818  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1819  * the +5 is its argument.
1820  */
1821
1822 STATIC void
1823 S_check_uni(pTHX)
1824 {
1825     const char *s;
1826     const char *t;
1827
1828     if (PL_oldoldbufptr != PL_last_uni)
1829         return;
1830     while (isSPACE(*PL_last_uni))
1831         PL_last_uni++;
1832     s = PL_last_uni;
1833     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1834         s++;
1835     if ((t = strchr(s, '(')) && t < PL_bufptr)
1836         return;
1837
1838     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1839                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1840                      (int)(s - PL_last_uni), PL_last_uni);
1841 }
1842
1843 /*
1844  * LOP : macro to build a list operator.  Its behaviour has been replaced
1845  * with a subroutine, S_lop() for which LOP is just another name.
1846  */
1847
1848 #define LOP(f,x) return lop(f,x,s)
1849
1850 /*
1851  * S_lop
1852  * Build a list operator (or something that might be one).  The rules:
1853  *  - if we have a next token, then it's a list operator (no parens) for
1854  *    which the next token has already been parsed; e.g.,
1855  *       sort foo @args
1856  *       sort foo (@args)
1857  *  - if the next thing is an opening paren, then it's a function
1858  *  - else it's a list operator
1859  */
1860
1861 STATIC I32
1862 S_lop(pTHX_ I32 f, int x, char *s)
1863 {
1864     PERL_ARGS_ASSERT_LOP;
1865
1866     pl_yylval.ival = f;
1867     CLINE;
1868     PL_bufptr = s;
1869     PL_last_lop = PL_oldbufptr;
1870     PL_last_lop_op = (OPCODE)f;
1871     if (PL_nexttoke)
1872         goto lstop;
1873     PL_expect = x;
1874     if (*s == '(')
1875         return REPORT(FUNC);
1876     s = skipspace(s);
1877     if (*s == '(')
1878         return REPORT(FUNC);
1879     else {
1880         lstop:
1881         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1882             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1883         return REPORT(LSTOP);
1884     }
1885 }
1886
1887 /*
1888  * S_force_next
1889  * When the lexer realizes it knows the next token (for instance,
1890  * it is reordering tokens for the parser) then it can call S_force_next
1891  * to know what token to return the next time the lexer is called.  Caller
1892  * will need to set PL_nextval[] and possibly PL_expect to ensure
1893  * the lexer handles the token correctly.
1894  */
1895
1896 STATIC void
1897 S_force_next(pTHX_ I32 type)
1898 {
1899 #ifdef DEBUGGING
1900     if (DEBUG_T_TEST) {
1901         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1902         tokereport(type, &NEXTVAL_NEXTTOKE);
1903     }
1904 #endif
1905     PL_nexttype[PL_nexttoke] = type;
1906     PL_nexttoke++;
1907     if (PL_lex_state != LEX_KNOWNEXT) {
1908         PL_lex_defer = PL_lex_state;
1909         PL_lex_state = LEX_KNOWNEXT;
1910     }
1911 }
1912
1913 /*
1914  * S_postderef
1915  *
1916  * This subroutine handles postfix deref syntax after the arrow has already
1917  * been emitted.  @* $* etc. are emitted as two separate token right here.
1918  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1919  * only the first, leaving yylex to find the next.
1920  */
1921
1922 static int
1923 S_postderef(pTHX_ int const funny, char const next)
1924 {
1925     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1926     assert(strchr("*[{", next));
1927     if (next == '*') {
1928         PL_expect = XOPERATOR;
1929         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1930             assert('@' == funny || '$' == funny || DOLSHARP == funny);
1931             PL_lex_state = LEX_INTERPEND;
1932             force_next(POSTJOIN);
1933         }
1934         force_next(next);
1935         PL_bufptr+=2;
1936     }
1937     else {
1938         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1939          && !PL_lex_brackets)
1940             PL_lex_dojoin = 2;
1941         PL_expect = XOPERATOR;
1942         PL_bufptr++;
1943     }
1944     return funny;
1945 }
1946
1947 void
1948 Perl_yyunlex(pTHX)
1949 {
1950     int yyc = PL_parser->yychar;
1951     if (yyc != YYEMPTY) {
1952         if (yyc) {
1953             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1954             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1955                 PL_lex_allbrackets--;
1956                 PL_lex_brackets--;
1957                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1958             } else if (yyc == '('/*)*/) {
1959                 PL_lex_allbrackets--;
1960                 yyc |= (2<<24);
1961             }
1962             force_next(yyc);
1963         }
1964         PL_parser->yychar = YYEMPTY;
1965     }
1966 }
1967
1968 STATIC SV *
1969 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1970 {
1971     SV * const sv = newSVpvn_utf8(start, len,
1972                                   !IN_BYTES
1973                                   && UTF
1974                                   && !is_invariant_string((const U8*)start, len)
1975                                   && is_utf8_string((const U8*)start, len));
1976     return sv;
1977 }
1978
1979 /*
1980  * S_force_word
1981  * When the lexer knows the next thing is a word (for instance, it has
1982  * just seen -> and it knows that the next char is a word char, then
1983  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1984  * lookahead.
1985  *
1986  * Arguments:
1987  *   char *start : buffer position (must be within PL_linestr)
1988  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1989  *   int check_keyword : if true, Perl checks to make sure the word isn't
1990  *       a keyword (do this if the word is a label, e.g. goto FOO)
1991  *   int allow_pack : if true, : characters will also be allowed (require,
1992  *       use, etc. do this)
1993  */
1994
1995 STATIC char *
1996 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
1997 {
1998     char *s;
1999     STRLEN len;
2000
2001     PERL_ARGS_ASSERT_FORCE_WORD;
2002
2003     start = skipspace(start);
2004     s = start;
2005     if (isIDFIRST_lazy_if(s,UTF) ||
2006         (allow_pack && *s == ':') )
2007     {
2008         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2009         if (check_keyword) {
2010           char *s2 = PL_tokenbuf;
2011           STRLEN len2 = len;
2012           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2013             s2 += 6, len2 -= 6;
2014           if (keyword(s2, len2, 0))
2015             return start;
2016         }
2017         if (token == METHOD) {
2018             s = skipspace(s);
2019             if (*s == '(')
2020                 PL_expect = XTERM;
2021             else {
2022                 PL_expect = XOPERATOR;
2023             }
2024         }
2025         NEXTVAL_NEXTTOKE.opval
2026             = (OP*)newSVOP(OP_CONST,0,
2027                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2028         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2029         force_next(token);
2030     }
2031     return s;
2032 }
2033
2034 /*
2035  * S_force_ident
2036  * Called when the lexer wants $foo *foo &foo etc, but the program
2037  * text only contains the "foo" portion.  The first argument is a pointer
2038  * to the "foo", and the second argument is the type symbol to prefix.
2039  * Forces the next token to be a "WORD".
2040  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2041  */
2042
2043 STATIC void
2044 S_force_ident(pTHX_ const char *s, int kind)
2045 {
2046     PERL_ARGS_ASSERT_FORCE_IDENT;
2047
2048     if (s[0]) {
2049         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2050         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2051                                                                 UTF ? SVf_UTF8 : 0));
2052         NEXTVAL_NEXTTOKE.opval = o;
2053         force_next(WORD);
2054         if (kind) {
2055             o->op_private = OPpCONST_ENTERED;
2056             /* XXX see note in pp_entereval() for why we forgo typo
2057                warnings if the symbol must be introduced in an eval.
2058                GSAR 96-10-12 */
2059             gv_fetchpvn_flags(s, len,
2060                               (PL_in_eval ? GV_ADDMULTI
2061                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2062                               kind == '$' ? SVt_PV :
2063                               kind == '@' ? SVt_PVAV :
2064                               kind == '%' ? SVt_PVHV :
2065                               SVt_PVGV
2066                               );
2067         }
2068     }
2069 }
2070
2071 static void
2072 S_force_ident_maybe_lex(pTHX_ char pit)
2073 {
2074     NEXTVAL_NEXTTOKE.ival = pit;
2075     force_next('p');
2076 }
2077
2078 NV
2079 Perl_str_to_version(pTHX_ SV *sv)
2080 {
2081     NV retval = 0.0;
2082     NV nshift = 1.0;
2083     STRLEN len;
2084     const char *start = SvPV_const(sv,len);
2085     const char * const end = start + len;
2086     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2087
2088     PERL_ARGS_ASSERT_STR_TO_VERSION;
2089
2090     while (start < end) {
2091         STRLEN skip;
2092         UV n;
2093         if (utf)
2094             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2095         else {
2096             n = *(U8*)start;
2097             skip = 1;
2098         }
2099         retval += ((NV)n)/nshift;
2100         start += skip;
2101         nshift *= 1000;
2102     }
2103     return retval;
2104 }
2105
2106 /*
2107  * S_force_version
2108  * Forces the next token to be a version number.
2109  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2110  * and if "guessing" is TRUE, then no new token is created (and the caller
2111  * must use an alternative parsing method).
2112  */
2113
2114 STATIC char *
2115 S_force_version(pTHX_ char *s, int guessing)
2116 {
2117     OP *version = NULL;
2118     char *d;
2119
2120     PERL_ARGS_ASSERT_FORCE_VERSION;
2121
2122     s = skipspace(s);
2123
2124     d = s;
2125     if (*d == 'v')
2126         d++;
2127     if (isDIGIT(*d)) {
2128         while (isDIGIT(*d) || *d == '_' || *d == '.')
2129             d++;
2130         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2131             SV *ver;
2132             s = scan_num(s, &pl_yylval);
2133             version = pl_yylval.opval;
2134             ver = cSVOPx(version)->op_sv;
2135             if (SvPOK(ver) && !SvNIOK(ver)) {
2136                 SvUPGRADE(ver, SVt_PVNV);
2137                 SvNV_set(ver, str_to_version(ver));
2138                 SvNOK_on(ver);          /* hint that it is a version */
2139             }
2140         }
2141         else if (guessing) {
2142             return s;
2143         }
2144     }
2145
2146     /* NOTE: The parser sees the package name and the VERSION swapped */
2147     NEXTVAL_NEXTTOKE.opval = version;
2148     force_next(WORD);
2149
2150     return s;
2151 }
2152
2153 /*
2154  * S_force_strict_version
2155  * Forces the next token to be a version number using strict syntax rules.
2156  */
2157
2158 STATIC char *
2159 S_force_strict_version(pTHX_ char *s)
2160 {
2161     OP *version = NULL;
2162     const char *errstr = NULL;
2163
2164     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2165
2166     while (isSPACE(*s)) /* leading whitespace */
2167         s++;
2168
2169     if (is_STRICT_VERSION(s,&errstr)) {
2170         SV *ver = newSV(0);
2171         s = (char *)scan_version(s, ver, 0);
2172         version = newSVOP(OP_CONST, 0, ver);
2173     }
2174     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2175             (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2176     {
2177         PL_bufptr = s;
2178         if (errstr)
2179             yyerror(errstr); /* version required */
2180         return s;
2181     }
2182
2183     /* NOTE: The parser sees the package name and the VERSION swapped */
2184     NEXTVAL_NEXTTOKE.opval = version;
2185     force_next(WORD);
2186
2187     return s;
2188 }
2189
2190 /*
2191  * S_tokeq
2192  * Tokenize a quoted string passed in as an SV.  It finds the next
2193  * chunk, up to end of string or a backslash.  It may make a new
2194  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2195  * turns \\ into \.
2196  */
2197
2198 STATIC SV *
2199 S_tokeq(pTHX_ SV *sv)
2200 {
2201     char *s;
2202     char *send;
2203     char *d;
2204     SV *pv = sv;
2205
2206     PERL_ARGS_ASSERT_TOKEQ;
2207
2208     assert (SvPOK(sv));
2209     assert (SvLEN(sv));
2210     assert (!SvIsCOW(sv));
2211     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2212         goto finish;
2213     s = SvPVX(sv);
2214     send = SvEND(sv);
2215     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2216     while (s < send && !(*s == '\\' && s[1] == '\\'))
2217         s++;
2218     if (s == send)
2219         goto finish;
2220     d = s;
2221     if ( PL_hints & HINT_NEW_STRING ) {
2222         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2223                             SVs_TEMP | SvUTF8(sv));
2224     }
2225     while (s < send) {
2226         if (*s == '\\') {
2227             if (s + 1 < send && (s[1] == '\\'))
2228                 s++;            /* all that, just for this */
2229         }
2230         *d++ = *s++;
2231     }
2232     *d = '\0';
2233     SvCUR_set(sv, d - SvPVX_const(sv));
2234   finish:
2235     if ( PL_hints & HINT_NEW_STRING )
2236        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2237     return sv;
2238 }
2239
2240 /*
2241  * Now come three functions related to double-quote context,
2242  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2243  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2244  * interact with PL_lex_state, and create fake ( ... ) argument lists
2245  * to handle functions and concatenation.
2246  * For example,
2247  *   "foo\lbar"
2248  * is tokenised as
2249  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2250  */
2251
2252 /*
2253  * S_sublex_start
2254  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2255  *
2256  * Pattern matching will set PL_lex_op to the pattern-matching op to
2257  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2258  *
2259  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2260  *
2261  * Everything else becomes a FUNC.
2262  *
2263  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2264  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2265  * call to S_sublex_push().
2266  */
2267
2268 STATIC I32
2269 S_sublex_start(pTHX)
2270 {
2271     const I32 op_type = pl_yylval.ival;
2272
2273     if (op_type == OP_NULL) {
2274         pl_yylval.opval = PL_lex_op;
2275         PL_lex_op = NULL;
2276         return THING;
2277     }
2278     if (op_type == OP_CONST) {
2279         SV *sv = tokeq(PL_lex_stuff);
2280
2281         if (SvTYPE(sv) == SVt_PVIV) {
2282             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2283             STRLEN len;
2284             const char * const p = SvPV_const(sv, len);
2285             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2286             SvREFCNT_dec(sv);
2287             sv = nsv;
2288         }
2289         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2290         PL_lex_stuff = NULL;
2291         return THING;
2292     }
2293
2294     PL_sublex_info.super_state = PL_lex_state;
2295     PL_sublex_info.sub_inwhat = (U16)op_type;
2296     PL_sublex_info.sub_op = PL_lex_op;
2297     PL_lex_state = LEX_INTERPPUSH;
2298
2299     PL_expect = XTERM;
2300     if (PL_lex_op) {
2301         pl_yylval.opval = PL_lex_op;
2302         PL_lex_op = NULL;
2303         return PMFUNC;
2304     }
2305     else
2306         return FUNC;
2307 }
2308
2309 /*
2310  * S_sublex_push
2311  * Create a new scope to save the lexing state.  The scope will be
2312  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2313  * to the uc, lc, etc. found before.
2314  * Sets PL_lex_state to LEX_INTERPCONCAT.
2315  */
2316
2317 STATIC I32
2318 S_sublex_push(pTHX)
2319 {
2320     LEXSHARED *shared;
2321     const bool is_heredoc = PL_multi_close == '<';
2322     ENTER;
2323
2324     PL_lex_state = PL_sublex_info.super_state;
2325     SAVEI8(PL_lex_dojoin);
2326     SAVEI32(PL_lex_brackets);
2327     SAVEI32(PL_lex_allbrackets);
2328     SAVEI32(PL_lex_formbrack);
2329     SAVEI8(PL_lex_fakeeof);
2330     SAVEI32(PL_lex_casemods);
2331     SAVEI32(PL_lex_starts);
2332     SAVEI8(PL_lex_state);
2333     SAVESPTR(PL_lex_repl);
2334     SAVEVPTR(PL_lex_inpat);
2335     SAVEI16(PL_lex_inwhat);
2336     if (is_heredoc)
2337     {
2338         SAVECOPLINE(PL_curcop);
2339         SAVEI32(PL_multi_end);
2340         SAVEI32(PL_parser->herelines);
2341         PL_parser->herelines = 0;
2342     }
2343     SAVEI8(PL_multi_close);
2344     SAVEPPTR(PL_bufptr);
2345     SAVEPPTR(PL_bufend);
2346     SAVEPPTR(PL_oldbufptr);
2347     SAVEPPTR(PL_oldoldbufptr);
2348     SAVEPPTR(PL_last_lop);
2349     SAVEPPTR(PL_last_uni);
2350     SAVEPPTR(PL_linestart);
2351     SAVESPTR(PL_linestr);
2352     SAVEGENERICPV(PL_lex_brackstack);
2353     SAVEGENERICPV(PL_lex_casestack);
2354     SAVEGENERICPV(PL_parser->lex_shared);
2355     SAVEBOOL(PL_parser->lex_re_reparsing);
2356     SAVEI32(PL_copline);
2357
2358     /* The here-doc parser needs to be able to peek into outer lexing
2359        scopes to find the body of the here-doc.  So we put PL_linestr and
2360        PL_bufptr into lex_shared, to ‘share’ those values.
2361      */
2362     PL_parser->lex_shared->ls_linestr = PL_linestr;
2363     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2364
2365     PL_linestr = PL_lex_stuff;
2366     PL_lex_repl = PL_sublex_info.repl;
2367     PL_lex_stuff = NULL;
2368     PL_sublex_info.repl = NULL;
2369
2370     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2371         = SvPVX(PL_linestr);
2372     PL_bufend += SvCUR(PL_linestr);
2373     PL_last_lop = PL_last_uni = NULL;
2374     SAVEFREESV(PL_linestr);
2375     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2376
2377     PL_lex_dojoin = FALSE;
2378     PL_lex_brackets = PL_lex_formbrack = 0;
2379     PL_lex_allbrackets = 0;
2380     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2381     Newx(PL_lex_brackstack, 120, char);
2382     Newx(PL_lex_casestack, 12, char);
2383     PL_lex_casemods = 0;
2384     *PL_lex_casestack = '\0';
2385     PL_lex_starts = 0;
2386     PL_lex_state = LEX_INTERPCONCAT;
2387     if (is_heredoc)
2388         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2389     PL_copline = NOLINE;
2390
2391     Newxz(shared, 1, LEXSHARED);
2392     shared->ls_prev = PL_parser->lex_shared;
2393     PL_parser->lex_shared = shared;
2394
2395     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2396     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2397     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2398         PL_lex_inpat = PL_sublex_info.sub_op;
2399     else
2400         PL_lex_inpat = NULL;
2401
2402     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2403     PL_in_eval &= ~EVAL_RE_REPARSING;
2404
2405     return '(';
2406 }
2407
2408 /*
2409  * S_sublex_done
2410  * Restores lexer state after a S_sublex_push.
2411  */
2412
2413 STATIC I32
2414 S_sublex_done(pTHX)
2415 {
2416     if (!PL_lex_starts++) {
2417         SV * const sv = newSVpvs("");
2418         if (SvUTF8(PL_linestr))
2419             SvUTF8_on(sv);
2420         PL_expect = XOPERATOR;
2421         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2422         return THING;
2423     }
2424
2425     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2426         PL_lex_state = LEX_INTERPCASEMOD;
2427         return yylex();
2428     }
2429
2430     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2431     assert(PL_lex_inwhat != OP_TRANSR);
2432     if (PL_lex_repl) {
2433         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2434         PL_linestr = PL_lex_repl;
2435         PL_lex_inpat = 0;
2436         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2437         PL_bufend += SvCUR(PL_linestr);
2438         PL_last_lop = PL_last_uni = NULL;
2439         PL_lex_dojoin = FALSE;
2440         PL_lex_brackets = 0;
2441         PL_lex_allbrackets = 0;
2442         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2443         PL_lex_casemods = 0;
2444         *PL_lex_casestack = '\0';
2445         PL_lex_starts = 0;
2446         if (SvEVALED(PL_lex_repl)) {
2447             PL_lex_state = LEX_INTERPNORMAL;
2448             PL_lex_starts++;
2449             /*  we don't clear PL_lex_repl here, so that we can check later
2450                 whether this is an evalled subst; that means we rely on the
2451                 logic to ensure sublex_done() is called again only via the
2452                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2453         }
2454         else {
2455             PL_lex_state = LEX_INTERPCONCAT;
2456             PL_lex_repl = NULL;
2457         }
2458         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2459             CopLINE(PL_curcop) +=
2460                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2461                  + PL_parser->herelines;
2462             PL_parser->herelines = 0;
2463         }
2464         return '/';
2465     }
2466     else {
2467         const line_t l = CopLINE(PL_curcop);
2468         LEAVE;
2469         if (PL_multi_close == '<')
2470             PL_parser->herelines += l - PL_multi_end;
2471         PL_bufend = SvPVX(PL_linestr);
2472         PL_bufend += SvCUR(PL_linestr);
2473         PL_expect = XOPERATOR;
2474         PL_sublex_info.sub_inwhat = 0;
2475         return ')';
2476     }
2477 }
2478
2479 PERL_STATIC_INLINE SV*
2480 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2481 {
2482     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2483      * interior, hence to the "}".  Finds what the name resolves to, returning
2484      * an SV* containing it; NULL if no valid one found */
2485
2486     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2487
2488     HV * table;
2489     SV **cvp;
2490     SV *cv;
2491     SV *rv;
2492     HV *stash;
2493     const U8* first_bad_char_loc;
2494     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2495
2496     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2497
2498     if (!SvCUR(res))
2499         return res;
2500
2501     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2502                                      e - backslash_ptr,
2503                                      &first_bad_char_loc))
2504     {
2505         /* If warnings are on, this will print a more detailed analysis of what
2506          * is wrong than the error message below */
2507         utf8n_to_uvchr(first_bad_char_loc,
2508                        e - ((char *) first_bad_char_loc),
2509                        NULL, 0);
2510
2511         /* We deliberately don't try to print the malformed character, which
2512          * might not print very well; it also may be just the first of many
2513          * malformations, so don't print what comes after it */
2514         yyerror(Perl_form(aTHX_
2515             "Malformed UTF-8 character immediately after '%.*s'",
2516             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2517         return NULL;
2518     }
2519
2520     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2521                         /* include the <}> */
2522                         e - backslash_ptr + 1);
2523     if (! SvPOK(res)) {
2524         SvREFCNT_dec_NN(res);
2525         return NULL;
2526     }
2527
2528     /* See if the charnames handler is the Perl core's, and if so, we can skip
2529      * the validation needed for a user-supplied one, as Perl's does its own
2530      * validation. */
2531     table = GvHV(PL_hintgv);             /* ^H */
2532     cvp = hv_fetchs(table, "charnames", FALSE);
2533     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2534         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2535     {
2536         const char * const name = HvNAME(stash);
2537         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2538          && strEQ(name, "_charnames")) {
2539            return res;
2540        }
2541     }
2542
2543     /* Here, it isn't Perl's charname handler.  We can't rely on a
2544      * user-supplied handler to validate the input name.  For non-ut8 input,
2545      * look to see that the first character is legal.  Then loop through the
2546      * rest checking that each is a continuation */
2547
2548     /* This code makes the reasonable assumption that the only Latin1-range
2549      * characters that begin a character name alias are alphabetic, otherwise
2550      * would have to create a isCHARNAME_BEGIN macro */
2551
2552     if (! UTF) {
2553         if (! isALPHAU(*s)) {
2554             goto bad_charname;
2555         }
2556         s++;
2557         while (s < e) {
2558             if (! isCHARNAME_CONT(*s)) {
2559                 goto bad_charname;
2560             }
2561             if (*s == ' ' && *(s-1) == ' ') {
2562                 goto multi_spaces;
2563             }
2564             if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2565                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2566                            "NO-BREAK SPACE in a charnames "
2567                            "alias definition is deprecated");
2568             }
2569             s++;
2570         }
2571     }
2572     else {
2573         /* Similarly for utf8.  For invariants can check directly; for other
2574          * Latin1, can calculate their code point and check; otherwise  use a
2575          * swash */
2576         if (UTF8_IS_INVARIANT(*s)) {
2577             if (! isALPHAU(*s)) {
2578                 goto bad_charname;
2579             }
2580             s++;
2581         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2582             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2583                 goto bad_charname;
2584             }
2585             s += 2;
2586         }
2587         else {
2588             if (! PL_utf8_charname_begin) {
2589                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2590                 PL_utf8_charname_begin = _core_swash_init("utf8",
2591                                                         "_Perl_Charname_Begin",
2592                                                         &PL_sv_undef,
2593                                                         1, 0, NULL, &flags);
2594             }
2595             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2596                 goto bad_charname;
2597             }
2598             s += UTF8SKIP(s);
2599         }
2600
2601         while (s < e) {
2602             if (UTF8_IS_INVARIANT(*s)) {
2603                 if (! isCHARNAME_CONT(*s)) {
2604                     goto bad_charname;
2605                 }
2606                 if (*s == ' ' && *(s-1) == ' ') {
2607                     goto multi_spaces;
2608                 }
2609                 s++;
2610             }
2611             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2612                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2613                 {
2614                     goto bad_charname;
2615                 }
2616                 if (*s == *NBSP_UTF8
2617                     && *(s+1) == *(NBSP_UTF8+1)
2618                     && ckWARN_d(WARN_DEPRECATED))
2619                 {
2620                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2621                                 "NO-BREAK SPACE in a charnames "
2622                                 "alias definition is deprecated");
2623                 }
2624                 s += 2;
2625             }
2626             else {
2627                 if (! PL_utf8_charname_continue) {
2628                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2629                     PL_utf8_charname_continue = _core_swash_init("utf8",
2630                                                 "_Perl_Charname_Continue",
2631                                                 &PL_sv_undef,
2632                                                 1, 0, NULL, &flags);
2633                 }
2634                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2635                     goto bad_charname;
2636                 }
2637                 s += UTF8SKIP(s);
2638             }
2639         }
2640     }
2641     if (*(s-1) == ' ') {
2642         yyerror_pv(
2643             Perl_form(aTHX_
2644             "charnames alias definitions may not contain trailing "
2645             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2646             (int)(s - backslash_ptr + 1), backslash_ptr,
2647             (int)(e - s + 1), s + 1
2648             ),
2649         UTF ? SVf_UTF8 : 0);
2650         return NULL;
2651     }
2652
2653     if (SvUTF8(res)) { /* Don't accept malformed input */
2654         const U8* first_bad_char_loc;
2655         STRLEN len;
2656         const char* const str = SvPV_const(res, len);
2657         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2658             /* If warnings are on, this will print a more detailed analysis of
2659              * what is wrong than the error message below */
2660             utf8n_to_uvchr(first_bad_char_loc,
2661                            (char *) first_bad_char_loc - str,
2662                            NULL, 0);
2663
2664             /* We deliberately don't try to print the malformed character,
2665              * which might not print very well; it also may be just the first
2666              * of many malformations, so don't print what comes after it */
2667             yyerror_pv(
2668               Perl_form(aTHX_
2669                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2670                  (int) (e - backslash_ptr + 1), backslash_ptr,
2671                  (int) ((char *) first_bad_char_loc - str), str
2672               ),
2673               SVf_UTF8);
2674             return NULL;
2675         }
2676     }
2677
2678     return res;
2679
2680   bad_charname: {
2681
2682         /* The final %.*s makes sure that should the trailing NUL be missing
2683          * that this print won't run off the end of the string */
2684         yyerror_pv(
2685           Perl_form(aTHX_
2686             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2687             (int)(s - backslash_ptr + 1), backslash_ptr,
2688             (int)(e - s + 1), s + 1
2689           ),
2690           UTF ? SVf_UTF8 : 0);
2691         return NULL;
2692     }
2693
2694   multi_spaces:
2695         yyerror_pv(
2696           Perl_form(aTHX_
2697             "charnames alias definitions may not contain a sequence of "
2698             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2699             (int)(s - backslash_ptr + 1), backslash_ptr,
2700             (int)(e - s + 1), s + 1
2701           ),
2702           UTF ? SVf_UTF8 : 0);
2703         return NULL;
2704 }
2705
2706 /*
2707   scan_const
2708
2709   Extracts the next constant part of a pattern, double-quoted string,
2710   or transliteration.  This is terrifying code.
2711
2712   For example, in parsing the double-quoted string "ab\x63$d", it would
2713   stop at the '$' and return an OP_CONST containing 'abc'.
2714
2715   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2716   processing a pattern (PL_lex_inpat is true), a transliteration
2717   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2718
2719   Returns a pointer to the character scanned up to. If this is
2720   advanced from the start pointer supplied (i.e. if anything was
2721   successfully parsed), will leave an OP_CONST for the substring scanned
2722   in pl_yylval. Caller must intuit reason for not parsing further
2723   by looking at the next characters herself.
2724
2725   In patterns:
2726     expand:
2727       \N{FOO}  => \N{U+hex_for_character_FOO}
2728       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2729
2730     pass through:
2731         all other \-char, including \N and \N{ apart from \N{ABC}
2732
2733     stops on:
2734         @ and $ where it appears to be a var, but not for $ as tail anchor
2735         \l \L \u \U \Q \E
2736         (?{  or  (??{
2737
2738
2739   In transliterations:
2740     characters are VERY literal, except for - not at the start or end
2741     of the string, which indicates a range. If the range is in bytes,
2742     scan_const expands the range to the full set of intermediate
2743     characters. If the range is in utf8, the hyphen is replaced with
2744     a certain range mark which will be handled by pmtrans() in op.c.
2745
2746   In double-quoted strings:
2747     backslashes:
2748       double-quoted style: \r and \n
2749       constants: \x31, etc.
2750       deprecated backrefs: \1 (in substitution replacements)
2751       case and quoting: \U \Q \E
2752     stops on @ and $
2753
2754   scan_const does *not* construct ops to handle interpolated strings.
2755   It stops processing as soon as it finds an embedded $ or @ variable
2756   and leaves it to the caller to work out what's going on.
2757
2758   embedded arrays (whether in pattern or not) could be:
2759       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2760
2761   $ in double-quoted strings must be the symbol of an embedded scalar.
2762
2763   $ in pattern could be $foo or could be tail anchor.  Assumption:
2764   it's a tail anchor if $ is the last thing in the string, or if it's
2765   followed by one of "()| \r\n\t"
2766
2767   \1 (backreferences) are turned into $1 in substitutions
2768
2769   The structure of the code is
2770       while (there's a character to process) {
2771           handle transliteration ranges
2772           skip regexp comments /(?#comment)/ and codes /(?{code})/
2773           skip #-initiated comments in //x patterns
2774           check for embedded arrays
2775           check for embedded scalars
2776           if (backslash) {
2777               deprecate \1 in substitution replacements
2778               handle string-changing backslashes \l \U \Q \E, etc.
2779               switch (what was escaped) {
2780                   handle \- in a transliteration (becomes a literal -)
2781                   if a pattern and not \N{, go treat as regular character
2782                   handle \132 (octal characters)
2783                   handle \x15 and \x{1234} (hex characters)
2784                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2785                   handle \cV (control characters)
2786                   handle printf-style backslashes (\f, \r, \n, etc)
2787               } (end switch)
2788               continue
2789           } (end if backslash)
2790           handle regular character
2791     } (end while character to read)
2792
2793 */
2794
2795 STATIC char *
2796 S_scan_const(pTHX_ char *start)
2797 {
2798     char *send = PL_bufend;             /* end of the constant */
2799     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2800                                            on sizing. */
2801     char *s = start;                    /* start of the constant */
2802     char *d = SvPVX(sv);                /* destination for copies */
2803     bool dorange = FALSE;               /* are we in a translit range? */
2804     bool didrange = FALSE;              /* did we just finish a range? */
2805     bool in_charclass = FALSE;          /* within /[...]/ */
2806     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2807     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2808                                            UTF8?  But, this can show as true
2809                                            when the source isn't utf8, as for
2810                                            example when it is entirely composed
2811                                            of hex constants */
2812     SV *res;                            /* result from charnames */
2813
2814     /* Note on sizing:  The scanned constant is placed into sv, which is
2815      * initialized by newSV() assuming one byte of output for every byte of
2816      * input.  This routine expects newSV() to allocate an extra byte for a
2817      * trailing NUL, which this routine will append if it gets to the end of
2818      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2819      * CAPITAL LETTER A}), or more output than input if the constant ends up
2820      * recoded to utf8, but each time a construct is found that might increase
2821      * the needed size, SvGROW() is called.  Its size parameter each time is
2822      * based on the best guess estimate at the time, namely the length used so
2823      * far, plus the length the current construct will occupy, plus room for
2824      * the trailing NUL, plus one byte for every input byte still unscanned */
2825
2826     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2827                        before set */
2828 #ifdef EBCDIC
2829     UV literal_endpoint = 0;
2830     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2831 #endif
2832
2833     PERL_ARGS_ASSERT_SCAN_CONST;
2834
2835     assert(PL_lex_inwhat != OP_TRANSR);
2836     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2837         /* If we are doing a trans and we know we want UTF8 set expectation */
2838         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2839         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2840     }
2841
2842     /* Protect sv from errors and fatal warnings. */
2843     ENTER_with_name("scan_const");
2844     SAVEFREESV(sv);
2845
2846     while (s < send || dorange) {
2847
2848         /* get transliterations out of the way (they're most literal) */
2849         if (PL_lex_inwhat == OP_TRANS) {
2850             /* expand a range A-Z to the full set of characters.  AIE! */
2851             if (dorange) {
2852                 I32 i;                          /* current expanded character */
2853                 I32 min;                        /* first character in range */
2854                 I32 max;                        /* last character in range */
2855
2856 #ifdef EBCDIC
2857                 UV uvmax = 0;
2858 #endif
2859
2860                 if (has_utf8
2861 #ifdef EBCDIC
2862                     && !native_range
2863 #endif
2864                 ) {
2865                     char * const c = (char*)utf8_hop((U8*)d, -1);
2866                     char *e = d++;
2867                     while (e-- > c)
2868                         *(e + 1) = *e;
2869                     *c = (char) ILLEGAL_UTF8_BYTE;
2870                     /* mark the range as done, and continue */
2871                     dorange = FALSE;
2872                     didrange = TRUE;
2873                     continue;
2874                 }
2875
2876                 i = d - SvPVX_const(sv);                /* remember current offset */
2877 #ifdef EBCDIC
2878                 SvGROW(sv,
2879                        SvLEN(sv) + ((has_utf8)
2880                                     ?  (512 - UTF_CONTINUATION_MARK
2881                                         + UNISKIP(0x100))
2882                                     : 256));
2883                 /* How many two-byte within 0..255: 128 in UTF-8,
2884                  * 96 in UTF-8-mod. */
2885 #else
2886                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2887 #endif
2888                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2889 #ifdef EBCDIC
2890                 if (has_utf8) {
2891                     int j;
2892                     for (j = 0; j <= 1; j++) {
2893                         char * const c = (char*)utf8_hop((U8*)d, -1);
2894                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2895                         if (j)
2896                             min = (U8)uv;
2897                         else if (uv < 256)
2898                             max = (U8)uv;
2899                         else {
2900                             max = (U8)0xff; /* only to \xff */
2901                             uvmax = uv; /* \x{100} to uvmax */
2902                         }
2903                         d = c; /* eat endpoint chars */
2904                      }
2905                 }
2906                else {
2907 #endif
2908                    d -= 2;              /* eat the first char and the - */
2909                    min = (U8)*d;        /* first char in range */
2910                    max = (U8)d[1];      /* last char in range  */
2911 #ifdef EBCDIC
2912                }
2913 #endif
2914
2915                 if (min > max) {
2916                     Perl_croak(aTHX_
2917                                "Invalid range \"%c-%c\" in transliteration operator",
2918                                (char)min, (char)max);
2919                 }
2920
2921 #ifdef EBCDIC
2922                 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
2923                  * any subsets of these ranges into individual characters */
2924                 if (literal_endpoint == 2 &&
2925                     ((isLOWER_A(min) && isLOWER_A(max)) ||
2926                      (isUPPER_A(min) && isUPPER_A(max))))
2927                 {
2928                     for (i = min; i <= max; i++) {
2929                         if (isALPHA_A(i))
2930                             *d++ = i;
2931                     }
2932                 }
2933                 else
2934 #endif
2935                     for (i = min; i <= max; i++)
2936 #ifdef EBCDIC
2937                         if (has_utf8) {
2938                             append_utf8_from_native_byte(i, &d);
2939                         }
2940                         else
2941 #endif
2942                             *d++ = (char)i;
2943
2944 #ifdef EBCDIC
2945                 if (uvmax) {
2946                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2947                     if (uvmax > 0x101)
2948                         *d++ = (char) ILLEGAL_UTF8_BYTE;
2949                     if (uvmax > 0x100)
2950                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2951                 }
2952 #endif
2953
2954                 /* mark the range as done, and continue */
2955                 dorange = FALSE;
2956                 didrange = TRUE;
2957 #ifdef EBCDIC
2958                 literal_endpoint = 0;
2959 #endif
2960                 continue;
2961             }
2962
2963             /* range begins (ignore - as first or last char) */
2964             else if (*s == '-' && s+1 < send  && s != start) {
2965                 if (didrange) {
2966                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2967                 }
2968                 if (has_utf8
2969 #ifdef EBCDIC
2970                     && !native_range
2971 #endif
2972                     ) {
2973                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
2974                     s++;
2975                     continue;
2976                 }
2977                 dorange = TRUE;
2978                 s++;
2979             }
2980             else {
2981                 didrange = FALSE;
2982 #ifdef EBCDIC
2983                 literal_endpoint = 0;
2984                 native_range = TRUE;
2985 #endif
2986             }
2987         }
2988
2989         /* if we get here, we're not doing a transliteration */
2990
2991         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2992             char *s1 = s-1;
2993             int esc = 0;
2994             while (s1 >= start && *s1-- == '\\')
2995                 esc = !esc;
2996             if (!esc)
2997                 in_charclass = TRUE;
2998         }
2999
3000         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3001             char *s1 = s-1;
3002             int esc = 0;
3003             while (s1 >= start && *s1-- == '\\')
3004                 esc = !esc;
3005             if (!esc)
3006                 in_charclass = FALSE;
3007         }
3008
3009         /* skip for regexp comments /(?#comment)/, except for the last
3010          * char, which will be done separately.
3011          * Stop on (?{..}) and friends */
3012
3013         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3014             if (s[2] == '#') {
3015                 while (s+1 < send && *s != ')')
3016                     *d++ = *s++;
3017             }
3018             else if (!PL_lex_casemods &&
3019                      (    s[2] == '{' /* This should match regcomp.c */
3020                       || (s[2] == '?' && s[3] == '{')))
3021             {
3022                 break;
3023             }
3024         }
3025
3026         /* likewise skip #-initiated comments in //x patterns */
3027         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3028           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3029             while (s+1 < send && *s != '\n')
3030                 *d++ = *s++;
3031         }
3032
3033         /* no further processing of single-quoted regex */
3034         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3035             goto default_action;
3036
3037         /* check for embedded arrays
3038            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3039            */
3040         else if (*s == '@' && s[1]) {
3041             if (isWORDCHAR_lazy_if(s+1,UTF))
3042                 break;
3043             if (strchr(":'{$", s[1]))
3044                 break;
3045             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3046                 break; /* in regexp, neither @+ nor @- are interpolated */
3047         }
3048
3049         /* check for embedded scalars.  only stop if we're sure it's a
3050            variable.
3051         */
3052         else if (*s == '$') {
3053             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3054                 break;
3055             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3056                 if (s[1] == '\\') {
3057                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3058                                    "Possible unintended interpolation of $\\ in regex");
3059                 }
3060                 break;          /* in regexp, $ might be tail anchor */
3061             }
3062         }
3063
3064         /* End of else if chain - OP_TRANS rejoin rest */
3065
3066         /* backslashes */
3067         if (*s == '\\' && s+1 < send) {
3068             char* e;    /* Can be used for ending '}', etc. */
3069
3070             s++;
3071
3072             /* warn on \1 - \9 in substitution replacements, but note that \11
3073              * is an octal; and \19 is \1 followed by '9' */
3074             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3075                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3076             {
3077                 /* diag_listed_as: \%d better written as $%d */
3078                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3079                 *--s = '$';
3080                 break;
3081             }
3082
3083             /* string-change backslash escapes */
3084             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3085                 --s;
3086                 break;
3087             }
3088             /* In a pattern, process \N, but skip any other backslash escapes.
3089              * This is because we don't want to translate an escape sequence
3090              * into a meta symbol and have the regex compiler use the meta
3091              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3092              * in spite of this, we do have to process \N here while the proper
3093              * charnames handler is in scope.  See bugs #56444 and #62056.
3094              *
3095              * There is a complication because \N in a pattern may also stand
3096              * for 'match a non-nl', and not mean a charname, in which case its
3097              * processing should be deferred to the regex compiler.  To be a
3098              * charname it must be followed immediately by a '{', and not look
3099              * like \N followed by a curly quantifier, i.e., not something like
3100              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3101              * quantifier */
3102             else if (PL_lex_inpat
3103                     && (*s != 'N'
3104                         || s[1] != '{'
3105                         || regcurly(s + 1)))
3106             {
3107                 *d++ = '\\';
3108                 goto default_action;
3109             }
3110
3111             switch (*s) {
3112
3113             /* quoted - in transliterations */
3114             case '-':
3115                 if (PL_lex_inwhat == OP_TRANS) {
3116                     *d++ = *s++;
3117                     continue;
3118                 }
3119                 /* FALLTHROUGH */
3120             default:
3121                 {
3122                     if ((isALPHANUMERIC(*s)))
3123                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3124                                        "Unrecognized escape \\%c passed through",
3125                                        *s);
3126                     /* default action is to copy the quoted character */
3127                     goto default_action;
3128                 }
3129
3130             /* eg. \132 indicates the octal constant 0132 */
3131             case '0': case '1': case '2': case '3':
3132             case '4': case '5': case '6': case '7':
3133                 {
3134                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3135                     STRLEN len = 3;
3136                     uv = grok_oct(s, &len, &flags, NULL);
3137                     s += len;
3138                     if (len < 3 && s < send && isDIGIT(*s)
3139                         && ckWARN(WARN_MISC))
3140                     {
3141                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3142                                     "%s", form_short_octal_warning(s, len));
3143                     }
3144                 }
3145                 goto NUM_ESCAPE_INSERT;
3146
3147             /* eg. \o{24} indicates the octal constant \024 */
3148             case 'o':
3149                 {
3150                     const char* error;
3151
3152                     bool valid = grok_bslash_o(&s, &uv, &error,
3153                                                TRUE, /* Output warning */
3154                                                FALSE, /* Not strict */
3155                                                TRUE, /* Output warnings for
3156                                                          non-portables */
3157                                                UTF);
3158                     if (! valid) {
3159                         yyerror(error);
3160                         continue;
3161                     }
3162                     goto NUM_ESCAPE_INSERT;
3163                 }
3164
3165             /* eg. \x24 indicates the hex constant 0x24 */
3166             case 'x':
3167                 {
3168                     const char* error;
3169
3170                     bool valid = grok_bslash_x(&s, &uv, &error,
3171                                                TRUE, /* Output warning */
3172                                                FALSE, /* Not strict */
3173                                                TRUE,  /* Output warnings for
3174                                                          non-portables */
3175                                                UTF);
3176                     if (! valid) {
3177                         yyerror(error);
3178                         continue;
3179                     }
3180                 }
3181
3182               NUM_ESCAPE_INSERT:
3183                 /* Insert oct or hex escaped character.  There will always be
3184                  * enough room in sv since such escapes will be longer than any
3185                  * UTF-8 sequence they can end up as, except if they force us
3186                  * to recode the rest of the string into utf8 */
3187
3188                 /* Here uv is the ordinal of the next character being added */
3189                 if (!UVCHR_IS_INVARIANT(uv)) {
3190                     if (!has_utf8 && uv > 255) {
3191                         /* Might need to recode whatever we have accumulated so
3192                          * far if it contains any chars variant in utf8 or
3193                          * utf-ebcdic. */
3194
3195                         SvCUR_set(sv, d - SvPVX_const(sv));
3196                         SvPOK_on(sv);
3197                         *d = '\0';
3198                         /* See Note on sizing above.  */
3199                         sv_utf8_upgrade_flags_grow(
3200                                          sv,
3201                                          SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3202                                                   /* Above-latin1 in string
3203                                                    * implies no encoding */
3204                                                   |SV_UTF8_NO_ENCODING,
3205                                          UNISKIP(uv) + (STRLEN)(send - s) + 1);
3206                         d = SvPVX(sv) + SvCUR(sv);
3207                         has_utf8 = TRUE;
3208                     }
3209
3210                     if (has_utf8) {
3211                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3212                         if (PL_lex_inwhat == OP_TRANS &&
3213                             PL_sublex_info.sub_op) {
3214                             PL_sublex_info.sub_op->op_private |=
3215                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3216                                              : OPpTRANS_TO_UTF);
3217                         }
3218 #ifdef EBCDIC
3219                         if (uv > 255 && !dorange)
3220                             native_range = FALSE;
3221 #endif
3222                     }
3223                     else {
3224                         *d++ = (char)uv;
3225                     }
3226                 }
3227                 else {
3228                     *d++ = (char) uv;
3229                 }
3230                 continue;
3231
3232             case 'N':
3233                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3234                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3235                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3236                  * GRAVE}.  For convenience all three forms are referred to as
3237                  * "named characters" below.
3238                  *
3239                  * For patterns, \N also can mean to match a non-newline.  Code
3240                  * before this 'switch' statement should already have handled
3241                  * this situation, and hence this code only has to deal with
3242                  * the named character cases.
3243                  *
3244                  * For non-patterns, the named characters are converted to
3245                  * their string equivalents.  In patterns, named characters are
3246                  * not converted to their ultimate forms for the same reasons
3247                  * that other escapes aren't.  Instead, they are converted to
3248                  * the \N{U+...} form to get the value from the charnames that
3249                  * is in effect right now, while preserving the fact that it
3250                  * was a named character, so that the regex compiler knows
3251                  * this.
3252                  *
3253                  * The structure of this section of code (besides checking for
3254                  * errors and upgrading to utf8) is:
3255                  *  If the named character is of the form \N{U+...}, pass it
3256                  *      through if a pattern; otherwise convert the code point
3257                  *      to utf8
3258                  *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
3259                  *      if a pattern; otherwise convert to utf8
3260                  *
3261                  * If the regex compiler should ever need to differentiate
3262                  * between the \N{U+...} and \N{name} forms, that could easily
3263                  * be done here by stripping any leading zeros from the
3264                  * \N{U+...} case, and adding them to the other one. */
3265
3266                 /* Here, 's' points to the 'N'; the test below is guaranteed to
3267                  * succeed if we are being called on a pattern, as we already
3268                  * know from a test above that the next character is a '{'.  A
3269                  * non-pattern \N must mean 'named character', which requires
3270                  * braces */
3271                 s++;
3272                 if (*s != '{') {
3273                     yyerror("Missing braces on \\N{}");
3274                     continue;
3275                 }
3276                 s++;
3277
3278                 /* If there is no matching '}', it is an error. */
3279                 if (! (e = strchr(s, '}'))) {
3280                     if (! PL_lex_inpat) {
3281                         yyerror("Missing right brace on \\N{}");
3282                     } else {
3283                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3284                     }
3285                     continue;
3286                 }
3287
3288                 /* Here it looks like a named character */
3289
3290                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3291                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3292                                 | PERL_SCAN_SILENT_ILLDIGIT
3293                                 | PERL_SCAN_DISALLOW_PREFIX;
3294                     STRLEN len;
3295
3296                     s += 2;         /* Skip to next char after the 'U+' */
3297                     len = e - s;
3298                     uv = grok_hex(s, &len, &flags, NULL);
3299                     if (len == 0
3300                      || (  len != (STRLEN)(e - s) && s[len] != '.'
3301                         && PL_lex_inpat))
3302                     {
3303                       bad_NU:
3304                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3305                         s = e + 1;
3306                         continue;
3307                     }
3308
3309                     if (PL_lex_inpat) {
3310 #ifdef EBCDIC
3311                         s -= 5;     /* Include the '\N{U+' */
3312                         /* On EBCDIC platforms, in \N{U+...}, the '...' is a
3313                          * Unicode value, so convert to native so downstream
3314                          * code can continue to assume it's native */
3315                         /* XXX This should be in the regexp parser,
3316                                because doing it here makes /\N{U+41}/ and
3317                                =~ '\N{U+41}' do different things.  */
3318                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the '}'
3319                                                                and the \0 */
3320                                          "\\N{U+%X",
3321                                          (unsigned int) UNI_TO_NATIVE(uv));
3322                         s += 5 + len;
3323                         while (*s == '.') {
3324                             s++;
3325                             len = e - s;
3326                             uv = grok_hex(s, &len, &flags, NULL);
3327                             if (!len
3328                              || (len != (STRLEN)(e - s) && s[len] != '.'))
3329                                 goto bad_NU;
3330                             s--;
3331                             d += my_snprintf(
3332                                      d, e - s + 1 + 1, ".%X",
3333                                      (unsigned int)UNI_TO_NATIVE(uv)
3334                                  );
3335                             s += len + 1;
3336                         }
3337                         *(d++) = '}';
3338 #else
3339                         /* On non-EBCDIC platforms, pass it through unchanged.
3340                          * The reason we evaluate the numbers is to make
3341                          * sure there wasn't a syntax error. */
3342                         const char * const orig_s = s - 5;
3343                         while (*s == '.') {
3344                             s++;
3345                             len = e - s;
3346                             uv = grok_hex(s, &len, &flags, NULL);
3347                             if (!len
3348                              || (len != (STRLEN)(e - s) && s[len] != '.'))
3349                                 goto bad_NU;
3350                         }
3351                         /* +1 is for the '}' */
3352                         Copy(orig_s, d, e - orig_s + 1, char);
3353                         d += e - orig_s + 1;
3354 #endif
3355                     }
3356                     else {  /* Not a pattern: convert the hex to string */
3357
3358                          /* If the destination is not in utf8, unconditionally
3359                           * recode it to be so.  This is because \N{} implies
3360                           * Unicode semantics, and scalars have to be in utf8
3361                           * to guarantee those semantics */
3362                         if (! has_utf8) {
3363                             SvCUR_set(sv, d - SvPVX_const(sv));
3364                             SvPOK_on(sv);
3365                             *d = '\0';
3366                             /* See Note on sizing above.  */
3367                             sv_utf8_upgrade_flags_grow(
3368                                         sv,
3369                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3370                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3371                             d = SvPVX(sv) + SvCUR(sv);
3372                             has_utf8 = TRUE;
3373                         }
3374
3375                         /* Add the (Unicode) code point to the output. */
3376                         if (UNI_IS_INVARIANT(uv)) {
3377                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3378                         }
3379                         else {
3380                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3381                         }
3382                     }
3383                 }
3384                 else /* Here is \N{NAME} but not \N{U+...}. */
3385                      if ((res = get_and_check_backslash_N_name(s, e)))
3386                 {
3387                     STRLEN len;
3388                     const char *str = SvPV_const(res, len);
3389                     if (PL_lex_inpat) {
3390
3391                         if (! len) { /* The name resolved to an empty string */
3392                             Copy("\\N{}", d, 4, char);
3393                             d += 4;
3394                         }
3395                         else {
3396                             /* In order to not lose information for the regex
3397                             * compiler, pass the result in the specially made
3398                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3399                             * the code points in hex of each character
3400                             * returned by charnames */
3401
3402                             const char *str_end = str + len;
3403                             const STRLEN off = d - SvPVX_const(sv);
3404
3405                             if (! SvUTF8(res)) {
3406                                 /* For the non-UTF-8 case, we can determine the
3407                                  * exact length needed without having to parse
3408                                  * through the string.  Each character takes up
3409                                  * 2 hex digits plus either a trailing dot or
3410                                  * the "}" */
3411                                 const char initial_text[] = "\\N{U+";
3412                                 const STRLEN initial_len = sizeof(initial_text)
3413                                                            - 1;
3414                                 d = off + SvGROW(sv, off
3415                                                     + 3 * len
3416
3417                                                     /* +1 for trailing NUL */
3418                                                     + initial_len + 1
3419
3420                                                     + (STRLEN)(send - e));
3421                                 Copy(initial_text, d, initial_len, char);
3422                                 d += initial_len;
3423                                 while (str < str_end) {
3424                                     char hex_string[4];
3425                                     int len =
3426                                         my_snprintf(hex_string,
3427                                                     sizeof(hex_string),
3428                                                     "%02X.", (U8) *str);
3429                                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
3430                                     Copy(hex_string, d, 3, char);
3431                                     d += 3;
3432                                     str++;
3433                                 }
3434                                 d--;    /* Below, we will overwrite the final
3435                                            dot with a right brace */
3436                             }
3437                             else {
3438                                 STRLEN char_length; /* cur char's byte length */
3439
3440                                 /* and the number of bytes after this is
3441                                  * translated into hex digits */
3442                                 STRLEN output_length;
3443
3444                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3445                                  * for max('U+', '.'); and 1 for NUL */
3446                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3447
3448                                 /* Get the first character of the result. */
3449                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3450                                                         len,
3451                                                         &char_length,
3452                                                         UTF8_ALLOW_ANYUV);
3453                                 /* Convert first code point to hex, including
3454                                  * the boiler plate before it. */
3455                                 output_length =
3456                                     my_snprintf(hex_string, sizeof(hex_string),
3457                                                 "\\N{U+%X",
3458                                                 (unsigned int) uv);
3459
3460                                 /* Make sure there is enough space to hold it */
3461                                 d = off + SvGROW(sv, off
3462                                                     + output_length
3463                                                     + (STRLEN)(send - e)
3464                                                     + 2);       /* '}' + NUL */
3465                                 /* And output it */
3466                                 Copy(hex_string, d, output_length, char);
3467                                 d += output_length;
3468
3469                                 /* For each subsequent character, append dot and
3470                                 * its ordinal in hex */
3471                                 while ((str += char_length) < str_end) {
3472                                     const STRLEN off = d - SvPVX_const(sv);
3473                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3474                                                             str_end - str,
3475                                                             &char_length,
3476                                                             UTF8_ALLOW_ANYUV);
3477                                     output_length =
3478                                         my_snprintf(hex_string,
3479                                                     sizeof(hex_string),
3480                                                     ".%X",
3481                                                     (unsigned int) uv);
3482
3483                                     d = off + SvGROW(sv, off
3484                                                         + output_length
3485                                                         + (STRLEN)(send - e)
3486                                                         + 2);   /* '}' +  NUL */
3487                                     Copy(hex_string, d, output_length, char);
3488                                     d += output_length;
3489                                 }
3490                             }
3491
3492                             *d++ = '}'; /* Done.  Add the trailing brace */
3493                         }
3494                     }
3495                     else { /* Here, not in a pattern.  Convert the name to a
3496                             * string. */
3497
3498                          /* If destination is not in utf8, unconditionally
3499                           * recode it to be so.  This is because \N{} implies
3500                           * Unicode semantics, and scalars have to be in utf8
3501                           * to guarantee those semantics */
3502                         if (! has_utf8) {
3503                             SvCUR_set(sv, d - SvPVX_const(sv));
3504                             SvPOK_on(sv);
3505                             *d = '\0';
3506                             /* See Note on sizing above.  */
3507                             sv_utf8_upgrade_flags_grow(sv,
3508                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3509                                                 len + (STRLEN)(send - s) + 1);
3510                             d = SvPVX(sv) + SvCUR(sv);
3511                             has_utf8 = TRUE;
3512                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3513
3514                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3515                              * set correctly here). */
3516                             const STRLEN off = d - SvPVX_const(sv);
3517                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3518                         }
3519                         if (! SvUTF8(res)) {    /* Make sure \N{} return is UTF-8 */
3520                             sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3521                             str = SvPV_const(res, len);
3522                         }
3523                         Copy(str, d, len, char);
3524                         d += len;
3525                     }
3526
3527                     SvREFCNT_dec(res);
3528
3529                 } /* End \N{NAME} */
3530 #ifdef EBCDIC
3531                 if (!dorange)
3532                     native_range = FALSE; /* \N{} is defined to be Unicode */
3533 #endif
3534                 s = e + 1;  /* Point to just after the '}' */
3535                 continue;
3536
3537             /* \c is a control character */
3538             case 'c':
3539                 s++;
3540                 if (s < send) {
3541                     *d++ = grok_bslash_c(*s++, 1);
3542                 }
3543                 else {
3544                     yyerror("Missing control char name in \\c");
3545                 }
3546                 continue;
3547
3548             /* printf-style backslashes, formfeeds, newlines, etc */
3549             case 'b':
3550                 *d++ = '\b';
3551                 break;
3552             case 'n':
3553                 *d++ = '\n';
3554                 break;
3555             case 'r':
3556                 *d++ = '\r';
3557                 break;
3558             case 'f':
3559                 *d++ = '\f';
3560                 break;
3561             case 't':
3562                 *d++ = '\t';
3563                 break;
3564             case 'e':
3565                 *d++ = ESC_NATIVE;
3566                 break;
3567             case 'a':
3568                 *d++ = '\a';
3569                 break;
3570             } /* end switch */
3571
3572             s++;
3573             continue;
3574         } /* end if (backslash) */
3575 #ifdef EBCDIC
3576         else
3577             literal_endpoint++;
3578 #endif
3579
3580     default_action:
3581         /* If we started with encoded form, or already know we want it,
3582            then encode the next character */
3583         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3584             STRLEN len  = 1;
3585
3586
3587             /* One might think that it is wasted effort in the case of the
3588              * source being utf8 (this_utf8 == TRUE) to take the next character
3589              * in the source, convert it to an unsigned value, and then convert
3590              * it back again.  But the source has not been validated here.  The
3591              * routine that does the conversion checks for errors like
3592              * malformed utf8 */
3593
3594             const UV nextuv   = (this_utf8)
3595                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3596                                 : (UV) ((U8) *s);
3597             const STRLEN need = UNISKIP(nextuv);
3598             if (!has_utf8) {
3599                 SvCUR_set(sv, d - SvPVX_const(sv));
3600                 SvPOK_on(sv);
3601                 *d = '\0';
3602                 /* See Note on sizing above.  */
3603                 sv_utf8_upgrade_flags_grow(sv,
3604                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3605                                         need + (STRLEN)(send - s) + 1);
3606                 d = SvPVX(sv) + SvCUR(sv);
3607                 has_utf8 = TRUE;
3608             } else if (need > len) {
3609                 /* encoded value larger than old, may need extra space (NOTE:
3610                  * SvCUR() is not set correctly here).   See Note on sizing
3611                  * above.  */
3612                 const STRLEN off = d - SvPVX_const(sv);
3613                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3614             }
3615             s += len;
3616
3617             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3618 #ifdef EBCDIC
3619             if (uv > 255 && !dorange)
3620                 native_range = FALSE;
3621 #endif
3622         }
3623         else {
3624             *d++ = *s++;
3625         }
3626     } /* while loop to process each character */
3627
3628     /* terminate the string and set up the sv */
3629     *d = '\0';
3630     SvCUR_set(sv, d - SvPVX_const(sv));
3631     if (SvCUR(sv) >= SvLEN(sv))
3632         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3633                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3634
3635     SvPOK_on(sv);
3636     if (IN_ENCODING && !has_utf8) {
3637         sv_recode_to_utf8(sv, _get_encoding());
3638         if (SvUTF8(sv))
3639             has_utf8 = TRUE;
3640     }
3641     if (has_utf8) {
3642         SvUTF8_on(sv);
3643         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3644             PL_sublex_info.sub_op->op_private |=
3645                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3646         }
3647     }
3648
3649     /* shrink the sv if we allocated more than we used */
3650     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3651         SvPV_shrink_to_cur(sv);
3652     }
3653
3654     /* return the substring (via pl_yylval) only if we parsed anything */
3655     if (s > start) {
3656         char *s2 = start;
3657         for (; s2 < s; s2++) {
3658             if (*s2 == '\n')
3659                 COPLINE_INC_WITH_HERELINES;
3660         }
3661         SvREFCNT_inc_simple_void_NN(sv);
3662         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3663             && ! PL_parser->lex_re_reparsing)
3664         {
3665             const char *const key = PL_lex_inpat ? "qr" : "q";
3666             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3667             const char *type;
3668             STRLEN typelen;
3669
3670             if (PL_lex_inwhat == OP_TRANS) {
3671                 type = "tr";
3672                 typelen = 2;
3673             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3674                 type = "s";
3675                 typelen = 1;
3676             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3677                 type = "q";
3678                 typelen = 1;
3679             } else  {
3680                 type = "qq";
3681                 typelen = 2;
3682             }
3683
3684             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3685                                 type, typelen);
3686         }
3687         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3688     }
3689     LEAVE_with_name("scan_const");
3690     return s;
3691 }
3692
3693 /* S_intuit_more
3694  * Returns TRUE if there's more to the expression (e.g., a subscript),
3695  * FALSE otherwise.
3696  *
3697  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3698  *
3699  * ->[ and ->{ return TRUE
3700  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3701  * { and [ outside a pattern are always subscripts, so return TRUE
3702  * if we're outside a pattern and it's not { or [, then return FALSE
3703  * if we're in a pattern and the first char is a {
3704  *   {4,5} (any digits around the comma) returns FALSE
3705  * if we're in a pattern and the first char is a [
3706  *   [] returns FALSE
3707  *   [SOMETHING] has a funky algorithm to decide whether it's a
3708  *      character class or not.  It has to deal with things like
3709  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3710  * anything else returns TRUE
3711  */
3712
3713 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3714
3715 STATIC int
3716 S_intuit_more(pTHX_ char *s)
3717 {
3718     PERL_ARGS_ASSERT_INTUIT_MORE;
3719
3720     if (PL_lex_brackets)
3721         return TRUE;
3722     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3723         return TRUE;
3724     if (*s == '-' && s[1] == '>'
3725      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3726      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3727         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3728         return TRUE;
3729     if (*s != '{' && *s != '[')
3730         return FALSE;
3731     if (!PL_lex_inpat)
3732         return TRUE;
3733
3734     /* In a pattern, so maybe we have {n,m}. */
3735     if (*s == '{') {
3736         if (regcurly(s)) {
3737             return FALSE;
3738         }
3739         return TRUE;
3740     }
3741
3742     /* On the other hand, maybe we have a character class */
3743
3744     s++;
3745     if (*s == ']' || *s == '^')
3746         return FALSE;
3747     else {
3748         /* this is terrifying, and it works */
3749         int weight;
3750         char seen[256];
3751         const char * const send = strchr(s,']');
3752         unsigned char un_char, last_un_char;
3753         char tmpbuf[sizeof PL_tokenbuf * 4];
3754
3755         if (!send)              /* has to be an expression */
3756             return TRUE;
3757         weight = 2;             /* let's weigh the evidence */
3758
3759         if (*s == '$')
3760             weight -= 3;
3761         else if (isDIGIT(*s)) {
3762             if (s[1] != ']') {
3763                 if (isDIGIT(s[1]) && s[2] == ']')
3764                     weight -= 10;
3765             }
3766             else
3767                 weight -= 100;
3768         }
3769         Zero(seen,256,char);
3770         un_char = 255;
3771         for (; s < send; s++) {
3772             last_un_char = un_char;
3773             un_char = (unsigned char)*s;
3774             switch (*s) {
3775             case '@':
3776             case '&':
3777             case '$':
3778                 weight -= seen[un_char] * 10;
3779                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3780                     int len;
3781                     char *tmp = PL_bufend;
3782                     PL_bufend = (char*)send;
3783                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3784                     PL_bufend = tmp;
3785                     len = (int)strlen(tmpbuf);
3786                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3787                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3788                         weight -= 100;
3789                     else
3790                         weight -= 10;
3791                 }
3792                 else if (*s == '$' && s[1] &&
3793                   strchr("[#!%*<>()-=",s[1])) {
3794                     if (/*{*/ strchr("])} =",s[2]))
3795                         weight -= 10;
3796                     else
3797                         weight -= 1;
3798                 }
3799                 break;
3800             case '\\':
3801                 un_char = 254;
3802                 if (s[1]) {
3803                     if (strchr("wds]",s[1]))
3804                         weight += 100;
3805                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3806                         weight += 1;
3807                     else if (strchr("rnftbxcav",s[1]))
3808                         weight += 40;
3809                     else if (isDIGIT(s[1])) {
3810                         weight += 40;
3811                         while (s[1] && isDIGIT(s[1]))
3812                             s++;
3813                     }
3814                 }
3815                 else
3816                     weight += 100;
3817                 break;
3818             case '-':
3819                 if (s[1] == '\\')
3820                     weight += 50;
3821                 if (strchr("aA01! ",last_un_char))
3822                     weight += 30;
3823                 if (strchr("zZ79~",s[1]))
3824                     weight += 30;
3825                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3826                     weight -= 5;        /* cope with negative subscript */
3827                 break;
3828             default:
3829                 if (!isWORDCHAR(last_un_char)
3830                     && !(last_un_char == '$' || last_un_char == '@'
3831                          || last_un_char == '&')
3832                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3833                     char *d = s;
3834                     while (isALPHA(*s))
3835                         s++;
3836                     if (keyword(d, s - d, 0))
3837                         weight -= 150;
3838                 }
3839                 if (un_char == last_un_char + 1)
3840                     weight += 5;
3841                 weight -= seen[un_char];
3842                 break;
3843             }
3844             seen[un_char]++;
3845         }
3846         if (weight >= 0)        /* probably a character class */
3847             return FALSE;
3848     }
3849
3850     return TRUE;
3851 }
3852
3853 /*
3854  * S_intuit_method
3855  *
3856  * Does all the checking to disambiguate
3857  *   foo bar
3858  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3859  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3860  *
3861  * First argument is the stuff after the first token, e.g. "bar".
3862  *
3863  * Not a method if foo is a filehandle.
3864  * Not a method if foo is a subroutine prototyped to take a filehandle.
3865  * Not a method if it's really "Foo $bar"
3866  * Method if it's "foo $bar"
3867  * Not a method if it's really "print foo $bar"
3868  * Method if it's really "foo package::" (interpreted as package->foo)
3869  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3870  * Not a method if bar is a filehandle or package, but is quoted with
3871  *   =>
3872  */
3873
3874 STATIC int
3875 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
3876 {
3877     char *s = start + (*start == '$');
3878     char tmpbuf[sizeof PL_tokenbuf];
3879     STRLEN len;
3880     GV* indirgv;
3881         /* Mustn't actually add anything to a symbol table.
3882            But also don't want to "initialise" any placeholder
3883            constants that might already be there into full
3884            blown PVGVs with attached PVCV.  */
3885     GV * const gv =
3886         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
3887
3888     PERL_ARGS_ASSERT_INTUIT_METHOD;
3889
3890     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3891             return 0;
3892     if (cv && SvPOK(cv)) {
3893         const char *proto = CvPROTO(cv);
3894         if (proto) {
3895             while (*proto && (isSPACE(*proto) || *proto == ';'))
3896                 proto++;
3897             if (*proto == '*')
3898                 return 0;
3899         }
3900     }
3901
3902     if (*start == '$') {
3903         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3904                 isUPPER(*PL_tokenbuf))
3905             return 0;
3906         s = skipspace(s);
3907         PL_bufptr = start;
3908         PL_expect = XREF;
3909         return *s == '(' ? FUNCMETH : METHOD;
3910     }
3911
3912     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3913     /* start is the beginning of the possible filehandle/object,
3914      * and s is the end of it
3915      * tmpbuf is a copy of it (but with single quotes as double colons)
3916      */
3917
3918     if (!keyword(tmpbuf, len, 0)) {
3919         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3920             len -= 2;
3921             tmpbuf[len] = '\0';
3922             goto bare_package;
3923         }
3924         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3925         if (indirgv && GvCVu(indirgv))
3926             return 0;
3927         /* filehandle or package name makes it a method */
3928         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3929             s = skipspace(s);
3930             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3931                 return 0;       /* no assumptions -- "=>" quotes bareword */
3932       bare_package:
3933             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3934                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3935             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3936             PL_expect = XTERM;
3937             force_next(WORD);
3938             PL_bufptr = s;
3939             return *s == '(' ? FUNCMETH : METHOD;
3940         }
3941     }
3942     return 0;
3943 }
3944
3945 /* Encoded script support. filter_add() effectively inserts a
3946  * 'pre-processing' function into the current source input stream.
3947  * Note that the filter function only applies to the current source file
3948  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3949  *
3950  * The datasv parameter (which may be NULL) can be used to pass
3951  * private data to this instance of the filter. The filter function
3952  * can recover the SV using the FILTER_DATA macro and use it to
3953  * store private buffers and state information.
3954  *
3955  * The supplied datasv parameter is upgraded to a PVIO type
3956  * and the IoDIRP/IoANY field is used to store the function pointer,
3957  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3958  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3959  * private use must be set using malloc'd pointers.
3960  */
3961
3962 SV *
3963 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3964 {
3965     if (!funcp)
3966         return NULL;
3967
3968     if (!PL_parser)
3969         return NULL;
3970
3971     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3972         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3973
3974     if (!PL_rsfp_filters)
3975         PL_rsfp_filters = newAV();
3976     if (!datasv)
3977         datasv = newSV(0);
3978     SvUPGRADE(datasv, SVt_PVIO);
3979     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3980     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3981     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3982                           FPTR2DPTR(void *, IoANY(datasv)),
3983                           SvPV_nolen(datasv)));
3984     av_unshift(PL_rsfp_filters, 1);
3985     av_store(PL_rsfp_filters, 0, datasv) ;
3986     if (
3987         !PL_parser->filtered
3988      && PL_parser->lex_flags & LEX_EVALBYTES
3989      && PL_bufptr < PL_bufend
3990     ) {
3991         const char *s = PL_bufptr;
3992         while (s < PL_bufend) {
3993             if (*s == '\n') {
3994                 SV *linestr = PL_parser->linestr;
3995                 char *buf = SvPVX(linestr);
3996                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3997                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3998                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3999                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4000                 STRLEN const last_uni_pos =
4001                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4002                 STRLEN const last_lop_pos =
4003                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4004                 av_push(PL_rsfp_filters, linestr);
4005                 PL_parser->linestr =
4006                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4007                 buf = SvPVX(PL_parser->linestr);
4008                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4009                 PL_parser->bufptr = buf + bufptr_pos;
4010                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4011                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4012                 PL_parser->linestart = buf + linestart_pos;
4013                 if (PL_parser->last_uni)
4014                     PL_parser->last_uni = buf + last_uni_pos;
4015                 if (PL_parser->last_lop)
4016                     PL_parser->last_lop = buf + last_lop_pos;
4017                 SvLEN(linestr) = SvCUR(linestr);
4018                 SvCUR(linestr) = s-SvPVX(linestr);
4019                 PL_parser->filtered = 1;
4020                 break;
4021             }
4022             s++;
4023         }
4024     }
4025     return(datasv);
4026 }
4027
4028
4029 /* Delete most recently added instance of this filter function. */
4030 void
4031 Perl_filter_del(pTHX_ filter_t funcp)
4032 {
4033     SV *datasv;
4034
4035     PERL_ARGS_ASSERT_FILTER_DEL;
4036
4037 #ifdef DEBUGGING
4038     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4039                           FPTR2DPTR(void*, funcp)));
4040 #endif
4041     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4042         return;
4043     /* if filter is on top of stack (usual case) just pop it off */
4044     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4045     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4046         sv_free(av_pop(PL_rsfp_filters));
4047
4048         return;
4049     }
4050     /* we need to search for the correct entry and clear it     */
4051     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4052 }
4053
4054
4055 /* Invoke the idxth filter function for the current rsfp.        */
4056 /* maxlen 0 = read one text line */
4057 I32
4058 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4059 {
4060     filter_t funcp;
4061     SV *datasv = NULL;
4062     /* This API is bad. It should have been using unsigned int for maxlen.
4063        Not sure if we want to change the API, but if not we should sanity
4064        check the value here.  */
4065     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4066
4067     PERL_ARGS_ASSERT_FILTER_READ;
4068
4069     if (!PL_parser || !PL_rsfp_filters)
4070         return -1;
4071     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4072         /* Provide a default input filter to make life easy.    */
4073         /* Note that we append to the line. This is handy.      */
4074         DEBUG_P(PerlIO_printf(Perl_debug_log,
4075                               "filter_read %d: from rsfp\n", idx));
4076         if (correct_length) {
4077             /* Want a block */
4078             int len ;
4079             const int old_len = SvCUR(buf_sv);
4080
4081             /* ensure buf_sv is large enough */
4082             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4083             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4084                                    correct_length)) <= 0) {
4085                 if (PerlIO_error(PL_rsfp))
4086                     return -1;          /* error */
4087                 else
4088                     return 0 ;          /* end of file */
4089             }
4090             SvCUR_set(buf_sv, old_len + len) ;
4091             SvPVX(buf_sv)[old_len + len] = '\0';
4092         } else {
4093             /* Want a line */
4094             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4095                 if (PerlIO_error(PL_rsfp))
4096                     return -1;          /* error */
4097                 else
4098                     return 0 ;          /* end of file */
4099             }
4100         }
4101         return SvCUR(buf_sv);
4102     }
4103     /* Skip this filter slot if filter has been deleted */
4104     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4105         DEBUG_P(PerlIO_printf(Perl_debug_log,
4106                               "filter_read %d: skipped (filter deleted)\n",
4107                               idx));
4108         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4109     }
4110     if (SvTYPE(datasv) != SVt_PVIO) {
4111         if (correct_length) {
4112             /* Want a block */
4113             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4114             if (!remainder) return 0; /* eof */
4115             if (correct_length > remainder) correct_length = remainder;
4116             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4117             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4118         } else {
4119             /* Want a line */
4120             const char *s = SvEND(datasv);
4121             const char *send = SvPVX(datasv) + SvLEN(datasv);
4122             while (s < send) {
4123                 if (*s == '\n') {
4124                     s++;
4125                     break;
4126                 }
4127                 s++;
4128             }
4129             if (s == send) return 0; /* eof */
4130             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4131             SvCUR_set(datasv, s-SvPVX(datasv));
4132         }
4133         return SvCUR(buf_sv);
4134     }
4135     /* Get function pointer hidden within datasv        */
4136     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4137     DEBUG_P(PerlIO_printf(Perl_debug_log,
4138                           "filter_read %d: via function %p (%s)\n",
4139                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4140     /* Call function. The function is expected to       */
4141     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4142     /* Return: <0:error, =0:eof, >0:not eof             */
4143     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4144 }
4145
4146 STATIC char *
4147 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4148 {
4149     PERL_ARGS_ASSERT_FILTER_GETS;
4150
4151 #ifdef PERL_CR_FILTER
4152     if (!PL_rsfp_filters) {
4153         filter_add(S_cr_textfilter,NULL);
4154     }
4155 #endif
4156     if (PL_rsfp_filters) {
4157         if (!append)
4158             SvCUR_set(sv, 0);   /* start with empty line        */
4159         if (FILTER_READ(0, sv, 0) > 0)
4160             return ( SvPVX(sv) ) ;
4161         else
4162             return NULL ;
4163     }
4164     else
4165         return (sv_gets(sv, PL_rsfp, append));
4166 }
4167
4168 STATIC HV *
4169 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4170 {
4171     GV *gv;
4172
4173     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4174
4175     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4176         return PL_curstash;
4177
4178     if (len > 2 &&
4179         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4180         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4181     {
4182         return GvHV(gv);                        /* Foo:: */
4183     }
4184
4185     /* use constant CLASS => 'MyClass' */
4186     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4187     if (gv && GvCV(gv)) {
4188         SV * const sv = cv_const_sv(GvCV(gv));
4189         if (sv)
4190             return gv_stashsv(sv, 0);
4191     }
4192
4193     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4194 }
4195
4196
4197 STATIC char *
4198 S_tokenize_use(pTHX_ int is_use, char *s) {
4199     PERL_ARGS_ASSERT_TOKENIZE_USE;
4200
4201     if (PL_expect != XSTATE)
4202         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4203                     is_use ? "use" : "no"));
4204     PL_expect = XTERM;
4205     s = skipspace(s);
4206     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4207         s = force_version(s, TRUE);
4208         if (*s == ';' || *s == '}'
4209                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4210             NEXTVAL_NEXTTOKE.opval = NULL;
4211             force_next(WORD);
4212         }
4213         else if (*s == 'v') {
4214             s = force_word(s,WORD,FALSE,TRUE);
4215             s = force_version(s, FALSE);
4216         }
4217     }
4218     else {
4219         s = force_word(s,WORD,FALSE,TRUE);
4220         s = force_version(s, FALSE);
4221     }
4222     pl_yylval.ival = is_use;
4223     return s;
4224 }
4225 #ifdef DEBUGGING
4226     static const char* const exp_name[] =
4227         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4228           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4229           "TERMORDORDOR"
4230         };
4231 #endif
4232
4233 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4234 STATIC bool
4235 S_word_takes_any_delimeter(char *p, STRLEN len)
4236 {
4237     return (len == 1 && strchr("msyq", p[0])) ||
4238            (len == 2 && (
4239             (p[0] == 't' && p[1] == 'r') ||
4240             (p[0] == 'q' && strchr("qwxr", p[1]))));
4241 }
4242
4243 static void
4244 S_check_scalar_slice(pTHX_ char *s)
4245 {
4246     s++;
4247     while (*s == ' ' || *s == '\t') s++;
4248     if (*s == 'q' && s[1] == 'w'
4249      && !isWORDCHAR_lazy_if(s+2,UTF))
4250         return;
4251     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4252         s += UTF ? UTF8SKIP(s) : 1;
4253     if (*s == '}' || *s == ']')
4254         pl_yylval.ival = OPpSLICEWARNING;
4255 }
4256
4257 /*
4258   yylex
4259
4260   Works out what to call the token just pulled out of the input
4261   stream.  The yacc parser takes care of taking the ops we return and
4262   stitching them into a tree.
4263
4264   Returns:
4265     The type of the next token
4266
4267   Structure:
4268       Switch based on the current state:
4269           - if we already built the token before, use it
4270           - if we have a case modifier in a string, deal with that
4271           - handle other cases of interpolation inside a string
4272           - scan the next line if we are inside a format
4273       In the normal state switch on the next character:
4274           - default:
4275             if alphabetic, go to key lookup
4276             unrecoginized character - croak
4277           - 0/4/26: handle end-of-line or EOF
4278           - cases for whitespace
4279           - \n and #: handle comments and line numbers
4280           - various operators, brackets and sigils
4281           - numbers
4282           - quotes
4283           - 'v': vstrings (or go to key lookup)
4284           - 'x' repetition operator (or go to key lookup)
4285           - other ASCII alphanumerics (key lookup begins here):
4286               word before => ?
4287               keyword plugin
4288               scan built-in keyword (but do nothing with it yet)
4289               check for statement label
4290               check for lexical subs
4291                   goto just_a_word if there is one
4292               see whether built-in keyword is overridden
4293               switch on keyword number:
4294                   - default: just_a_word:
4295                       not a built-in keyword; handle bareword lookup
4296                       disambiguate between method and sub call
4297                       fall back to bareword
4298                   - cases for built-in keywords
4299 */
4300
4301
4302 int
4303 Perl_yylex(pTHX)
4304 {
4305     dVAR;
4306     char *s = PL_bufptr;
4307     char *d;
4308     STRLEN len;
4309     bool bof = FALSE;
4310     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4311     U8 formbrack = 0;
4312     U32 fake_eof = 0;
4313
4314     /* orig_keyword, gvp, and gv are initialized here because
4315      * jump to the label just_a_word_zero can bypass their
4316      * initialization later. */
4317     I32 orig_keyword = 0;
4318     GV *gv = NULL;
4319     GV **gvp = NULL;
4320
4321     DEBUG_T( {
4322         SV* tmp = newSVpvs("");
4323         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4324             (IV)CopLINE(PL_curcop),
4325             lex_state_names[PL_lex_state],
4326             exp_name[PL_expect],
4327             pv_display(tmp, s, strlen(s), 0, 60));
4328         SvREFCNT_dec(tmp);
4329     } );
4330
4331     switch (PL_lex_state) {
4332     case LEX_NORMAL:
4333     case LEX_INTERPNORMAL:
4334         break;
4335
4336     /* when we've already built the next token, just pull it out of the queue */
4337     case LEX_KNOWNEXT:
4338         PL_nexttoke--;
4339         pl_yylval = PL_nextval[PL_nexttoke];
4340         if (!PL_nexttoke) {
4341             PL_lex_state = PL_lex_defer;
4342             PL_lex_defer = LEX_NORMAL;
4343         }
4344         {
4345             I32 next_type;
4346             next_type = PL_nexttype[PL_nexttoke];
4347             if (next_type & (7<<24)) {
4348                 if (next_type & (1<<24)) {
4349                     if (PL_lex_brackets > 100)
4350                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4351                     PL_lex_brackstack[PL_lex_brackets++] =
4352                         (char) ((next_type >> 16) & 0xff);
4353                 }
4354                 if (next_type & (2<<24))
4355                     PL_lex_allbrackets++;
4356                 if (next_type & (4<<24))
4357                     PL_lex_allbrackets--;
4358                 next_type &= 0xffff;
4359             }
4360             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4361         }
4362
4363     /* interpolated case modifiers like \L \U, including \Q and \E.
4364        when we get here, PL_bufptr is at the \
4365     */
4366     case LEX_INTERPCASEMOD:
4367 #ifdef DEBUGGING
4368         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4369             Perl_croak(aTHX_
4370                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4371                        PL_bufptr, PL_bufend, *PL_bufptr);
4372 #endif
4373         /* handle \E or end of string */
4374         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4375             /* if at a \E */
4376             if (PL_lex_casemods) {
4377                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4378                 PL_lex_casestack[PL_lex_casemods] = '\0';
4379
4380                 if (PL_bufptr != PL_bufend
4381                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4382                         || oldmod == 'F')) {
4383                     PL_bufptr += 2;
4384                     PL_lex_state = LEX_INTERPCONCAT;
4385                 }
4386                 PL_lex_allbrackets--;
4387                 return REPORT(')');
4388             }
4389             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4390                /* Got an unpaired \E */
4391                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4392                         "Useless use of \\E");
4393             }
4394             if (PL_bufptr != PL_bufend)
4395                 PL_bufptr += 2;
4396             PL_lex_state = LEX_INTERPCONCAT;
4397             return yylex();
4398         }
4399         else {
4400             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4401               "### Saw case modifier\n"); });
4402             s = PL_bufptr + 1;
4403             if (s[1] == '\\' && s[2] == 'E') {
4404                 PL_bufptr = s + 3;
4405                 PL_lex_state = LEX_INTERPCONCAT;
4406                 return yylex();
4407             }
4408             else {
4409                 I32 tmp;
4410                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4411                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4412                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4413                     (strchr(PL_lex_casestack, 'L')
4414                         || strchr(PL_lex_casestack, 'U')
4415                         || strchr(PL_lex_casestack, 'F'))) {
4416                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4417                     PL_lex_allbrackets--;
4418                     return REPORT(')');
4419                 }
4420                 if (PL_lex_casemods > 10)
4421                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4422                 PL_lex_casestack[PL_lex_casemods++] = *s;
4423                 PL_lex_casestack[PL_lex_casemods] = '\0';
4424                 PL_lex_state = LEX_INTERPCONCAT;
4425                 NEXTVAL_NEXTTOKE.ival = 0;
4426                 force_next((2<<24)|'(');
4427                 if (*s == 'l')
4428                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4429                 else if (*s == 'u')
4430                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4431                 else if (*s == 'L')
4432                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4433                 else if (*s == 'U')
4434                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4435                 else if (*s == 'Q')
4436                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4437                 else if (*s == 'F')
4438                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4439                 else
4440                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4441                 PL_bufptr = s + 1;
4442             }
4443             force_next(FUNC);
4444             if (PL_lex_starts) {
4445                 s = PL_bufptr;
4446                 PL_lex_starts = 0;
4447                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4448                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4449                     TOKEN(',');
4450                 else
4451                     AopNOASSIGN(OP_CONCAT);
4452             }
4453             else
4454                 return yylex();
4455         }
4456
4457     case LEX_INTERPPUSH:
4458         return REPORT(sublex_push());
4459
4460     case LEX_INTERPSTART:
4461         if (PL_bufptr == PL_bufend)
4462             return REPORT(sublex_done());
4463         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4464               "### Interpolated variable\n"); });
4465         PL_expect = XTERM;
4466         /* for /@a/, we leave the joining for the regex engine to do
4467          * (unless we're within \Q etc) */
4468         PL_lex_dojoin = (*PL_bufptr == '@'
4469                             && (!PL_lex_inpat || PL_lex_casemods));
4470         PL_lex_state = LEX_INTERPNORMAL;
4471         if (PL_lex_dojoin) {
4472             NEXTVAL_NEXTTOKE.ival = 0;
4473             force_next(',');
4474             force_ident("\"", '$');
4475             NEXTVAL_NEXTTOKE.ival = 0;
4476             force_next('$');
4477             NEXTVAL_NEXTTOKE.ival = 0;
4478             force_next((2<<24)|'(');
4479             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4480             force_next(FUNC);
4481         }
4482         /* Convert (?{...}) and friends to 'do {...}' */
4483         if (PL_lex_inpat && *PL_bufptr == '(') {
4484             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4485             PL_bufptr += 2;
4486             if (*PL_bufptr != '{')
4487                 PL_bufptr++;
4488             PL_expect = XTERMBLOCK;
4489             force_next(DO);
4490         }
4491
4492         if (PL_lex_starts++) {
4493             s = PL_bufptr;
4494             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4495             if (!PL_lex_casemods && PL_lex_inpat)
4496                 TOKEN(',');
4497             else
4498                 AopNOASSIGN(OP_CONCAT);
4499         }
4500         return yylex();
4501
4502     case LEX_INTERPENDMAYBE:
4503         if (intuit_more(PL_bufptr)) {
4504             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4505             break;
4506         }
4507         /* FALLTHROUGH */
4508
4509     case LEX_INTERPEND:
4510         if (PL_lex_dojoin) {
4511             const U8 dojoin_was = PL_lex_dojoin;
4512             PL_lex_dojoin = FALSE;
4513             PL_lex_state = LEX_INTERPCONCAT;
4514             PL_lex_allbrackets--;
4515             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4516         }
4517         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4518             && SvEVALED(PL_lex_repl))
4519         {
4520             if (PL_bufptr != PL_bufend)
4521                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4522             PL_lex_repl = NULL;
4523         }
4524         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4525            re_eval_str.  If the here-doc body’s length equals the previous
4526            value of re_eval_start, re_eval_start will now be null.  So
4527            check re_eval_str as well. */
4528         if (PL_parser->lex_shared->re_eval_start
4529          || PL_parser->lex_shared->re_eval_str) {
4530             SV *sv;
4531             if (*PL_bufptr != ')')
4532                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4533             PL_bufptr++;
4534             /* having compiled a (?{..}) expression, return the original
4535              * text too, as a const */
4536             if (PL_parser->lex_shared->re_eval_str) {
4537                 sv = PL_parser->lex_shared->re_eval_str;
4538                 PL_parser->lex_shared->re_eval_str = NULL;
4539                 SvCUR_set(sv,
4540                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4541                 SvPV_shrink_to_cur(sv);
4542             }
4543             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4544                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4545             NEXTVAL_NEXTTOKE.opval =
4546                     (OP*)newSVOP(OP_CONST, 0,
4547                                  sv);
4548             force_next(THING);
4549             PL_parser->lex_shared->re_eval_start = NULL;
4550             PL_expect = XTERM;
4551             return REPORT(',');
4552         }
4553
4554         /* FALLTHROUGH */
4555     case LEX_INTERPCONCAT:
4556 #ifdef DEBUGGING
4557         if (PL_lex_brackets)
4558             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4559                        (long) PL_lex_brackets);
4560 #endif
4561         if (PL_bufptr == PL_bufend)
4562             return REPORT(sublex_done());
4563
4564         /* m'foo' still needs to be parsed for possible (?{...}) */
4565         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4566             SV *sv = newSVsv(PL_linestr);
4567             sv = tokeq(sv);
4568             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4569             s = PL_bufend;
4570         }
4571         else {
4572             s = scan_const(PL_bufptr);
4573             if (*s == '\\')
4574                 PL_lex_state = LEX_INTERPCASEMOD;
4575             else
4576                 PL_lex_state = LEX_INTERPSTART;
4577         }
4578
4579         if (s != PL_bufptr) {
4580             NEXTVAL_NEXTTOKE = pl_yylval;
4581             PL_expect = XTERM;
4582             force_next(THING);
4583             if (PL_lex_starts++) {
4584                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4585                 if (!PL_lex_casemods && PL_lex_inpat)
4586                     TOKEN(',');
4587                 else
4588                     AopNOASSIGN(OP_CONCAT);
4589             }
4590             else {
4591                 PL_bufptr = s;
4592                 return yylex();
4593             }
4594         }
4595
4596         return yylex();
4597     case LEX_FORMLINE:
4598         s = scan_formline(PL_bufptr);
4599         if (!PL_lex_formbrack)
4600         {
4601             formbrack = 1;
4602             goto rightbracket;
4603         }
4604         PL_bufptr = s;
4605         return yylex();
4606     }
4607
4608     /* We really do *not* want PL_linestr ever becoming a COW. */
4609     assert (!SvIsCOW(PL_linestr));
4610     s = PL_bufptr;
4611     PL_oldoldbufptr = PL_oldbufptr;
4612     PL_oldbufptr = s;
4613     PL_parser->saw_infix_sigil = 0;
4614
4615   retry:
4616     switch (*s) {
4617     default:
4618         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4619             goto keylookup;
4620         {
4621         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4622         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4623                                                     UTF8SKIP(s),
4624                                                     SVs_TEMP | SVf_UTF8),
4625                                             10, UNI_DISPLAY_ISPRINT)
4626                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4627         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4628         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4629             d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4630         } else {
4631             d = PL_linestart;
4632         }
4633         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4634                           UTF8fARG(UTF, (s - d), d),
4635                          (int) len + 1);
4636     }
4637     case 4:
4638     case 26:
4639         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4640     case 0:
4641         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4642             PL_last_uni = 0;
4643             PL_last_lop = 0;
4644             if (PL_lex_brackets &&
4645                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4646                 yyerror((const char *)
4647                         (PL_lex_formbrack
4648                          ? "Format not terminated"
4649                          : "Missing right curly or square bracket"));
4650             }
4651             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4652                         "### Tokener got EOF\n");
4653             } );
4654             TOKEN(0);
4655         }
4656         if (s++ < PL_bufend)
4657             goto retry;                 /* ignore stray nulls */
4658         PL_last_uni = 0;
4659         PL_last_lop = 0;
4660         if (!PL_in_eval && !PL_preambled) {
4661             PL_preambled = TRUE;
4662             if (PL_perldb) {
4663                 /* Generate a string of Perl code to load the debugger.
4664                  * If PERL5DB is set, it will return the contents of that,
4665                  * otherwise a compile-time require of perl5db.pl.  */
4666
4667                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4668
4669                 if (pdb) {
4670                     sv_setpv(PL_linestr, pdb);
4671                     sv_catpvs(PL_linestr,";");
4672                 } else {
4673                     SETERRNO(0,SS_NORMAL);
4674                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4675                 }
4676                 PL_parser->preambling = CopLINE(PL_curcop);
4677             } else
4678                 sv_setpvs(PL_linestr,"");
4679             if (PL_preambleav) {
4680                 SV **svp = AvARRAY(PL_preambleav);
4681                 SV **const end = svp + AvFILLp(PL_preambleav);
4682                 while(svp <= end) {
4683                     sv_catsv(PL_linestr, *svp);
4684                     ++svp;
4685                     sv_catpvs(PL_linestr, ";");
4686                 }
4687                 sv_free(MUTABLE_SV(PL_preambleav));
4688                 PL_preambleav = NULL;
4689             }
4690             if (PL_minus_E)
4691                 sv_catpvs(PL_linestr,
4692                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4693             if (PL_minus_n || PL_minus_p) {
4694                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4695                 if (PL_minus_l)
4696                     sv_catpvs(PL_linestr,"chomp;");
4697                 if (PL_minus_a) {
4698                     if (PL_minus_F) {
4699                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4700                              || *PL_splitstr == '"')
4701                               && strchr(PL_splitstr + 1, *PL_splitstr))
4702                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4703                         else {
4704                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4705                                bytes can be used as quoting characters.  :-) */
4706                             const char *splits = PL_splitstr;
4707                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4708                             do {
4709                                 /* Need to \ \s  */
4710                                 if (*splits == '\\')
4711                                     sv_catpvn(PL_linestr, splits, 1);
4712                                 sv_catpvn(PL_linestr, splits, 1);
4713                             } while (*splits++);
4714                             /* This loop will embed the trailing NUL of
4715                                PL_linestr as the last thing it does before
4716                                terminating.  */
4717                             sv_catpvs(PL_linestr, ");");
4718                         }
4719                     }
4720                     else
4721                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4722                 }
4723             }
4724             sv_catpvs(PL_linestr, "\n");
4725             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4726             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4727             PL_last_lop = PL_last_uni = NULL;
4728             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4729                 update_debugger_info(PL_linestr, NULL, 0);
4730             goto retry;
4731         }
4732         do {
4733             fake_eof = 0;
4734             bof = PL_rsfp ? TRUE : FALSE;
4735             if (0) {
4736               fake_eof:
4737                 fake_eof = LEX_FAKE_EOF;
4738             }
4739             PL_bufptr = PL_bufend;
4740             COPLINE_INC_WITH_HERELINES;
4741             if (!lex_next_chunk(fake_eof)) {
4742                 CopLINE_dec(PL_curcop);
4743                 s = PL_bufptr;
4744                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4745             }
4746             CopLINE_dec(PL_curcop);
4747             s = PL_bufptr;
4748             /* If it looks like the start of a BOM or raw UTF-16,
4749              * check if it in fact is. */
4750             if (bof && PL_rsfp &&
4751                      (*s == 0 ||
4752                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4753                       *(U8*)s >= 0xFE ||
4754                       s[1] == 0)) {
4755                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4756                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4757 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4758                 /* offset may include swallowed CR */
4759                 if (!bof)
4760                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4761 #endif
4762                 if (bof) {
4763                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4764                     s = swallow_bom((U8*)s);
4765                 }
4766             }
4767             if (PL_parser->in_pod) {
4768                 /* Incest with pod. */
4769                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4770                     sv_setpvs(PL_linestr, "");
4771                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4772                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4773                     PL_last_lop = PL_last_uni = NULL;
4774                     PL_parser->in_pod = 0;
4775                 }
4776             }
4777             if (PL_rsfp || PL_parser->filtered)
4778                 incline(s);
4779         } while (PL_parser->in_pod);
4780         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4781         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4782         PL_last_lop = PL_last_uni = NULL;
4783         if (CopLINE(PL_curcop) == 1) {
4784             while (s < PL_bufend && isSPACE(*s))
4785                 s++;
4786             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4787                 s++;
4788             d = NULL;
4789             if (!PL_in_eval) {
4790                 if (*s == '#' && *(s+1) == '!')
4791                     d = s + 2;
4792 #ifdef ALTERNATE_SHEBANG
4793                 else {
4794                     static char const as[] = ALTERNATE_SHEBANG;
4795                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4796                         d = s + (sizeof(as) - 1);
4797                 }
4798 #endif /* ALTERNATE_SHEBANG */
4799             }
4800             if (d) {
4801                 char *ipath;
4802                 char *ipathend;
4803
4804                 while (isSPACE(*d))
4805                     d++;
4806                 ipath = d;
4807                 while (*d && !isSPACE(*d))
4808                     d++;
4809                 ipathend = d;
4810
4811 #ifdef ARG_ZERO_IS_SCRIPT
4812                 if (ipathend > ipath) {
4813                     /*
4814                      * HP-UX (at least) sets argv[0] to the script name,
4815                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4816                      * at least, set argv[0] to the basename of the Perl
4817                      * interpreter. So, having found "#!", we'll set it right.
4818                      */
4819                     SV* copfilesv = CopFILESV(PL_curcop);
4820                     if (copfilesv) {
4821                         SV * const x =
4822                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4823                                              SVt_PV)); /* $^X */
4824                         assert(SvPOK(x) || SvGMAGICAL(x));
4825                         if (sv_eq(x, copfilesv)) {
4826                             sv_setpvn(x, ipath, ipathend - ipath);
4827                             SvSETMAGIC(x);
4828                         }
4829                         else {
4830                             STRLEN blen;
4831                             STRLEN llen;
4832                             const char *bstart = SvPV_const(copfilesv, blen);
4833                             const char * const lstart = SvPV_const(x, llen);
4834                             if (llen < blen) {
4835                                 bstart += blen - llen;
4836                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
4837                                     sv_setpvn(x, ipath, ipathend - ipath);
4838                                     SvSETMAGIC(x);
4839                                 }
4840                             }
4841                         }
4842                     }
4843                     else {
4844                         /* Anything to do if no copfilesv? */
4845                     }
4846                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4847                 }
4848 #endif /* ARG_ZERO_IS_SCRIPT */
4849
4850                 /*
4851                  * Look for options.
4852                  */
4853                 d = instr(s,"perl -");
4854                 if (!d) {
4855                     d = instr(s,"perl");
4856 #if defined(DOSISH)
4857                     /* avoid getting into infinite loops when shebang
4858                      * line contains "Perl" rather than "perl" */
4859                     if (!d) {
4860                         for (d = ipathend-4; d >= ipath; --d) {
4861                             if (isALPHA_FOLD_EQ(*d, 'p')
4862                                 && !ibcmp(d, "perl", 4))
4863                             {
4864                                 break;
4865                             }
4866                         }
4867                         if (d < ipath)
4868                             d = NULL;
4869                     }
4870 #endif
4871                 }
4872 #ifdef ALTERNATE_SHEBANG
4873                 /*
4874                  * If the ALTERNATE_SHEBANG on this system starts with a
4875                  * character that can be part of a Perl expression, then if
4876                  * we see it but not "perl", we're probably looking at the
4877                  * start of Perl code, not a request to hand off to some
4878                  * other interpreter.  Similarly, if "perl" is there, but
4879                  * not in the first 'word' of the line, we assume the line
4880                  * contains the start of the Perl program.
4881                  */
4882                 if (d && *s != '#') {
4883                     const char *c = ipath;
4884                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4885                         c++;
4886                     if (c < d)
4887                         d = NULL;       /* "perl" not in first word; ignore */
4888                     else
4889                         *s = '#';       /* Don't try to parse shebang line */
4890                 }
4891 #endif /* ALTERNATE_SHEBANG */
4892                 if (!d &&
4893                     *s == '#' &&
4894                     ipathend > ipath &&
4895                     !PL_minus_c &&
4896                     !instr(s,"indir") &&
4897                     instr(PL_origargv[0],"perl"))
4898                 {
4899                     dVAR;
4900                     char **newargv;
4901
4902                     *ipathend = '\0';
4903                     s = ipathend + 1;
4904                     while (s < PL_bufend && isSPACE(*s))
4905                         s++;
4906                     if (s < PL_bufend) {
4907                         Newx(newargv,PL_origargc+3,char*);
4908                         newargv[1] = s;
4909                         while (s < PL_bufend && !isSPACE(*s))
4910                             s++;
4911                         *s = '\0';
4912                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4913                     }
4914                     else
4915                         newargv = PL_origargv;
4916                     newargv[0] = ipath;
4917                     PERL_FPU_PRE_EXEC
4918                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4919                     PERL_FPU_POST_EXEC
4920                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4921                 }
4922                 if (d) {
4923                     while (*d && !isSPACE(*d))
4924                         d++;
4925                     while (SPACE_OR_TAB(*d))
4926                         d++;
4927
4928                     if (*d++ == '-') {
4929                         const bool switches_done = PL_doswitches;
4930                         const U32 oldpdb = PL_perldb;
4931                         const bool oldn = PL_minus_n;
4932                         const bool oldp = PL_minus_p;
4933                         const char *d1 = d;
4934
4935                         do {
4936                             bool baduni = FALSE;
4937                             if (*d1 == 'C') {
4938                                 const char *d2 = d1 + 1;
4939                                 if (parse_unicode_opts((const char **)&d2)
4940                                     != PL_unicode)
4941                                     baduni = TRUE;
4942                             }
4943                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
4944                                 const char * const m = d1;
4945                                 while (*d1 && !isSPACE(*d1))
4946                                     d1++;
4947                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4948                                       (int)(d1 - m), m);
4949                             }
4950                             d1 = moreswitches(d1);
4951                         } while (d1);
4952                         if (PL_doswitches && !switches_done) {
4953                             int argc = PL_origargc;
4954                             char **argv = PL_origargv;
4955                             do {
4956                                 argc--,argv++;
4957                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4958                             init_argv_symbols(argc,argv);
4959                         }
4960                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4961                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4962                               /* if we have already added "LINE: while (<>) {",
4963                                  we must not do it again */
4964                         {
4965                             sv_setpvs(PL_linestr, "");
4966                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4967                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4968                             PL_last_lop = PL_last_uni = NULL;
4969                             PL_preambled = FALSE;
4970                             if (PERLDB_LINE || PERLDB_SAVESRC)
4971                                 (void)gv_fetchfile(PL_origfilename);
4972                             goto retry;
4973                         }
4974                     }
4975                 }
4976             }
4977         }
4978         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4979             PL_lex_state = LEX_FORMLINE;
4980             NEXTVAL_NEXTTOKE.ival = 0;
4981             force_next(FORMRBRACK);
4982             TOKEN(';');
4983         }
4984         goto retry;
4985     case '\r':
4986 #ifdef PERL_STRICT_CR
4987         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4988         Perl_croak(aTHX_
4989       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4990 #endif
4991     case ' ': case '\t': case '\f': case '\v':
4992         s++;
4993         goto retry;
4994     case '#':
4995     case '\n':
4996         if (PL_lex_state != LEX_NORMAL ||
4997              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
4998             const bool in_comment = *s == '#';
4999             if (*s == '#' && s == PL_linestart && PL_in_eval
5000              && !PL_rsfp && !PL_parser->filtered) {
5001                 /* handle eval qq[#line 1 "foo"\n ...] */
5002                 CopLINE_dec(PL_curcop);
5003                 incline(s);
5004             }
5005             d = s;
5006             while (d < PL_bufend && *d != '\n')
5007                 d++;
5008             if (d < PL_bufend)
5009                 d++;
5010             else if (d > PL_bufend)
5011                 /* Found by Ilya: feed random input to Perl. */
5012                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5013                            d, PL_bufend);
5014             s = d;
5015             if (in_comment && d == PL_bufend
5016                 && PL_lex_state == LEX_INTERPNORMAL
5017                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5018                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5019             else
5020                 incline(s);
5021             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5022                 PL_lex_state = LEX_FORMLINE;
5023                 NEXTVAL_NEXTTOKE.ival = 0;
5024                 force_next(FORMRBRACK);
5025                 TOKEN(';');
5026             }
5027         }
5028         else {
5029             while (s < PL_bufend && *s != '\n')
5030                 s++;
5031             if (s < PL_bufend)
5032                 {
5033                     s++;
5034                     if (s < PL_bufend)
5035                         incline(s);
5036                 }
5037             else if (s > PL_bufend)
5038                 /* Found by Ilya: feed random input to Perl. */
5039                 Perl_croak(aTHX_ "panic: input overflow");
5040         }
5041         goto retry;
5042     case '-':
5043         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5044             I32 ftst = 0;
5045             char tmp;
5046
5047             s++;
5048             PL_bufptr = s;
5049             tmp = *s++;
5050
5051             while (s < PL_bufend && SPACE_OR_TAB(*s))
5052                 s++;
5053
5054             if (strnEQ(s,"=>",2)) {
5055                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5056                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5057                 OPERATOR('-');          /* unary minus */
5058             }
5059             switch (tmp) {
5060             case 'r': ftst = OP_FTEREAD;        break;
5061             case 'w': ftst = OP_FTEWRITE;       break;
5062             case 'x': ftst = OP_FTEEXEC;        break;
5063             case 'o': ftst = OP_FTEOWNED;       break;
5064             case 'R': ftst = OP_FTRREAD;        break;
5065             case 'W': ftst = OP_FTRWRITE;       break;
5066             case 'X': ftst = OP_FTREXEC;        break;
5067             case 'O': ftst = OP_FTROWNED;       break;
5068             case 'e': ftst = OP_FTIS;           break;
5069             case 'z': ftst = OP_FTZERO;         break;
5070             case 's': ftst = OP_FTSIZE;         break;
5071             case 'f': ftst = OP_FTFILE;         break;
5072             case 'd': ftst = OP_FTDIR;          break;
5073             case 'l': ftst = OP_FTLINK;         break;
5074             case 'p': ftst = OP_FTPIPE;         break;
5075             case 'S': ftst = OP_FTSOCK;         break;
5076             case 'u': ftst = OP_FTSUID;         break;
5077             case 'g': ftst = OP_FTSGID;         break;
5078             case 'k': ftst = OP_FTSVTX;         break;
5079             case 'b': ftst = OP_FTBLK;          break;
5080             case 'c': ftst = OP_FTCHR;          break;
5081             case 't': ftst = OP_FTTTY;          break;
5082             case 'T': ftst = OP_FTTEXT;         break;
5083             case 'B': ftst = OP_FTBINARY;       break;
5084             case 'M': case 'A': case 'C':
5085                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5086                 switch (tmp) {
5087                 case 'M': ftst = OP_FTMTIME;    break;
5088                 case 'A': ftst = OP_FTATIME;    break;
5089                 case 'C': ftst = OP_FTCTIME;    break;
5090                 default:                        break;
5091                 }
5092                 break;
5093             default:
5094                 break;
5095             }
5096             if (ftst) {
5097                 PL_last_uni = PL_oldbufptr;
5098                 PL_last_lop_op = (OPCODE)ftst;
5099                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5100                         "### Saw file test %c\n", (int)tmp);
5101                 } );
5102                 FTST(ftst);
5103             }
5104             else {
5105                 /* Assume it was a minus followed by a one-letter named
5106                  * subroutine call (or a -bareword), then. */
5107                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5108                         "### '-%c' looked like a file test but was not\n",
5109                         (int) tmp);
5110                 } );
5111                 s = --PL_bufptr;
5112             }
5113         }
5114         {
5115             const char tmp = *s++;
5116             if (*s == tmp) {
5117                 s++;
5118                 if (PL_expect == XOPERATOR)
5119                     TERM(POSTDEC);
5120                 else
5121                     OPERATOR(PREDEC);
5122             }
5123             else if (*s == '>') {
5124                 s++;
5125                 s = skipspace(s);
5126                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5127                     ((*s == '$' || *s == '&') && s[1] == '*')
5128                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5129                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5130                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5131                  ))
5132                 {
5133                     Perl_ck_warner_d(aTHX_
5134                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5135                         "Postfix dereference is experimental"
5136                     );
5137                     PL_expect = XPOSTDEREF;
5138                     TOKEN(ARROW);
5139                 }
5140                 if (isIDFIRST_lazy_if(s,UTF)) {
5141                     s = force_word(s,METHOD,FALSE,TRUE);
5142                     TOKEN(ARROW);
5143                 }
5144                 else if (*s == '$')
5145                     OPERATOR(ARROW);
5146                 else
5147                     TERM(ARROW);
5148             }
5149             if (PL_expect == XOPERATOR) {
5150                 if (*s == '=' && !PL_lex_allbrackets &&
5151                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5152                     s--;
5153                     TOKEN(0);
5154                 }
5155                 Aop(OP_SUBTRACT);
5156             }
5157             else {
5158                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5159                     check_uni();
5160                 OPERATOR('-');          /* unary minus */
5161             }
5162         }
5163
5164     case '+':
5165         {
5166             const char tmp = *s++;
5167             if (*s == tmp) {
5168                 s++;
5169                 if (PL_expect == XOPERATOR)
5170                     TERM(POSTINC);
5171                 else
5172                     OPERATOR(PREINC);
5173             }
5174             if (PL_expect == XOPERATOR) {
5175                 if (*s == '=' && !PL_lex_allbrackets &&
5176                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5177                     s--;
5178                     TOKEN(0);
5179                 }
5180                 Aop(OP_ADD);
5181             }
5182             else {
5183                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5184                     check_uni();
5185                 OPERATOR('+');
5186             }
5187         }
5188
5189     case '*':
5190         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5191         if (PL_expect != XOPERATOR) {
5192             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5193             PL_expect = XOPERATOR;
5194             force_ident(PL_tokenbuf, '*');
5195             if (!*PL_tokenbuf)
5196                 PREREF('*');
5197             TERM('*');
5198         }
5199         s++;
5200         if (*s == '*') {
5201             s++;
5202             if (*s == '=' && !PL_lex_allbrackets &&
5203                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5204                 s -= 2;
5205                 TOKEN(0);
5206             }
5207             PWop(OP_POW);
5208         }
5209         if (*s == '=' && !PL_lex_allbrackets &&
5210                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5211             s--;
5212             TOKEN(0);
5213         }
5214         PL_parser->saw_infix_sigil = 1;
5215         Mop(OP_MULTIPLY);
5216
5217     case '%':
5218     {
5219         if (PL_expect == XOPERATOR) {
5220             if (s[1] == '=' && !PL_lex_allbrackets &&
5221                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5222                 TOKEN(0);
5223             ++s;
5224             PL_parser->saw_infix_sigil = 1;
5225             Mop(OP_MODULO);
5226         }
5227         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5228         PL_tokenbuf[0] = '%';
5229         s = scan_ident(s, PL_tokenbuf + 1,
5230                 sizeof PL_tokenbuf - 1, FALSE);
5231         pl_yylval.ival = 0;
5232         if (!PL_tokenbuf[1]) {
5233             PREREF('%');
5234         }
5235         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5236             if (*s == '[')
5237                 PL_tokenbuf[0] = '@';
5238         }
5239         PL_expect = XOPERATOR;
5240         force_ident_maybe_lex('%');
5241         TERM('%');
5242     }
5243     case '^':
5244         d = s;
5245         bof = FEATURE_BITWISE_IS_ENABLED;
5246         if (bof && s[1] == '.')
5247             s++;
5248         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5249                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5250         {
5251             s = d;
5252             TOKEN(0);
5253         }
5254         s++;
5255         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5256     case '[':
5257         if (PL_lex_brackets > 100)
5258             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5259         PL_lex_brackstack[PL_lex_brackets++] = 0;
5260         PL_lex_allbrackets++;
5261         {
5262             const char tmp = *s++;
5263             OPERATOR(tmp);
5264         }
5265     case '~':
5266         if (s[1] == '~'
5267             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5268         {
5269             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5270                 TOKEN(0);
5271             s += 2;
5272             Perl_ck_warner_d(aTHX_
5273                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5274                 "Smartmatch is experimental");
5275             Eop(OP_SMARTMATCH);
5276         }
5277         s++;
5278         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5279             s++;
5280             BCop(OP_SCOMPLEMENT);
5281         }
5282         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5283     case ',':
5284         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5285             TOKEN(0);
5286         s++;
5287         OPERATOR(',');
5288     case ':':
5289         if (s[1] == ':') {
5290             len = 0;
5291             goto just_a_word_zero_gv;
5292         }
5293         s++;
5294         {
5295         OP *attrs;
5296
5297         switch (PL_expect) {
5298         case XOPERATOR:
5299             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5300                 break;
5301             PL_bufptr = s;      /* update in case we back off */
5302             if (*s == '=') {
5303                 Perl_croak(aTHX_
5304                            "Use of := for an empty attribute list is not allowed");
5305             }
5306             goto grabattrs;
5307         case XATTRBLOCK:
5308             PL_expect = XBLOCK;
5309             goto grabattrs;
5310         case XATTRTERM:
5311             PL_expect = XTERMBLOCK;
5312          grabattrs:
5313             s = skipspace(s);
5314             attrs = NULL;
5315             while (isIDFIRST_lazy_if(s,UTF)) {
5316                 I32 tmp;
5317                 SV *sv;
5318                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5319                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5320                     if (tmp < 0) tmp = -tmp;
5321                     switch (tmp) {
5322                     case KEY_or:
5323                     case KEY_and:
5324                     case KEY_for:
5325                     case KEY_foreach:
5326                     case KEY_unless:
5327                     case KEY_if:
5328                     case KEY_while:
5329                     case KEY_until:
5330                         goto got_attrs;
5331                     default:
5332                         break;
5333                     }
5334                 }
5335                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5336                 if (*d == '(') {
5337                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5338                     COPLINE_SET_FROM_MULTI_END;
5339                     if (!d) {
5340                         /* MUST advance bufptr here to avoid bogus
5341                            "at end of line" context messages from yyerror().
5342                          */
5343                         PL_bufptr = s + len;
5344                         yyerror("Unterminated attribute parameter in attribute list");
5345                         if (attrs)
5346                             op_free(attrs);
5347                         sv_free(sv);
5348                         return REPORT(0);       /* EOF indicator */
5349                     }
5350                 }
5351                 if (PL_lex_stuff) {
5352                     sv_catsv(sv, PL_lex_stuff);
5353                     attrs = op_append_elem(OP_LIST, attrs,
5354                                         newSVOP(OP_CONST, 0, sv));
5355                     SvREFCNT_dec(PL_lex_stuff);
5356                     PL_lex_stuff = NULL;
5357                 }
5358                 else {
5359                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5360                         sv_free(sv);
5361                         if (PL_in_my == KEY_our) {
5362                             deprecate(":unique");
5363                         }
5364                         else
5365                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5366                     }
5367
5368                     /* NOTE: any CV attrs applied here need to be part of
5369                        the CVf_BUILTIN_ATTRS define in cv.h! */
5370                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5371                         sv_free(sv);
5372                         CvLVALUE_on(PL_compcv);
5373                     }
5374                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5375                         sv_free(sv);
5376                         deprecate(":locked");
5377                     }
5378                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5379                         sv_free(sv);
5380                         CvMETHOD_on(PL_compcv);
5381                     }
5382                     else if (!PL_in_my && len == 5
5383                           && strnEQ(SvPVX(sv), "const", len))
5384                     {
5385                         sv_free(sv);
5386                         Perl_ck_warner_d(aTHX_
5387                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5388                            ":const is experimental"
5389                         );
5390                         CvANONCONST_on(PL_compcv);
5391                         if (!CvANON(PL_compcv))
5392                             yyerror(":const is not permitted on named "
5393                                     "subroutines");
5394                     }
5395                     /* After we've set the flags, it could be argued that
5396                        we don't need to do the attributes.pm-based setting
5397                        process, and shouldn't bother appending recognized
5398                        flags.  To experiment with that, uncomment the
5399                        following "else".  (Note that's already been
5400                        uncommented.  That keeps the above-applied built-in
5401                        attributes from being intercepted (and possibly
5402                        rejected) by a package's attribute routines, but is
5403                        justified by the performance win for the common case
5404                        of applying only built-in attributes.) */
5405                     else
5406                         attrs = op_append_elem(OP_LIST, attrs,
5407                                             newSVOP(OP_CONST, 0,
5408                                                     sv));
5409                 }
5410                 s = skipspace(d);
5411                 if (*s == ':' && s[1] != ':')
5412                     s = skipspace(s+1);
5413                 else if (s == d)
5414                     break;      /* require real whitespace or :'s */
5415                 /* XXX losing whitespace on sequential attributes here */
5416             }
5417             {
5418                 if (*s != ';' && *s != '}' &&
5419                     !(PL_expect == XOPERATOR
5420                         ? (*s == '=' ||  *s == ')')
5421                         : (*s == '{' ||  *s == '('))) {
5422                     const char q = ((*s == '\'') ? '"' : '\'');
5423                     /* If here for an expression, and parsed no attrs, back
5424                        off. */
5425                     if (PL_expect == XOPERATOR && !attrs) {
5426                         s = PL_bufptr;
5427                         break;
5428                     }
5429                     /* MUST advance bufptr here to avoid bogus "at end of line"
5430                        context messages from yyerror().
5431                     */
5432                     PL_bufptr = s;
5433                     yyerror( (const char *)
5434                              (*s
5435                               ? Perl_form(aTHX_ "Invalid separator character "
5436                                           "%c%c%c in attribute list", q, *s, q)
5437                               : "Unterminated attribute list" ) );
5438                     if (attrs)
5439                         op_free(attrs);
5440                     OPERATOR(':');
5441                 }
5442             }
5443         got_attrs:
5444             if (attrs) {
5445                 NEXTVAL_NEXTTOKE.opval = attrs;
5446                 force_next(THING);
5447             }
5448             TOKEN(COLONATTR);
5449         }
5450         }
5451         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5452             s--;
5453             TOKEN(0);
5454         }
5455         PL_lex_allbrackets--;
5456         OPERATOR(':');
5457     case '(':
5458         s++;
5459         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5460             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5461         else
5462             PL_expect = XTERM;
5463         s = skipspace(s);
5464         PL_lex_allbrackets++;
5465         TOKEN('(');
5466     case ';':
5467         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5468             TOKEN(0);
5469         CLINE;
5470         s++;
5471         PL_expect = XSTATE;
5472         TOKEN(';');
5473     case ')':
5474         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5475             TOKEN(0);
5476         s++;
5477         PL_lex_allbrackets--;
5478         s = skipspace(s);
5479         if (*s == '{')
5480             PREBLOCK(')');
5481         TERM(')');
5482     case ']':
5483         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5484             TOKEN(0);
5485         s++;
5486         if (PL_lex_brackets <= 0)
5487             /* diag_listed_as: Unmatched right %s bracket */
5488             yyerror("Unmatched right square bracket");
5489         else
5490             --PL_lex_brackets;
5491         PL_lex_allbrackets--;
5492         if (PL_lex_state == LEX_INTERPNORMAL) {
5493             if (PL_lex_brackets == 0) {
5494                 if (*s == '-' && s[1] == '>')
5495                     PL_lex_state = LEX_INTERPENDMAYBE;
5496                 else if (*s != '[' && *s != '{')
5497                     PL_lex_state = LEX_INTERPEND;
5498             }
5499         }
5500         TERM(']');
5501     case '{':
5502         s++;
5503       leftbracket:
5504         if (PL_lex_brackets > 100) {
5505             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5506         }
5507         switch (PL_expect) {
5508         case XTERM:
5509             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5510             PL_lex_allbrackets++;
5511             OPERATOR(HASHBRACK);
5512         case XOPERATOR:
5513             while (s < PL_bufend && SPACE_OR_TAB(*s))
5514                 s++;
5515             d = s;
5516             PL_tokenbuf[0] = '\0';
5517             if (d < PL_bufend && *d == '-') {
5518                 PL_tokenbuf[0] = '-';
5519                 d++;
5520                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5521                     d++;
5522             }
5523             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5524                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5525                               FALSE, &len);
5526                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5527                     d++;
5528                 if (*d == '}') {
5529                     const char minus = (PL_tokenbuf[0] == '-');
5530                     s = force_word(s + minus, WORD, FALSE, TRUE);
5531                     if (minus)
5532                         force_next('-');
5533                 }
5534             }
5535             /* FALLTHROUGH */
5536         case XATTRTERM:
5537         case XTERMBLOCK:
5538             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5539             PL_lex_allbrackets++;
5540             PL_expect = XSTATE;
5541             break;
5542         case XATTRBLOCK:
5543         case XBLOCK:
5544             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5545             PL_lex_allbrackets++;
5546             PL_expect = XSTATE;
5547             break;
5548         case XBLOCKTERM:
5549             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5550             PL_lex_allbrackets++;
5551             PL_expect = XSTATE;
5552             break;
5553         default: {
5554                 const char *t;
5555                 if (PL_oldoldbufptr == PL_last_lop)
5556                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5557                 else
5558                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5559                 PL_lex_allbrackets++;
5560                 s = skipspace(s);
5561                 if (*s == '}') {
5562                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5563                         PL_expect = XTERM;
5564                         /* This hack is to get the ${} in the message. */
5565                         PL_bufptr = s+1;
5566                         yyerror("syntax error");
5567                         break;
5568                     }
5569                     OPERATOR(HASHBRACK);
5570                 }
5571                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5572                     /* ${...} or @{...} etc., but not print {...}
5573                      * Skip the disambiguation and treat this as a block.
5574                      */
5575                     goto block_expectation;
5576                 }
5577                 /* This hack serves to disambiguate a pair of curlies
5578                  * as being a block or an anon hash.  Normally, expectation
5579                  * determines that, but in cases where we're not in a
5580                  * position to expect anything in particular (like inside
5581                  * eval"") we have to resolve the ambiguity.  This code
5582                  * covers the case where the first term in the curlies is a
5583                  * quoted string.  Most other cases need to be explicitly
5584                  * disambiguated by prepending a "+" before the opening
5585                  * curly in order to force resolution as an anon hash.
5586                  *
5587                  * XXX should probably propagate the outer expectation
5588                  * into eval"" to rely less on this hack, but that could
5589                  * potentially break current behavior of eval"".
5590                  * GSAR 97-07-21
5591                  */
5592                 t = s;
5593                 if (*s == '\'' || *s == '"' || *s == '`') {
5594                     /* common case: get past first string, handling escapes */
5595                     for (t++; t < PL_bufend && *t != *s;)
5596                         if (*t++ == '\\')
5597                             t++;
5598                     t++;
5599                 }
5600                 else if (*s == 'q') {
5601                     if (++t < PL_bufend
5602                         && (!isWORDCHAR(*t)
5603                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5604                                 && !isWORDCHAR(*t))))
5605                     {
5606                         /* skip q//-like construct */
5607                         const char *tmps;
5608                         char open, close, term;
5609                         I32 brackets = 1;
5610
5611                         while (t < PL_bufend && isSPACE(*t))
5612                             t++;
5613                         /* check for q => */
5614                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5615                             OPERATOR(HASHBRACK);
5616                         }
5617                         term = *t;
5618                         open = term;
5619                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5620                             term = tmps[5];
5621                         close = term;
5622                         if (open == close)
5623                             for (t++; t < PL_bufend; t++) {
5624                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5625                                     t++;
5626                                 else if (*t == open)
5627                                     break;
5628                             }
5629                         else {
5630                             for (t++; t < PL_bufend; t++) {
5631                                 if (*t == '\\' && t+1 < PL_bufend)
5632                                     t++;
5633                                 else if (*t == close && --brackets <= 0)
5634                                     break;
5635                                 else if (*t == open)
5636                                     brackets++;
5637                             }
5638                         }
5639                         t++;
5640                     }
5641                     else
5642                         /* skip plain q word */
5643                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5644                              t += UTF8SKIP(t);
5645                 }
5646                 else if (isWORDCHAR_lazy_if(t,UTF)) {
5647                     t += UTF8SKIP(t);
5648                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5649                          t += UTF8SKIP(t);
5650                 }
5651                 while (t < PL_bufend && isSPACE(*t))
5652                     t++;
5653                 /* if comma follows first term, call it an anon hash */
5654                 /* XXX it could be a comma expression with loop modifiers */
5655                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5656                                    || (*t == '=' && t[1] == '>')))
5657                     OPERATOR(HASHBRACK);
5658                 if (PL_expect == XREF)
5659                 {
5660                   block_expectation:
5661                     /* If there is an opening brace or 'sub:', treat it
5662                        as a term to make ${{...}}{k} and &{sub:attr...}
5663                        dwim.  Otherwise, treat it as a statement, so
5664                        map {no strict; ...} works.
5665                      */
5666                     s = skipspace(s);
5667                     if (*s == '{') {
5668                         PL_expect = XTERM;
5669                         break;
5670                     }
5671                     if (strnEQ(s, "sub", 3)) {
5672                         d = s + 3;
5673                         d = skipspace(d);
5674                         if (*d == ':') {
5675                             PL_expect = XTERM;
5676                             break;
5677                         }
5678                     }
5679                     PL_expect = XSTATE;
5680                 }
5681                 else {
5682                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5683                     PL_expect = XSTATE;
5684                 }
5685             }
5686             break;
5687         }
5688         pl_yylval.ival = CopLINE(PL_curcop);
5689         PL_copline = NOLINE;   /* invalidate current command line number */
5690         TOKEN(formbrack ? '=' : '{');
5691     case '}':
5692         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5693             TOKEN(0);
5694       rightbracket:
5695         s++;
5696         if (PL_lex_brackets <= 0)
5697             /* diag_listed_as: Unmatched right %s bracket */
5698             yyerror("Unmatched right curly bracket");
5699         else
5700             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5701         PL_lex_allbrackets--;
5702         if (PL_lex_state == LEX_INTERPNORMAL) {
5703             if (PL_lex_brackets == 0) {
5704                 if (PL_expect & XFAKEBRACK) {
5705                     PL_expect &= XENUMMASK;
5706                     PL_lex_state = LEX_INTERPEND;
5707                     PL_bufptr = s;
5708                     return yylex();     /* ignore fake brackets */
5709                 }
5710                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5711                  && SvEVALED(PL_lex_repl))
5712                     PL_lex_state = LEX_INTERPEND;
5713                 else if (*s == '-' && s[1] == '>')
5714                     PL_lex_state = LEX_INTERPENDMAYBE;
5715                 else if (*s != '[' && *s != '{')
5716                     PL_lex_state = LEX_INTERPEND;
5717             }
5718         }
5719         if (PL_expect & XFAKEBRACK) {
5720             PL_expect &= XENUMMASK;
5721             PL_bufptr = s;
5722             return yylex();             /* ignore fake brackets */
5723         }
5724         force_next(formbrack ? '.' : '}');
5725         if (formbrack) LEAVE;
5726         if (formbrack == 2) { /* means . where arguments were expected */
5727             force_next(';');
5728             TOKEN(FORMRBRACK);
5729         }
5730         TOKEN(';');
5731     case '&':
5732         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5733         s++;
5734         if (*s++ == '&') {
5735             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5736                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5737                 s -= 2;
5738                 TOKEN(0);
5739             }
5740             AOPERATOR(ANDAND);
5741         }
5742         s--;
5743         if (PL_expect == XOPERATOR) {
5744             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5745                 && isIDFIRST_lazy_if(s,UTF))
5746             {
5747                 CopLINE_dec(PL_curcop);
5748                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5749                 CopLINE_inc(PL_curcop);
5750             }
5751             d = s;
5752             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5753                 s++;
5754             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5755                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5756                 s = d;
5757                 s--;
5758                 TOKEN(0);
5759             }
5760             if (d == s) {
5761                 PL_parser->saw_infix_sigil = 1;
5762                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5763             }
5764             else
5765                 BAop(OP_SBIT_AND);
5766         }
5767
5768         PL_tokenbuf[0] = '&';
5769         s = scan_ident(s - 1, PL_tokenbuf + 1,
5770                        sizeof PL_tokenbuf - 1, TRUE);
5771         if (PL_tokenbuf[1]) {
5772             PL_expect = XOPERATOR;
5773             force_ident_maybe_lex('&');
5774         }
5775         else
5776             PREREF('&');
5777         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5778         TERM('&');
5779
5780     case '|':
5781         s++;
5782         if (*s++ == '|') {
5783             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5784                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5785                 s -= 2;
5786                 TOKEN(0);
5787             }
5788             AOPERATOR(OROR);
5789         }
5790         s--;
5791         d = s;
5792         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5793             s++;
5794         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5795                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5796             s = d - 1;
5797             TOKEN(0);
5798         }
5799         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
5800     case '=':
5801         s++;
5802         {
5803             const char tmp = *s++;
5804             if (tmp == '=') {
5805                 if (!PL_lex_allbrackets &&
5806                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5807                     s -= 2;
5808                     TOKEN(0);
5809                 }
5810                 Eop(OP_EQ);
5811             }
5812             if (tmp == '>') {
5813                 if (!PL_lex_allbrackets &&
5814                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5815                     s -= 2;
5816                     TOKEN(0);
5817                 }
5818                 OPERATOR(',');
5819             }
5820             if (tmp == '~')
5821                 PMop(OP_MATCH);
5822             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5823                 && strchr("+-*/%.^&|<",tmp))
5824                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5825                             "Reversed %c= operator",(int)tmp);
5826             s--;
5827             if (PL_expect == XSTATE && isALPHA(tmp) &&
5828                 (s == PL_linestart+1 || s[-2] == '\n') )
5829             {
5830                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5831                     || PL_lex_state != LEX_NORMAL) {
5832                     d = PL_bufend;
5833                     while (s < d) {
5834                         if (*s++ == '\n') {
5835                             incline(s);
5836                             if (strnEQ(s,"=cut",4)) {
5837                                 s = strchr(s,'\n');
5838                                 if (s)
5839                                     s++;
5840                                 else
5841                                     s = d;
5842                                 incline(s);
5843                                 goto retry;
5844                             }
5845                         }
5846                     }
5847                     goto retry;
5848                 }
5849                 s = PL_bufend;
5850                 PL_parser->in_pod = 1;
5851                 goto retry;
5852             }
5853         }
5854         if (PL_expect == XBLOCK) {
5855             const char *t = s;
5856 #ifdef PERL_STRICT_CR
5857             while (SPACE_OR_TAB(*t))
5858 #else
5859             while (SPACE_OR_TAB(*t) || *t == '\r')
5860 #endif
5861                 t++;
5862             if (*t == '\n' || *t == '#') {
5863                 formbrack = 1;
5864                 ENTER;
5865                 SAVEI8(PL_parser->form_lex_state);
5866                 SAVEI32(PL_lex_formbrack);
5867                 PL_parser->form_lex_state = PL_lex_state;
5868                 PL_lex_formbrack = PL_lex_brackets + 1;
5869                 goto leftbracket;
5870             }
5871         }
5872         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5873             s--;
5874             TOKEN(0);
5875         }
5876         pl_yylval.ival = 0;
5877         OPERATOR(ASSIGNOP);
5878     case '!':
5879         s++;
5880         {
5881             const char tmp = *s++;
5882             if (tmp == '=') {
5883                 /* was this !=~ where !~ was meant?
5884                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5885
5886                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5887                     const char *t = s+1;
5888
5889                     while (t < PL_bufend && isSPACE(*t))
5890                         ++t;
5891
5892                     if (*t == '/' || *t == '?' ||
5893                         ((*t == 'm' || *t == 's' || *t == 'y')
5894                          && !isWORDCHAR(t[1])) ||
5895                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5896                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5897                                     "!=~ should be !~");
5898                 }
5899                 if (!PL_lex_allbrackets &&
5900                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5901                     s -= 2;
5902                     TOKEN(0);
5903                 }
5904                 Eop(OP_NE);
5905             }
5906             if (tmp == '~')
5907                 PMop(OP_NOT);
5908         }
5909         s--;
5910         OPERATOR('!');
5911     case '<':
5912         if (PL_expect != XOPERATOR) {
5913             if (s[1] != '<' && !strchr(s,'>'))
5914                 check_uni();
5915             if (s[1] == '<' && s[2] != '>')
5916                 s = scan_heredoc(s);
5917             else
5918                 s = scan_inputsymbol(s);
5919             PL_expect = XOPERATOR;
5920             TOKEN(sublex_start());
5921         }
5922         s++;
5923         {
5924             char tmp = *s++;
5925             if (tmp == '<') {
5926                 if (*s == '=' && !PL_lex_allbrackets &&
5927                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5928                     s -= 2;
5929                     TOKEN(0);
5930                 }
5931                 SHop(OP_LEFT_SHIFT);
5932             }
5933             if (tmp == '=') {
5934                 tmp = *s++;
5935                 if (tmp == '>') {
5936                     if (!PL_lex_allbrackets &&
5937                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5938                         s -= 3;
5939                         TOKEN(0);
5940                     }
5941                     Eop(OP_NCMP);
5942                 }
5943                 s--;
5944                 if (!PL_lex_allbrackets &&
5945                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5946                     s -= 2;
5947                     TOKEN(0);
5948                 }
5949                 Rop(OP_LE);
5950             }
5951         }
5952         s--;
5953         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5954             s--;
5955             TOKEN(0);
5956         }
5957         Rop(OP_LT);
5958     case '>':
5959         s++;
5960         {
5961             const char tmp = *s++;
5962             if (tmp == '>') {
5963                 if (*s == '=' && !PL_lex_allbrackets &&
5964                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5965                     s -= 2;
5966                     TOKEN(0);
5967                 }
5968                 SHop(OP_RIGHT_SHIFT);
5969             }
5970             else if (tmp == '=') {
5971                 if (!PL_lex_allbrackets &&
5972                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5973                     s -= 2;
5974                     TOKEN(0);
5975                 }
5976                 Rop(OP_GE);
5977             }
5978         }
5979         s--;
5980         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5981             s--;
5982             TOKEN(0);
5983         }
5984         Rop(OP_GT);
5985
5986     case '$':
5987         CLINE;
5988
5989         if (PL_expect == XOPERATOR) {
5990             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5991                 return deprecate_commaless_var_list();
5992             }
5993         }
5994         else if (PL_expect == XPOSTDEREF) {
5995             if (s[1] == '#') {
5996                 s++;
5997                 POSTDEREF(DOLSHARP);
5998             }
5999             POSTDEREF('$');
6000         }
6001
6002         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6003             PL_tokenbuf[0] = '@';
6004             s = scan_ident(s + 1, PL_tokenbuf + 1,
6005                            sizeof PL_tokenbuf - 1, FALSE);
6006             if (PL_expect == XOPERATOR)
6007                 no_op("Array length", s);
6008             if (!PL_tokenbuf[1])
6009                 PREREF(DOLSHARP);
6010             PL_expect = XOPERATOR;
6011             force_ident_maybe_lex('#');
6012             TOKEN(DOLSHARP);
6013         }
6014
6015         PL_tokenbuf[0] = '$';
6016         s = scan_ident(s, PL_tokenbuf + 1,
6017                        sizeof PL_tokenbuf - 1, FALSE);
6018         if (PL_expect == XOPERATOR)
6019             no_op("Scalar", s);
6020         if (!PL_tokenbuf[1]) {
6021             if (s == PL_bufend)
6022                 yyerror("Final $ should be \\$ or $name");
6023             PREREF('$');
6024         }
6025
6026         d = s;
6027         {
6028             const char tmp = *s;
6029             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6030                 s = skipspace(s);
6031
6032             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6033                 && intuit_more(s)) {
6034                 if (*s == '[') {
6035                     PL_tokenbuf[0] = '@';
6036                     if (ckWARN(WARN_SYNTAX)) {
6037                         char *t = s+1;
6038
6039                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6040                             t++;
6041                         if (*t++ == ',') {
6042                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6043                             while (t < PL_bufend && *t != ']')
6044                                 t++;
6045                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6046                                         "Multidimensional syntax %.*s not supported",
6047                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6048                         }
6049                     }
6050                 }
6051                 else if (*s == '{') {
6052                     char *t;
6053                     PL_tokenbuf[0] = '%';
6054                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6055                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6056                         {
6057                             char tmpbuf[sizeof PL_tokenbuf];
6058                             do {
6059                                 t++;
6060                             } while (isSPACE(*t));
6061                             if (isIDFIRST_lazy_if(t,UTF)) {
6062                                 STRLEN len;
6063                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6064                                               &len);
6065                                 while (isSPACE(*t))
6066                                     t++;
6067                                 if (*t == ';'
6068                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6069                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6070                                         "You need to quote \"%"UTF8f"\"",
6071                                          UTF8fARG(UTF, len, tmpbuf));
6072                             }
6073                         }
6074                 }
6075             }
6076
6077             PL_expect = XOPERATOR;
6078             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6079                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6080                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6081                     PL_expect = XOPERATOR;
6082                 else if (strchr("$@\"'`q", *s))
6083                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6084                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6085                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6086                 else if (isIDFIRST_lazy_if(s,UTF)) {
6087                     char tmpbuf[sizeof PL_tokenbuf];
6088                     int t2;
6089                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6090                     if ((t2 = keyword(tmpbuf, len, 0))) {
6091                         /* binary operators exclude handle interpretations */
6092                         switch (t2) {
6093                         case -KEY_x:
6094                         case -KEY_eq:
6095                         case -KEY_ne:
6096                         case -KEY_gt:
6097                         case -KEY_lt:
6098                         case -KEY_ge:
6099                         case -KEY_le:
6100                         case -KEY_cmp:
6101                             break;
6102                         default:
6103                             PL_expect = XTERM;  /* e.g. print $fh length() */
6104                             break;
6105                         }
6106                     }
6107                     else {
6108                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6109                     }
6110                 }
6111                 else if (isDIGIT(*s))
6112                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6113                 else if (*s == '.' && isDIGIT(s[1]))
6114                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6115                 else if ((*s == '?' || *s == '-' || *s == '+')
6116                          && !isSPACE(s[1]) && s[1] != '=')
6117                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6118                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6119                          && s[1] != '/')
6120                     PL_expect = XTERM;          /* e.g. print $fh /.../
6121                                                    XXX except DORDOR operator
6122                                                 */
6123                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6124                          && s[2] != '=')
6125                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6126             }
6127         }
6128         force_ident_maybe_lex('$');
6129         TOKEN('$');
6130
6131     case '@':
6132         if (PL_expect == XOPERATOR)
6133             no_op("Array", s);
6134         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6135         PL_tokenbuf[0] = '@';
6136         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6137         pl_yylval.ival = 0;
6138         if (!PL_tokenbuf[1]) {
6139             PREREF('@');
6140         }
6141         if (PL_lex_state == LEX_NORMAL)
6142             s = skipspace(s);
6143         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6144             if (*s == '{')
6145                 PL_tokenbuf[0] = '%';
6146
6147             /* Warn about @ where they meant $. */
6148             if (*s == '[' || *s == '{') {
6149                 if (ckWARN(WARN_SYNTAX)) {
6150                     S_check_scalar_slice(aTHX_ s);
6151                 }
6152             }
6153         }
6154         PL_expect = XOPERATOR;
6155         force_ident_maybe_lex('@');
6156         TERM('@');
6157
6158      case '/':                  /* may be division, defined-or, or pattern */
6159         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6160             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6161                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6162                 TOKEN(0);
6163             s += 2;
6164             AOPERATOR(DORDOR);
6165         }
6166         else if (PL_expect == XOPERATOR) {
6167             s++;
6168             if (*s == '=' && !PL_lex_allbrackets &&
6169                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6170                 s--;
6171                 TOKEN(0);
6172             }
6173             Mop(OP_DIVIDE);
6174         }
6175         else {
6176             /* Disable warning on "study /blah/" */
6177             if (PL_oldoldbufptr == PL_last_uni
6178              && (*PL_last_uni != 's' || s - PL_last_uni < 5
6179                  || memNE(PL_last_uni, "study", 5)
6180                  || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6181              ))
6182                 check_uni();
6183             s = scan_pat(s,OP_MATCH);
6184             TERM(sublex_start());
6185         }
6186
6187      case '?':                  /* conditional */
6188         s++;
6189         if (!PL_lex_allbrackets &&
6190             PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6191             s--;
6192             TOKEN(0);
6193         }
6194         PL_lex_allbrackets++;
6195         OPERATOR('?');
6196
6197     case '.':
6198         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6199 #ifdef PERL_STRICT_CR
6200             && s[1] == '\n'
6201 #else
6202             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6203 #endif
6204             && (s == PL_linestart || s[-1] == '\n') )
6205         {
6206             PL_expect = XSTATE;
6207             formbrack = 2; /* dot seen where arguments expected */
6208             goto rightbracket;
6209         }
6210         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6211             s += 3;
6212             OPERATOR(YADAYADA);
6213         }
6214         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6215             char tmp = *s++;
6216             if (*s == tmp) {
6217                 if (!PL_lex_allbrackets &&
6218                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6219                     s--;
6220                     TOKEN(0);
6221                 }
6222                 s++;
6223                 if (*s == tmp) {
6224                     s++;
6225                     pl_yylval.ival = OPf_SPECIAL;
6226                 }
6227                 else
6228                     pl_yylval.ival = 0;
6229                 OPERATOR(DOTDOT);
6230             }
6231             if (*s == '=' && !PL_lex_allbrackets &&
6232                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6233                 s--;
6234                 TOKEN(0);
6235             }
6236             Aop(OP_CONCAT);
6237         }
6238         /* FALLTHROUGH */
6239     case '0': case '1': case '2': case '3': case '4':
6240     case '5': case '6': case '7': case '8': case '9':
6241         s = scan_num(s, &pl_yylval);
6242         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6243         if (PL_expect == XOPERATOR)
6244             no_op("Number",s);
6245         TERM(THING);
6246
6247     case '\'':
6248         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6249         if (!s)
6250             missingterm(NULL);
6251         COPLINE_SET_FROM_MULTI_END;
6252         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6253         if (PL_expect == XOPERATOR) {
6254             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6255                 return deprecate_commaless_var_list();
6256             }
6257             else
6258                 no_op("String",s);
6259         }
6260         pl_yylval.ival = OP_CONST;
6261         TERM(sublex_start());
6262
6263     case '"':
6264         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6265         DEBUG_T( {
6266             if (s)
6267                 printbuf("### Saw string before %s\n", s);
6268             else
6269                 PerlIO_printf(Perl_debug_log,
6270                              "### Saw unterminated string\n");
6271         } );
6272         if (PL_expect == XOPERATOR) {
6273             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6274                 return deprecate_commaless_var_list();
6275             }
6276             else
6277                 no_op("String",s);
6278         }
6279         if (!s)
6280             missingterm(NULL);
6281         pl_yylval.ival = OP_CONST;
6282         /* FIXME. I think that this can be const if char *d is replaced by
6283            more localised variables.  */
6284         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6285             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6286                 pl_yylval.ival = OP_STRINGIFY;
6287                 break;
6288             }
6289         }
6290         if (pl_yylval.ival == OP_CONST)
6291             COPLINE_SET_FROM_MULTI_END;
6292         TERM(sublex_start());
6293
6294     case '`':
6295         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6296         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6297         if (PL_expect == XOPERATOR)
6298             no_op("Backticks",s);
6299         if (!s)
6300             missingterm(NULL);
6301         pl_yylval.ival = OP_BACKTICK;
6302         TERM(sublex_start());
6303
6304     case '\\':
6305         s++;
6306         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6307          && isDIGIT(*s))
6308             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6309                            *s, *s);
6310         if (PL_expect == XOPERATOR)
6311             no_op("Backslash",s);
6312         OPERATOR(REFGEN);
6313
6314     case 'v':
6315         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6316             char *start = s + 2;
6317             while (isDIGIT(*start) || *start == '_')
6318                 start++;
6319             if (*start == '.' && isDIGIT(start[1])) {
6320                 s = scan_num(s, &pl_yylval);
6321                 TERM(THING);
6322             }
6323             else if ((*start == ':' && start[1] == ':')
6324                   || (PL_expect == XSTATE && *start == ':'))
6325                 goto keylookup;
6326             else if (PL_expect == XSTATE) {
6327                 d = start;
6328                 while (d < PL_bufend && isSPACE(*d)) d++;
6329                 if (*d == ':') goto keylookup;
6330             }
6331             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6332             if (!isALPHA(*start) && (PL_expect == XTERM
6333                         || PL_expect == XREF || PL_expect == XSTATE
6334                         || PL_expect == XTERMORDORDOR)) {
6335                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6336                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6337                 if (!gv) {
6338                     s = scan_num(s, &pl_yylval);
6339                     TERM(THING);
6340                 }
6341             }
6342         }
6343         goto keylookup;
6344     case 'x':
6345         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6346             s++;
6347             Mop(OP_REPEAT);
6348         }
6349         goto keylookup;
6350
6351     case '_':
6352     case 'a': case 'A':
6353     case 'b': case 'B':
6354     case 'c': case 'C':
6355     case 'd': case 'D':
6356     case 'e': case 'E':
6357     case 'f': case 'F':
6358     case 'g': case 'G':
6359     case 'h': case 'H':
6360     case 'i': case 'I':
6361     case 'j': case 'J':
6362     case 'k': case 'K':
6363     case 'l': case 'L':
6364     case 'm': case 'M':
6365     case 'n': case 'N':
6366     case 'o': case 'O':
6367     case 'p': case 'P':
6368     case 'q': case 'Q':
6369     case 'r': case 'R':
6370     case 's': case 'S':
6371     case 't': case 'T':
6372     case 'u': case 'U':
6373               case 'V':
6374     case 'w': case 'W':
6375               case 'X':
6376     case 'y': case 'Y':
6377     case 'z': case 'Z':
6378
6379       keylookup: {
6380         bool anydelim;
6381         bool lex;
6382         I32 tmp;
6383         SV *sv;
6384         CV *cv;
6385         PADOFFSET off;
6386         OP *rv2cv_op;
6387
6388         lex = FALSE;
6389         orig_keyword = 0;
6390         off = 0;
6391         sv = NULL;
6392         cv = NULL;
6393         gv = NULL;
6394         gvp = NULL;
6395         rv2cv_op = NULL;
6396
6397         PL_bufptr = s;
6398         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6399
6400         /* Some keywords can be followed by any delimiter, including ':' */
6401         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6402
6403         /* x::* is just a word, unless x is "CORE" */
6404         if (!anydelim && *s == ':' && s[1] == ':') {
6405             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6406             goto just_a_word;
6407         }
6408
6409         d = s;
6410         while (d < PL_bufend && isSPACE(*d))
6411                 d++;    /* no comments skipped here, or s### is misparsed */
6412
6413         /* Is this a word before a => operator? */
6414         if (*d == '=' && d[1] == '>') {
6415           fat_arrow:
6416             CLINE;
6417             pl_yylval.opval
6418                 = (OP*)newSVOP(OP_CONST, 0,
6419                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6420             pl_yylval.opval->op_private = OPpCONST_BARE;
6421             TERM(WORD);
6422         }
6423
6424         /* Check for plugged-in keyword */
6425         {
6426             OP *o;
6427             int result;
6428             char *saved_bufptr = PL_bufptr;
6429             PL_bufptr = s;
6430             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6431             s = PL_bufptr;
6432             if (result == KEYWORD_PLUGIN_DECLINE) {
6433                 /* not a plugged-in keyword */
6434                 PL_bufptr = saved_bufptr;
6435             } else if (result == KEYWORD_PLUGIN_STMT) {
6436                 pl_yylval.opval = o;
6437                 CLINE;
6438                 if (!PL_nexttoke) PL_expect = XSTATE;
6439                 return REPORT(PLUGSTMT);
6440             } else if (result == KEYWORD_PLUGIN_EXPR) {
6441                 pl_yylval.opval = o;
6442                 CLINE;
6443                 if (!PL_nexttoke) PL_expect = XOPERATOR;
6444                 return REPORT(PLUGEXPR);
6445             } else {
6446                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6447                                         PL_tokenbuf);
6448             }
6449         }
6450
6451         /* Check for built-in keyword */
6452         tmp = keyword(PL_tokenbuf, len, 0);
6453
6454         /* Is this a label? */
6455         if (!anydelim && PL_expect == XSTATE
6456               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6457             s = d + 1;
6458             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6459             pl_yylval.pval[len] = '\0';
6460             pl_yylval.pval[len+1] = UTF ? 1 : 0;
6461             CLINE;
6462             TOKEN(LABEL);
6463         }
6464
6465         /* Check for lexical sub */
6466         if (PL_expect != XOPERATOR) {
6467             char tmpbuf[sizeof PL_tokenbuf + 1];
6468             *tmpbuf = '&';
6469             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6470             off = pad_findmy_pvn(tmpbuf, len+1, 0);
6471             if (off != NOT_IN_PAD) {
6472                 assert(off); /* we assume this is boolean-true below */
6473                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6474                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6475                     HEK * const stashname = HvNAME_HEK(stash);
6476                     sv = newSVhek(stashname);
6477                     sv_catpvs(sv, "::");
6478                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6479                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6480                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6481                                     SVt_PVCV);
6482                     off = 0;
6483                     if (!gv) {
6484                         sv_free(sv);
6485                         sv = NULL;
6486                         goto just_a_word;
6487                     }
6488                 }
6489                 else {
6490                     rv2cv_op = newOP(OP_PADANY, 0);
6491                     rv2cv_op->op_targ = off;
6492                     cv = find_lexical_cv(off);
6493                 }
6494                 lex = TRUE;
6495                 goto just_a_word;
6496             }
6497             off = 0;
6498         }
6499
6500         if (tmp < 0) {                  /* second-class keyword? */
6501             GV *ogv = NULL;     /* override (winner) */
6502             GV *hgv = NULL;     /* hidden (loser) */
6503             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6504                 CV *cv;
6505                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6506                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6507                                             SVt_PVCV)) &&
6508                     (cv = GvCVu(gv)))
6509                 {
6510                     if (GvIMPORTED_CV(gv))
6511                         ogv = gv;
6512                     else if (! CvMETHOD(cv))
6513                         hgv = gv;
6514                 }
6515                 if (!ogv &&
6516                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6517                                           len, FALSE)) &&
6518                     (gv = *gvp) && (
6519                         isGV_with_GP(gv)
6520                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
6521                             :   SvPCS_IMPORTED(gv)
6522                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6523                                          len, 0), 1)
6524                    ))
6525                 {
6526                     ogv = gv;
6527                 }
6528             }
6529             if (ogv) {
6530                 orig_keyword = tmp;
6531                 tmp = 0;                /* overridden by import or by GLOBAL */
6532             }
6533             else if (gv && !gvp
6534                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6535                      && GvCVu(gv))
6536             {
6537                 tmp = 0;                /* any sub overrides "weak" keyword */
6538             }
6539             else {                      /* no override */
6540                 tmp = -tmp;
6541                 if (tmp == KEY_dump) {
6542                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6543                                    "dump() better written as CORE::dump()");
6544                 }
6545                 gv = NULL;
6546                 gvp = 0;
6547                 if (hgv && tmp != KEY_x)        /* never ambiguous */
6548                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6549                                    "Ambiguous call resolved as CORE::%s(), "
6550                                    "qualify as such or use &",
6551                                    GvENAME(hgv));
6552             }
6553         }
6554
6555         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6556          && (!anydelim || *s != '#')) {
6557             /* no override, and not s### either; skipspace is safe here
6558              * check for => on following line */
6559             bool arrow;
6560             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6561             STRLEN   soff = s         - SvPVX(PL_linestr);
6562             s = skipspace_flags(s, LEX_NO_INCLINE);
6563             arrow = *s == '=' && s[1] == '>';
6564             PL_bufptr = SvPVX(PL_linestr) + bufoff;
6565             s         = SvPVX(PL_linestr) +   soff;
6566             if (arrow)
6567                 goto fat_arrow;
6568         }
6569
6570       reserved_word:
6571         switch (tmp) {
6572
6573         default:                        /* not a keyword */
6574             /* Trade off - by using this evil construction we can pull the
6575                variable gv into the block labelled keylookup. If not, then
6576                we have to give it function scope so that the goto from the
6577                earlier ':' case doesn't bypass the initialisation.  */
6578             if (0) {
6579             just_a_word_zero_gv:
6580                 sv = NULL;
6581                 cv = NULL;
6582                 gv = NULL;
6583                 gvp = NULL;
6584                 rv2cv_op = NULL;
6585                 orig_keyword = 0;
6586                 lex = 0;
6587                 off = 0;
6588             }
6589           just_a_word: {
6590                 int pkgname = 0;
6591                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6592                 bool safebw;
6593
6594
6595                 /* Get the rest if it looks like a package qualifier */
6596
6597                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6598                     STRLEN morelen;
6599                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6600                                   TRUE, &morelen);
6601                     if (!morelen)
6602                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6603                                 UTF8fARG(UTF, len, PL_tokenbuf),
6604                                 *s == '\'' ? "'" : "::");
6605                     len += morelen;
6606                     pkgname = 1;
6607                 }
6608
6609                 if (PL_expect == XOPERATOR) {
6610                     if (PL_bufptr == PL_linestart) {
6611                         CopLINE_dec(PL_curcop);
6612                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6613                         CopLINE_inc(PL_curcop);
6614                     }
6615                     else
6616                         no_op("Bareword",s);
6617                 }
6618
6619                 /* See if the name is "Foo::",
6620                    in which case Foo is a bareword
6621                    (and a package name). */
6622
6623                 if (len > 2 &&
6624                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6625                 {
6626                     if (ckWARN(WARN_BAREWORD)
6627                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6628                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6629                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
6630                            UTF8fARG(UTF, len, PL_tokenbuf));
6631                     len -= 2;
6632                     PL_tokenbuf[len] = '\0';
6633                     gv = NULL;
6634                     gvp = 0;
6635                     safebw = TRUE;
6636                 }
6637                 else {
6638                     safebw = FALSE;
6639                 }
6640
6641                 /* if we saw a global override before, get the right name */
6642
6643                 if (!sv)
6644                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6645                                                 len);
6646                 if (gvp) {
6647                     SV * const tmp_sv = sv;
6648                     sv = newSVpvs("CORE::GLOBAL::");
6649                     sv_catsv(sv, tmp_sv);
6650                     SvREFCNT_dec(tmp_sv);
6651                 }
6652
6653
6654                 /* Presume this is going to be a bareword of some sort. */
6655                 CLINE;
6656                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6657                 pl_yylval.opval->op_private = OPpCONST_BARE;
6658
6659                 /* And if "Foo::", then that's what it certainly is. */
6660                 if (safebw)
6661                     goto safe_bareword;
6662
6663                 if (!off)
6664                 {
6665                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6666                     const_op->op_private = OPpCONST_BARE;
6667                     rv2cv_op =
6668                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6669                     cv = lex
6670                         ? isGV(gv)
6671                             ? GvCV(gv)
6672                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6673                                 ? (CV *)SvRV(gv)
6674                                 : ((CV *)gv)
6675                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6676                 }
6677
6678                 /* Use this var to track whether intuit_method has been
6679                    called.  intuit_method returns 0 or > 255.  */
6680                 tmp = 1;
6681
6682                 /* See if it's the indirect object for a list operator. */
6683
6684                 if (PL_oldoldbufptr &&
6685                     PL_oldoldbufptr < PL_bufptr &&
6686                     (PL_oldoldbufptr == PL_last_lop
6687                      || PL_oldoldbufptr == PL_last_uni) &&
6688                     /* NO SKIPSPACE BEFORE HERE! */
6689                     (PL_expect == XREF ||
6690                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6691                 {
6692                     bool immediate_paren = *s == '(';
6693
6694                     /* (Now we can afford to cross potential line boundary.) */
6695                     s = skipspace(s);
6696
6697                     /* Two barewords in a row may indicate method call. */
6698
6699                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6700                         (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6701                         goto method;
6702                     }
6703
6704                     /* If not a declared subroutine, it's an indirect object. */
6705                     /* (But it's an indir obj regardless for sort.) */
6706                     /* Also, if "_" follows a filetest operator, it's a bareword */
6707
6708                     if (
6709                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6710                          (!cv &&
6711                         (PL_last_lop_op != OP_MAPSTART &&
6712                          PL_last_lop_op != OP_GREPSTART))))
6713                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6714                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6715                        )
6716                     {
6717                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6718                         goto bareword;
6719                     }
6720                 }
6721
6722                 PL_expect = XOPERATOR;
6723                 s = skipspace(s);
6724
6725                 /* Is this a word before a => operator? */
6726                 if (*s == '=' && s[1] == '>' && !pkgname) {
6727                     op_free(rv2cv_op);
6728                     CLINE;
6729                     if (gvp || (lex && !off)) {
6730                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
6731                         /* This is our own scalar, created a few lines
6732                            above, so this is safe. */
6733                         SvREADONLY_off(sv);
6734                         sv_setpv(sv, PL_tokenbuf);
6735                         if (UTF && !IN_BYTES
6736                          && is_utf8_string((U8*)PL_tokenbuf, len))
6737                               SvUTF8_on(sv);
6738                         SvREADONLY_on(sv);
6739                     }
6740                     TERM(WORD);
6741                 }
6742
6743                 /* If followed by a paren, it's certainly a subroutine. */
6744                 if (*s == '(') {
6745                     CLINE;
6746                     if (cv) {
6747                         d = s + 1;
6748                         while (SPACE_OR_TAB(*d))
6749                             d++;
6750                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6751                             s = d + 1;
6752                             goto its_constant;
6753                         }
6754                     }
6755                     NEXTVAL_NEXTTOKE.opval =
6756                         off ? rv2cv_op : pl_yylval.opval;
6757                     if (off)
6758                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
6759                     else op_free(rv2cv_op),        force_next(WORD);
6760                     pl_yylval.ival = 0;
6761                     TOKEN('&');
6762                 }
6763
6764                 /* If followed by var or block, call it a method (unless sub) */
6765
6766                 if ((*s == '$' || *s == '{') && !cv) {
6767                     op_free(rv2cv_op);
6768                     PL_last_lop = PL_oldbufptr;
6769                     PL_last_lop_op = OP_METHOD;
6770                     if (!PL_lex_allbrackets &&
6771                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6772                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6773                     PL_expect = XBLOCKTERM;
6774                     PL_bufptr = s;
6775                     return REPORT(METHOD);
6776                 }
6777
6778                 /* If followed by a bareword, see if it looks like indir obj. */
6779
6780                 if (tmp == 1 && !orig_keyword
6781                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6782                         && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6783                   method:
6784                     if (lex && !off) {
6785                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
6786                         SvREADONLY_off(sv);
6787                         sv_setpvn(sv, PL_tokenbuf, len);
6788                         if (UTF && !IN_BYTES
6789                          && is_utf8_string((U8*)PL_tokenbuf, len))
6790                             SvUTF8_on (sv);
6791                         else SvUTF8_off(sv);
6792                     }
6793                     op_free(rv2cv_op);
6794                     if (tmp == METHOD && !PL_lex_allbrackets &&
6795                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6796                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6797                     return REPORT(tmp);
6798                 }
6799
6800                 /* Not a method, so call it a subroutine (if defined) */
6801
6802                 if (cv) {
6803                     /* Check for a constant sub */
6804                     if ((sv = cv_const_sv_or_av(cv))) {
6805                   its_constant:
6806                         op_free(rv2cv_op);
6807                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6808                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6809                         if (SvTYPE(sv) == SVt_PVAV)
6810                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6811                                                       pl_yylval.opval);
6812                         else {
6813                             pl_yylval.opval->op_private = 0;
6814                             pl_yylval.opval->op_folded = 1;
6815                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
6816                         }
6817                         TOKEN(WORD);
6818                     }
6819
6820                     op_free(pl_yylval.opval);
6821                     pl_yylval.opval =
6822                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6823                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6824                     PL_last_lop = PL_oldbufptr;
6825                     PL_last_lop_op = OP_ENTERSUB;
6826                     /* Is there a prototype? */
6827                     if (
6828                         SvPOK(cv))
6829                     {
6830                         STRLEN protolen = CvPROTOLEN(cv);
6831                         const char *proto = CvPROTO(cv);
6832                         bool optional;
6833                         proto = S_strip_spaces(aTHX_ proto, &protolen);
6834                         if (!protolen)
6835                             TERM(FUNC0SUB);
6836                         if ((optional = *proto == ';'))
6837                           do
6838                             proto++;
6839                           while (*proto == ';');
6840                         if (
6841                             (
6842                                 (
6843                                     *proto == '$' || *proto == '_'
6844                                  || *proto == '*' || *proto == '+'
6845                                 )
6846                              && proto[1] == '\0'
6847                             )
6848                          || (
6849                              *proto == '\\' && proto[1] && proto[2] == '\0'
6850                             )
6851                         )
6852                             UNIPROTO(UNIOPSUB,optional);
6853                         if (*proto == '\\' && proto[1] == '[') {
6854                             const char *p = proto + 2;
6855                             while(*p && *p != ']')
6856                                 ++p;
6857                             if(*p == ']' && !p[1])
6858                                 UNIPROTO(UNIOPSUB,optional);
6859                         }
6860                         if (*proto == '&' && *s == '{') {
6861                             if (PL_curstash)
6862                                 sv_setpvs(PL_subname, "__ANON__");
6863                             else
6864                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6865                             if (!PL_lex_allbrackets &&
6866                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6867                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6868                             PREBLOCK(LSTOPSUB);
6869                         }
6870                     }
6871                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6872                     PL_expect = XTERM;
6873                     force_next(off ? PRIVATEREF : WORD);
6874                     if (!PL_lex_allbrackets &&
6875                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6876                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6877                     TOKEN(NOAMP);
6878                 }
6879
6880                 /* Call it a bare word */
6881
6882                 if (PL_hints & HINT_STRICT_SUBS)
6883                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6884                 else {
6885                 bareword:
6886                     /* after "print" and similar functions (corresponding to
6887                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6888                      * a filehandle should be subject to "strict subs".
6889                      * Likewise for the optional indirect-object argument to system
6890                      * or exec, which can't be a bareword */
6891                     if ((PL_last_lop_op == OP_PRINT
6892                             || PL_last_lop_op == OP_PRTF
6893                             || PL_last_lop_op == OP_SAY
6894                             || PL_last_lop_op == OP_SYSTEM
6895                             || PL_last_lop_op == OP_EXEC)
6896                             && (PL_hints & HINT_STRICT_SUBS))
6897                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6898                     if (lastchar != '-') {
6899                         if (ckWARN(WARN_RESERVED)) {
6900                             d = PL_tokenbuf;
6901                             while (isLOWER(*d))
6902                                 d++;
6903                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6904                             {
6905                                 /* PL_warn_reserved is constant */
6906                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6907                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6908                                        PL_tokenbuf);
6909                                 GCC_DIAG_RESTORE;
6910                             }
6911                         }
6912                     }
6913                 }
6914                 op_free(rv2cv_op);
6915
6916             safe_bareword:
6917                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6918                  && saw_infix_sigil) {
6919                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6920                                      "Operator or semicolon missing before %c%"UTF8f,
6921                                      lastchar,
6922                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
6923                                               PL_tokenbuf));
6924                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6925                                      "Ambiguous use of %c resolved as operator %c",
6926                                      lastchar, lastchar);
6927                 }
6928                 TOKEN(WORD);
6929             }
6930
6931         case KEY___FILE__:
6932             FUN0OP(
6933                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6934             );
6935
6936         case KEY___LINE__:
6937             FUN0OP(
6938                 (OP*)newSVOP(OP_CONST, 0,
6939                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6940             );
6941
6942         case KEY___PACKAGE__:
6943             FUN0OP(
6944                 (OP*)newSVOP(OP_CONST, 0,
6945                                         (PL_curstash
6946                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6947                                          : &PL_sv_undef))
6948             );
6949
6950         case KEY___DATA__:
6951         case KEY___END__: {
6952             GV *gv;
6953             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6954                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6955                                         ? PL_curstash
6956                                         : PL_defstash;
6957                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6958                 if (!isGV(gv))
6959                     gv_init(gv,stash,"DATA",4,0);
6960                 GvMULTI_on(gv);
6961                 if (!GvIO(gv))
6962                     GvIOp(gv) = newIO();
6963                 IoIFP(GvIOp(gv)) = PL_rsfp;
6964 #if defined(HAS_FCNTL) && defined(F_SETFD)
6965                 {
6966                     const int fd = PerlIO_fileno(PL_rsfp);
6967                     fcntl(fd,F_SETFD,fd >= 3);
6968                 }
6969 #endif
6970                 /* Mark this internal pseudo-handle as clean */
6971                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6972                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6973                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6974                 else
6975                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6976 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6977                 /* if the script was opened in binmode, we need to revert
6978                  * it to text mode for compatibility; but only iff it has CRs
6979                  * XXX this is a questionable hack at best. */
6980                 if (PL_bufend-PL_bufptr > 2
6981                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6982                 {
6983                     Off_t loc = 0;
6984                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6985                         loc = PerlIO_tell(PL_rsfp);
6986                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6987                     }
6988 #ifdef NETWARE
6989                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6990 #else
6991                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6992 #endif  /* NETWARE */
6993                         if (loc > 0)
6994                             PerlIO_seek(PL_rsfp, loc, 0);
6995                     }
6996                 }
6997 #endif
6998 #ifdef PERLIO_LAYERS
6999                 if (!IN_BYTES) {
7000                     if (UTF)
7001                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7002                     else if (IN_ENCODING) {
7003                         SV *name;
7004                         dSP;
7005                         ENTER;
7006                         SAVETMPS;
7007                         PUSHMARK(sp);
7008                         XPUSHs(_get_encoding());
7009                         PUTBACK;
7010                         call_method("name", G_SCALAR);
7011                         SPAGAIN;
7012                         name = POPs;
7013                         PUTBACK;
7014                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7015                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7016                                                       SVfARG(name)));
7017                         FREETMPS;
7018                         LEAVE;
7019                     }
7020                 }
7021 #endif
7022                 PL_rsfp = NULL;
7023             }
7024             goto fake_eof;
7025         }
7026
7027         case KEY___SUB__:
7028             FUN0OP(CvCLONE(PL_compcv)
7029                         ? newOP(OP_RUNCV, 0)
7030                         : newPVOP(OP_RUNCV,0,NULL));
7031
7032         case KEY_AUTOLOAD:
7033         case KEY_DESTROY:
7034         case KEY_BEGIN:
7035         case KEY_UNITCHECK:
7036         case KEY_CHECK:
7037         case KEY_INIT:
7038         case KEY_END:
7039             if (PL_expect == XSTATE) {
7040                 s = PL_bufptr;
7041                 goto really_sub;
7042             }
7043             goto just_a_word;
7044
7045         case_KEY_CORE:
7046             {
7047                 STRLEN olen = len;
7048                 d = s;
7049                 s += 2;
7050                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7051                 if ((*s == ':' && s[1] == ':')
7052                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7053                 {
7054                     s = d;
7055                     len = olen;
7056                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7057                     goto just_a_word;
7058                 }
7059                 if (!tmp)
7060                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7061                                       UTF8fARG(UTF, len, PL_tokenbuf));
7062                 if (tmp < 0)
7063                     tmp = -tmp;
7064                 else if (tmp == KEY_require || tmp == KEY_do
7065                       || tmp == KEY_glob)
7066                     /* that's a way to remember we saw "CORE::" */
7067                     orig_keyword = tmp;
7068                 goto reserved_word;
7069             }
7070
7071         case KEY_abs:
7072             UNI(OP_ABS);
7073
7074         case KEY_alarm:
7075             UNI(OP_ALARM);
7076
7077         case KEY_accept:
7078             LOP(OP_ACCEPT,XTERM);
7079
7080         case KEY_and:
7081             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7082                 return REPORT(0);
7083             OPERATOR(ANDOP);
7084
7085         case KEY_atan2:
7086             LOP(OP_ATAN2,XTERM);
7087
7088         case KEY_bind:
7089             LOP(OP_BIND,XTERM);
7090
7091         case KEY_binmode:
7092             LOP(OP_BINMODE,XTERM);
7093
7094         case KEY_bless:
7095             LOP(OP_BLESS,XTERM);
7096
7097         case KEY_break:
7098             FUN0(OP_BREAK);
7099
7100         case KEY_chop:
7101             UNI(OP_CHOP);
7102
7103         case KEY_continue:
7104                     /* We have to disambiguate the two senses of
7105                       "continue". If the next token is a '{' then
7106                       treat it as the start of a continue block;
7107                       otherwise treat it as a control operator.
7108                      */
7109                     s = skipspace(s);
7110                     if (*s == '{')
7111             PREBLOCK(CONTINUE);
7112                     else
7113                         FUN0(OP_CONTINUE);
7114
7115         case KEY_chdir:
7116             /* may use HOME */
7117             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7118             UNI(OP_CHDIR);
7119
7120         case KEY_close:
7121             UNI(OP_CLOSE);
7122
7123         case KEY_closedir:
7124             UNI(OP_CLOSEDIR);
7125
7126         case KEY_cmp:
7127             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7128                 return REPORT(0);
7129             Eop(OP_SCMP);
7130
7131         case KEY_caller:
7132             UNI(OP_CALLER);
7133
7134         case KEY_crypt:
7135 #ifdef FCRYPT
7136             if (!PL_cryptseen) {
7137                 PL_cryptseen = TRUE;
7138                 init_des();
7139             }
7140 #endif
7141             LOP(OP_CRYPT,XTERM);
7142
7143         case KEY_chmod:
7144             LOP(OP_CHMOD,XTERM);
7145
7146         case KEY_chown:
7147             LOP(OP_CHOWN,XTERM);
7148
7149         case KEY_connect:
7150             LOP(OP_CONNECT,XTERM);
7151
7152         case KEY_chr:
7153             UNI(OP_CHR);
7154
7155         case KEY_cos:
7156             UNI(OP_COS);
7157
7158         case KEY_chroot:
7159             UNI(OP_CHROOT);
7160
7161         case KEY_default:
7162             PREBLOCK(DEFAULT);
7163
7164         case KEY_do:
7165             s = skipspace(s);
7166             if (*s == '{')
7167                 PRETERMBLOCK(DO);
7168             if (*s != '\'') {
7169                 *PL_tokenbuf = '&';
7170                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7171                               1, &len);
7172                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7173                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7174                     d = skipspace(d);
7175                     if (*d == '(') {
7176                         force_ident_maybe_lex('&');
7177                         s = d;
7178                     }
7179                 }
7180             }
7181             if (orig_keyword == KEY_do) {
7182                 orig_keyword = 0;
7183                 pl_yylval.ival = 1;
7184             }
7185             else
7186                 pl_yylval.ival = 0;
7187             OPERATOR(DO);
7188
7189         case KEY_die:
7190             PL_hints |= HINT_BLOCK_SCOPE;
7191             LOP(OP_DIE,XTERM);
7192
7193         case KEY_defined:
7194             UNI(OP_DEFINED);
7195
7196         case KEY_delete:
7197             UNI(OP_DELETE);
7198
7199         case KEY_dbmopen:
7200             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7201                               STR_WITH_LEN("NDBM_File::"),
7202                               STR_WITH_LEN("DB_File::"),
7203                               STR_WITH_LEN("GDBM_File::"),
7204                               STR_WITH_LEN("SDBM_File::"),
7205                               STR_WITH_LEN("ODBM_File::"),
7206                               NULL);
7207             LOP(OP_DBMOPEN,XTERM);
7208
7209         case KEY_dbmclose:
7210             UNI(OP_DBMCLOSE);
7211
7212         case KEY_dump:
7213             LOOPX(OP_DUMP);
7214
7215         case KEY_else:
7216             PREBLOCK(ELSE);
7217
7218         case KEY_elsif:
7219             pl_yylval.ival = CopLINE(PL_curcop);
7220             OPERATOR(ELSIF);
7221
7222         case KEY_eq:
7223             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7224                 return REPORT(0);
7225             Eop(OP_SEQ);
7226
7227         case KEY_exists:
7228             UNI(OP_EXISTS);
7229
7230         case KEY_exit:
7231             UNI(OP_EXIT);
7232
7233         case KEY_eval:
7234             s = skipspace(s);
7235             if (*s == '{') { /* block eval */
7236                 PL_expect = XTERMBLOCK;
7237                 UNIBRACK(OP_ENTERTRY);
7238             }
7239             else { /* string eval */
7240                 PL_expect = XTERM;
7241                 UNIBRACK(OP_ENTEREVAL);
7242             }
7243
7244         case KEY_evalbytes:
7245             PL_expect = XTERM;
7246             UNIBRACK(-OP_ENTEREVAL);
7247
7248         case KEY_eof:
7249             UNI(OP_EOF);
7250
7251         case KEY_exp:
7252             UNI(OP_EXP);
7253
7254         case KEY_each:
7255             UNI(OP_EACH);
7256
7257         case KEY_exec:
7258             LOP(OP_EXEC,XREF);
7259
7260         case KEY_endhostent:
7261             FUN0(OP_EHOSTENT);
7262
7263         case KEY_endnetent:
7264             FUN0(OP_ENETENT);
7265
7266         case KEY_endservent:
7267             FUN0(OP_ESERVENT);
7268
7269         case KEY_endprotoent:
7270             FUN0(OP_EPROTOENT);
7271
7272         case KEY_endpwent:
7273             FUN0(OP_EPWENT);
7274
7275         case KEY_endgrent:
7276             FUN0(OP_EGRENT);
7277
7278         case KEY_for:
7279         case KEY_foreach:
7280             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7281                 return REPORT(0);
7282             pl_yylval.ival = CopLINE(PL_curcop);
7283             s = skipspace(s);
7284             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7285                 char *p = s;
7286
7287                 if ((PL_bufend - p) >= 3 &&
7288                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7289                     p += 2;
7290                 else if ((PL_bufend - p) >= 4 &&
7291                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7292                     p += 3;
7293                 p = skipspace(p);
7294                 /* skip optional package name, as in "for my abc $x (..)" */
7295                 if (isIDFIRST_lazy_if(p,UTF)) {
7296                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7297                     p = skipspace(p);
7298                 }
7299                 if (*p != '$')
7300                     Perl_croak(aTHX_ "Missing $ on loop variable");
7301             }
7302             OPERATOR(FOR);
7303
7304         case KEY_formline:
7305             LOP(OP_FORMLINE,XTERM);
7306
7307         case KEY_fork:
7308             FUN0(OP_FORK);
7309
7310         case KEY_fc:
7311             UNI(OP_FC);
7312
7313         case KEY_fcntl:
7314             LOP(OP_FCNTL,XTERM);
7315
7316         case KEY_fileno:
7317             UNI(OP_FILENO);
7318
7319         case KEY_flock:
7320             LOP(OP_FLOCK,XTERM);
7321
7322         case KEY_gt:
7323             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7324                 return REPORT(0);
7325             Rop(OP_SGT);
7326
7327         case KEY_ge:
7328             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7329                 return REPORT(0);
7330             Rop(OP_SGE);
7331
7332         case KEY_grep:
7333             LOP(OP_GREPSTART, XREF);
7334
7335         case KEY_goto:
7336             LOOPX(OP_GOTO);
7337
7338         case KEY_gmtime:
7339             UNI(OP_GMTIME);
7340
7341         case KEY_getc:
7342             UNIDOR(OP_GETC);
7343
7344         case KEY_getppid:
7345             FUN0(OP_GETPPID);
7346
7347         case KEY_getpgrp:
7348             UNI(OP_GETPGRP);
7349
7350         case KEY_getpriority:
7351             LOP(OP_GETPRIORITY,XTERM);
7352
7353         case KEY_getprotobyname:
7354             UNI(OP_GPBYNAME);
7355
7356         case KEY_getprotobynumber:
7357             LOP(OP_GPBYNUMBER,XTERM);
7358
7359         case KEY_getprotoent:
7360             FUN0(OP_GPROTOENT);
7361
7362         case KEY_getpwent:
7363             FUN0(OP_GPWENT);
7364
7365         case KEY_getpwnam:
7366             UNI(OP_GPWNAM);
7367
7368         case KEY_getpwuid:
7369             UNI(OP_GPWUID);
7370
7371         case KEY_getpeername:
7372             UNI(OP_GETPEERNAME);
7373
7374         case KEY_gethostbyname:
7375             UNI(OP_GHBYNAME);
7376
7377         case KEY_gethostbyaddr:
7378             LOP(OP_GHBYADDR,XTERM);
7379
7380         case KEY_gethostent:
7381             FUN0(OP_GHOSTENT);
7382
7383         case KEY_getnetbyname:
7384             UNI(OP_GNBYNAME);
7385
7386         case KEY_getnetbyaddr:
7387             LOP(OP_GNBYADDR,XTERM);
7388
7389         case KEY_getnetent:
7390             FUN0(OP_GNETENT);
7391
7392         case KEY_getservbyname:
7393             LOP(OP_GSBYNAME,XTERM);
7394
7395         case KEY_getservbyport:
7396             LOP(OP_GSBYPORT,XTERM);
7397
7398         case KEY_getservent:
7399             FUN0(OP_GSERVENT);
7400
7401         case KEY_getsockname:
7402             UNI(OP_GETSOCKNAME);
7403
7404         case KEY_getsockopt:
7405             LOP(OP_GSOCKOPT,XTERM);
7406
7407         case KEY_getgrent:
7408             FUN0(OP_GGRENT);
7409
7410         case KEY_getgrnam:
7411             UNI(OP_GGRNAM);
7412
7413         case KEY_getgrgid:
7414             UNI(OP_GGRGID);
7415
7416         case KEY_getlogin:
7417             FUN0(OP_GETLOGIN);
7418
7419         case KEY_given:
7420             pl_yylval.ival = CopLINE(PL_curcop);
7421             Perl_ck_warner_d(aTHX_
7422                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7423                 "given is experimental");
7424             OPERATOR(GIVEN);
7425
7426         case KEY_glob:
7427             LOP(
7428              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7429              XTERM
7430             );
7431
7432         case KEY_hex:
7433             UNI(OP_HEX);
7434
7435         case KEY_if:
7436             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7437                 return REPORT(0);
7438             pl_yylval.ival = CopLINE(PL_curcop);
7439             OPERATOR(IF);
7440
7441         case KEY_index:
7442             LOP(OP_INDEX,XTERM);
7443
7444         case KEY_int:
7445             UNI(OP_INT);
7446
7447         case KEY_ioctl:
7448             LOP(OP_IOCTL,XTERM);
7449
7450         case KEY_join:
7451             LOP(OP_JOIN,XTERM);
7452
7453         case KEY_keys:
7454             UNI(OP_KEYS);
7455
7456         case KEY_kill:
7457             LOP(OP_KILL,XTERM);
7458
7459         case KEY_last:
7460             LOOPX(OP_LAST);
7461
7462         case KEY_lc:
7463             UNI(OP_LC);
7464
7465         case KEY_lcfirst:
7466             UNI(OP_LCFIRST);
7467
7468         case KEY_local:
7469             pl_yylval.ival = 0;
7470             OPERATOR(LOCAL);
7471
7472         case KEY_length:
7473             UNI(OP_LENGTH);
7474
7475         case KEY_lt:
7476             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7477                 return REPORT(0);
7478             Rop(OP_SLT);
7479
7480         case KEY_le:
7481             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7482                 return REPORT(0);
7483             Rop(OP_SLE);
7484
7485         case KEY_localtime:
7486             UNI(OP_LOCALTIME);
7487
7488         case KEY_log:
7489             UNI(OP_LOG);
7490
7491         case KEY_link:
7492             LOP(OP_LINK,XTERM);
7493
7494         case KEY_listen:
7495             LOP(OP_LISTEN,XTERM);
7496
7497         case KEY_lock:
7498             UNI(OP_LOCK);
7499
7500         case KEY_lstat:
7501             UNI(OP_LSTAT);
7502
7503         case KEY_m:
7504             s = scan_pat(s,OP_MATCH);
7505             TERM(sublex_start());
7506
7507         case KEY_map:
7508             LOP(OP_MAPSTART, XREF);
7509
7510         case KEY_mkdir:
7511             LOP(OP_MKDIR,XTERM);
7512
7513         case KEY_msgctl:
7514             LOP(OP_MSGCTL,XTERM);
7515
7516         case KEY_msgget:
7517             LOP(OP_MSGGET,XTERM);
7518
7519         case KEY_msgrcv:
7520             LOP(OP_MSGRCV,XTERM);
7521
7522         case KEY_msgsnd:
7523             LOP(OP_MSGSND,XTERM);
7524
7525         case KEY_our:
7526         case KEY_my:
7527         case KEY_state:
7528             PL_in_my = (U16)tmp;
7529             s = skipspace(s);
7530             if (isIDFIRST_lazy_if(s,UTF)) {
7531                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7532                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7533                 {
7534                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7535                         Perl_croak(aTHX_
7536                                   "Experimental \"%s\" subs not enabled",
7537                                    tmp == KEY_my    ? "my"    :
7538                                    tmp == KEY_state ? "state" : "our");
7539                     Perl_ck_warner_d(aTHX_
7540                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7541                         "The lexical_subs feature is experimental");
7542                     goto really_sub;
7543                 }
7544                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7545                 if (!PL_in_my_stash) {
7546                     char tmpbuf[1024];
7547                     int len;
7548                     PL_bufptr = s;
7549                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7550                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7551                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7552                 }
7553             }
7554             pl_yylval.ival = 1;
7555             OPERATOR(MY);
7556
7557         case KEY_next:
7558             LOOPX(OP_NEXT);
7559
7560         case KEY_ne:
7561             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7562                 return REPORT(0);
7563             Eop(OP_SNE);
7564
7565         case KEY_no:
7566             s = tokenize_use(0, s);
7567             TOKEN(USE);
7568
7569         case KEY_not:
7570             if (*s == '(' || (s = skipspace(s), *s == '('))
7571                 FUN1(OP_NOT);
7572             else {
7573                 if (!PL_lex_allbrackets &&
7574                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7575                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7576                 OPERATOR(NOTOP);
7577             }
7578
7579         case KEY_open:
7580             s = skipspace(s);
7581             if (isIDFIRST_lazy_if(s,UTF)) {
7582           const char *t;
7583           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7584               &len);
7585                 for (t=d; isSPACE(*t);)
7586                     t++;
7587                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7588                     /* [perl #16184] */
7589                     && !(t[0] == '=' && t[1] == '>')
7590                     && !(t[0] == ':' && t[1] == ':')
7591                     && !keyword(s, d-s, 0)
7592                 ) {
7593                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7594                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7595                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7596                 }
7597             }
7598             LOP(OP_OPEN,XTERM);
7599
7600         case KEY_or:
7601             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7602                 return REPORT(0);
7603             pl_yylval.ival = OP_OR;
7604             OPERATOR(OROP);
7605
7606         case KEY_ord:
7607             UNI(OP_ORD);
7608
7609         case KEY_oct:
7610             UNI(OP_OCT);
7611
7612         case KEY_opendir:
7613             LOP(OP_OPEN_DIR,XTERM);
7614
7615         case KEY_print:
7616             checkcomma(s,PL_tokenbuf,"filehandle");
7617             LOP(OP_PRINT,XREF);
7618
7619         case KEY_printf:
7620             checkcomma(s,PL_tokenbuf,"filehandle");
7621             LOP(OP_PRTF,XREF);
7622
7623         case KEY_prototype:
7624             UNI(OP_PROTOTYPE);
7625
7626         case KEY_push:
7627             LOP(OP_PUSH,XTERM);
7628
7629         case KEY_pop:
7630             UNIDOR(OP_POP);
7631
7632         case KEY_pos:
7633             UNIDOR(OP_POS);
7634
7635         case KEY_pack:
7636             LOP(OP_PACK,XTERM);
7637
7638         case KEY_package:
7639             s = force_word(s,WORD,FALSE,TRUE);
7640             s = skipspace(s);
7641             s = force_strict_version(s);
7642             PREBLOCK(PACKAGE);
7643
7644         case KEY_pipe:
7645             LOP(OP_PIPE_OP,XTERM);
7646
7647         case KEY_q:
7648             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7649             if (!s)
7650                 missingterm(NULL);
7651             COPLINE_SET_FROM_MULTI_END;
7652             pl_yylval.ival = OP_CONST;
7653             TERM(sublex_start());
7654
7655         case KEY_quotemeta:
7656             UNI(OP_QUOTEMETA);
7657
7658         case KEY_qw: {
7659             OP *words = NULL;
7660             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7661             if (!s)
7662                 missingterm(NULL);
7663             COPLINE_SET_FROM_MULTI_END;
7664             PL_expect = XOPERATOR;
7665             if (SvCUR(PL_lex_stuff)) {
7666                 int warned_comma = !ckWARN(WARN_QW);
7667                 int warned_comment = warned_comma;
7668                 d = SvPV_force(PL_lex_stuff, len);
7669                 while (len) {
7670                     for (; isSPACE(*d) && len; --len, ++d)
7671                         /**/;
7672                     if (len) {
7673                         SV *sv;
7674                         const char *b = d;
7675                         if (!warned_comma || !warned_comment) {
7676                             for (; !isSPACE(*d) && len; --len, ++d) {
7677                                 if (!warned_comma && *d == ',') {
7678                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7679                                         "Possible attempt to separate words with commas");
7680                                     ++warned_comma;
7681                                 }
7682                                 else if (!warned_comment && *d == '#') {
7683                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7684                                         "Possible attempt to put comments in qw() list");
7685                                     ++warned_comment;
7686                                 }
7687                             }
7688                         }
7689                         else {
7690                             for (; !isSPACE(*d) && len; --len, ++d)
7691                                 /**/;
7692                         }
7693                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7694                         words = op_append_elem(OP_LIST, words,
7695                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7696                     }
7697                 }
7698             }
7699             if (!words)
7700                 words = newNULLLIST();
7701             if (PL_lex_stuff) {
7702                 SvREFCNT_dec(PL_lex_stuff);
7703                 PL_lex_stuff = NULL;
7704             }
7705             PL_expect = XOPERATOR;
7706             pl_yylval.opval = sawparens(words);
7707             TOKEN(QWLIST);
7708         }
7709
7710         case KEY_qq:
7711             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7712             if (!s)
7713                 missingterm(NULL);
7714             pl_yylval.ival = OP_STRINGIFY;
7715             if (SvIVX(PL_lex_stuff) == '\'')
7716                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7717             TERM(sublex_start());
7718
7719         case KEY_qr:
7720             s = scan_pat(s,OP_QR);
7721             TERM(sublex_start());
7722
7723         case KEY_qx:
7724             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7725             if (!s)
7726                 missingterm(NULL);
7727             pl_yylval.ival = OP_BACKTICK;
7728             TERM(sublex_start());
7729
7730         case KEY_return:
7731             OLDLOP(OP_RETURN);
7732
7733         case KEY_require:
7734             s = skipspace(s);
7735             if (isDIGIT(*s)) {
7736                 s = force_version(s, FALSE);
7737             }
7738             else if (*s != 'v' || !isDIGIT(s[1])
7739                     || (s = force_version(s, TRUE), *s == 'v'))
7740             {
7741                 *PL_tokenbuf = '\0';
7742                 s = force_word(s,WORD,TRUE,TRUE);
7743                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7744                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7745                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7746                 else if (*s == '<')
7747                     yyerror("<> at require-statement should be quotes");
7748             }
7749             if (orig_keyword == KEY_require) {
7750                 orig_keyword = 0;
7751                 pl_yylval.ival = 1;
7752             }
7753             else
7754                 pl_yylval.ival = 0;
7755             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7756             PL_bufptr = s;
7757             PL_last_uni = PL_oldbufptr;
7758             PL_last_lop_op = OP_REQUIRE;
7759             s = skipspace(s);
7760             return REPORT( (int)REQUIRE );
7761
7762         case KEY_reset:
7763             UNI(OP_RESET);
7764
7765         case KEY_redo:
7766             LOOPX(OP_REDO);
7767
7768         case KEY_rename:
7769             LOP(OP_RENAME,XTERM);
7770
7771         case KEY_rand:
7772             UNI(OP_RAND);
7773
7774         case KEY_rmdir:
7775             UNI(OP_RMDIR);
7776
7777         case KEY_rindex:
7778             LOP(OP_RINDEX,XTERM);
7779
7780         case KEY_read:
7781             LOP(OP_READ,XTERM);
7782
7783         case KEY_readdir:
7784             UNI(OP_READDIR);
7785
7786         case KEY_readline:
7787             UNIDOR(OP_READLINE);
7788
7789         case KEY_readpipe:
7790             UNIDOR(OP_BACKTICK);
7791
7792         case KEY_rewinddir:
7793             UNI(OP_REWINDDIR);
7794
7795         case KEY_recv:
7796             LOP(OP_RECV,XTERM);
7797
7798         case KEY_reverse:
7799             LOP(OP_REVERSE,XTERM);
7800
7801         case KEY_readlink:
7802             UNIDOR(OP_READLINK);
7803
7804         case KEY_ref:
7805             UNI(OP_REF);
7806
7807         case KEY_s:
7808             s = scan_subst(s);
7809             if (pl_yylval.opval)
7810                 TERM(sublex_start());
7811             else
7812                 TOKEN(1);       /* force error */
7813
7814         case KEY_say:
7815             checkcomma(s,PL_tokenbuf,"filehandle");
7816             LOP(OP_SAY,XREF);
7817
7818         case KEY_chomp:
7819             UNI(OP_CHOMP);
7820
7821         case KEY_scalar:
7822             UNI(OP_SCALAR);
7823
7824         case KEY_select:
7825             LOP(OP_SELECT,XTERM);
7826
7827         case KEY_seek:
7828             LOP(OP_SEEK,XTERM);
7829
7830         case KEY_semctl:
7831             LOP(OP_SEMCTL,XTERM);
7832
7833         case KEY_semget:
7834             LOP(OP_SEMGET,XTERM);
7835
7836         case KEY_semop:
7837             LOP(OP_SEMOP,XTERM);
7838
7839         case KEY_send:
7840             LOP(OP_SEND,XTERM);
7841
7842         case KEY_setpgrp:
7843             LOP(OP_SETPGRP,XTERM);
7844
7845         case KEY_setpriority:
7846             LOP(OP_SETPRIORITY,XTERM);
7847
7848         case KEY_sethostent:
7849             UNI(OP_SHOSTENT);
7850
7851         case KEY_setnetent:
7852             UNI(OP_SNETENT);
7853
7854         case KEY_setservent:
7855             UNI(OP_SSERVENT);
7856
7857         case KEY_setprotoent:
7858             UNI(OP_SPROTOENT);
7859
7860         case KEY_setpwent:
7861             FUN0(OP_SPWENT);
7862
7863         case KEY_setgrent:
7864             FUN0(OP_SGRENT);
7865
7866         case KEY_seekdir:
7867             LOP(OP_SEEKDIR,XTERM);
7868
7869         case KEY_setsockopt:
7870             LOP(OP_SSOCKOPT,XTERM);
7871
7872         case KEY_shift:
7873             UNIDOR(OP_SHIFT);
7874
7875         case KEY_shmctl:
7876             LOP(OP_SHMCTL,XTERM);
7877
7878         case KEY_shmget:
7879             LOP(OP_SHMGET,XTERM);
7880
7881         case KEY_shmread:
7882             LOP(OP_SHMREAD,XTERM);
7883
7884         case KEY_shmwrite:
7885             LOP(OP_SHMWRITE,XTERM);
7886
7887         case KEY_shutdown:
7888             LOP(OP_SHUTDOWN,XTERM);
7889
7890         case KEY_sin:
7891             UNI(OP_SIN);
7892
7893         case KEY_sleep:
7894             UNI(OP_SLEEP);
7895
7896         case KEY_socket:
7897             LOP(OP_SOCKET,XTERM);
7898
7899         case KEY_socketpair:
7900             LOP(OP_SOCKPAIR,XTERM);
7901
7902         case KEY_sort:
7903             checkcomma(s,PL_tokenbuf,"subroutine name");
7904             s = skipspace(s);
7905             PL_expect = XTERM;
7906             s = force_word(s,WORD,TRUE,TRUE);
7907             LOP(OP_SORT,XREF);
7908
7909         case KEY_split:
7910             LOP(OP_SPLIT,XTERM);
7911
7912         case KEY_sprintf:
7913             LOP(OP_SPRINTF,XTERM);
7914
7915         case KEY_splice:
7916             LOP(OP_SPLICE,XTERM);
7917
7918         case KEY_sqrt:
7919             UNI(OP_SQRT);
7920
7921         case KEY_srand:
7922             UNI(OP_SRAND);
7923
7924         case KEY_stat:
7925             UNI(OP_STAT);
7926
7927         case KEY_study:
7928             UNI(OP_STUDY);
7929
7930         case KEY_substr:
7931             LOP(OP_SUBSTR,XTERM);
7932
7933         case KEY_format:
7934         case KEY_sub:
7935           really_sub:
7936             {
7937                 char * const tmpbuf = PL_tokenbuf + 1;
7938                 expectation attrful;
7939                 bool have_name, have_proto;
7940                 const int key = tmp;
7941                 SV *format_name = NULL;
7942
7943                 d = s;
7944                 s = skipspace(s);
7945
7946                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7947                     (*s == ':' && s[1] == ':'))
7948                 {
7949
7950                     PL_expect = XBLOCK;
7951                     attrful = XATTRBLOCK;
7952                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7953                                   &len);
7954                     if (key == KEY_format)
7955                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7956                     *PL_tokenbuf = '&';
7957                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
7958                      || pad_findmy_pvn(
7959                             PL_tokenbuf, len + 1, 0
7960                         ) != NOT_IN_PAD)
7961                         sv_setpvn(PL_subname, tmpbuf, len);
7962                     else {
7963                         sv_setsv(PL_subname,PL_curstname);
7964                         sv_catpvs(PL_subname,"::");
7965                         sv_catpvn(PL_subname,tmpbuf,len);
7966                     }
7967                     if (SvUTF8(PL_linestr))
7968                         SvUTF8_on(PL_subname);
7969                     have_name = TRUE;
7970
7971
7972                     s = skipspace(d);
7973                 }
7974                 else {
7975                     if (key == KEY_my || key == KEY_our || key==KEY_state)
7976                     {
7977                         *d = '\0';
7978                         /* diag_listed_as: Missing name in "%s sub" */
7979                         Perl_croak(aTHX_
7980                                   "Missing name in \"%s\"", PL_bufptr);
7981                     }
7982                     PL_expect = XTERMBLOCK;
7983                     attrful = XATTRTERM;
7984                     sv_setpvs(PL_subname,"?");
7985                     have_name = FALSE;
7986                 }
7987
7988                 if (key == KEY_format) {
7989                     if (format_name) {
7990                         NEXTVAL_NEXTTOKE.opval
7991                             = (OP*)newSVOP(OP_CONST,0, format_name);
7992                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
7993                         force_next(WORD);
7994                     }
7995                     PREBLOCK(FORMAT);
7996                 }
7997
7998                 /* Look for a prototype */
7999                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8000                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8001                     COPLINE_SET_FROM_MULTI_END;
8002                     if (!s)
8003                         Perl_croak(aTHX_ "Prototype not terminated");
8004                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8005                     have_proto = TRUE;
8006
8007                     s = skipspace(s);
8008                 }
8009                 else
8010                     have_proto = FALSE;
8011
8012                 if (*s == ':' && s[1] != ':')
8013                     PL_expect = attrful;
8014                 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8015                     if (!have_name)
8016                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8017                     else if (*s != ';' && *s != '}')
8018                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8019                 }
8020
8021                 if (have_proto) {
8022                     NEXTVAL_NEXTTOKE.opval =
8023                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8024                     PL_lex_stuff = NULL;
8025                     force_next(THING);
8026                 }
8027                 if (!have_name) {
8028                     if (PL_curstash)
8029                         sv_setpvs(PL_subname, "__ANON__");
8030                     else
8031                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8032                     TOKEN(ANONSUB);
8033                 }
8034                 force_ident_maybe_lex('&');
8035                 TOKEN(SUB);
8036             }
8037
8038         case KEY_system:
8039             LOP(OP_SYSTEM,XREF);
8040
8041         case KEY_symlink:
8042             LOP(OP_SYMLINK,XTERM);
8043
8044         case KEY_syscall:
8045             LOP(OP_SYSCALL,XTERM);
8046
8047         case KEY_sysopen:
8048             LOP(OP_SYSOPEN,XTERM);
8049
8050         case KEY_sysseek:
8051             LOP(OP_SYSSEEK,XTERM);
8052
8053         case KEY_sysread:
8054             LOP(OP_SYSREAD,XTERM);
8055
8056         case KEY_syswrite:
8057             LOP(OP_SYSWRITE,XTERM);
8058
8059         case KEY_tr:
8060         case KEY_y:
8061             s = scan_trans(s);
8062             TERM(sublex_start());
8063
8064         case KEY_tell:
8065             UNI(OP_TELL);
8066
8067         case KEY_telldir:
8068             UNI(OP_TELLDIR);
8069
8070         case KEY_tie:
8071             LOP(OP_TIE,XTERM);
8072
8073         case KEY_tied:
8074             UNI(OP_TIED);
8075
8076         case KEY_time:
8077             FUN0(OP_TIME);
8078
8079         case KEY_times:
8080             FUN0(OP_TMS);
8081
8082         case KEY_truncate:
8083             LOP(OP_TRUNCATE,XTERM);
8084
8085         case KEY_uc:
8086             UNI(OP_UC);
8087
8088         case KEY_ucfirst:
8089             UNI(OP_UCFIRST);
8090
8091         case KEY_untie:
8092             UNI(OP_UNTIE);
8093
8094         case KEY_until:
8095             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8096                 return REPORT(0);
8097             pl_yylval.ival = CopLINE(PL_curcop);
8098             OPERATOR(UNTIL);
8099
8100         case KEY_unless:
8101             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8102                 return REPORT(0);
8103             pl_yylval.ival = CopLINE(PL_curcop);
8104             OPERATOR(UNLESS);
8105
8106         case KEY_unlink:
8107             LOP(OP_UNLINK,XTERM);
8108
8109         case KEY_undef:
8110             UNIDOR(OP_UNDEF);
8111
8112         case KEY_unpack:
8113             LOP(OP_UNPACK,XTERM);
8114
8115         case KEY_utime:
8116             LOP(OP_UTIME,XTERM);
8117
8118         case KEY_umask:
8119             UNIDOR(OP_UMASK);
8120
8121         case KEY_unshift:
8122             LOP(OP_UNSHIFT,XTERM);
8123
8124         case KEY_use:
8125             s = tokenize_use(1, s);
8126             TOKEN(USE);
8127
8128         case KEY_values:
8129             UNI(OP_VALUES);
8130
8131         case KEY_vec:
8132             LOP(OP_VEC,XTERM);
8133
8134         case KEY_when:
8135             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8136                 return REPORT(0);
8137             pl_yylval.ival = CopLINE(PL_curcop);
8138             Perl_ck_warner_d(aTHX_
8139                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8140                 "when is experimental");
8141             OPERATOR(WHEN);
8142
8143         case KEY_while:
8144             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8145                 return REPORT(0);
8146             pl_yylval.ival = CopLINE(PL_curcop);
8147             OPERATOR(WHILE);
8148
8149         case KEY_warn:
8150             PL_hints |= HINT_BLOCK_SCOPE;
8151             LOP(OP_WARN,XTERM);
8152
8153         case KEY_wait:
8154             FUN0(OP_WAIT);
8155
8156         case KEY_waitpid:
8157             LOP(OP_WAITPID,XTERM);
8158
8159         case KEY_wantarray:
8160             FUN0(OP_WANTARRAY);
8161
8162         case KEY_write:
8163             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8164              * we use the same number on EBCDIC */
8165             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8166             UNI(OP_ENTERWRITE);
8167
8168         case KEY_x:
8169             if (PL_expect == XOPERATOR) {
8170                 if (*s == '=' && !PL_lex_allbrackets &&
8171                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8172                     return REPORT(0);
8173                 Mop(OP_REPEAT);
8174             }
8175             check_uni();
8176             goto just_a_word;
8177
8178         case KEY_xor:
8179             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8180                 return REPORT(0);
8181             pl_yylval.ival = OP_XOR;
8182             OPERATOR(OROP);
8183         }
8184     }}
8185 }
8186
8187 /*
8188   S_pending_ident
8189
8190   Looks up an identifier in the pad or in a package
8191
8192   Returns:
8193     PRIVATEREF if this is a lexical name.
8194     WORD       if this belongs to a package.
8195
8196   Structure:
8197       if we're in a my declaration
8198           croak if they tried to say my($foo::bar)
8199           build the ops for a my() declaration
8200       if it's an access to a my() variable
8201           build ops for access to a my() variable
8202       if in a dq string, and they've said @foo and we can't find @foo
8203           warn
8204       build ops for a bareword
8205 */
8206
8207 static int
8208 S_pending_ident(pTHX)
8209 {
8210     PADOFFSET tmp = 0;
8211     const char pit = (char)pl_yylval.ival;
8212     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8213     /* All routes through this function want to know if there is a colon.  */
8214     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8215
8216     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8217           "### Pending identifier '%s'\n", PL_tokenbuf); });
8218
8219     /* if we're in a my(), we can't allow dynamics here.
8220        $foo'bar has already been turned into $foo::bar, so
8221        just check for colons.
8222
8223        if it's a legal name, the OP is a PADANY.
8224     */
8225     if (PL_in_my) {
8226         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8227             if (has_colon)
8228                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8229                                   "variable %s in \"our\"",
8230                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8231             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8232         }
8233         else {
8234             if (has_colon) {
8235                 /* "my" variable %s can't be in a package */
8236                 /* PL_no_myglob is constant */
8237                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8238                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8239                             PL_in_my == KEY_my ? "my" : "state",
8240                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8241                             PL_tokenbuf),
8242                             UTF ? SVf_UTF8 : 0);
8243                 GCC_DIAG_RESTORE;
8244             }
8245
8246             pl_yylval.opval = newOP(OP_PADANY, 0);
8247             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8248                                                         UTF ? SVf_UTF8 : 0);
8249             return PRIVATEREF;
8250         }
8251     }
8252
8253     /*
8254        build the ops for accesses to a my() variable.
8255     */
8256
8257     if (!has_colon) {
8258         if (!PL_in_my)
8259             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8260                                  0);
8261         if (tmp != NOT_IN_PAD) {
8262             /* might be an "our" variable" */
8263             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8264                 /* build ops for a bareword */
8265                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8266                 HEK * const stashname = HvNAME_HEK(stash);
8267                 SV *  const sym = newSVhek(stashname);
8268                 sv_catpvs(sym, "::");
8269                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8270                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8271                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8272                 if (pit != '&')
8273                   gv_fetchsv(sym,
8274                     GV_ADDMULTI,
8275                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8276                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8277                      : SVt_PVHV));
8278                 return WORD;
8279             }
8280
8281             pl_yylval.opval = newOP(OP_PADANY, 0);
8282             pl_yylval.opval->op_targ = tmp;
8283             return PRIVATEREF;
8284         }
8285     }
8286
8287     /*
8288        Whine if they've said @foo in a doublequoted string,
8289        and @foo isn't a variable we can find in the symbol
8290        table.
8291     */
8292     if (ckWARN(WARN_AMBIGUOUS) &&
8293         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8294         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8295                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8296         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8297                 /* DO NOT warn for @- and @+ */
8298                 && !( PL_tokenbuf[2] == '\0' &&
8299                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8300            )
8301         {
8302             /* Downgraded from fatal to warning 20000522 mjd */
8303             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8304                         "Possible unintended interpolation of %"UTF8f
8305                         " in string",
8306                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8307         }
8308     }
8309
8310     /* build ops for a bareword */
8311     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8312                                    newSVpvn_flags(PL_tokenbuf + 1,
8313                                                       tokenbuf_len - 1,
8314                                                       UTF ? SVf_UTF8 : 0 ));
8315     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8316     if (pit != '&')
8317         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8318                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8319                      | ( UTF ? SVf_UTF8 : 0 ),
8320                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8321                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8322                       : SVt_PVHV));
8323     return WORD;
8324 }
8325
8326 STATIC void
8327 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8328 {
8329     PERL_ARGS_ASSERT_CHECKCOMMA;
8330
8331     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8332         if (ckWARN(WARN_SYNTAX)) {
8333             int level = 1;
8334             const char *w;
8335             for (w = s+2; *w && level; w++) {
8336                 if (*w == '(')
8337                     ++level;
8338                 else if (*w == ')')
8339                     --level;
8340             }
8341             while (isSPACE(*w))
8342                 ++w;
8343             /* the list of chars below is for end of statements or
8344              * block / parens, boolean operators (&&, ||, //) and branch
8345              * constructs (or, and, if, until, unless, while, err, for).
8346              * Not a very solid hack... */
8347             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8348                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8349                             "%s (...) interpreted as function",name);
8350         }
8351     }
8352     while (s < PL_bufend && isSPACE(*s))
8353         s++;
8354     if (*s == '(')
8355         s++;
8356     while (s < PL_bufend && isSPACE(*s))
8357         s++;
8358     if (isIDFIRST_lazy_if(s,UTF)) {
8359         const char * const w = s;
8360         s += UTF ? UTF8SKIP(s) : 1;
8361         while (isWORDCHAR_lazy_if(s,UTF))
8362             s += UTF ? UTF8SKIP(s) : 1;
8363         while (s < PL_bufend && isSPACE(*s))
8364             s++;
8365         if (*s == ',') {
8366             GV* gv;
8367             PADOFFSET off;
8368             if (keyword(w, s - w, 0))
8369                 return;
8370
8371             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8372             if (gv && GvCVu(gv))
8373                 return;
8374             if (s - w <= 254) {
8375                 char tmpbuf[256];
8376                 Copy(w, tmpbuf+1, s - w, char);
8377                 *tmpbuf = '&';
8378                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8379                 if (off != NOT_IN_PAD) return;
8380             }
8381             Perl_croak(aTHX_ "No comma allowed after %s", what);
8382         }
8383     }
8384 }
8385
8386 /* S_new_constant(): do any overload::constant lookup.
8387
8388    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8389    Best used as sv=new_constant(..., sv, ...).
8390    If s, pv are NULL, calls subroutine with one argument,
8391    and <type> is used with error messages only.
8392    <type> is assumed to be well formed UTF-8 */
8393
8394 STATIC SV *
8395 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8396                SV *sv, SV *pv, const char *type, STRLEN typelen)
8397 {
8398     dSP;
8399     HV * table = GvHV(PL_hintgv);                /* ^H */
8400     SV *res;
8401     SV *errsv = NULL;
8402     SV **cvp;
8403     SV *cv, *typesv;
8404     const char *why1 = "", *why2 = "", *why3 = "";
8405
8406     PERL_ARGS_ASSERT_NEW_CONSTANT;
8407     /* We assume that this is true: */
8408     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8409     assert(type || s);
8410
8411     /* charnames doesn't work well if there have been errors found */
8412     if (PL_error_count > 0 && *key == 'c')
8413     {
8414         SvREFCNT_dec_NN(sv);
8415         return &PL_sv_undef;
8416     }
8417
8418     sv_2mortal(sv);                     /* Parent created it permanently */
8419     if (!table
8420         || ! (PL_hints & HINT_LOCALIZE_HH)
8421         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8422         || ! SvOK(*cvp))
8423     {
8424         char *msg;
8425
8426         /* Here haven't found what we're looking for.  If it is charnames,
8427          * perhaps it needs to be loaded.  Try doing that before giving up */
8428         if (*key == 'c') {
8429             Perl_load_module(aTHX_
8430                             0,
8431                             newSVpvs("_charnames"),
8432                              /* version parameter; no need to specify it, as if
8433                               * we get too early a version, will fail anyway,
8434                               * not being able to find '_charnames' */
8435                             NULL,
8436                             newSVpvs(":full"),
8437                             newSVpvs(":short"),
8438                             NULL);
8439             assert(sp == PL_stack_sp);
8440             table = GvHV(PL_hintgv);
8441             if (table
8442                 && (PL_hints & HINT_LOCALIZE_HH)
8443                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8444                 && SvOK(*cvp))
8445             {
8446                 goto now_ok;
8447             }
8448         }
8449         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8450             msg = Perl_form(aTHX_
8451                                "Constant(%.*s) unknown",
8452                                 (int)(type ? typelen : len),
8453                                 (type ? type: s));
8454         }
8455         else {
8456             why1 = "$^H{";
8457             why2 = key;
8458             why3 = "} is not defined";
8459         report:
8460             if (*key == 'c') {
8461                 msg = Perl_form(aTHX_
8462                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8463                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8464                       );
8465             }
8466             else {
8467                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8468                                     (int)(type ? typelen : len),
8469                                     (type ? type: s), why1, why2, why3);
8470             }
8471         }
8472         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8473         return SvREFCNT_inc_simple_NN(sv);
8474     }
8475   now_ok:
8476     cv = *cvp;
8477     if (!pv && s)
8478         pv = newSVpvn_flags(s, len, SVs_TEMP);
8479     if (type && pv)
8480         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8481     else
8482         typesv = &PL_sv_undef;
8483
8484     PUSHSTACKi(PERLSI_OVERLOAD);
8485     ENTER ;
8486     SAVETMPS;
8487
8488     PUSHMARK(SP) ;
8489     EXTEND(sp, 3);
8490     if (pv)
8491         PUSHs(pv);
8492     PUSHs(sv);
8493     if (pv)
8494         PUSHs(typesv);
8495     PUTBACK;
8496     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8497
8498     SPAGAIN ;
8499
8500     /* Check the eval first */
8501     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8502         STRLEN errlen;
8503         const char * errstr;
8504         sv_catpvs(errsv, "Propagated");
8505         errstr = SvPV_const(errsv, errlen);
8506         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8507         (void)POPs;
8508         res = SvREFCNT_inc_simple_NN(sv);
8509     }
8510     else {
8511         res = POPs;
8512         SvREFCNT_inc_simple_void_NN(res);
8513     }
8514
8515     PUTBACK ;
8516     FREETMPS ;
8517     LEAVE ;
8518     POPSTACK;
8519
8520     if (!SvOK(res)) {
8521         why1 = "Call to &{$^H{";
8522         why2 = key;
8523         why3 = "}} did not return a defined value";
8524         sv = res;
8525         (void)sv_2mortal(sv);
8526         goto report;
8527     }
8528
8529     return res;
8530 }
8531
8532 PERL_STATIC_INLINE void
8533 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8534     PERL_ARGS_ASSERT_PARSE_IDENT;
8535
8536     for (;;) {
8537         if (*d >= e)
8538             Perl_croak(aTHX_ "%s", ident_too_long);
8539         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8540              /* The UTF-8 case must come first, otherwise things
8541              * like c\N{COMBINING TILDE} would start failing, as the
8542              * isWORDCHAR_A case below would gobble the 'c' up.
8543              */
8544
8545             char *t = *s + UTF8SKIP(*s);
8546             while (isIDCONT_utf8((U8*)t))
8547                 t += UTF8SKIP(t);
8548             if (*d + (t - *s) > e)
8549                 Perl_croak(aTHX_ "%s", ident_too_long);
8550             Copy(*s, *d, t - *s, char);
8551             *d += t - *s;
8552             *s = t;
8553         }
8554         else if ( isWORDCHAR_A(**s) ) {
8555             do {
8556                 *(*d)++ = *(*s)++;
8557             } while (isWORDCHAR_A(**s) && *d < e);
8558         }
8559         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8560             *(*d)++ = ':';
8561             *(*d)++ = ':';
8562             (*s)++;
8563         }
8564         else if (allow_package && **s == ':' && (*s)[1] == ':'
8565            /* Disallow things like Foo::$bar. For the curious, this is
8566             * the code path that triggers the "Bad name after" warning
8567             * when looking for barewords.
8568             */
8569            && (*s)[2] != '$') {
8570             *(*d)++ = *(*s)++;
8571             *(*d)++ = *(*s)++;
8572         }
8573         else
8574             break;
8575     }
8576     return;
8577 }
8578
8579 /* Returns a NUL terminated string, with the length of the string written to
8580    *slp
8581    */
8582 STATIC char *
8583 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8584 {
8585     char *d = dest;
8586     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8587     bool is_utf8 = cBOOL(UTF);
8588
8589     PERL_ARGS_ASSERT_SCAN_WORD;
8590
8591     parse_ident(&s, &d, e, allow_package, is_utf8);
8592     *d = '\0';
8593     *slp = d - dest;
8594     return s;
8595 }
8596
8597 STATIC char *
8598 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8599 {
8600     I32 herelines = PL_parser->herelines;
8601     SSize_t bracket = -1;
8602     char funny = *s++;
8603     char *d = dest;
8604     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8605     bool is_utf8 = cBOOL(UTF);
8606     I32 orig_copline = 0, tmp_copline = 0;
8607
8608     PERL_ARGS_ASSERT_SCAN_IDENT;
8609
8610     if (isSPACE(*s))
8611         s = skipspace(s);
8612     if (isDIGIT(*s)) {
8613         while (isDIGIT(*s)) {
8614             if (d >= e)
8615                 Perl_croak(aTHX_ "%s", ident_too_long);
8616             *d++ = *s++;
8617         }
8618     }
8619     else {
8620         parse_ident(&s, &d, e, 1, is_utf8);
8621     }
8622     *d = '\0';
8623     d = dest;
8624     if (*d) {
8625         /* Either a digit variable, or parse_ident() found an identifier
8626            (anything valid as a bareword), so job done and return.  */
8627         if (PL_lex_state != LEX_NORMAL)
8628             PL_lex_state = LEX_INTERPENDMAYBE;
8629         return s;
8630     }
8631     if (*s == '$' && s[1] &&
8632       (isIDFIRST_lazy_if(s+1,is_utf8)
8633          || isDIGIT_A((U8)s[1])
8634          || s[1] == '$'
8635          || s[1] == '{'
8636          || strnEQ(s+1,"::",2)) )
8637     {
8638         /* Dereferencing a value in a scalar variable.
8639            The alternatives are different syntaxes for a scalar variable.
8640            Using ' as a leading package separator isn't allowed. :: is.   */
8641         return s;
8642     }
8643     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8644     if (*s == '{') {
8645         bracket = s - SvPVX(PL_linestr);
8646         s++;
8647         orig_copline = CopLINE(PL_curcop);
8648         if (s < PL_bufend && isSPACE(*s)) {
8649             s = skipspace(s);
8650         }
8651     }
8652
8653 /* Is the byte 'd' a legal single character identifier name?  'u' is true
8654  * iff Unicode semantics are to be used.  The legal ones are any of:
8655  *  a) all ASCII characters except:
8656  *          1) space-type ones, like \t and SPACE;
8657             2) NUL;
8658  *          3) '{'
8659  *     The final case currently doesn't get this far in the program, so we
8660  *     don't test for it.  If that were to change, it would be ok to allow it.
8661  *  c) When not under Unicode rules, any upper Latin1 character
8662  *  d) Otherwise, when unicode rules are used, all XIDS characters.
8663  *
8664  *      Because all ASCII characters have the same representation whether
8665  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8666  *      '{' without knowing if is UTF-8 or not.
8667  * EBCDIC already uses the rules that ASCII platforms will use after the
8668  * deprecation cycle; see comment below about the deprecation. */
8669 #ifdef EBCDIC
8670 #   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
8671     (isGRAPH_A(*(s)) || ((is_utf8)                                            \
8672                          ? isIDFIRST_utf8((U8*) (s))                          \
8673                          : (isGRAPH_L1(*s)                                    \
8674                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8675 #else
8676 #   define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s))                 \
8677                                             && LIKELY(*(s) != '\0')           \
8678                                             && (! is_utf8                     \
8679                                                 || isASCII_utf8((U8*) (s))    \
8680                                                 || isIDFIRST_utf8((U8*) (s))))
8681 #endif
8682     if ((s <= PL_bufend - (is_utf8)
8683                           ? UTF8SKIP(s)
8684                           : 1)
8685         && VALID_LEN_ONE_IDENT(s, is_utf8))
8686     {
8687         /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
8688          * because often it has no graphic representation.  (We can't get to
8689          * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8690          * test for it.) */
8691         if ((is_utf8)
8692             ? ! isGRAPH_utf8( (U8*) s)
8693             : (! isGRAPH_L1( (U8) *s)
8694                || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
8695         {
8696             /* Split messages for back compat */
8697             if (isCNTRL_A( (U8) *s)) {
8698                 deprecate("literal control characters in variable names");
8699             }
8700             else {
8701                 deprecate("literal non-graphic characters in variable names");
8702             }
8703         }
8704
8705         if (is_utf8) {
8706             const STRLEN skip = UTF8SKIP(s);
8707             STRLEN i;
8708             d[skip] = '\0';
8709             for ( i = 0; i < skip; i++ )
8710                 d[i] = *s++;
8711         }
8712         else {
8713             *d = *s++;
8714             d[1] = '\0';
8715         }
8716     }
8717     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8718     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8719         *d = toCTRL(*s);
8720         s++;
8721     }
8722     /* Warn about ambiguous code after unary operators if {...} notation isn't
8723        used.  There's no difference in ambiguity; it's merely a heuristic
8724        about when not to warn.  */
8725     else if (ck_uni && bracket == -1)
8726         check_uni();
8727     if (bracket != -1) {
8728         /* If we were processing {...} notation then...  */
8729         if (isIDFIRST_lazy_if(d,is_utf8)) {
8730             /* if it starts as a valid identifier, assume that it is one.
8731                (the later check for } being at the expected point will trap
8732                cases where this doesn't pan out.)  */
8733         d += is_utf8 ? UTF8SKIP(d) : 1;
8734         parse_ident(&s, &d, e, 1, is_utf8);
8735             *d = '\0';
8736             tmp_copline = CopLINE(PL_curcop);
8737             if (s < PL_bufend && isSPACE(*s)) {
8738                 s = skipspace(s);
8739             }
8740             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8741                 /* ${foo[0]} and ${foo{bar}} notation.  */
8742                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8743                     const char * const brack =
8744                         (const char *)
8745                         ((*s == '[') ? "[...]" : "{...}");
8746                     orig_copline = CopLINE(PL_curcop);
8747                     CopLINE_set(PL_curcop, tmp_copline);
8748    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8749                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8750                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8751                         funny, dest, brack, funny, dest, brack);
8752                     CopLINE_set(PL_curcop, orig_copline);
8753                 }
8754                 bracket++;
8755                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8756                 PL_lex_allbrackets++;
8757                 return s;
8758             }
8759         }
8760         /* Handle extended ${^Foo} variables
8761          * 1999-02-27 mjd-perl-patch@plover.com */
8762         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8763                  && isWORDCHAR(*s))
8764         {
8765             d++;
8766             while (isWORDCHAR(*s) && d < e) {
8767                 *d++ = *s++;
8768             }
8769             if (d >= e)
8770                 Perl_croak(aTHX_ "%s", ident_too_long);
8771             *d = '\0';
8772         }
8773
8774         if ( !tmp_copline )
8775             tmp_copline = CopLINE(PL_curcop);
8776         if (s < PL_bufend && isSPACE(*s)) {
8777             s = skipspace(s);
8778         }
8779
8780         /* Expect to find a closing } after consuming any trailing whitespace.
8781          */
8782         if (*s == '}') {
8783             s++;
8784             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8785                 PL_lex_state = LEX_INTERPEND;
8786                 PL_expect = XREF;
8787             }
8788             if (PL_lex_state == LEX_NORMAL) {
8789                 if (ckWARN(WARN_AMBIGUOUS) &&
8790                     (keyword(dest, d - dest, 0)
8791                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8792                 {
8793                     SV *tmp = newSVpvn_flags( dest, d - dest,
8794                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8795                     if (funny == '#')
8796                         funny = '@';
8797                     orig_copline = CopLINE(PL_curcop);
8798                     CopLINE_set(PL_curcop, tmp_copline);
8799                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8800                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8801                         funny, SVfARG(tmp), funny, SVfARG(tmp));
8802                     CopLINE_set(PL_curcop, orig_copline);
8803                 }
8804             }
8805         }
8806         else {
8807             /* Didn't find the closing } at the point we expected, so restore
8808                state such that the next thing to process is the opening { and */
8809             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8810             CopLINE_set(PL_curcop, orig_copline);
8811             PL_parser->herelines = herelines;
8812             *dest = '\0';
8813         }
8814     }
8815     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8816         PL_lex_state = LEX_INTERPEND;
8817     return s;
8818 }
8819
8820 static bool
8821 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
8822
8823     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
8824      * found in the parse starting at 's', based on the subset that are valid
8825      * in this context input to this routine in 'valid_flags'. Advances s.
8826      * Returns TRUE if the input should be treated as a valid flag, so the next
8827      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
8828      * upon first call on the current regex.  This routine will set it to any
8829      * charset modifier found.  The caller shouldn't change it.  This way,
8830      * another charset modifier encountered in the parse can be detected as an
8831      * error, as we have decided to allow only one */
8832
8833     const char c = **s;
8834     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8835
8836     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8837         if (isWORDCHAR_lazy_if(*s, UTF)) {
8838             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8839                        UTF ? SVf_UTF8 : 0);
8840             (*s) += charlen;
8841             /* Pretend that it worked, so will continue processing before
8842              * dieing */
8843             return TRUE;
8844         }
8845         return FALSE;
8846     }
8847
8848     switch (c) {
8849
8850         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
8851         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8852         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8853         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8854         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8855         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8856         case LOCALE_PAT_MOD:
8857             if (*charset) {
8858                 goto multiple_charsets;
8859             }
8860             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8861             *charset = c;
8862             break;
8863         case UNICODE_PAT_MOD:
8864             if (*charset) {
8865                 goto multiple_charsets;
8866             }
8867             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8868             *charset = c;
8869             break;
8870         case ASCII_RESTRICT_PAT_MOD:
8871             if (! *charset) {
8872                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8873             }
8874             else {
8875
8876                 /* Error if previous modifier wasn't an 'a', but if it was, see
8877                  * if, and accept, a second occurrence (only) */
8878                 if (*charset != 'a'
8879                     || get_regex_charset(*pmfl)
8880                         != REGEX_ASCII_RESTRICTED_CHARSET)
8881                 {
8882                         goto multiple_charsets;
8883                 }
8884                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8885             }
8886             *charset = c;
8887             break;
8888         case DEPENDS_PAT_MOD:
8889             if (*charset) {
8890                 goto multiple_charsets;
8891             }
8892             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8893             *charset = c;
8894             break;
8895     }
8896
8897     (*s)++;
8898     return TRUE;
8899
8900     multiple_charsets:
8901         if (*charset != c) {
8902             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8903         }
8904         else if (c == 'a') {
8905   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8906             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8907         }
8908         else {
8909             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8910         }
8911
8912         /* Pretend that it worked, so will continue processing before dieing */
8913         (*s)++;
8914         return TRUE;
8915 }
8916
8917 STATIC char *
8918 S_scan_pat(pTHX_ char *start, I32 type)
8919 {
8920     PMOP *pm;
8921     char *s;
8922     const char * const valid_flags =
8923         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8924     char charset = '\0';    /* character set modifier */
8925     unsigned int x_mod_count = 0;
8926
8927     PERL_ARGS_ASSERT_SCAN_PAT;
8928
8929     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8930     if (!s)
8931         Perl_croak(aTHX_ "Search pattern not terminated");
8932
8933     pm = (PMOP*)newPMOP(type, 0);
8934     if (PL_multi_open == '?') {
8935         /* This is the only point in the code that sets PMf_ONCE:  */
8936         pm->op_pmflags |= PMf_ONCE;
8937
8938         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8939            allows us to restrict the list needed by reset to just the ??
8940            matches.  */
8941         assert(type != OP_TRANS);
8942         if (PL_curstash) {
8943             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8944             U32 elements;
8945             if (!mg) {
8946                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8947                                  0);
8948             }
8949             elements = mg->mg_len / sizeof(PMOP**);
8950             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8951             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8952             mg->mg_len = elements * sizeof(PMOP**);
8953             PmopSTASH_set(pm,PL_curstash);
8954         }
8955     }
8956
8957     /* if qr/...(?{..}).../, then need to parse the pattern within a new
8958      * anon CV. False positives like qr/[(?{]/ are harmless */
8959
8960     if (type == OP_QR) {
8961         STRLEN len;
8962         char *e, *p = SvPV(PL_lex_stuff, len);
8963         e = p + len;
8964         for (; p < e; p++) {
8965             if (p[0] == '(' && p[1] == '?'
8966                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8967             {
8968                 pm->op_pmflags |= PMf_HAS_CV;
8969                 break;
8970             }
8971         }
8972         pm->op_pmflags |= PMf_IS_QR;
8973     }
8974
8975     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
8976                                 &s, &charset, &x_mod_count))
8977     {};
8978     /* issue a warning if /c is specified,but /g is not */
8979     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8980     {
8981         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8982                        "Use of /c modifier is meaningless without /g" );
8983     }
8984
8985     STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
8986
8987     PL_lex_op = (OP*)pm;
8988     pl_yylval.ival = OP_MATCH;
8989     return s;
8990 }
8991
8992 STATIC char *
8993 S_scan_subst(pTHX_ char *start)
8994 {
8995     char *s;
8996     PMOP *pm;
8997     I32 first_start;
8998     line_t first_line;
8999     I32 es = 0;
9000     char charset = '\0';    /* character set modifier */
9001     unsigned int x_mod_count = 0;
9002     char *t;
9003
9004     PERL_ARGS_ASSERT_SCAN_SUBST;
9005
9006     pl_yylval.ival = OP_NULL;
9007
9008     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9009
9010     if (!s)
9011         Perl_croak(aTHX_ "Substitution pattern not terminated");
9012
9013     s = t;
9014
9015     first_start = PL_multi_start;
9016     first_line = CopLINE(PL_curcop);
9017     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9018     if (!s) {
9019         if (PL_lex_stuff) {
9020             SvREFCNT_dec(PL_lex_stuff);
9021             PL_lex_stuff = NULL;
9022         }
9023         Perl_croak(aTHX_ "Substitution replacement not terminated");
9024     }
9025     PL_multi_start = first_start;       /* so whole substitution is taken together */
9026
9027     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9028
9029
9030     while (*s) {
9031         if (*s == EXEC_PAT_MOD) {
9032             s++;
9033             es++;
9034         }
9035         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9036                                   &s, &charset, &x_mod_count))
9037         {
9038             break;
9039         }
9040     }
9041
9042     STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9043
9044     if ((pm->op_pmflags & PMf_CONTINUE)) {
9045         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9046     }
9047
9048     if (es) {
9049         SV * const repl = newSVpvs("");
9050
9051         PL_multi_end = 0;
9052         pm->op_pmflags |= PMf_EVAL;
9053         while (es-- > 0) {
9054             if (es)
9055                 sv_catpvs(repl, "eval ");
9056             else
9057                 sv_catpvs(repl, "do ");
9058         }
9059         sv_catpvs(repl, "{");
9060         sv_catsv(repl, PL_sublex_info.repl);
9061         sv_catpvs(repl, "}");
9062         SvEVALED_on(repl);
9063         SvREFCNT_dec(PL_sublex_info.repl);
9064         PL_sublex_info.repl = repl;
9065     }
9066     if (CopLINE(PL_curcop) != first_line) {
9067         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9068         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9069             CopLINE(PL_curcop) - first_line;
9070         CopLINE_set(PL_curcop, first_line);
9071     }
9072
9073     PL_lex_op = (OP*)pm;
9074     pl_yylval.ival = OP_SUBST;
9075     return s;
9076 }
9077
9078 STATIC char *
9079 S_scan_trans(pTHX_ char *start)
9080 {
9081     char* s;
9082     OP *o;
9083     U8 squash;
9084     U8 del;
9085     U8 complement;
9086     bool nondestruct = 0;
9087     char *t;
9088
9089     PERL_ARGS_ASSERT_SCAN_TRANS;
9090
9091     pl_yylval.ival = OP_NULL;
9092
9093     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9094     if (!s)
9095         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9096
9097     s = t;
9098
9099     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9100     if (!s) {
9101         if (PL_lex_stuff) {
9102             SvREFCNT_dec(PL_lex_stuff);
9103             PL_lex_stuff = NULL;
9104         }
9105         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9106     }
9107
9108     complement = del = squash = 0;
9109     while (1) {
9110         switch (*s) {
9111         case 'c':
9112             complement = OPpTRANS_COMPLEMENT;
9113             break;
9114         case 'd':
9115             del = OPpTRANS_DELETE;
9116             break;
9117         case 's':
9118             squash = OPpTRANS_SQUASH;
9119             break;
9120         case 'r':
9121             nondestruct = 1;
9122             break;
9123         default:
9124             goto no_more;
9125         }
9126         s++;
9127     }
9128   no_more:
9129
9130     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9131     o->op_private &= ~OPpTRANS_ALL;
9132     o->op_private |= del|squash|complement|
9133       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9134       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9135
9136     PL_lex_op = o;
9137     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9138
9139
9140     return s;
9141 }
9142
9143 /* scan_heredoc
9144    Takes a pointer to the first < in <<FOO.
9145    Returns a pointer to the byte following <<FOO.
9146
9147    This function scans a heredoc, which involves different methods
9148    depending on whether we are in a string eval, quoted construct, etc.
9149    This is because PL_linestr could containing a single line of input, or
9150    a whole string being evalled, or the contents of the current quote-
9151    like operator.
9152
9153    The two basic methods are:
9154     - Steal lines from the input stream
9155     - Scan the heredoc in PL_linestr and remove it therefrom
9156
9157    In a file scope or filtered eval, the first method is used; in a
9158    string eval, the second.
9159
9160    In a quote-like operator, we have to choose between the two,
9161    depending on where we can find a newline.  We peek into outer lex-
9162    ing scopes until we find one with a newline in it.  If we reach the
9163    outermost lexing scope and it is a file, we use the stream method.
9164    Otherwise it is treated as an eval.
9165 */
9166
9167 STATIC char *
9168 S_scan_heredoc(pTHX_ char *s)
9169 {
9170     I32 op_type = OP_SCALAR;
9171     I32 len;
9172     SV *tmpstr;
9173     char term;
9174     char *d;
9175     char *e;
9176     char *peek;
9177     const bool infile = PL_rsfp || PL_parser->filtered;
9178     const line_t origline = CopLINE(PL_curcop);
9179     LEXSHARED *shared = PL_parser->lex_shared;
9180
9181     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9182
9183     s += 2;
9184     d = PL_tokenbuf + 1;
9185     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9186     *PL_tokenbuf = '\n';
9187     peek = s;
9188     while (SPACE_OR_TAB(*peek))
9189         peek++;
9190     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9191         s = peek;
9192         term = *s++;
9193         s = delimcpy(d, e, s, PL_bufend, term, &len);
9194         if (s == PL_bufend)
9195             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9196         d += len;
9197         s++;
9198     }
9199     else {
9200         if (*s == '\\')
9201             /* <<\FOO is equivalent to <<'FOO' */
9202             s++, term = '\'';
9203         else
9204             term = '"';
9205         if (!isWORDCHAR_lazy_if(s,UTF))
9206             deprecate("bare << to mean <<\"\"");
9207         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9208             if (d < e)
9209                 *d++ = *s;
9210         }
9211     }
9212     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9213         Perl_croak(aTHX_ "Delimiter for here document is too long");
9214     *d++ = '\n';
9215     *d = '\0';
9216     len = d - PL_tokenbuf;
9217
9218 #ifndef PERL_STRICT_CR
9219     d = strchr(s, '\r');
9220     if (d) {
9221         char * const olds = s;
9222         s = d;
9223         while (s < PL_bufend) {
9224             if (*s == '\r') {
9225                 *d++ = '\n';
9226                 if (*++s == '\n')
9227                     s++;
9228             }
9229             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9230                 *d++ = *s++;
9231                 s++;
9232             }
9233             else
9234                 *d++ = *s++;
9235         }
9236         *d = '\0';
9237         PL_bufend = d;
9238         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9239         s = olds;
9240     }
9241 #endif
9242
9243     tmpstr = newSV_type(SVt_PVIV);
9244     SvGROW(tmpstr, 80);
9245     if (term == '\'') {
9246         op_type = OP_CONST;
9247         SvIV_set(tmpstr, -1);
9248     }
9249     else if (term == '`') {
9250         op_type = OP_BACKTICK;
9251         SvIV_set(tmpstr, '\\');
9252     }
9253
9254     PL_multi_start = origline + 1 + PL_parser->herelines;
9255     PL_multi_open = PL_multi_close = '<';
9256     /* inside a string eval or quote-like operator */
9257     if (!infile || PL_lex_inwhat) {
9258         SV *linestr;
9259         char *bufend;
9260         char * const olds = s;
9261         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9262         /* These two fields are not set until an inner lexing scope is
9263            entered.  But we need them set here. */
9264         shared->ls_bufptr  = s;
9265         shared->ls_linestr = PL_linestr;
9266         if (PL_lex_inwhat)
9267           /* Look for a newline.  If the current buffer does not have one,
9268              peek into the line buffer of the parent lexing scope, going
9269              up as many levels as necessary to find one with a newline
9270              after bufptr.
9271            */
9272           while (!(s = (char *)memchr(
9273                     (void *)shared->ls_bufptr, '\n',
9274                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9275                 ))) {
9276             shared = shared->ls_prev;
9277             /* shared is only null if we have gone beyond the outermost
9278                lexing scope.  In a file, we will have broken out of the
9279                loop in the previous iteration.  In an eval, the string buf-
9280                fer ends with "\n;", so the while condition above will have
9281                evaluated to false.  So shared can never be null. */
9282             assert(shared);
9283             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9284                most lexing scope.  In a file, shared->ls_linestr at that
9285                level is just one line, so there is no body to steal. */
9286             if (infile && !shared->ls_prev) {
9287                 s = olds;
9288                 goto streaming;
9289             }
9290           }
9291         else {  /* eval */
9292             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9293             assert(s);
9294         }
9295         linestr = shared->ls_linestr;
9296         bufend = SvEND(linestr);
9297         d = s;
9298         while (s < bufend - len + 1 &&
9299           memNE(s,PL_tokenbuf,len) ) {
9300             if (*s++ == '\n')
9301                 ++PL_parser->herelines;
9302         }
9303         if (s >= bufend - len + 1) {
9304             goto interminable;
9305         }
9306         sv_setpvn(tmpstr,d+1,s-d);
9307         s += len - 1;
9308         /* the preceding stmt passes a newline */
9309         PL_parser->herelines++;
9310
9311         /* s now points to the newline after the heredoc terminator.
9312            d points to the newline before the body of the heredoc.
9313          */
9314
9315         /* We are going to modify linestr in place here, so set
9316            aside copies of the string if necessary for re-evals or
9317            (caller $n)[6]. */
9318         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9319            check shared->re_eval_str. */
9320         if (shared->re_eval_start || shared->re_eval_str) {
9321             /* Set aside the rest of the regexp */
9322             if (!shared->re_eval_str)
9323                 shared->re_eval_str =
9324                        newSVpvn(shared->re_eval_start,
9325                                 bufend - shared->re_eval_start);
9326             shared->re_eval_start -= s-d;
9327         }
9328         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9329             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9330             cx->blk_eval.cur_text == linestr)
9331         {
9332             cx->blk_eval.cur_text = newSVsv(linestr);
9333             SvSCREAM_on(cx->blk_eval.cur_text);
9334         }
9335         /* Copy everything from s onwards back to d. */
9336         Move(s,d,bufend-s + 1,char);
9337         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9338         /* Setting PL_bufend only applies when we have not dug deeper
9339            into other scopes, because sublex_done sets PL_bufend to
9340            SvEND(PL_linestr). */
9341         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9342         s = olds;
9343     }
9344     else
9345     {
9346       SV *linestr_save;
9347      streaming:
9348       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9349       term = PL_tokenbuf[1];
9350       len--;
9351       linestr_save = PL_linestr; /* must restore this afterwards */
9352       d = s;                     /* and this */
9353       PL_linestr = newSVpvs("");
9354       PL_bufend = SvPVX(PL_linestr);
9355       while (1) {
9356         PL_bufptr = PL_bufend;
9357         CopLINE_set(PL_curcop,
9358                     origline + 1 + PL_parser->herelines);
9359         if (!lex_next_chunk(LEX_NO_TERM)
9360          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9361             /* Simply freeing linestr_save might seem simpler here, as it
9362                does not matter what PL_linestr points to, since we are
9363                about to croak; but in a quote-like op, linestr_save
9364                will have been prospectively freed already, via
9365                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9366                restore PL_linestr. */
9367             SvREFCNT_dec_NN(PL_linestr);
9368             PL_linestr = linestr_save;
9369             goto interminable;
9370         }
9371         CopLINE_set(PL_curcop, origline);
9372         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9373             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9374             /* ^That should be enough to avoid this needing to grow:  */
9375             sv_catpvs(PL_linestr, "\n\0");
9376             assert(s == SvPVX(PL_linestr));
9377             PL_bufend = SvEND(PL_linestr);
9378         }
9379         s = PL_bufptr;
9380         PL_parser->herelines++;
9381         PL_last_lop = PL_last_uni = NULL;
9382 #ifndef PERL_STRICT_CR
9383         if (PL_bufend - PL_linestart >= 2) {
9384             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9385                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9386             {
9387                 PL_bufend[-2] = '\n';
9388                 PL_bufend--;
9389                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9390             }
9391             else if (PL_bufend[-1] == '\r')
9392                 PL_bufend[-1] = '\n';
9393         }
9394         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9395             PL_bufend[-1] = '\n';
9396 #endif
9397         if (*s == term && PL_bufend-s >= len
9398          && memEQ(s,PL_tokenbuf + 1,len)) {
9399             SvREFCNT_dec(PL_linestr);
9400             PL_linestr = linestr_save;
9401             PL_linestart = SvPVX(linestr_save);
9402             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9403             s = d;
9404             break;
9405         }
9406         else {
9407             sv_catsv(tmpstr,PL_linestr);
9408         }
9409       }
9410     }
9411     PL_multi_end = origline + PL_parser->herelines;
9412     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9413         SvPV_shrink_to_cur(tmpstr);
9414     }
9415     if (!IN_BYTES) {
9416         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9417             SvUTF8_on(tmpstr);
9418         else if (IN_ENCODING)
9419             sv_recode_to_utf8(tmpstr, _get_encoding());
9420     }
9421     PL_lex_stuff = tmpstr;
9422     pl_yylval.ival = op_type;
9423     return s;
9424
9425   interminable:
9426     SvREFCNT_dec(tmpstr);
9427     CopLINE_set(PL_curcop, origline);
9428     missingterm(PL_tokenbuf + 1);
9429 }
9430
9431 /* scan_inputsymbol
9432    takes: current position in input buffer
9433    returns: new position in input buffer
9434    side-effects: pl_yylval and lex_op are set.
9435
9436    This code handles:
9437
9438    <>           read from ARGV
9439    <<>>         read from ARGV without magic open
9440    <FH>         read from filehandle
9441    <pkg::FH>    read from package qualified filehandle
9442    <pkg'FH>     read from package qualified filehandle
9443    <$fh>        read from filehandle in $fh
9444    <*.h>        filename glob
9445
9446 */
9447
9448 STATIC char *
9449 S_scan_inputsymbol(pTHX_ char *start)
9450 {
9451     char *s = start;            /* current position in buffer */
9452     char *end;
9453     I32 len;
9454     bool nomagicopen = FALSE;
9455     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9456     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9457
9458     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9459
9460     end = strchr(s, '\n');
9461     if (!end)
9462         end = PL_bufend;
9463     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9464         nomagicopen = TRUE;
9465         *d = '\0';
9466         len = 0;
9467         s += 3;
9468     }
9469     else
9470         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
9471
9472     /* die if we didn't have space for the contents of the <>,
9473        or if it didn't end, or if we see a newline
9474     */
9475
9476     if (len >= (I32)sizeof PL_tokenbuf)
9477         Perl_croak(aTHX_ "Excessively long <> operator");
9478     if (s >= end)
9479         Perl_croak(aTHX_ "Unterminated <> operator");
9480
9481     s++;
9482
9483     /* check for <$fh>
9484        Remember, only scalar variables are interpreted as filehandles by
9485        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9486        treated as a glob() call.
9487        This code makes use of the fact that except for the $ at the front,
9488        a scalar variable and a filehandle look the same.
9489     */
9490     if (*d == '$' && d[1]) d++;
9491
9492     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9493     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9494         d += UTF ? UTF8SKIP(d) : 1;
9495
9496     /* If we've tried to read what we allow filehandles to look like, and
9497        there's still text left, then it must be a glob() and not a getline.
9498        Use scan_str to pull out the stuff between the <> and treat it
9499        as nothing more than a string.
9500     */
9501
9502     if (d - PL_tokenbuf != len) {
9503         pl_yylval.ival = OP_GLOB;
9504         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9505         if (!s)
9506            Perl_croak(aTHX_ "Glob not terminated");
9507         return s;
9508     }
9509     else {
9510         bool readline_overriden = FALSE;
9511         GV *gv_readline;
9512         /* we're in a filehandle read situation */
9513         d = PL_tokenbuf;
9514
9515         /* turn <> into <ARGV> */
9516         if (!len)
9517             Copy("ARGV",d,5,char);
9518
9519         /* Check whether readline() is overriden */
9520         if ((gv_readline = gv_override("readline",8)))
9521             readline_overriden = TRUE;
9522
9523         /* if <$fh>, create the ops to turn the variable into a
9524            filehandle
9525         */
9526         if (*d == '$') {
9527             /* try to find it in the pad for this block, otherwise find
9528                add symbol table ops
9529             */
9530             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9531             if (tmp != NOT_IN_PAD) {
9532                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9533                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9534                     HEK * const stashname = HvNAME_HEK(stash);
9535                     SV * const sym = sv_2mortal(newSVhek(stashname));
9536                     sv_catpvs(sym, "::");
9537                     sv_catpv(sym, d+1);
9538                     d = SvPVX(sym);
9539                     goto intro_sym;
9540                 }
9541                 else {
9542                     OP * const o = newOP(OP_PADSV, 0);
9543                     o->op_targ = tmp;
9544                     PL_lex_op = readline_overriden
9545                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9546                                 op_append_elem(OP_LIST, o,
9547                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9548                         : (OP*)newUNOP(OP_READLINE, 0, o);
9549                 }
9550             }
9551             else {
9552                 GV *gv;
9553                 ++d;
9554               intro_sym:
9555                 gv = gv_fetchpv(d,
9556                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9557                                 SVt_PV);
9558                 PL_lex_op = readline_overriden
9559                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9560                             op_append_elem(OP_LIST,
9561                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9562                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9563                     : (OP*)newUNOP(OP_READLINE, 0,
9564                             newUNOP(OP_RV2SV, 0,
9565                                 newGVOP(OP_GV, 0, gv)));
9566             }
9567             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9568             pl_yylval.ival = OP_NULL;
9569         }
9570
9571         /* If it's none of the above, it must be a literal filehandle
9572            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9573         else {
9574             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9575             PL_lex_op = readline_overriden
9576                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9577                         op_append_elem(OP_LIST,
9578                             newGVOP(OP_GV, 0, gv),
9579                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9580                 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9581             pl_yylval.ival = OP_NULL;
9582         }
9583     }
9584
9585     return s;
9586 }
9587
9588
9589 /* scan_str
9590    takes:
9591         start                   position in buffer
9592         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9593                                 only if they are of the open/close form
9594         keep_delims             preserve the delimiters around the string
9595         re_reparse              compiling a run-time /(?{})/:
9596                                    collapse // to /,  and skip encoding src
9597         delimp                  if non-null, this is set to the position of
9598                                 the closing delimiter, or just after it if
9599                                 the closing and opening delimiters differ
9600                                 (i.e., the opening delimiter of a substitu-
9601                                 tion replacement)
9602    returns: position to continue reading from buffer
9603    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9604         updates the read buffer.
9605
9606    This subroutine pulls a string out of the input.  It is called for:
9607         q               single quotes           q(literal text)
9608         '               single quotes           'literal text'
9609         qq              double quotes           qq(interpolate $here please)
9610         "               double quotes           "interpolate $here please"
9611         qx              backticks               qx(/bin/ls -l)
9612         `               backticks               `/bin/ls -l`
9613         qw              quote words             @EXPORT_OK = qw( func() $spam )
9614         m//             regexp match            m/this/
9615         s///            regexp substitute       s/this/that/
9616         tr///           string transliterate    tr/this/that/
9617         y///            string transliterate    y/this/that/
9618         ($*@)           sub prototypes          sub foo ($)
9619         (stuff)         sub attr parameters     sub foo : attr(stuff)
9620         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9621
9622    In most of these cases (all but <>, patterns and transliterate)
9623    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9624    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9625    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9626    calls scan_str().
9627
9628    It skips whitespace before the string starts, and treats the first
9629    character as the delimiter.  If the delimiter is one of ([{< then
9630    the corresponding "close" character )]}> is used as the closing
9631    delimiter.  It allows quoting of delimiters, and if the string has
9632    balanced delimiters ([{<>}]) it allows nesting.
9633
9634    On success, the SV with the resulting string is put into lex_stuff or,
9635    if that is already non-NULL, into lex_repl. The second case occurs only
9636    when parsing the RHS of the special constructs s/// and tr/// (y///).
9637    For convenience, the terminating delimiter character is stuffed into
9638    SvIVX of the SV.
9639 */
9640
9641 STATIC char *
9642 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9643                  char **delimp
9644     )
9645 {
9646     SV *sv;                     /* scalar value: string */
9647     const char *tmps;           /* temp string, used for delimiter matching */
9648     char *s = start;            /* current position in the buffer */
9649     char term;                  /* terminating character */
9650     char *to;                   /* current position in the sv's data */
9651     I32 brackets = 1;           /* bracket nesting level */
9652     bool has_utf8 = FALSE;      /* is there any utf8 content? */
9653     I32 termcode;               /* terminating char. code */
9654     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
9655     STRLEN termlen;             /* length of terminating string */
9656     int last_off = 0;           /* last position for nesting bracket */
9657     line_t herelines;
9658
9659     PERL_ARGS_ASSERT_SCAN_STR;
9660
9661     /* skip space before the delimiter */
9662     if (isSPACE(*s)) {
9663         s = skipspace(s);
9664     }
9665
9666     /* mark where we are, in case we need to report errors */
9667     CLINE;
9668
9669     /* after skipping whitespace, the next character is the terminator */
9670     term = *s;
9671     if (!UTF) {
9672         termcode = termstr[0] = term;
9673         termlen = 1;
9674     }
9675     else {
9676         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9677         Copy(s, termstr, termlen, U8);
9678         if (!UTF8_IS_INVARIANT(term))
9679             has_utf8 = TRUE;
9680     }
9681
9682     /* mark where we are */
9683     PL_multi_start = CopLINE(PL_curcop);
9684     PL_multi_open = term;
9685     herelines = PL_parser->herelines;
9686
9687     /* find corresponding closing delimiter */
9688     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9689         termcode = termstr[0] = term = tmps[5];
9690
9691     PL_multi_close = term;
9692
9693     if (PL_multi_open == PL_multi_close) {
9694         keep_bracketed_quoted = FALSE;
9695     }
9696
9697     /* create a new SV to hold the contents.  79 is the SV's initial length.
9698        What a random number. */
9699     sv = newSV_type(SVt_PVIV);
9700     SvGROW(sv, 80);
9701     SvIV_set(sv, termcode);
9702     (void)SvPOK_only(sv);               /* validate pointer */
9703
9704     /* move past delimiter and try to read a complete string */
9705     if (keep_delims)
9706         sv_catpvn(sv, s, termlen);
9707     s += termlen;
9708     for (;;) {
9709         if (IN_ENCODING && !UTF && !re_reparse) {
9710             bool cont = TRUE;
9711
9712             while (cont) {
9713                 int offset = s - SvPVX_const(PL_linestr);
9714                 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
9715                                            &offset, (char*)termstr, termlen);
9716                 const char *ns;
9717                 char *svlast;
9718
9719                 if (SvIsCOW(PL_linestr)) {
9720                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9721                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9722                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9723                     char *buf = SvPVX(PL_linestr);
9724                     bufend_pos = PL_parser->bufend - buf;
9725                     bufptr_pos = PL_parser->bufptr - buf;
9726                     oldbufptr_pos = PL_parser->oldbufptr - buf;
9727                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9728                     linestart_pos = PL_parser->linestart - buf;
9729                     last_uni_pos = PL_parser->last_uni
9730                         ? PL_parser->last_uni - buf
9731                         : 0;
9732                     last_lop_pos = PL_parser->last_lop
9733                         ? PL_parser->last_lop - buf
9734                         : 0;
9735                     re_eval_start_pos =
9736                         PL_parser->lex_shared->re_eval_start ?
9737                             PL_parser->lex_shared->re_eval_start - buf : 0;
9738                     s_pos = s - buf;
9739
9740                     sv_force_normal(PL_linestr);
9741
9742                     buf = SvPVX(PL_linestr);
9743                     PL_parser->bufend = buf + bufend_pos;
9744                     PL_parser->bufptr = buf + bufptr_pos;
9745                     PL_parser->oldbufptr = buf + oldbufptr_pos;
9746                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9747                     PL_parser->linestart = buf + linestart_pos;
9748                     if (PL_parser->last_uni)
9749                         PL_parser->last_uni = buf + last_uni_pos;
9750                     if (PL_parser->last_lop)
9751                         PL_parser->last_lop = buf + last_lop_pos;
9752                     if (PL_parser->lex_shared->re_eval_start)
9753                         PL_parser->lex_shared->re_eval_start  =
9754                             buf + re_eval_start_pos;
9755                     s = buf + s_pos;
9756                 }
9757                 ns = SvPVX_const(PL_linestr) + offset;
9758                 svlast = SvEND(sv) - 1;
9759
9760                 for (; s < ns; s++) {
9761                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9762                         COPLINE_INC_WITH_HERELINES;
9763                 }
9764                 if (!found)
9765                     goto read_more_line;
9766                 else {
9767                     /* handle quoted delimiters */
9768                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9769                         const char *t;
9770                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9771                             t--;
9772                         if ((svlast-1 - t) % 2) {
9773                             if (!keep_bracketed_quoted) {
9774                                 *(svlast-1) = term;
9775                                 *svlast = '\0';
9776                                 SvCUR_set(sv, SvCUR(sv) - 1);
9777                             }
9778                             continue;
9779                         }
9780                     }
9781                     if (PL_multi_open == PL_multi_close) {
9782                         cont = FALSE;
9783                     }
9784                     else {
9785                         const char *t;
9786                         char *w;
9787                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9788                             /* At here, all closes are "was quoted" one,
9789                                so we don't check PL_multi_close. */
9790                             if (*t == '\\') {
9791                                 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9792                                     t++;
9793                                 else
9794                                     *w++ = *t++;
9795                             }
9796                             else if (*t == PL_multi_open)
9797                                 brackets++;
9798
9799                             *w = *t;
9800                         }
9801                         if (w < t) {
9802                             *w++ = term;
9803                             *w = '\0';
9804                             SvCUR_set(sv, w - SvPVX_const(sv));
9805                         }
9806                         last_off = w - SvPVX(sv);
9807                         if (--brackets <= 0)
9808                             cont = FALSE;
9809                     }
9810                 }
9811             }
9812             if (!keep_delims) {
9813                 SvCUR_set(sv, SvCUR(sv) - 1);
9814                 *SvEND(sv) = '\0';
9815             }
9816             break;
9817         }
9818
9819         /* extend sv if need be */
9820         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9821         /* set 'to' to the next character in the sv's string */
9822         to = SvPVX(sv)+SvCUR(sv);
9823
9824         /* if open delimiter is the close delimiter read unbridle */
9825         if (PL_multi_open == PL_multi_close) {
9826             for (; s < PL_bufend; s++,to++) {
9827                 /* embedded newlines increment the current line number */
9828                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9829                     COPLINE_INC_WITH_HERELINES;
9830                 /* handle quoted delimiters */
9831                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9832                     if (!keep_bracketed_quoted
9833                         && (s[1] == term
9834                             || (re_reparse && s[1] == '\\'))
9835                     )
9836                         s++;
9837                     else /* any other quotes are simply copied straight through */
9838                         *to++ = *s++;
9839                 }
9840                 /* terminate when run out of buffer (the for() condition), or
9841                    have found the terminator */
9842                 else if (*s == term) {
9843                     if (termlen == 1)
9844                         break;
9845                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9846                         break;
9847                 }
9848                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9849                     has_utf8 = TRUE;
9850                 *to = *s;
9851             }
9852         }
9853
9854         /* if the terminator isn't the same as the start character (e.g.,
9855            matched brackets), we have to allow more in the quoting, and
9856            be prepared for nested brackets.
9857         */
9858         else {
9859             /* read until we run out of string, or we find the terminator */
9860             for (; s < PL_bufend; s++,to++) {
9861                 /* embedded newlines increment the line count */
9862                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9863                     COPLINE_INC_WITH_HERELINES;
9864                 /* backslashes can escape the open or closing characters */
9865                 if (*s == '\\' && s+1 < PL_bufend) {
9866                     if (!keep_bracketed_quoted &&
9867                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9868                     {
9869                         s++;
9870                     }
9871                     else
9872                         *to++ = *s++;
9873                 }
9874                 /* allow nested opens and closes */
9875                 else if (*s == PL_multi_close && --brackets <= 0)
9876                     break;
9877                 else if (*s == PL_multi_open)
9878                     brackets++;
9879                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9880                     has_utf8 = TRUE;
9881                 *to = *s;
9882             }
9883         }
9884         /* terminate the copied string and update the sv's end-of-string */
9885         *to = '\0';
9886         SvCUR_set(sv, to - SvPVX_const(sv));
9887
9888         /*
9889          * this next chunk reads more into the buffer if we're not done yet
9890          */
9891
9892         if (s < PL_bufend)
9893             break;              /* handle case where we are done yet :-) */
9894
9895 #ifndef PERL_STRICT_CR
9896         if (to - SvPVX_const(sv) >= 2) {
9897             if ((to[-2] == '\r' && to[-1] == '\n') ||
9898                 (to[-2] == '\n' && to[-1] == '\r'))
9899             {
9900                 to[-2] = '\n';
9901                 to--;
9902                 SvCUR_set(sv, to - SvPVX_const(sv));
9903             }
9904             else if (to[-1] == '\r')
9905                 to[-1] = '\n';
9906         }
9907         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9908             to[-1] = '\n';
9909 #endif
9910
9911      read_more_line:
9912         /* if we're out of file, or a read fails, bail and reset the current
9913            line marker so we can report where the unterminated string began
9914         */
9915         COPLINE_INC_WITH_HERELINES;
9916         PL_bufptr = PL_bufend;
9917         if (!lex_next_chunk(0)) {
9918             sv_free(sv);
9919             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9920             return NULL;
9921         }
9922         s = PL_bufptr;
9923     }
9924
9925     /* at this point, we have successfully read the delimited string */
9926
9927     if (!IN_ENCODING || UTF || re_reparse) {
9928
9929         if (keep_delims)
9930             sv_catpvn(sv, s, termlen);
9931         s += termlen;
9932     }
9933     if (has_utf8 || (IN_ENCODING && !re_reparse))
9934         SvUTF8_on(sv);
9935
9936     PL_multi_end = CopLINE(PL_curcop);
9937     CopLINE_set(PL_curcop, PL_multi_start);
9938     PL_parser->herelines = herelines;
9939
9940     /* if we allocated too much space, give some back */
9941     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9942         SvLEN_set(sv, SvCUR(sv) + 1);
9943         SvPV_renew(sv, SvLEN(sv));
9944     }
9945
9946     /* decide whether this is the first or second quoted string we've read
9947        for this op
9948     */
9949
9950     if (PL_lex_stuff)
9951         PL_sublex_info.repl = sv;
9952     else
9953         PL_lex_stuff = sv;
9954     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9955     return s;
9956 }
9957
9958 /*
9959   scan_num
9960   takes: pointer to position in buffer
9961   returns: pointer to new position in buffer
9962   side-effects: builds ops for the constant in pl_yylval.op
9963
9964   Read a number in any of the formats that Perl accepts:
9965
9966   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9967   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9968   0b[01](_?[01])*                                       binary integers
9969   0[0-7](_?[0-7])*                                      octal integers
9970   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
9971   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
9972
9973   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9974   thing it reads.
9975
9976   If it reads a number without a decimal point or an exponent, it will
9977   try converting the number to an integer and see if it can do so
9978   without loss of precision.
9979 */
9980
9981 char *
9982 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9983 {
9984     const char *s = start;      /* current position in buffer */
9985     char *d;                    /* destination in temp buffer */
9986     char *e;                    /* end of temp buffer */
9987     NV nv;                              /* number read, as a double */
9988     SV *sv = NULL;                      /* place to put the converted number */
9989     bool floatit;                       /* boolean: int or float? */
9990     const char *lastub = NULL;          /* position of last underbar */
9991     static const char* const number_too_long = "Number too long";
9992     /* Hexadecimal floating point.
9993      *
9994      * In many places (where we have quads and NV is IEEE 754 double)
9995      * we can fit the mantissa bits of a NV into an unsigned quad.
9996      * (Note that UVs might not be quads even when we have quads.)
9997      * This will not work everywhere, though (either no quads, or
9998      * using long doubles), in which case we have to resort to NV,
9999      * which will probably mean horrible loss of precision due to
10000      * multiple fp operations. */
10001     bool hexfp = FALSE;
10002     int total_bits = 0;
10003 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10004 #  define HEXFP_UQUAD
10005     Uquad_t hexfp_uquad = 0;
10006     int hexfp_frac_bits = 0;
10007 #else
10008 #  define HEXFP_NV
10009     NV hexfp_nv = 0.0;
10010 #endif
10011     NV hexfp_mult = 1.0;
10012     UV high_non_zero = 0; /* highest digit */
10013
10014     PERL_ARGS_ASSERT_SCAN_NUM;
10015
10016     /* We use the first character to decide what type of number this is */
10017
10018     switch (*s) {
10019     default:
10020         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10021
10022     /* if it starts with a 0, it could be an octal number, a decimal in
10023        0.13 disguise, or a hexadecimal number, or a binary number. */
10024     case '0':
10025         {
10026           /* variables:
10027              u          holds the "number so far"
10028              shift      the power of 2 of the base
10029                         (hex == 4, octal == 3, binary == 1)
10030              overflowed was the number more than we can hold?
10031
10032              Shift is used when we add a digit.  It also serves as an "are
10033              we in octal/hex/binary?" indicator to disallow hex characters
10034              when in octal mode.
10035            */
10036             NV n = 0.0;
10037             UV u = 0;
10038             I32 shift;
10039             bool overflowed = FALSE;
10040             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10041             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10042             static const char* const bases[5] =
10043               { "", "binary", "", "octal", "hexadecimal" };
10044             static const char* const Bases[5] =
10045               { "", "Binary", "", "Octal", "Hexadecimal" };
10046             static const char* const maxima[5] =
10047               { "",
10048                 "0b11111111111111111111111111111111",
10049                 "",
10050                 "037777777777",
10051                 "0xffffffff" };
10052             const char *base, *Base, *max;
10053
10054             /* check for hex */
10055             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10056                 shift = 4;
10057                 s += 2;
10058                 just_zero = FALSE;
10059             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10060                 shift = 1;
10061                 s += 2;
10062                 just_zero = FALSE;
10063             }
10064             /* check for a decimal in disguise */
10065             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10066                 goto decimal;
10067             /* so it must be octal */
10068             else {
10069                 shift = 3;
10070                 s++;
10071             }
10072
10073             if (*s == '_') {
10074                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10075                                "Misplaced _ in number");
10076                lastub = s++;
10077             }
10078
10079             base = bases[shift];
10080             Base = Bases[shift];
10081             max  = maxima[shift];
10082
10083             /* read the rest of the number */
10084             for (;;) {
10085                 /* x is used in the overflow test,
10086                    b is the digit we're adding on. */
10087                 UV x, b;
10088
10089                 switch (*s) {
10090
10091                 /* if we don't mention it, we're done */
10092                 default:
10093                     goto out;
10094
10095                 /* _ are ignored -- but warned about if consecutive */
10096                 case '_':
10097                     if (lastub && s == lastub + 1)
10098                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10099                                        "Misplaced _ in number");
10100                     lastub = s++;
10101                     break;
10102
10103                 /* 8 and 9 are not octal */
10104                 case '8': case '9':
10105                     if (shift == 3)
10106                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10107                     /* FALLTHROUGH */
10108
10109                 /* octal digits */
10110                 case '2': case '3': case '4':
10111                 case '5': case '6': case '7':
10112                     if (shift == 1)
10113                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10114                     /* FALLTHROUGH */
10115
10116                 case '0': case '1':
10117                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10118                     goto digit;
10119
10120                 /* hex digits */
10121                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10122                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10123                     /* make sure they said 0x */
10124                     if (shift != 4)
10125                         goto out;
10126                     b = (*s++ & 7) + 9;
10127
10128                     /* Prepare to put the digit we have onto the end
10129                        of the number so far.  We check for overflows.
10130                     */
10131
10132                   digit:
10133                     just_zero = FALSE;
10134                     if (!overflowed) {
10135                         x = u << shift; /* make room for the digit */
10136
10137                         total_bits += shift;
10138
10139                         if ((x >> shift) != u
10140                             && !(PL_hints & HINT_NEW_BINARY)) {
10141                             overflowed = TRUE;
10142                             n = (NV) u;
10143                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10144                                              "Integer overflow in %s number",
10145                                              base);
10146                         } else
10147                             u = x | b;          /* add the digit to the end */
10148                     }
10149                     if (overflowed) {
10150                         n *= nvshift[shift];
10151                         /* If an NV has not enough bits in its
10152                          * mantissa to represent an UV this summing of
10153                          * small low-order numbers is a waste of time
10154                          * (because the NV cannot preserve the
10155                          * low-order bits anyway): we could just
10156                          * remember when did we overflow and in the
10157                          * end just multiply n by the right
10158                          * amount. */
10159                         n += (NV) b;
10160                     }
10161
10162                     if (high_non_zero == 0 && b > 0)
10163                         high_non_zero = b;
10164
10165                     /* this could be hexfp, but peek ahead
10166                      * to avoid matching ".." */
10167                     if (UNLIKELY(HEXFP_PEEK(s))) {
10168                         goto out;
10169                     }
10170
10171                     break;
10172                 }
10173             }
10174
10175           /* if we get here, we had success: make a scalar value from
10176              the number.
10177           */
10178           out:
10179
10180             /* final misplaced underbar check */
10181             if (s[-1] == '_') {
10182                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10183             }
10184
10185             if (UNLIKELY(HEXFP_PEEK(s))) {
10186                 /* Do sloppy (on the underbars) but quick detection
10187                  * (and value construction) for hexfp, the decimal
10188                  * detection will shortly be more thorough with the
10189                  * underbar checks. */
10190                 const char* h = s;
10191 #ifdef HEXFP_UQUAD
10192                 hexfp_uquad = u;
10193 #else /* HEXFP_NV */
10194                 hexfp_nv = u;
10195 #endif
10196                 if (*h == '.') {
10197 #ifdef HEXFP_NV
10198                     NV mult = 1 / 16.0;
10199 #endif
10200                     h++;
10201                     while (isXDIGIT(*h) || *h == '_') {
10202                         if (isXDIGIT(*h)) {
10203                             U8 b = XDIGIT_VALUE(*h);
10204                             total_bits += shift;
10205 #ifdef HEXFP_UQUAD
10206                             hexfp_uquad <<= shift;
10207                             hexfp_uquad |= b;
10208                             hexfp_frac_bits += shift;
10209 #else /* HEXFP_NV */
10210                             hexfp_nv += b * mult;
10211                             mult /= 16.0;
10212 #endif
10213                         }
10214                         h++;
10215                     }
10216                 }
10217
10218                 if (total_bits >= 4) {
10219                     if (high_non_zero < 0x8)
10220                         total_bits--;
10221                     if (high_non_zero < 0x4)
10222                         total_bits--;
10223                     if (high_non_zero < 0x2)
10224                         total_bits--;
10225                 }
10226
10227                 if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
10228                     bool negexp = FALSE;
10229                     h++;
10230                     if (*h == '+')
10231                         h++;
10232                     else if (*h == '-') {
10233                         negexp = TRUE;
10234                         h++;
10235                     }
10236                     if (isDIGIT(*h)) {
10237                         I32 hexfp_exp = 0;
10238                         while (isDIGIT(*h) || *h == '_') {
10239                             if (isDIGIT(*h)) {
10240                                 hexfp_exp *= 10;
10241                                 hexfp_exp += *h - '0';
10242 #ifdef NV_MIN_EXP
10243                                 if (negexp &&
10244                                     -hexfp_exp < NV_MIN_EXP - 1) {
10245                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10246                                                    "Hexadecimal float: exponent underflow");
10247 #endif
10248                                     break;
10249                                 }
10250                                 else {
10251 #ifdef NV_MAX_EXP
10252                                     if (!negexp &&
10253                                         hexfp_exp > NV_MAX_EXP - 1) {
10254                                         Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10255                                                    "Hexadecimal float: exponent overflow");
10256                                         break;
10257                                     }
10258 #endif
10259                                 }
10260                             }
10261                             h++;
10262                         }
10263                         if (negexp)
10264                             hexfp_exp = -hexfp_exp;
10265 #ifdef HEXFP_UQUAD
10266                         hexfp_exp -= hexfp_frac_bits;
10267 #endif
10268                         hexfp_mult = pow(2.0, hexfp_exp);
10269                         hexfp = TRUE;
10270                         goto decimal;
10271                     }
10272                 }
10273             }
10274
10275             if (overflowed) {
10276                 if (n > 4294967295.0)
10277                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10278                                    "%s number > %s non-portable",
10279                                    Base, max);
10280                 sv = newSVnv(n);
10281             }
10282             else {
10283 #if UVSIZE > 4
10284                 if (u > 0xffffffff)
10285                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10286                                    "%s number > %s non-portable",
10287                                    Base, max);
10288 #endif
10289                 sv = newSVuv(u);
10290             }
10291             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10292                 sv = new_constant(start, s - start, "integer",
10293                                   sv, NULL, NULL, 0);
10294             else if (PL_hints & HINT_NEW_BINARY)
10295                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10296         }
10297         break;
10298
10299     /*
10300       handle decimal numbers.
10301       we're also sent here when we read a 0 as the first digit
10302     */
10303     case '1': case '2': case '3': case '4': case '5':
10304     case '6': case '7': case '8': case '9': case '.':
10305       decimal:
10306         d = PL_tokenbuf;
10307         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10308         floatit = FALSE;
10309         if (hexfp) {
10310             floatit = TRUE;
10311             *d++ = '0';
10312             *d++ = 'x';
10313             s = start + 2;
10314         }
10315
10316         /* read next group of digits and _ and copy into d */
10317         while (isDIGIT(*s) || *s == '_' ||
10318                UNLIKELY(hexfp && isXDIGIT(*s))) {
10319             /* skip underscores, checking for misplaced ones
10320                if -w is on
10321             */
10322             if (*s == '_') {
10323                 if (lastub && s == lastub + 1)
10324                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10325                                    "Misplaced _ in number");
10326                 lastub = s++;
10327             }
10328             else {
10329                 /* check for end of fixed-length buffer */
10330                 if (d >= e)
10331                     Perl_croak(aTHX_ "%s", number_too_long);
10332                 /* if we're ok, copy the character */
10333                 *d++ = *s++;
10334             }
10335         }
10336
10337         /* final misplaced underbar check */
10338         if (lastub && s == lastub + 1) {
10339             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10340         }
10341
10342         /* read a decimal portion if there is one.  avoid
10343            3..5 being interpreted as the number 3. followed
10344            by .5
10345         */
10346         if (*s == '.' && s[1] != '.') {
10347             floatit = TRUE;
10348             *d++ = *s++;
10349
10350             if (*s == '_') {
10351                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10352                                "Misplaced _ in number");
10353                 lastub = s;
10354             }
10355
10356             /* copy, ignoring underbars, until we run out of digits.
10357             */
10358             for (; isDIGIT(*s) || *s == '_' ||
10359                      UNLIKELY(hexfp && isXDIGIT(*s));
10360                  s++) {
10361                 /* fixed length buffer check */
10362                 if (d >= e)
10363                     Perl_croak(aTHX_ "%s", number_too_long);
10364                 if (*s == '_') {
10365                    if (lastub && s == lastub + 1)
10366                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10367                                       "Misplaced _ in number");
10368                    lastub = s;
10369                 }
10370                 else
10371                     *d++ = *s;
10372             }
10373             /* fractional part ending in underbar? */
10374             if (s[-1] == '_') {
10375                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10376                                "Misplaced _ in number");
10377             }
10378             if (*s == '.' && isDIGIT(s[1])) {
10379                 /* oops, it's really a v-string, but without the "v" */
10380                 s = start;
10381                 goto vstring;
10382             }
10383         }
10384
10385         /* read exponent part, if present */
10386         if ((isALPHA_FOLD_EQ(*s, 'e')
10387               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10388             && strchr("+-0123456789_", s[1]))
10389         {
10390             floatit = TRUE;
10391
10392             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10393                ditto for p (hexfloats) */
10394             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10395                 /* At least some Mach atof()s don't grok 'E' */
10396                 *d++ = 'e';
10397             }
10398             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10399                 *d++ = 'p';
10400             }
10401
10402             s++;
10403
10404
10405             /* stray preinitial _ */
10406             if (*s == '_') {
10407                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10408                                "Misplaced _ in number");
10409                 lastub = s++;
10410             }
10411
10412             /* allow positive or negative exponent */
10413             if (*s == '+' || *s == '-')
10414                 *d++ = *s++;
10415
10416             /* stray initial _ */
10417             if (*s == '_') {
10418                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10419                                "Misplaced _ in number");
10420                 lastub = s++;
10421             }
10422
10423             /* read digits of exponent */
10424             while (isDIGIT(*s) || *s == '_') {
10425                 if (isDIGIT(*s)) {
10426                     if (d >= e)
10427                         Perl_croak(aTHX_ "%s", number_too_long);
10428                     *d++ = *s++;
10429                 }
10430                 else {
10431                    if (((lastub && s == lastub + 1) ||
10432                         (!isDIGIT(s[1]) && s[1] != '_')))
10433                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10434                                       "Misplaced _ in number");
10435                    lastub = s++;
10436                 }
10437             }
10438         }
10439
10440
10441         /*
10442            We try to do an integer conversion first if no characters
10443            indicating "float" have been found.
10444          */
10445
10446         if (!floatit) {
10447             UV uv;
10448             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10449
10450             if (flags == IS_NUMBER_IN_UV) {
10451               if (uv <= IV_MAX)
10452                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10453               else
10454                 sv = newSVuv(uv);
10455             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10456               if (uv <= (UV) IV_MIN)
10457                 sv = newSViv(-(IV)uv);
10458               else
10459                 floatit = TRUE;
10460             } else
10461               floatit = TRUE;
10462         }
10463         if (floatit) {
10464             STORE_NUMERIC_LOCAL_SET_STANDARD();
10465             /* terminate the string */
10466             *d = '\0';
10467             if (UNLIKELY(hexfp)) {
10468 #  ifdef NV_MANT_DIG
10469                 if (total_bits > NV_MANT_DIG)
10470                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10471                                    "Hexadecimal float: mantissa overflow");
10472 #  endif
10473 #ifdef HEXFP_UQUAD
10474                 nv = hexfp_uquad * hexfp_mult;
10475 #else /* HEXFP_NV */
10476                 nv = hexfp_nv * hexfp_mult;
10477 #endif
10478             } else {
10479                 nv = Atof(PL_tokenbuf);
10480             }
10481             RESTORE_NUMERIC_LOCAL();
10482             sv = newSVnv(nv);
10483         }
10484
10485         if ( floatit
10486              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10487             const char *const key = floatit ? "float" : "integer";
10488             const STRLEN keylen = floatit ? 5 : 7;
10489             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10490                                 key, keylen, sv, NULL, NULL, 0);
10491         }
10492         break;
10493
10494     /* if it starts with a v, it could be a v-string */
10495     case 'v':
10496     vstring:
10497                 sv = newSV(5); /* preallocate storage space */
10498                 ENTER_with_name("scan_vstring");
10499                 SAVEFREESV(sv);
10500                 s = scan_vstring(s, PL_bufend, sv);
10501                 SvREFCNT_inc_simple_void_NN(sv);
10502                 LEAVE_with_name("scan_vstring");
10503         break;
10504     }
10505
10506     /* make the op for the constant and return */
10507
10508     if (sv)
10509         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10510     else
10511         lvalp->opval = NULL;
10512
10513     return (char *)s;
10514 }
10515
10516 STATIC char *
10517 S_scan_formline(pTHX_ char *s)
10518 {
10519     char *eol;
10520     char *t;
10521     SV * const stuff = newSVpvs("");
10522     bool needargs = FALSE;
10523     bool eofmt = FALSE;
10524
10525     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10526
10527     while (!needargs) {
10528         if (*s == '.') {
10529             t = s+1;
10530 #ifdef PERL_STRICT_CR
10531             while (SPACE_OR_TAB(*t))
10532                 t++;
10533 #else
10534             while (SPACE_OR_TAB(*t) || *t == '\r')
10535                 t++;
10536 #endif
10537             if (*t == '\n' || t == PL_bufend) {
10538                 eofmt = TRUE;
10539                 break;
10540             }
10541         }
10542         eol = (char *) memchr(s,'\n',PL_bufend-s);
10543         if (!eol++)
10544                 eol = PL_bufend;
10545         if (*s != '#') {
10546             for (t = s; t < eol; t++) {
10547                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10548                     needargs = FALSE;
10549                     goto enough;        /* ~~ must be first line in formline */
10550                 }
10551                 if (*t == '@' || *t == '^')
10552                     needargs = TRUE;
10553             }
10554             if (eol > s) {
10555                 sv_catpvn(stuff, s, eol-s);
10556 #ifndef PERL_STRICT_CR
10557                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10558                     char *end = SvPVX(stuff) + SvCUR(stuff);
10559                     end[-2] = '\n';
10560                     end[-1] = '\0';
10561                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10562                 }
10563 #endif
10564             }
10565             else
10566               break;
10567         }
10568         s = (char*)eol;
10569         if ((PL_rsfp || PL_parser->filtered)
10570          && PL_parser->form_lex_state == LEX_NORMAL) {
10571             bool got_some;
10572             PL_bufptr = PL_bufend;
10573             COPLINE_INC_WITH_HERELINES;
10574             got_some = lex_next_chunk(0);
10575             CopLINE_dec(PL_curcop);
10576             s = PL_bufptr;
10577             if (!got_some)
10578                 break;
10579         }
10580         incline(s);
10581     }
10582   enough:
10583     if (!SvCUR(stuff) || needargs)
10584         PL_lex_state = PL_parser->form_lex_state;
10585     if (SvCUR(stuff)) {
10586         PL_expect = XSTATE;
10587         if (needargs) {
10588             const char *s2 = s;
10589             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10590                 || *s2 == '\v')
10591                 s2++;
10592             if (*s2 == '{') {
10593                 PL_expect = XTERMBLOCK;
10594                 NEXTVAL_NEXTTOKE.ival = 0;
10595                 force_next(DO);
10596             }
10597             NEXTVAL_NEXTTOKE.ival = 0;
10598             force_next(FORMLBRACK);
10599         }
10600         if (!IN_BYTES) {
10601             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10602                 SvUTF8_on(stuff);
10603             else if (IN_ENCODING)
10604                 sv_recode_to_utf8(stuff, _get_encoding());
10605         }
10606         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10607         force_next(THING);
10608     }
10609     else {
10610         SvREFCNT_dec(stuff);
10611         if (eofmt)
10612             PL_lex_formbrack = 0;
10613     }
10614     return s;
10615 }
10616
10617 I32
10618 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10619 {
10620     const I32 oldsavestack_ix = PL_savestack_ix;
10621     CV* const outsidecv = PL_compcv;
10622
10623     SAVEI32(PL_subline);
10624     save_item(PL_subname);
10625     SAVESPTR(PL_compcv);
10626
10627     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10628     CvFLAGS(PL_compcv) |= flags;
10629
10630     PL_subline = CopLINE(PL_curcop);
10631     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10632     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10633     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10634     if (outsidecv && CvPADLIST(outsidecv))
10635         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10636
10637     return oldsavestack_ix;
10638 }
10639
10640 static int
10641 S_yywarn(pTHX_ const char *const s, U32 flags)
10642 {
10643     PERL_ARGS_ASSERT_YYWARN;
10644
10645     PL_in_eval |= EVAL_WARNONLY;
10646     yyerror_pv(s, flags);
10647     return 0;
10648 }
10649
10650 int
10651 Perl_yyerror(pTHX_ const char *const s)
10652 {
10653     PERL_ARGS_ASSERT_YYERROR;
10654     return yyerror_pvn(s, strlen(s), 0);
10655 }
10656
10657 int
10658 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10659 {
10660     PERL_ARGS_ASSERT_YYERROR_PV;
10661     return yyerror_pvn(s, strlen(s), flags);
10662 }
10663
10664 int
10665 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10666 {
10667     const char *context = NULL;
10668     int contlen = -1;
10669     SV *msg;
10670     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10671     int yychar  = PL_parser->yychar;
10672
10673     PERL_ARGS_ASSERT_YYERROR_PVN;
10674
10675     if (!yychar || (yychar == ';' && !PL_rsfp))
10676         sv_catpvs(where_sv, "at EOF");
10677     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10678       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10679       PL_oldbufptr != PL_bufptr) {
10680         /*
10681                 Only for NetWare:
10682                 The code below is removed for NetWare because it abends/crashes on NetWare
10683                 when the script has error such as not having the closing quotes like:
10684                     if ($var eq "value)
10685                 Checking of white spaces is anyway done in NetWare code.
10686         */
10687 #ifndef NETWARE
10688         while (isSPACE(*PL_oldoldbufptr))
10689             PL_oldoldbufptr++;
10690 #endif
10691         context = PL_oldoldbufptr;
10692         contlen = PL_bufptr - PL_oldoldbufptr;
10693     }
10694     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10695       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10696         /*
10697                 Only for NetWare:
10698                 The code below is removed for NetWare because it abends/crashes on NetWare
10699                 when the script has error such as not having the closing quotes like:
10700                     if ($var eq "value)
10701                 Checking of white spaces is anyway done in NetWare code.
10702         */
10703 #ifndef NETWARE
10704         while (isSPACE(*PL_oldbufptr))
10705             PL_oldbufptr++;
10706 #endif
10707         context = PL_oldbufptr;
10708         contlen = PL_bufptr - PL_oldbufptr;
10709     }
10710     else if (yychar > 255)
10711         sv_catpvs(where_sv, "next token ???");
10712     else if (yychar == YYEMPTY) {
10713         if (PL_lex_state == LEX_NORMAL ||
10714            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10715             sv_catpvs(where_sv, "at end of line");
10716         else if (PL_lex_inpat)
10717             sv_catpvs(where_sv, "within pattern");
10718         else
10719             sv_catpvs(where_sv, "within string");
10720     }
10721     else {
10722         sv_catpvs(where_sv, "next char ");
10723         if (yychar < 32)
10724             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10725         else if (isPRINT_LC(yychar)) {
10726             const char string = yychar;
10727             sv_catpvn(where_sv, &string, 1);
10728         }
10729         else
10730             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10731     }
10732     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10733     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10734         OutCopFILE(PL_curcop),
10735         (IV)(PL_parser->preambling == NOLINE
10736                ? CopLINE(PL_curcop)
10737                : PL_parser->preambling));
10738     if (context)
10739         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10740                              UTF8fARG(UTF, contlen, context));
10741     else
10742         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10743     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10744         Perl_sv_catpvf(aTHX_ msg,
10745         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10746                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10747         PL_multi_end = 0;
10748     }
10749     if (PL_in_eval & EVAL_WARNONLY) {
10750         PL_in_eval &= ~EVAL_WARNONLY;
10751         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10752     }
10753     else
10754         qerror(msg);
10755     if (PL_error_count >= 10) {
10756         SV * errsv;
10757         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10758             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10759                        SVfARG(errsv), OutCopFILE(PL_curcop));
10760         else
10761             Perl_croak(aTHX_ "%s has too many errors.\n",
10762             OutCopFILE(PL_curcop));
10763     }
10764     PL_in_my = 0;
10765     PL_in_my_stash = NULL;
10766     return 0;
10767 }
10768
10769 STATIC char*
10770 S_swallow_bom(pTHX_ U8 *s)
10771 {
10772     const STRLEN slen = SvCUR(PL_linestr);
10773
10774     PERL_ARGS_ASSERT_SWALLOW_BOM;
10775
10776     switch (s[0]) {
10777     case 0xFF:
10778         if (s[1] == 0xFE) {
10779             /* UTF-16 little-endian? (or UTF-32LE?) */
10780             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10781                 /* diag_listed_as: Unsupported script encoding %s */
10782                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10783 #ifndef PERL_NO_UTF16_FILTER
10784             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10785             s += 2;
10786             if (PL_bufend > (char*)s) {
10787                 s = add_utf16_textfilter(s, TRUE);
10788             }
10789 #else
10790             /* diag_listed_as: Unsupported script encoding %s */
10791             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10792 #endif
10793         }
10794         break;
10795     case 0xFE:
10796         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10797 #ifndef PERL_NO_UTF16_FILTER
10798             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10799             s += 2;
10800             if (PL_bufend > (char *)s) {
10801                 s = add_utf16_textfilter(s, FALSE);
10802             }
10803 #else
10804             /* diag_listed_as: Unsupported script encoding %s */
10805             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10806 #endif
10807         }
10808         break;
10809     case BOM_UTF8_FIRST_BYTE: {
10810         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10811         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10812             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10813             s += len + 1;                      /* UTF-8 */
10814         }
10815         break;
10816     }
10817     case 0:
10818         if (slen > 3) {
10819              if (s[1] == 0) {
10820                   if (s[2] == 0xFE && s[3] == 0xFF) {
10821                        /* UTF-32 big-endian */
10822                        /* diag_listed_as: Unsupported script encoding %s */
10823                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10824                   }
10825              }
10826              else if (s[2] == 0 && s[3] != 0) {
10827                   /* Leading bytes
10828                    * 00 xx 00 xx
10829                    * are a good indicator of UTF-16BE. */
10830 #ifndef PERL_NO_UTF16_FILTER
10831                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10832                   s = add_utf16_textfilter(s, FALSE);
10833 #else
10834                   /* diag_listed_as: Unsupported script encoding %s */
10835                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10836 #endif
10837              }
10838         }
10839         break;
10840
10841     default:
10842          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10843                   /* Leading bytes
10844                    * xx 00 xx 00
10845                    * are a good indicator of UTF-16LE. */
10846 #ifndef PERL_NO_UTF16_FILTER
10847               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10848               s = add_utf16_textfilter(s, TRUE);
10849 #else
10850               /* diag_listed_as: Unsupported script encoding %s */
10851               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10852 #endif
10853          }
10854     }
10855     return (char*)s;
10856 }
10857
10858
10859 #ifndef PERL_NO_UTF16_FILTER
10860 static I32
10861 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10862 {
10863     SV *const filter = FILTER_DATA(idx);
10864     /* We re-use this each time round, throwing the contents away before we
10865        return.  */
10866     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10867     SV *const utf8_buffer = filter;
10868     IV status = IoPAGE(filter);
10869     const bool reverse = cBOOL(IoLINES(filter));
10870     I32 retval;
10871
10872     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10873
10874     /* As we're automatically added, at the lowest level, and hence only called
10875        from this file, we can be sure that we're not called in block mode. Hence
10876        don't bother writing code to deal with block mode.  */
10877     if (maxlen) {
10878         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10879     }
10880     if (status < 0) {
10881         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10882     }
10883     DEBUG_P(PerlIO_printf(Perl_debug_log,
10884                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10885                           FPTR2DPTR(void *, S_utf16_textfilter),
10886                           reverse ? 'l' : 'b', idx, maxlen, status,
10887                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10888
10889     while (1) {
10890         STRLEN chars;
10891         STRLEN have;
10892         I32 newlen;
10893         U8 *end;
10894         /* First, look in our buffer of existing UTF-8 data:  */
10895         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10896
10897         if (nl) {
10898             ++nl;
10899         } else if (status == 0) {
10900             /* EOF */
10901             IoPAGE(filter) = 0;
10902             nl = SvEND(utf8_buffer);
10903         }
10904         if (nl) {
10905             STRLEN got = nl - SvPVX(utf8_buffer);
10906             /* Did we have anything to append?  */
10907             retval = got != 0;
10908             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10909             /* Everything else in this code works just fine if SVp_POK isn't
10910                set.  This, however, needs it, and we need it to work, else
10911                we loop infinitely because the buffer is never consumed.  */
10912             sv_chop(utf8_buffer, nl);
10913             break;
10914         }
10915
10916         /* OK, not a complete line there, so need to read some more UTF-16.
10917            Read an extra octect if the buffer currently has an odd number. */
10918         while (1) {
10919             if (status <= 0)
10920                 break;
10921             if (SvCUR(utf16_buffer) >= 2) {
10922                 /* Location of the high octet of the last complete code point.
10923                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10924                    *coupled* with all the benefits of partial reads and
10925                    endianness.  */
10926                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10927                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10928
10929                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10930                     break;
10931                 }
10932
10933                 /* We have the first half of a surrogate. Read more.  */
10934                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10935             }
10936
10937             status = FILTER_READ(idx + 1, utf16_buffer,
10938                                  160 + (SvCUR(utf16_buffer) & 1));
10939             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10940             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10941             if (status < 0) {
10942                 /* Error */
10943                 IoPAGE(filter) = status;
10944                 return status;
10945             }
10946         }
10947
10948         chars = SvCUR(utf16_buffer) >> 1;
10949         have = SvCUR(utf8_buffer);
10950         SvGROW(utf8_buffer, have + chars * 3 + 1);
10951
10952         if (reverse) {
10953             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10954                                          (U8*)SvPVX_const(utf8_buffer) + have,
10955                                          chars * 2, &newlen);
10956         } else {
10957             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10958                                 (U8*)SvPVX_const(utf8_buffer) + have,
10959                                 chars * 2, &newlen);
10960         }
10961         SvCUR_set(utf8_buffer, have + newlen);
10962         *end = '\0';
10963
10964         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10965            it's private to us, and utf16_to_utf8{,reversed} take a
10966            (pointer,length) pair, rather than a NUL-terminated string.  */
10967         if(SvCUR(utf16_buffer) & 1) {
10968             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10969             SvCUR_set(utf16_buffer, 1);
10970         } else {
10971             SvCUR_set(utf16_buffer, 0);
10972         }
10973     }
10974     DEBUG_P(PerlIO_printf(Perl_debug_log,
10975                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10976                           status,
10977                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10978     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10979     return retval;
10980 }
10981
10982 static U8 *
10983 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10984 {
10985     SV *filter = filter_add(S_utf16_textfilter, NULL);
10986
10987     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10988
10989     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10990     sv_setpvs(filter, "");
10991     IoLINES(filter) = reversed;
10992     IoPAGE(filter) = 1; /* Not EOF */
10993
10994     /* Sadly, we have to return a valid pointer, come what may, so we have to
10995        ignore any error return from this.  */
10996     SvCUR_set(PL_linestr, 0);
10997     if (FILTER_READ(0, PL_linestr, 0)) {
10998         SvUTF8_on(PL_linestr);
10999     } else {
11000         SvUTF8_on(PL_linestr);
11001     }
11002     PL_bufend = SvEND(PL_linestr);
11003     return (U8*)SvPVX(PL_linestr);
11004 }
11005 #endif
11006
11007 /*
11008 Returns a pointer to the next character after the parsed
11009 vstring, as well as updating the passed in sv.
11010
11011 Function must be called like
11012
11013         sv = sv_2mortal(newSV(5));
11014         s = scan_vstring(s,e,sv);
11015
11016 where s and e are the start and end of the string.
11017 The sv should already be large enough to store the vstring
11018 passed in, for performance reasons.
11019
11020 This function may croak if fatal warnings are enabled in the
11021 calling scope, hence the sv_2mortal in the example (to prevent
11022 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11023 sv_2mortal.
11024
11025 */
11026
11027 char *
11028 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11029 {
11030     const char *pos = s;
11031     const char *start = s;
11032
11033     PERL_ARGS_ASSERT_SCAN_VSTRING;
11034
11035     if (*pos == 'v') pos++;  /* get past 'v' */
11036     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11037         pos++;
11038     if ( *pos != '.') {
11039         /* this may not be a v-string if followed by => */
11040         const char *next = pos;
11041         while (next < e && isSPACE(*next))
11042             ++next;
11043         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11044             /* return string not v-string */
11045             sv_setpvn(sv,(char *)s,pos-s);
11046             return (char *)pos;
11047         }
11048     }
11049
11050     if (!isALPHA(*pos)) {
11051         U8 tmpbuf[UTF8_MAXBYTES+1];
11052
11053         if (*s == 'v')
11054             s++;  /* get past 'v' */
11055
11056         sv_setpvs(sv, "");
11057
11058         for (;;) {
11059             /* this is atoi() that tolerates underscores */
11060             U8 *tmpend;
11061             UV rev = 0;
11062             const char *end = pos;
11063             UV mult = 1;
11064             while (--end >= s) {
11065                 if (*end != '_') {
11066                     const UV orev = rev;
11067                     rev += (*end - '0') * mult;
11068                     mult *= 10;
11069                     if (orev > rev)
11070                         /* diag_listed_as: Integer overflow in %s number */
11071                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11072                                          "Integer overflow in decimal number");
11073                 }
11074             }
11075 #ifdef EBCDIC
11076             if (rev > 0x7FFFFFFF)
11077                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11078 #endif
11079             /* Append native character for the rev point */
11080             tmpend = uvchr_to_utf8(tmpbuf, rev);
11081             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11082             if (!UVCHR_IS_INVARIANT(rev))
11083                  SvUTF8_on(sv);
11084             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11085                  s = ++pos;
11086             else {
11087                  s = pos;
11088                  break;
11089             }
11090             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11091                  pos++;
11092         }
11093         SvPOK_on(sv);
11094         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11095         SvRMAGICAL_on(sv);
11096     }
11097     return (char *)s;
11098 }
11099
11100 int
11101 Perl_keyword_plugin_standard(pTHX_
11102         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11103 {
11104     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11105     PERL_UNUSED_CONTEXT;
11106     PERL_UNUSED_ARG(keyword_ptr);
11107     PERL_UNUSED_ARG(keyword_len);
11108     PERL_UNUSED_ARG(op_ptr);
11109     return KEYWORD_PLUGIN_DECLINE;
11110 }
11111
11112 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11113 static void
11114 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11115 {
11116     SAVEI32(PL_lex_brackets);
11117     if (PL_lex_brackets > 100)
11118         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11119     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11120     SAVEI32(PL_lex_allbrackets);
11121     PL_lex_allbrackets = 0;
11122     SAVEI8(PL_lex_fakeeof);
11123     PL_lex_fakeeof = (U8)fakeeof;
11124     if(yyparse(gramtype) && !PL_parser->error_count)
11125         qerror(Perl_mess(aTHX_ "Parse error"));
11126 }
11127
11128 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11129 static OP *
11130 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11131 {
11132     OP *o;
11133     ENTER;
11134     SAVEVPTR(PL_eval_root);
11135     PL_eval_root = NULL;
11136     parse_recdescent(gramtype, fakeeof);
11137     o = PL_eval_root;
11138     LEAVE;
11139     return o;
11140 }
11141
11142 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11143 static OP *
11144 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11145 {
11146     OP *exprop;
11147     if (flags & ~PARSE_OPTIONAL)
11148         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11149     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11150     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11151         if (!PL_parser->error_count)
11152             qerror(Perl_mess(aTHX_ "Parse error"));
11153         exprop = newOP(OP_NULL, 0);
11154     }
11155     return exprop;
11156 }
11157
11158 /*
11159 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11160
11161 Parse a Perl arithmetic expression.  This may contain operators of precedence
11162 down to the bit shift operators.  The expression must be followed (and thus
11163 terminated) either by a comparison or lower-precedence operator or by
11164 something that would normally terminate an expression such as semicolon.
11165 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11166 otherwise it is mandatory.  It is up to the caller to ensure that the
11167 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11168 the source of the code to be parsed and the lexical context for the
11169 expression.
11170
11171 The op tree representing the expression is returned.  If an optional
11172 expression is absent, a null pointer is returned, otherwise the pointer
11173 will be non-null.
11174
11175 If an error occurs in parsing or compilation, in most cases a valid op
11176 tree is returned anyway.  The error is reflected in the parser state,
11177 normally resulting in a single exception at the top level of parsing
11178 which covers all the compilation errors that occurred.  Some compilation
11179 errors, however, will throw an exception immediately.
11180
11181 =cut
11182 */
11183
11184 OP *
11185 Perl_parse_arithexpr(pTHX_ U32 flags)
11186 {
11187     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11188 }
11189
11190 /*
11191 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11192
11193 Parse a Perl term expression.  This may contain operators of precedence
11194 down to the assignment operators.  The expression must be followed (and thus
11195 terminated) either by a comma or lower-precedence operator or by
11196 something that would normally terminate an expression such as semicolon.
11197 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11198 otherwise it is mandatory.  It is up to the caller to ensure that the
11199 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11200 the source of the code to be parsed and the lexical context for the
11201 expression.
11202
11203 The op tree representing the expression is returned.  If an optional
11204 expression is absent, a null pointer is returned, otherwise the pointer
11205 will be non-null.
11206
11207 If an error occurs in parsing or compilation, in most cases a valid op
11208 tree is returned anyway.  The error is reflected in the parser state,
11209 normally resulting in a single exception at the top level of parsing
11210 which covers all the compilation errors that occurred.  Some compilation
11211 errors, however, will throw an exception immediately.
11212
11213 =cut
11214 */
11215
11216 OP *
11217 Perl_parse_termexpr(pTHX_ U32 flags)
11218 {
11219     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11220 }
11221
11222 /*
11223 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11224
11225 Parse a Perl list expression.  This may contain operators of precedence
11226 down to the comma operator.  The expression must be followed (and thus
11227 terminated) either by a low-precedence logic operator such as C<or> or by
11228 something that would normally terminate an expression such as semicolon.
11229 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11230 otherwise it is mandatory.  It is up to the caller to ensure that the
11231 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11232 the source of the code to be parsed and the lexical context for the
11233 expression.
11234
11235 The op tree representing the expression is returned.  If an optional
11236 expression is absent, a null pointer is returned, otherwise the pointer
11237 will be non-null.
11238
11239 If an error occurs in parsing or compilation, in most cases a valid op
11240 tree is returned anyway.  The error is reflected in the parser state,
11241 normally resulting in a single exception at the top level of parsing
11242 which covers all the compilation errors that occurred.  Some compilation
11243 errors, however, will throw an exception immediately.
11244
11245 =cut
11246 */
11247
11248 OP *
11249 Perl_parse_listexpr(pTHX_ U32 flags)
11250 {
11251     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11252 }
11253
11254 /*
11255 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11256
11257 Parse a single complete Perl expression.  This allows the full
11258 expression grammar, including the lowest-precedence operators such
11259 as C<or>.  The expression must be followed (and thus terminated) by a
11260 token that an expression would normally be terminated by: end-of-file,
11261 closing bracketing punctuation, semicolon, or one of the keywords that
11262 signals a postfix expression-statement modifier.  If I<flags> includes
11263 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11264 mandatory.  It is up to the caller to ensure that the dynamic parser
11265 state (L</PL_parser> et al) is correctly set to reflect the source of
11266 the code to be parsed and the lexical context for the expression.
11267
11268 The op tree representing the expression is returned.  If an optional
11269 expression is absent, a null pointer is returned, otherwise the pointer
11270 will be non-null.
11271
11272 If an error occurs in parsing or compilation, in most cases a valid op
11273 tree is returned anyway.  The error is reflected in the parser state,
11274 normally resulting in a single exception at the top level of parsing
11275 which covers all the compilation errors that occurred.  Some compilation
11276 errors, however, will throw an exception immediately.
11277
11278 =cut
11279 */
11280
11281 OP *
11282 Perl_parse_fullexpr(pTHX_ U32 flags)
11283 {
11284     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11285 }
11286
11287 /*
11288 =for apidoc Amx|OP *|parse_block|U32 flags
11289
11290 Parse a single complete Perl code block.  This consists of an opening
11291 brace, a sequence of statements, and a closing brace.  The block
11292 constitutes a lexical scope, so C<my> variables and various compile-time
11293 effects can be contained within it.  It is up to the caller to ensure
11294 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11295 reflect the source of the code to be parsed and the lexical context for
11296 the statement.
11297
11298 The op tree representing the code block is returned.  This is always a
11299 real op, never a null pointer.  It will normally be a C<lineseq> list,
11300 including C<nextstate> or equivalent ops.  No ops to construct any kind
11301 of runtime scope are included by virtue of it being a block.
11302
11303 If an error occurs in parsing or compilation, in most cases a valid op
11304 tree (most likely null) is returned anyway.  The error is reflected in
11305 the parser state, normally resulting in a single exception at the top
11306 level of parsing which covers all the compilation errors that occurred.
11307 Some compilation errors, however, will throw an exception immediately.
11308
11309 The I<flags> parameter is reserved for future use, and must always
11310 be zero.
11311
11312 =cut
11313 */
11314
11315 OP *
11316 Perl_parse_block(pTHX_ U32 flags)
11317 {
11318     if (flags)
11319         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11320     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11321 }
11322
11323 /*
11324 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11325
11326 Parse a single unadorned Perl statement.  This may be a normal imperative
11327 statement or a declaration that has compile-time effect.  It does not
11328 include any label or other affixture.  It is up to the caller to ensure
11329 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11330 reflect the source of the code to be parsed and the lexical context for
11331 the statement.
11332
11333 The op tree representing the statement is returned.  This may be a
11334 null pointer if the statement is null, for example if it was actually
11335 a subroutine definition (which has compile-time side effects).  If not
11336 null, it will be ops directly implementing the statement, suitable to
11337 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11338 equivalent op (except for those embedded in a scope contained entirely
11339 within the statement).
11340
11341 If an error occurs in parsing or compilation, in most cases a valid op
11342 tree (most likely null) is returned anyway.  The error is reflected in
11343 the parser state, normally resulting in a single exception at the top
11344 level of parsing which covers all the compilation errors that occurred.
11345 Some compilation errors, however, will throw an exception immediately.
11346
11347 The I<flags> parameter is reserved for future use, and must always
11348 be zero.
11349
11350 =cut
11351 */
11352
11353 OP *
11354 Perl_parse_barestmt(pTHX_ U32 flags)
11355 {
11356     if (flags)
11357         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11358     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11359 }
11360
11361 /*
11362 =for apidoc Amx|SV *|parse_label|U32 flags
11363
11364 Parse a single label, possibly optional, of the type that may prefix a
11365 Perl statement.  It is up to the caller to ensure that the dynamic parser
11366 state (L</PL_parser> et al) is correctly set to reflect the source of
11367 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11368 label is optional, otherwise it is mandatory.
11369
11370 The name of the label is returned in the form of a fresh scalar.  If an
11371 optional label is absent, a null pointer is returned.
11372
11373 If an error occurs in parsing, which can only occur if the label is
11374 mandatory, a valid label is returned anyway.  The error is reflected in
11375 the parser state, normally resulting in a single exception at the top
11376 level of parsing which covers all the compilation errors that occurred.
11377
11378 =cut
11379 */
11380
11381 SV *
11382 Perl_parse_label(pTHX_ U32 flags)
11383 {
11384     if (flags & ~PARSE_OPTIONAL)
11385         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11386     if (PL_lex_state == LEX_KNOWNEXT) {
11387         PL_parser->yychar = yylex();
11388         if (PL_parser->yychar == LABEL) {
11389             char * const lpv = pl_yylval.pval;
11390             STRLEN llen = strlen(lpv);
11391             PL_parser->yychar = YYEMPTY;
11392             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11393         } else {
11394             yyunlex();
11395             goto no_label;
11396         }
11397     } else {
11398         char *s, *t;
11399         STRLEN wlen, bufptr_pos;
11400         lex_read_space(0);
11401         t = s = PL_bufptr;
11402         if (!isIDFIRST_lazy_if(s, UTF))
11403             goto no_label;
11404         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11405         if (word_takes_any_delimeter(s, wlen))
11406             goto no_label;
11407         bufptr_pos = s - SvPVX(PL_linestr);
11408         PL_bufptr = t;
11409         lex_read_space(LEX_KEEP_PREVIOUS);
11410         t = PL_bufptr;
11411         s = SvPVX(PL_linestr) + bufptr_pos;
11412         if (t[0] == ':' && t[1] != ':') {
11413             PL_oldoldbufptr = PL_oldbufptr;
11414             PL_oldbufptr = s;
11415             PL_bufptr = t+1;
11416             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11417         } else {
11418             PL_bufptr = s;
11419             no_label:
11420             if (flags & PARSE_OPTIONAL) {
11421                 return NULL;
11422             } else {
11423                 qerror(Perl_mess(aTHX_ "Parse error"));
11424                 return newSVpvs("x");
11425             }
11426         }
11427     }
11428 }
11429
11430 /*
11431 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11432
11433 Parse a single complete Perl statement.  This may be a normal imperative
11434 statement or a declaration that has compile-time effect, and may include
11435 optional labels.  It is up to the caller to ensure that the dynamic
11436 parser state (L</PL_parser> et al) is correctly set to reflect the source
11437 of the code to be parsed and the lexical context for the statement.
11438
11439 The op tree representing the statement is returned.  This may be a
11440 null pointer if the statement is null, for example if it was actually
11441 a subroutine definition (which has compile-time side effects).  If not
11442 null, it will be the result of a L</newSTATEOP> call, normally including
11443 a C<nextstate> or equivalent op.
11444
11445 If an error occurs in parsing or compilation, in most cases a valid op
11446 tree (most likely null) is returned anyway.  The error is reflected in
11447 the parser state, normally resulting in a single exception at the top
11448 level of parsing which covers all the compilation errors that occurred.
11449 Some compilation errors, however, will throw an exception immediately.
11450
11451 The I<flags> parameter is reserved for future use, and must always
11452 be zero.
11453
11454 =cut
11455 */
11456
11457 OP *
11458 Perl_parse_fullstmt(pTHX_ U32 flags)
11459 {
11460     if (flags)
11461         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11462     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11463 }
11464
11465 /*
11466 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11467
11468 Parse a sequence of zero or more Perl statements.  These may be normal
11469 imperative statements, including optional labels, or declarations
11470 that have compile-time effect, or any mixture thereof.  The statement
11471 sequence ends when a closing brace or end-of-file is encountered in a
11472 place where a new statement could have validly started.  It is up to
11473 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11474 is correctly set to reflect the source of the code to be parsed and the
11475 lexical context for the statements.
11476
11477 The op tree representing the statement sequence is returned.  This may
11478 be a null pointer if the statements were all null, for example if there
11479 were no statements or if there were only subroutine definitions (which
11480 have compile-time side effects).  If not null, it will be a C<lineseq>
11481 list, normally including C<nextstate> or equivalent ops.
11482
11483 If an error occurs in parsing or compilation, in most cases a valid op
11484 tree is returned anyway.  The error is reflected in the parser state,
11485 normally resulting in a single exception at the top level of parsing
11486 which covers all the compilation errors that occurred.  Some compilation
11487 errors, however, will throw an exception immediately.
11488
11489 The I<flags> parameter is reserved for future use, and must always
11490 be zero.
11491
11492 =cut
11493 */
11494
11495 OP *
11496 Perl_parse_stmtseq(pTHX_ U32 flags)
11497 {
11498     OP *stmtseqop;
11499     I32 c;
11500     if (flags)
11501         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11502     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11503     c = lex_peek_unichar(0);
11504     if (c != -1 && c != /*{*/'}')
11505         qerror(Perl_mess(aTHX_ "Parse error"));
11506     return stmtseqop;
11507 }
11508
11509 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11510 static void
11511 S_lex_token_boundary(pTHX)
11512 {
11513     PL_oldoldbufptr = PL_oldbufptr;
11514     PL_oldbufptr = PL_bufptr;
11515 }
11516
11517 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11518 static OP *
11519 S_parse_opt_lexvar(pTHX)
11520 {
11521     I32 sigil, c;
11522     char *s, *d;
11523     OP *var;
11524     lex_token_boundary();
11525     sigil = lex_read_unichar(0);
11526     if (lex_peek_unichar(0) == '#') {
11527         qerror(Perl_mess(aTHX_ "Parse error"));
11528         return NULL;
11529     }
11530     lex_read_space(0);
11531     c = lex_peek_unichar(0);
11532     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11533         return NULL;
11534     s = PL_bufptr;
11535     d = PL_tokenbuf + 1;
11536     PL_tokenbuf[0] = (char)sigil;
11537     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11538     PL_bufptr = s;
11539     if (d == PL_tokenbuf+1)
11540         return NULL;
11541     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11542                 OPf_MOD | (OPpLVAL_INTRO<<8));
11543     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11544     return var;
11545 }
11546
11547 OP *
11548 Perl_parse_subsignature(pTHX)
11549 {
11550     I32 c;
11551     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11552     OP *initops = NULL;
11553     lex_read_space(0);
11554     c = lex_peek_unichar(0);
11555     while (c != /*(*/')') {
11556         switch (c) {
11557             case '$': {
11558                 OP *var, *expr;
11559                 if (prev_type == 2)
11560                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11561                 var = parse_opt_lexvar();
11562                 expr = var ?
11563                     newBINOP(OP_AELEM, 0,
11564                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11565                             OP_RV2AV),
11566                         newSVOP(OP_CONST, 0, newSViv(pos))) :
11567                     NULL;
11568                 lex_read_space(0);
11569                 c = lex_peek_unichar(0);
11570                 if (c == '=') {
11571                     lex_token_boundary();
11572                     lex_read_unichar(0);
11573                     lex_read_space(0);
11574                     c = lex_peek_unichar(0);
11575                     if (c == ',' || c == /*(*/')') {
11576                         if (var)
11577                             qerror(Perl_mess(aTHX_ "Optional parameter "
11578                                     "lacks default expression"));
11579                     } else {
11580                         OP *defexpr = parse_termexpr(0);
11581                         if (defexpr->op_type == OP_UNDEF &&
11582                                 !(defexpr->op_flags & OPf_KIDS)) {
11583                             op_free(defexpr);
11584                         } else {
11585                             OP *ifop =
11586                                 newBINOP(OP_GE, 0,
11587                                     scalar(newUNOP(OP_RV2AV, 0,
11588                                             newGVOP(OP_GV, 0, PL_defgv))),
11589                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
11590                             expr = var ?
11591                                 newCONDOP(0, ifop, expr, defexpr) :
11592                                 newLOGOP(OP_OR, 0, ifop, defexpr);
11593                         }
11594                     }
11595                     prev_type = 1;
11596                 } else {
11597                     if (prev_type == 1)
11598                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
11599                                 "follows optional parameter"));
11600                     prev_type = 0;
11601                     min_arity = pos + 1;
11602                 }
11603                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11604                 if (expr)
11605                     initops = op_append_list(OP_LINESEQ, initops,
11606                                 newSTATEOP(0, NULL, expr));
11607                 max_arity = ++pos;
11608             } break;
11609             case '@':
11610             case '%': {
11611                 OP *var;
11612                 if (prev_type == 2)
11613                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11614                 var = parse_opt_lexvar();
11615                 if (c == '%') {
11616                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11617                             newBINOP(OP_BIT_AND, 0,
11618                                 scalar(newUNOP(OP_RV2AV, 0,
11619                                     newGVOP(OP_GV, 0, PL_defgv))),
11620                                 newSVOP(OP_CONST, 0, newSViv(1))),
11621                             op_convert_list(OP_DIE, 0,
11622                                 op_convert_list(OP_SPRINTF, 0,
11623                                     op_append_list(OP_LIST,
11624                                         newSVOP(OP_CONST, 0,
11625                                             newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
11626                                         newSLICEOP(0,
11627                                             op_append_list(OP_LIST,
11628                                                 newSVOP(OP_CONST, 0, newSViv(1)),
11629                                                 newSVOP(OP_CONST, 0, newSViv(2))),
11630                                             newOP(OP_CALLER, 0))))));
11631                     if (pos != min_arity)
11632                         chkop = newLOGOP(OP_AND, 0,
11633                                     newBINOP(OP_GT, 0,
11634                                         scalar(newUNOP(OP_RV2AV, 0,
11635                                             newGVOP(OP_GV, 0, PL_defgv))),
11636                                         newSVOP(OP_CONST, 0, newSViv(pos))),
11637                                     chkop);
11638                     initops = op_append_list(OP_LINESEQ,
11639                                 newSTATEOP(0, NULL, chkop),
11640                                 initops);
11641                 }
11642                 if (var) {
11643                     OP *slice = pos ?
11644                         op_prepend_elem(OP_ASLICE,
11645                             newOP(OP_PUSHMARK, 0),
11646                             newLISTOP(OP_ASLICE, 0,
11647                                 list(newRANGE(0,
11648                                     newSVOP(OP_CONST, 0, newSViv(pos)),
11649                                     newUNOP(OP_AV2ARYLEN, 0,
11650                                         ref(newUNOP(OP_RV2AV, 0,
11651                                                 newGVOP(OP_GV, 0, PL_defgv)),
11652                                             OP_AV2ARYLEN)))),
11653                                 ref(newUNOP(OP_RV2AV, 0,
11654                                         newGVOP(OP_GV, 0, PL_defgv)),
11655                                     OP_ASLICE))) :
11656                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11657                     initops = op_append_list(OP_LINESEQ, initops,
11658                         newSTATEOP(0, NULL,
11659                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
11660                 }
11661                 prev_type = 2;
11662                 max_arity = -1;
11663             } break;
11664             default:
11665                 parse_error:
11666                 qerror(Perl_mess(aTHX_ "Parse error"));
11667                 return NULL;
11668         }
11669         lex_read_space(0);
11670         c = lex_peek_unichar(0);
11671         switch (c) {
11672             case /*(*/')': break;
11673             case ',':
11674                 do {
11675                     lex_token_boundary();
11676                     lex_read_unichar(0);
11677                     lex_read_space(0);
11678                     c = lex_peek_unichar(0);
11679                 } while (c == ',');
11680                 break;
11681             default:
11682                 goto parse_error;
11683         }
11684     }
11685     if (min_arity != 0) {
11686         initops = op_append_list(OP_LINESEQ,
11687             newSTATEOP(0, NULL,
11688                 newLOGOP(OP_OR, 0,
11689                     newBINOP(OP_GE, 0,
11690                         scalar(newUNOP(OP_RV2AV, 0,
11691                             newGVOP(OP_GV, 0, PL_defgv))),
11692                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
11693                     op_convert_list(OP_DIE, 0,
11694                         op_convert_list(OP_SPRINTF, 0,
11695                             op_append_list(OP_LIST,
11696                                 newSVOP(OP_CONST, 0,
11697                                     newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
11698                                 newSLICEOP(0,
11699                                     op_append_list(OP_LIST,
11700                                         newSVOP(OP_CONST, 0, newSViv(1)),
11701                                         newSVOP(OP_CONST, 0, newSViv(2))),
11702                                     newOP(OP_CALLER, 0))))))),
11703             initops);
11704     }
11705     if (max_arity != -1) {
11706         initops = op_append_list(OP_LINESEQ,
11707             newSTATEOP(0, NULL,
11708                 newLOGOP(OP_OR, 0,
11709                     newBINOP(OP_LE, 0,
11710                         scalar(newUNOP(OP_RV2AV, 0,
11711                             newGVOP(OP_GV, 0, PL_defgv))),
11712                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
11713                     op_convert_list(OP_DIE, 0,
11714                         op_convert_list(OP_SPRINTF, 0,
11715                             op_append_list(OP_LIST,
11716                                 newSVOP(OP_CONST, 0,
11717                                     newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
11718                                 newSLICEOP(0,
11719                                     op_append_list(OP_LIST,
11720                                         newSVOP(OP_CONST, 0, newSViv(1)),
11721                                         newSVOP(OP_CONST, 0, newSViv(2))),
11722                                     newOP(OP_CALLER, 0))))))),
11723             initops);
11724     }
11725     return initops;
11726 }
11727
11728 /*
11729  * Local variables:
11730  * c-indentation-style: bsd
11731  * c-basic-offset: 4
11732  * indent-tabs-mode: nil
11733  * End:
11734  *
11735  * ex: set ts=8 sts=4 sw=4 et:
11736  */