toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_static.c"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_defer            (PL_parser->lex_defer)
  56 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  58 #define PL_lex_inpat            (PL_parser->lex_inpat)
  59 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  60 #define PL_lex_op               (PL_parser->lex_op)
  61 #define PL_lex_repl             (PL_parser->lex_repl)
  62 #define PL_lex_starts           (PL_parser->lex_starts)
  63 #define PL_lex_stuff            (PL_parser->lex_stuff)
  64 #define PL_multi_start          (PL_parser->multi_start)
  65 #define PL_multi_open           (PL_parser->multi_open)
  66 #define PL_multi_close          (PL_parser->multi_close)
  67 #define PL_preambled            (PL_parser->preambled)
  68 #define PL_sublex_info          (PL_parser->sublex_info)
  69 #define PL_linestr              (PL_parser->linestr)
  70 #define PL_expect               (PL_parser->expect)
  71 #define PL_copline              (PL_parser->copline)
  72 #define PL_bufptr               (PL_parser->bufptr)
  73 #define PL_oldbufptr            (PL_parser->oldbufptr)
  74 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  75 #define PL_linestart            (PL_parser->linestart)
  76 #define PL_bufend               (PL_parser->bufend)
  77 #define PL_last_uni             (PL_parser->last_uni)
  78 #define PL_last_lop             (PL_parser->last_lop)
  79 #define PL_last_lop_op          (PL_parser->last_lop_op)
  80 #define PL_lex_state            (PL_parser->lex_state)
  81 #define PL_rsfp                 (PL_parser->rsfp)
  82 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  83 #define PL_in_my                (PL_parser->in_my)
  84 #define PL_in_my_stash          (PL_parser->in_my_stash)
  85 #define PL_tokenbuf             (PL_parser->tokenbuf)
  86 #define PL_multi_end            (PL_parser->multi_end)
  87 #define PL_error_count          (PL_parser->error_count)
  88
  89 #  define PL_nexttoke           (PL_parser->nexttoke)
  90 #  define PL_nexttype           (PL_parser->nexttype)
  91 #  define PL_nextval            (PL_parser->nextval)
  92
  93 static const char* const ident_too_long = "Identifier too long";
  94
  95 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  96
  97 #define XENUMMASK  0x3f
  98 #define XFAKEEOF   0x40
  99 #define XFAKEBRACK 0x80
 100
 101 #ifdef USE_UTF8_SCRIPTS
 102 #   define UTF cBOOL(!IN_BYTES)
 103 #else
 104 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 105 #endif
 106
 107 /* The maximum number of characters preceding the unrecognized one to display */
 108 #define UNRECOGNIZED_PRECEDE_COUNT 10
 109
 110 /* In variables named $^X, these are the legal values for X.
 111  * 1999-02-27 mjd-perl-patch@plover.com */
 112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 113
 114 #define SPACE_OR_TAB(c) isBLANK_A(c)
 115
 116 #define HEXFP_PEEK(s)     \
 117     (((s[0] == '.') && \
 118       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 119      isALPHA_FOLD_EQ(s[0], 'p'))
 120
 121 /* LEX_* are values for PL_lex_state, the state of the lexer.
 122  * They are arranged oddly so that the guard on the switch statement
 123  * can get by with a single comparison (if the compiler is smart enough).
 124  *
 125  * These values refer to the various states within a sublex parse,
 126  * i.e. within a double quotish string
 127  */
 128
 129 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 130
 131 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 132 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 133 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 134 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 135 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 136
 137                                    /* at end of code, eg "$x" followed by:  */
 138 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 139 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 140
 141 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 142                                         string or after \E, $foo, etc       */
 143 #define LEX_INTERPCONST          2 /* NOT USED */
 144 #define LEX_FORMLINE             1 /* expecting a format line               */
 145 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 146
 147
 148 #ifdef DEBUGGING
 149 static const char* const lex_state_names[] = {
 150     "KNOWNEXT",
 151     "FORMLINE",
 152     "INTERPCONST",
 153     "INTERPCONCAT",
 154     "INTERPENDMAYBE",
 155     "INTERPEND",
 156     "INTERPSTART",
 157     "INTERPPUSH",
 158     "INTERPCASEMOD",
 159     "INTERPNORMAL",
 160     "NORMAL"
 161 };
 162 #endif
 163
 164 #include "keywords.h"
 165
 166 /* CLINE is a macro that ensures PL_copline has a sane value */
 167
 168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 169
 170 /*
 171  * Convenience functions to return different tokens and prime the
 172  * lexer for the next token.  They all take an argument.
 173  *
 174  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 175  * OPERATOR     : generic operator
 176  * AOPERATOR    : assignment operator
 177  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 178  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 179  * PREREF       : *EXPR where EXPR is not a simple identifier
 180  * TERM         : expression term
 181  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 182  * LOOPX        : loop exiting command (goto, last, dump, etc)
 183  * FTST         : file test operator
 184  * FUN0         : zero-argument function
 185  * FUN0OP       : zero-argument function, with its op created in this file
 186  * FUN1         : not used, except for not, which isn't a UNIOP
 187  * BOop         : bitwise or or xor
 188  * BAop         : bitwise and
 189  * BCop         : bitwise complement
 190  * SHop         : shift operator
 191  * PWop         : power operator
 192  * PMop         : pattern-matching operator
 193  * Aop          : addition-level operator
 194  * AopNOASSIGN  : addition-level operator that is never part of .=
 195  * Mop          : multiplication-level operator
 196  * Eop          : equality-testing operator
 197  * Rop          : relational operator <= != gt
 198  *
 199  * Also see LOP and lop() below.
 200  */
 201
 202 #ifdef DEBUGGING /* Serve -DT. */
 203 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 204 #else
 205 #   define REPORT(retval) (retval)
 206 #endif
 207
 208 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 209 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 210 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 211 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 212 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 213 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 214 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 215 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 216 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
 217                          pl_yylval.ival=f, \
 218                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 219                          REPORT((int)LOOPEX))
 220 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 221 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 222 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 223 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 224 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 225 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 226 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 227                        REPORT('~')
 228 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 229 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 230 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 231 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 232 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 233 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 234 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 235 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 236
 237 /* This bit of chicanery makes a unary function followed by
 238  * a parenthesis into a function with one argument, highest precedence.
 239  * The UNIDOR macro is for unary functions that can be followed by the //
 240  * operator (such as C<shift // 0>).
 241  */
 242 #define UNI3(f,x,have_x) { \
 243         pl_yylval.ival = f; \
 244         if (have_x) PL_expect = x; \
 245         PL_bufptr = s; \
 246         PL_last_uni = PL_oldbufptr; \
 247         PL_last_lop_op = f; \
 248         if (*s == '(') \
 249             return REPORT( (int)FUNC1 ); \
 250         s = skipspace(s); \
 251         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 252         }
 253 #define UNI(f)    UNI3(f,XTERM,1)
 254 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 255 #define UNIPROTO(f,optional) { \
 256         if (optional) PL_last_uni = PL_oldbufptr; \
 257         OPERATOR(f); \
 258         }
 259
 260 #define UNIBRACK(f) UNI3(f,0,0)
 261
 262 /* grandfather return to old style */
 263 #define OLDLOP(f) \
 264         do { \
 265             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 266                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 267             pl_yylval.ival = (f); \
 268             PL_expect = XTERM; \
 269             PL_bufptr = s; \
 270             return (int)LSTOP; \
 271         } while(0)
 272
 273 #define COPLINE_INC_WITH_HERELINES                  \
 274     STMT_START {                                     \
 275         CopLINE_inc(PL_curcop);                       \
 276         if (PL_parser->herelines)                      \
 277             CopLINE(PL_curcop) += PL_parser->herelines, \
 278             PL_parser->herelines = 0;                    \
 279     } STMT_END
 280 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 281  * is no sublex_push to follow. */
 282 #define COPLINE_SET_FROM_MULTI_END            \
 283     STMT_START {                               \
 284         CopLINE_set(PL_curcop, PL_multi_end);   \
 285         if (PL_multi_end != PL_multi_start)      \
 286             PL_parser->herelines = 0;             \
 287     } STMT_END
 288
 289
 290 #ifdef DEBUGGING
 291
 292 /* how to interpret the pl_yylval associated with the token */
 293 enum token_type {
 294     TOKENTYPE_NONE,
 295     TOKENTYPE_IVAL,
 296     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 297     TOKENTYPE_PVAL,
 298     TOKENTYPE_OPVAL
 299 };
 300
 301 static struct debug_tokens {
 302     const int token;
 303     enum token_type type;
 304     const char *name;
 305 } const debug_tokens[] =
 306 {
 307     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 308     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 309     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 310     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 311     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 312     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 313     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 314     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 315     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 316     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 317     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 318     { DO,               TOKENTYPE_NONE,         "DO" },
 319     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 320     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 321     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 322     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 323     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 324     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 325     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 326     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 327     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 328     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 329     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 330     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 331     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 332     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 333     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 334     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 335     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 336     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 337     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 338     { IF,               TOKENTYPE_IVAL,         "IF" },
 339     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 340     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 341     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 342     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 343     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 344     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 345     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 346     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 347     { MY,               TOKENTYPE_IVAL,         "MY" },
 348     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 349     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 350     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 351     { OROR,             TOKENTYPE_NONE,         "OROR" },
 352     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 353     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 354     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 355     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 356     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 357     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 358     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 359     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 360     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 361     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 362     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 363     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 364     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 365     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 366     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 367     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 368     { SUB,              TOKENTYPE_NONE,         "SUB" },
 369     { THING,            TOKENTYPE_OPVAL,        "THING" },
 370     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 371     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 372     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 373     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 374     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 375     { USE,              TOKENTYPE_IVAL,         "USE" },
 376     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 377     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 378     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 379     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 380     { 0,                TOKENTYPE_NONE,         NULL }
 381 };
 382
 383 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 384
 385 STATIC int
 386 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 387 {
 388     PERL_ARGS_ASSERT_TOKEREPORT;
 389
 390     if (DEBUG_T_TEST) {
 391         const char *name = NULL;
 392         enum token_type type = TOKENTYPE_NONE;
 393         const struct debug_tokens *p;
 394         SV* const report = newSVpvs("<== ");
 395
 396         for (p = debug_tokens; p->token; p++) {
 397             if (p->token == (int)rv) {
 398                 name = p->name;
 399                 type = p->type;
 400                 break;
 401             }
 402         }
 403         if (name)
 404             Perl_sv_catpv(aTHX_ report, name);
 405         else if (isGRAPH(rv))
 406         {
 407             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 408             if ((char)rv == 'p')
 409                 sv_catpvs(report, " (pending identifier)");
 410         }
 411         else if (!rv)
 412             sv_catpvs(report, "EOF");
 413         else
 414             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 415         switch (type) {
 416         case TOKENTYPE_NONE:
 417             break;
 418         case TOKENTYPE_IVAL:
 419             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 420             break;
 421         case TOKENTYPE_OPNUM:
 422             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 423                                     PL_op_name[lvalp->ival]);
 424             break;
 425         case TOKENTYPE_PVAL:
 426             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 427             break;
 428         case TOKENTYPE_OPVAL:
 429             if (lvalp->opval) {
 430                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 431                                     PL_op_name[lvalp->opval->op_type]);
 432                 if (lvalp->opval->op_type == OP_CONST) {
 433                     Perl_sv_catpvf(aTHX_ report, " %s",
 434                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 435                 }
 436
 437             }
 438             else
 439                 sv_catpvs(report, "(opval=null)");
 440             break;
 441         }
 442         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 443     };
 444     return (int)rv;
 445 }
 446
 447
 448 /* print the buffer with suitable escapes */
 449
 450 STATIC void
 451 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 452 {
 453     SV* const tmp = newSVpvs("");
 454
 455     PERL_ARGS_ASSERT_PRINTBUF;
 456
 457     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 458     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 459     GCC_DIAG_RESTORE;
 460     SvREFCNT_dec(tmp);
 461 }
 462
 463 #endif
 464
 465 static int
 466 S_deprecate_commaless_var_list(pTHX) {
 467     PL_expect = XTERM;
 468     deprecate("comma-less variable list");
 469     return REPORT(','); /* grandfather non-comma-format format */
 470 }
 471
 472 /*
 473  * S_ao
 474  *
 475  * This subroutine looks for an '=' next to the operator that has just been
 476  * parsed and turns it into an ASSIGNOP if it finds one.
 477  */
 478
 479 STATIC int
 480 S_ao(pTHX_ int toketype)
 481 {
 482     if (*PL_bufptr == '=') {
 483         PL_bufptr++;
 484         if (toketype == ANDAND)
 485             pl_yylval.ival = OP_ANDASSIGN;
 486         else if (toketype == OROR)
 487             pl_yylval.ival = OP_ORASSIGN;
 488         else if (toketype == DORDOR)
 489             pl_yylval.ival = OP_DORASSIGN;
 490         toketype = ASSIGNOP;
 491     }
 492     return REPORT(toketype);
 493 }
 494
 495 /*
 496  * S_no_op
 497  * When Perl expects an operator and finds something else, no_op
 498  * prints the warning.  It always prints "<something> found where
 499  * operator expected.  It prints "Missing semicolon on previous line?"
 500  * if the surprise occurs at the start of the line.  "do you need to
 501  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 502  * where the compiler doesn't know if foo is a method call or a function.
 503  * It prints "Missing operator before end of line" if there's nothing
 504  * after the missing operator, or "... before <...>" if there is something
 505  * after the missing operator.
 506  *
 507  * PL_bufptr is expected to point to the start of the thing that was found,
 508  * and s after the next token or partial token.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     char * const oldbp = PL_bufptr;
 515     const bool is_first = (PL_oldbufptr == PL_linestart);
 516
 517     PERL_ARGS_ASSERT_NO_OP;
 518
 519     if (!s)
 520         s = oldbp;
 521     else
 522         PL_bufptr = s;
 523     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 524     if (ckWARN_d(WARN_SYNTAX)) {
 525         if (is_first)
 526             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 527                     "\t(Missing semicolon on previous line?)\n");
 528         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 529             const char *t;
 530             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 531                                                             t += UTF ? UTF8SKIP(t) : 1)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 536                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %"UTF8f"?)\n",
 542                      UTF8fARG(UTF, s - oldbp, oldbp));
 543         }
 544     }
 545     PL_bufptr = oldbp;
 546 }
 547
 548 /*
 549  * S_missingterm
 550  * Complain about missing quote/regexp/heredoc terminator.
 551  * If it's called with NULL then it cauterizes the line buffer.
 552  * If we're in a delimited string and the delimiter is a control
 553  * character, it's reformatted into a two-char sequence like ^C.
 554  * This is fatal.
 555  */
 556
 557 STATIC void
 558 S_missingterm(pTHX_ char *s)
 559 {
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if ((U8) PL_multi_close < 32) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #include "feature.h"
 583
 584 /*
 585  * Check whether the named feature is enabled.
 586  */
 587 bool
 588 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 589 {
 590     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 591
 592     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 593
 594     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 595
 596     if (namelen > MAX_FEATURE_LEN)
 597         return FALSE;
 598     memcpy(&he_name[8], name, namelen);
 599
 600     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 601                                      REFCOUNTED_HE_EXISTS));
 602 }
 603
 604 /*
 605  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 606  * utf16-to-utf8-reversed.
 607  */
 608
 609 #ifdef PERL_CR_FILTER
 610 static void
 611 strip_return(SV *sv)
 612 {
 613     const char *s = SvPVX_const(sv);
 614     const char * const e = s + SvCUR(sv);
 615
 616     PERL_ARGS_ASSERT_STRIP_RETURN;
 617
 618     /* outer loop optimized to do nothing if there are no CR-LFs */
 619     while (s < e) {
 620         if (*s++ == '\r' && *s == '\n') {
 621             /* hit a CR-LF, need to copy the rest */
 622             char *d = s - 1;
 623             *d++ = *s++;
 624             while (s < e) {
 625                 if (*s == '\r' && s[1] == '\n')
 626                     s++;
 627                 *d++ = *s++;
 628             }
 629             SvCUR(sv) -= s - d;
 630             return;
 631         }
 632     }
 633 }
 634
 635 STATIC I32
 636 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 637 {
 638     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 639     if (count > 0 && !maxlen)
 640         strip_return(sv);
 641     return count;
 642 }
 643 #endif
 644
 645 /*
 646 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 647
 648 Creates and initialises a new lexer/parser state object, supplying
 649 a context in which to lex and parse from a new source of Perl code.
 650 A pointer to the new state object is placed in L</PL_parser>.  An entry
 651 is made on the save stack so that upon unwinding the new state object
 652 will be destroyed and the former value of L</PL_parser> will be restored.
 653 Nothing else need be done to clean up the parsing context.
 654
 655 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 656 non-null, provides a string (in SV form) containing code to be parsed.
 657 A copy of the string is made, so subsequent modification of I<line>
 658 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 659 from which code will be read to be parsed.  If both are non-null, the
 660 code in I<line> comes first and must consist of complete lines of input,
 661 and I<rsfp> supplies the remainder of the source.
 662
 663 The I<flags> parameter is reserved for future use.  Currently it is only
 664 used by perl internally, so extensions should always pass zero.
 665
 666 =cut
 667 */
 668
 669 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 670    can share filters with the current parser.
 671    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 672    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 673    destruction. This is used to handle the case of defaulting to reading the
 674    script from the standard input because no filename was given on the command
 675    line (without getting confused by situation where STDIN has been closed, so
 676    the script handle is opened on fd 0)  */
 677
 678 void
 679 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 680 {
 681     const char *s = NULL;
 682     yy_parser *parser, *oparser;
 683     if (flags && flags & ~LEX_START_FLAGS)
 684         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 685
 686     /* create and initialise a parser */
 687
 688     Newxz(parser, 1, yy_parser);
 689     parser->old_parser = oparser = PL_parser;
 690     PL_parser = parser;
 691
 692     parser->stack = NULL;
 693     parser->ps = NULL;
 694     parser->stack_size = 0;
 695
 696     /* on scope exit, free this parser and restore any outer one */
 697     SAVEPARSER(parser);
 698     parser->saved_curcop = PL_curcop;
 699
 700     /* initialise lexer state */
 701
 702     parser->nexttoke = 0;
 703     parser->error_count = oparser ? oparser->error_count : 0;
 704     parser->copline = parser->preambling = NOLINE;
 705     parser->lex_state = LEX_NORMAL;
 706     parser->expect = XSTATE;
 707     parser->rsfp = rsfp;
 708     parser->rsfp_filters =
 709       !(flags & LEX_START_SAME_FILTER) || !oparser
 710         ? NULL
 711         : MUTABLE_AV(SvREFCNT_inc(
 712             oparser->rsfp_filters
 713              ? oparser->rsfp_filters
 714              : (oparser->rsfp_filters = newAV())
 715           ));
 716
 717     Newx(parser->lex_brackstack, 120, char);
 718     Newx(parser->lex_casestack, 12, char);
 719     *parser->lex_casestack = '\0';
 720     Newxz(parser->lex_shared, 1, LEXSHARED);
 721
 722     if (line) {
 723         STRLEN len;
 724         s = SvPV_const(line, len);
 725         parser->linestr = flags & LEX_START_COPIED
 726                             ? SvREFCNT_inc_simple_NN(line)
 727                             : newSVpvn_flags(s, len, SvUTF8(line));
 728         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 729     } else {
 730         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 731     }
 732     parser->oldoldbufptr =
 733         parser->oldbufptr =
 734         parser->bufptr =
 735         parser->linestart = SvPVX(parser->linestr);
 736     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 737     parser->last_lop = parser->last_uni = NULL;
 738
 739     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 740                                                         |LEX_DONT_CLOSE_RSFP));
 741     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 742                                                         |LEX_DONT_CLOSE_RSFP));
 743
 744     parser->in_pod = parser->filtered = 0;
 745 }
 746
 747
 748 /* delete a parser object */
 749
 750 void
 751 Perl_parser_free(pTHX_  const yy_parser *parser)
 752 {
 753     PERL_ARGS_ASSERT_PARSER_FREE;
 754
 755     PL_curcop = parser->saved_curcop;
 756     SvREFCNT_dec(parser->linestr);
 757
 758     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 759         PerlIO_clearerr(parser->rsfp);
 760     else if (parser->rsfp && (!parser->old_parser ||
 761                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 762         PerlIO_close(parser->rsfp);
 763     SvREFCNT_dec(parser->rsfp_filters);
 764     SvREFCNT_dec(parser->lex_stuff);
 765     SvREFCNT_dec(parser->sublex_info.repl);
 766
 767     Safefree(parser->lex_brackstack);
 768     Safefree(parser->lex_casestack);
 769     Safefree(parser->lex_shared);
 770     PL_parser = parser->old_parser;
 771     Safefree(parser);
 772 }
 773
 774 void
 775 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 776 {
 777     I32 nexttoke = parser->nexttoke;
 778     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 779     while (nexttoke--) {
 780         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 781          && parser->nextval[nexttoke].opval
 782          && parser->nextval[nexttoke].opval->op_slabbed
 783          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 784             op_free(parser->nextval[nexttoke].opval);
 785             parser->nextval[nexttoke].opval = NULL;
 786         }
 787     }
 788 }
 789
 790
 791 /*
 792 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 793
 794 Buffer scalar containing the chunk currently under consideration of the
 795 text currently being lexed.  This is always a plain string scalar (for
 796 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 797 normal scalar means; instead refer to the buffer directly by the pointer
 798 variables described below.
 799
 800 The lexer maintains various C<char*> pointers to things in the
 801 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 802 reallocated, all of these pointers must be updated.  Don't attempt to
 803 do this manually, but rather use L</lex_grow_linestr> if you need to
 804 reallocate the buffer.
 805
 806 The content of the text chunk in the buffer is commonly exactly one
 807 complete line of input, up to and including a newline terminator,
 808 but there are situations where it is otherwise.  The octets of the
 809 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 810 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 811 flag on this scalar, which may disagree with it.
 812
 813 For direct examination of the buffer, the variable
 814 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 815 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 816 of these pointers is usually preferable to examination of the scalar
 817 through normal scalar means.
 818
 819 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 820
 821 Direct pointer to the end of the chunk of text currently being lexed, the
 822 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 823 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 824 always located at the end of the buffer, and does not count as part of
 825 the buffer's contents.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 828
 829 Points to the current position of lexing inside the lexer buffer.
 830 Characters around this point may be freely examined, within
 831 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 832 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 833 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 834
 835 Lexing code (whether in the Perl core or not) moves this pointer past
 836 the characters that it consumes.  It is also expected to perform some
 837 bookkeeping whenever a newline character is consumed.  This movement
 838 can be more conveniently performed by the function L</lex_read_to>,
 839 which handles newlines appropriately.
 840
 841 Interpretation of the buffer's octets can be abstracted out by
 842 using the slightly higher-level functions L</lex_peek_unichar> and
 843 L</lex_read_unichar>.
 844
 845 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 846
 847 Points to the start of the current line inside the lexer buffer.
 848 This is useful for indicating at which column an error occurred, and
 849 not much else.  This must be updated by any lexing code that consumes
 850 a newline; the function L</lex_read_to> handles this detail.
 851
 852 =cut
 853 */
 854
 855 /*
 856 =for apidoc Amx|bool|lex_bufutf8
 857
 858 Indicates whether the octets in the lexer buffer
 859 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 860 of Unicode characters.  If not, they should be interpreted as Latin-1
 861 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 862
 863 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 864 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 865 encoding.
 866
 867 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 868 is significant, but not the whole story regarding the input character
 869 encoding.  Normally, when a file is being read, the scalar contains octets
 870 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 871 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 872 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 873 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 874 is in effect.  This logic may change in the future; use this function
 875 instead of implementing the logic yourself.
 876
 877 =cut
 878 */
 879
 880 bool
 881 Perl_lex_bufutf8(pTHX)
 882 {
 883     return UTF;
 884 }
 885
 886 /*
 887 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 888
 889 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 890 at least I<len> octets (including terminating C<NUL>).  Returns a
 891 pointer to the reallocated buffer.  This is necessary before making
 892 any direct modification of the buffer that would increase its length.
 893 L</lex_stuff_pvn> provides a more convenient way to insert text into
 894 the buffer.
 895
 896 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 897 this function updates all of the lexer's variables that point directly
 898 into the buffer.
 899
 900 =cut
 901 */
 902
 903 char *
 904 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 905 {
 906     SV *linestr;
 907     char *buf;
 908     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 909     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 910     linestr = PL_parser->linestr;
 911     buf = SvPVX(linestr);
 912     if (len <= SvLEN(linestr))
 913         return buf;
 914     bufend_pos = PL_parser->bufend - buf;
 915     bufptr_pos = PL_parser->bufptr - buf;
 916     oldbufptr_pos = PL_parser->oldbufptr - buf;
 917     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 918     linestart_pos = PL_parser->linestart - buf;
 919     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 920     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 921     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 922                             PL_parser->lex_shared->re_eval_start - buf : 0;
 923
 924     buf = sv_grow(linestr, len);
 925
 926     PL_parser->bufend = buf + bufend_pos;
 927     PL_parser->bufptr = buf + bufptr_pos;
 928     PL_parser->oldbufptr = buf + oldbufptr_pos;
 929     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 930     PL_parser->linestart = buf + linestart_pos;
 931     if (PL_parser->last_uni)
 932         PL_parser->last_uni = buf + last_uni_pos;
 933     if (PL_parser->last_lop)
 934         PL_parser->last_lop = buf + last_lop_pos;
 935     if (PL_parser->lex_shared->re_eval_start)
 936         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 937     return buf;
 938 }
 939
 940 /*
 941 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 942
 943 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 944 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 945 reallocating the buffer if necessary.  This means that lexing code that
 946 runs later will see the characters as if they had appeared in the input.
 947 It is not recommended to do this as part of normal parsing, and most
 948 uses of this facility run the risk of the inserted characters being
 949 interpreted in an unintended manner.
 950
 951 The string to be inserted is represented by I<len> octets starting
 952 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 953 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 954 The characters are recoded for the lexer buffer, according to how the
 955 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 956 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 957 function is more convenient.
 958
 959 =cut
 960 */
 961
 962 void
 963 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 964 {
 965     dVAR;
 966     char *bufptr;
 967     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 968     if (flags & ~(LEX_STUFF_UTF8))
 969         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 970     if (UTF) {
 971         if (flags & LEX_STUFF_UTF8) {
 972             goto plain_copy;
 973         } else {
 974             STRLEN highhalf = 0;    /* Count of variants */
 975             const char *p, *e = pv+len;
 976             for (p = pv; p != e; p++) {
 977                 if (! UTF8_IS_INVARIANT(*p)) {
 978                     highhalf++;
 979                 }
 980             }
 981             if (!highhalf)
 982                 goto plain_copy;
 983             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 984             bufptr = PL_parser->bufptr;
 985             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 986             SvCUR_set(PL_parser->linestr,
 987                 SvCUR(PL_parser->linestr) + len+highhalf);
 988             PL_parser->bufend += len+highhalf;
 989             for (p = pv; p != e; p++) {
 990                 U8 c = (U8)*p;
 991                 if (! UTF8_IS_INVARIANT(c)) {
 992                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
 993                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
 994                 } else {
 995                     *bufptr++ = (char)c;
 996                 }
 997             }
 998         }
 999     } else {
1000         if (flags & LEX_STUFF_UTF8) {
1001             STRLEN highhalf = 0;
1002             const char *p, *e = pv+len;
1003             for (p = pv; p != e; p++) {
1004                 U8 c = (U8)*p;
1005                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1006                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1007                                 "non-Latin-1 character into Latin-1 input");
1008                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1009                     p++;
1010                     highhalf++;
1011                 } else if (! UTF8_IS_INVARIANT(c)) {
1012                     /* malformed UTF-8 */
1013                     ENTER;
1014                     SAVESPTR(PL_warnhook);
1015                     PL_warnhook = PERL_WARNHOOK_FATAL;
1016                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1017                     LEAVE;
1018                 }
1019             }
1020             if (!highhalf)
1021                 goto plain_copy;
1022             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1023             bufptr = PL_parser->bufptr;
1024             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1025             SvCUR_set(PL_parser->linestr,
1026                 SvCUR(PL_parser->linestr) + len-highhalf);
1027             PL_parser->bufend += len-highhalf;
1028             p = pv;
1029             while (p < e) {
1030                 if (UTF8_IS_INVARIANT(*p)) {
1031                     *bufptr++ = *p;
1032                     p++;
1033                 }
1034                 else {
1035                     assert(p < e -1 );
1036                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1037                     p += 2;
1038                 }
1039             }
1040         } else {
1041           plain_copy:
1042             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043             bufptr = PL_parser->bufptr;
1044             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046             PL_parser->bufend += len;
1047             Copy(pv, bufptr, len, char);
1048         }
1049     }
1050 }
1051
1052 /*
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1054
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary.  This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1062
1063 The string to be inserted is represented by octets starting at I<pv>
1064 and continuing to the first nul.  These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in I<flags>.  The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1076 {
1077     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078     lex_stuff_pvn(pv, strlen(pv), flags);
1079 }
1080
1081 /*
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1083
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary.  This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1091
1092 The string to be inserted is the string value of I<sv>.  The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1097
1098 =cut
1099 */
1100
1101 void
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103 {
1104     char *pv;
1105     STRLEN len;
1106     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1107     if (flags)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1109     pv = SvPV(sv, len);
1110     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111 }
1112
1113 /*
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1115
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1120
1121 This is not the normal way to consume lexed text.  For that, use
1122 L</lex_read_to>.
1123
1124 =cut
1125 */
1126
1127 void
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1129 {
1130     char *buf, *bufend;
1131     STRLEN unstuff_len;
1132     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133     buf = PL_parser->bufptr;
1134     if (ptr < buf)
1135         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1136     if (ptr == buf)
1137         return;
1138     bufend = PL_parser->bufend;
1139     if (ptr > bufend)
1140         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141     unstuff_len = ptr - buf;
1142     Move(ptr, buf, bufend+1-ptr, char);
1143     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144     PL_parser->bufend = bufend - unstuff_len;
1145 }
1146
1147 /*
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1149
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1154
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_read_to(pTHX_ char *ptr)
1164 {
1165     char *s;
1166     PERL_ARGS_ASSERT_LEX_READ_TO;
1167     s = PL_parser->bufptr;
1168     if (ptr < s || ptr > PL_parser->bufend)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170     for (; s != ptr; s++)
1171         if (*s == '\n') {
1172             COPLINE_INC_WITH_HERELINES;
1173             PL_parser->linestart = s+1;
1174         }
1175     PL_parser->bufptr = ptr;
1176 }
1177
1178 /*
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1180
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to I<ptr>.  The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately.  I<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1186
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it.  However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1194
1195 =cut
1196 */
1197
1198 void
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1200 {
1201     char *buf;
1202     STRLEN discard_len;
1203     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204     buf = SvPVX(PL_parser->linestr);
1205     if (ptr < buf)
1206         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1207     if (ptr == buf)
1208         return;
1209     if (ptr > PL_parser->bufptr)
1210         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211     discard_len = ptr - buf;
1212     if (PL_parser->oldbufptr < ptr)
1213         PL_parser->oldbufptr = ptr;
1214     if (PL_parser->oldoldbufptr < ptr)
1215         PL_parser->oldoldbufptr = ptr;
1216     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217         PL_parser->last_uni = NULL;
1218     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219         PL_parser->last_lop = NULL;
1220     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222     PL_parser->bufend -= discard_len;
1223     PL_parser->bufptr -= discard_len;
1224     PL_parser->oldbufptr -= discard_len;
1225     PL_parser->oldoldbufptr -= discard_len;
1226     if (PL_parser->last_uni)
1227         PL_parser->last_uni -= discard_len;
1228     if (PL_parser->last_lop)
1229         PL_parser->last_lop -= discard_len;
1230 }
1231
1232 /*
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1234
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more.  It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1240
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1245 will not be discarded.  If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1247
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1250
1251 =cut
1252 */
1253
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM  0x40000000 /* here-doc */
1256
1257 bool
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1259 {
1260     SV *linestr;
1261     char *buf;
1262     STRLEN old_bufend_pos, new_bufend_pos;
1263     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265     bool got_some_for_debugger = 0;
1266     bool got_some;
1267     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269     if (!(flags & LEX_NO_TERM) && PL_sublex_info.sub_inwhat)
1270         return FALSE;
1271     linestr = PL_parser->linestr;
1272     buf = SvPVX(linestr);
1273     if (!(flags & LEX_KEEP_PREVIOUS) &&
1274             PL_parser->bufptr == PL_parser->bufend) {
1275         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1276         linestart_pos = 0;
1277         if (PL_parser->last_uni != PL_parser->bufend)
1278             PL_parser->last_uni = NULL;
1279         if (PL_parser->last_lop != PL_parser->bufend)
1280             PL_parser->last_lop = NULL;
1281         last_uni_pos = last_lop_pos = 0;
1282         *buf = 0;
1283         SvCUR(linestr) = 0;
1284     } else {
1285         old_bufend_pos = PL_parser->bufend - buf;
1286         bufptr_pos = PL_parser->bufptr - buf;
1287         oldbufptr_pos = PL_parser->oldbufptr - buf;
1288         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1289         linestart_pos = PL_parser->linestart - buf;
1290         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1291         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1292     }
1293     if (flags & LEX_FAKE_EOF) {
1294         goto eof;
1295     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1296         got_some = 0;
1297     } else if (filter_gets(linestr, old_bufend_pos)) {
1298         got_some = 1;
1299         got_some_for_debugger = 1;
1300     } else if (flags & LEX_NO_TERM) {
1301         got_some = 0;
1302     } else {
1303         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1304             sv_setpvs(linestr, "");
1305         eof:
1306         /* End of real input.  Close filehandle (unless it was STDIN),
1307          * then add implicit termination.
1308          */
1309         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1310             PerlIO_clearerr(PL_parser->rsfp);
1311         else if (PL_parser->rsfp)
1312             (void)PerlIO_close(PL_parser->rsfp);
1313         PL_parser->rsfp = NULL;
1314         PL_parser->in_pod = PL_parser->filtered = 0;
1315         if (!PL_in_eval && PL_minus_p) {
1316             sv_catpvs(linestr,
1317                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1318             PL_minus_n = PL_minus_p = 0;
1319         } else if (!PL_in_eval && PL_minus_n) {
1320             sv_catpvs(linestr, /*{*/";}");
1321             PL_minus_n = 0;
1322         } else
1323             sv_catpvs(linestr, ";");
1324         got_some = 1;
1325     }
1326     buf = SvPVX(linestr);
1327     new_bufend_pos = SvCUR(linestr);
1328     PL_parser->bufend = buf + new_bufend_pos;
1329     PL_parser->bufptr = buf + bufptr_pos;
1330     PL_parser->oldbufptr = buf + oldbufptr_pos;
1331     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1332     PL_parser->linestart = buf + linestart_pos;
1333     if (PL_parser->last_uni)
1334         PL_parser->last_uni = buf + last_uni_pos;
1335     if (PL_parser->last_lop)
1336         PL_parser->last_lop = buf + last_lop_pos;
1337     if (PL_parser->preambling != NOLINE) {
1338         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1339         PL_parser->preambling = NOLINE;
1340     }
1341     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1342             PL_curstash != PL_debstash) {
1343         /* debugger active and we're not compiling the debugger code,
1344          * so store the line into the debugger's array of lines
1345          */
1346         update_debugger_info(NULL, buf+old_bufend_pos,
1347             new_bufend_pos-old_bufend_pos);
1348     }
1349     return got_some;
1350 }
1351
1352 /*
1353 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1354
1355 Looks ahead one (Unicode) character in the text currently being lexed.
1356 Returns the codepoint (unsigned integer value) of the next character,
1357 or -1 if lexing has reached the end of the input text.  To consume the
1358 peeked character, use L</lex_read_unichar>.
1359
1360 If the next character is in (or extends into) the next chunk of input
1361 text, the next chunk will be read in.  Normally the current chunk will be
1362 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1363 then the current chunk will not be discarded.
1364
1365 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1366 is encountered, an exception is generated.
1367
1368 =cut
1369 */
1370
1371 I32
1372 Perl_lex_peek_unichar(pTHX_ U32 flags)
1373 {
1374     dVAR;
1375     char *s, *bufend;
1376     if (flags & ~(LEX_KEEP_PREVIOUS))
1377         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1378     s = PL_parser->bufptr;
1379     bufend = PL_parser->bufend;
1380     if (UTF) {
1381         U8 head;
1382         I32 unichar;
1383         STRLEN len, retlen;
1384         if (s == bufend) {
1385             if (!lex_next_chunk(flags))
1386                 return -1;
1387             s = PL_parser->bufptr;
1388             bufend = PL_parser->bufend;
1389         }
1390         head = (U8)*s;
1391         if (UTF8_IS_INVARIANT(head))
1392             return head;
1393         if (UTF8_IS_START(head)) {
1394             len = UTF8SKIP(&head);
1395             while ((STRLEN)(bufend-s) < len) {
1396                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1397                     break;
1398                 s = PL_parser->bufptr;
1399                 bufend = PL_parser->bufend;
1400             }
1401         }
1402         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1403         if (retlen == (STRLEN)-1) {
1404             /* malformed UTF-8 */
1405             ENTER;
1406             SAVESPTR(PL_warnhook);
1407             PL_warnhook = PERL_WARNHOOK_FATAL;
1408             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1409             LEAVE;
1410         }
1411         return unichar;
1412     } else {
1413         if (s == bufend) {
1414             if (!lex_next_chunk(flags))
1415                 return -1;
1416             s = PL_parser->bufptr;
1417         }
1418         return (U8)*s;
1419     }
1420 }
1421
1422 /*
1423 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1424
1425 Reads the next (Unicode) character in the text currently being lexed.
1426 Returns the codepoint (unsigned integer value) of the character read,
1427 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1428 if lexing has reached the end of the input text.  To non-destructively
1429 examine the next character, use L</lex_peek_unichar> instead.
1430
1431 If the next character is in (or extends into) the next chunk of input
1432 text, the next chunk will be read in.  Normally the current chunk will be
1433 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1434 then the current chunk will not be discarded.
1435
1436 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1437 is encountered, an exception is generated.
1438
1439 =cut
1440 */
1441
1442 I32
1443 Perl_lex_read_unichar(pTHX_ U32 flags)
1444 {
1445     I32 c;
1446     if (flags & ~(LEX_KEEP_PREVIOUS))
1447         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1448     c = lex_peek_unichar(flags);
1449     if (c != -1) {
1450         if (c == '\n')
1451             COPLINE_INC_WITH_HERELINES;
1452         if (UTF)
1453             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1454         else
1455             ++(PL_parser->bufptr);
1456     }
1457     return c;
1458 }
1459
1460 /*
1461 =for apidoc Amx|void|lex_read_space|U32 flags
1462
1463 Reads optional spaces, in Perl style, in the text currently being
1464 lexed.  The spaces may include ordinary whitespace characters and
1465 Perl-style comments.  C<#line> directives are processed if encountered.
1466 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1467 at a non-space character (or the end of the input text).
1468
1469 If spaces extend into the next chunk of input text, the next chunk will
1470 be read in.  Normally the current chunk will be discarded at the same
1471 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1472 chunk will not be discarded.
1473
1474 =cut
1475 */
1476
1477 #define LEX_NO_INCLINE    0x40000000
1478 #define LEX_NO_NEXT_CHUNK 0x80000000
1479
1480 void
1481 Perl_lex_read_space(pTHX_ U32 flags)
1482 {
1483     char *s, *bufend;
1484     const bool can_incline = !(flags & LEX_NO_INCLINE);
1485     bool need_incline = 0;
1486     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1487         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1488     s = PL_parser->bufptr;
1489     bufend = PL_parser->bufend;
1490     while (1) {
1491         char c = *s;
1492         if (c == '#') {
1493             do {
1494                 c = *++s;
1495             } while (!(c == '\n' || (c == 0 && s == bufend)));
1496         } else if (c == '\n') {
1497             s++;
1498             if (can_incline) {
1499                 PL_parser->linestart = s;
1500                 if (s == bufend)
1501                     need_incline = 1;
1502                 else
1503                     incline(s);
1504             }
1505         } else if (isSPACE(c)) {
1506             s++;
1507         } else if (c == 0 && s == bufend) {
1508             bool got_more;
1509             line_t l;
1510             if (flags & LEX_NO_NEXT_CHUNK)
1511                 break;
1512             PL_parser->bufptr = s;
1513             l = CopLINE(PL_curcop);
1514             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1515             got_more = lex_next_chunk(flags);
1516             CopLINE_set(PL_curcop, l);
1517             s = PL_parser->bufptr;
1518             bufend = PL_parser->bufend;
1519             if (!got_more)
1520                 break;
1521             if (can_incline && need_incline && PL_parser->rsfp) {
1522                 incline(s);
1523                 need_incline = 0;
1524             }
1525         } else if (!c) {
1526             s++;
1527         } else {
1528             break;
1529         }
1530     }
1531     PL_parser->bufptr = s;
1532 }
1533
1534 /*
1535
1536 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1537
1538 This function performs syntax checking on a prototype, C<proto>.
1539 If C<warn> is true, any illegal characters or mismatched brackets
1540 will trigger illegalproto warnings, declaring that they were
1541 detected in the prototype for C<name>.
1542
1543 The return value is C<true> if this is a valid prototype, and
1544 C<false> if it is not, regardless of whether C<warn> was C<true> or
1545 C<false>.
1546
1547 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1548
1549 =cut
1550
1551  */
1552
1553 bool
1554 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1555 {
1556     STRLEN len, origlen;
1557     char *p = proto ? SvPV(proto, len) : NULL;
1558     bool bad_proto = FALSE;
1559     bool in_brackets = FALSE;
1560     bool after_slash = FALSE;
1561     char greedy_proto = ' ';
1562     bool proto_after_greedy_proto = FALSE;
1563     bool must_be_last = FALSE;
1564     bool underscore = FALSE;
1565     bool bad_proto_after_underscore = FALSE;
1566
1567     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1568
1569     if (!proto)
1570         return TRUE;
1571
1572     origlen = len;
1573     for (; len--; p++) {
1574         if (!isSPACE(*p)) {
1575             if (must_be_last)
1576                 proto_after_greedy_proto = TRUE;
1577             if (underscore) {
1578                 if (!strchr(";@%", *p))
1579                     bad_proto_after_underscore = TRUE;
1580                 underscore = FALSE;
1581             }
1582             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1583                 bad_proto = TRUE;
1584             }
1585             else {
1586                 if (*p == '[')
1587                     in_brackets = TRUE;
1588                 else if (*p == ']')
1589                     in_brackets = FALSE;
1590                 else if ((*p == '@' || *p == '%') &&
1591                     !after_slash &&
1592                     !in_brackets ) {
1593                     must_be_last = TRUE;
1594                     greedy_proto = *p;
1595                 }
1596                 else if (*p == '_')
1597                     underscore = TRUE;
1598             }
1599             if (*p == '\\')
1600                 after_slash = TRUE;
1601             else
1602                 after_slash = FALSE;
1603         }
1604     }
1605
1606     if (warn) {
1607         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1608         p -= origlen;
1609         p = SvUTF8(proto)
1610             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1611                              origlen, UNI_DISPLAY_ISPRINT)
1612             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1613
1614         if (proto_after_greedy_proto)
1615             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1616                         "Prototype after '%c' for %"SVf" : %s",
1617                         greedy_proto, SVfARG(name), p);
1618         if (in_brackets)
1619             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1620                         "Missing ']' in prototype for %"SVf" : %s",
1621                         SVfARG(name), p);
1622         if (bad_proto)
1623             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1624                         "Illegal character in prototype for %"SVf" : %s",
1625                         SVfARG(name), p);
1626         if (bad_proto_after_underscore)
1627             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1628                         "Illegal character after '_' in prototype for %"SVf" : %s",
1629                         SVfARG(name), p);
1630     }
1631
1632     return (! (proto_after_greedy_proto || bad_proto) );
1633 }
1634
1635 /*
1636  * S_incline
1637  * This subroutine has nothing to do with tilting, whether at windmills
1638  * or pinball tables.  Its name is short for "increment line".  It
1639  * increments the current line number in CopLINE(PL_curcop) and checks
1640  * to see whether the line starts with a comment of the form
1641  *    # line 500 "foo.pm"
1642  * If so, it sets the current line number and file to the values in the comment.
1643  */
1644
1645 STATIC void
1646 S_incline(pTHX_ const char *s)
1647 {
1648     const char *t;
1649     const char *n;
1650     const char *e;
1651     line_t line_num;
1652
1653     PERL_ARGS_ASSERT_INCLINE;
1654
1655     COPLINE_INC_WITH_HERELINES;
1656     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1657      && s+1 == PL_bufend && *s == ';') {
1658         /* fake newline in string eval */
1659         CopLINE_dec(PL_curcop);
1660         return;
1661     }
1662     if (*s++ != '#')
1663         return;
1664     while (SPACE_OR_TAB(*s))
1665         s++;
1666     if (strnEQ(s, "line", 4))
1667         s += 4;
1668     else
1669         return;
1670     if (SPACE_OR_TAB(*s))
1671         s++;
1672     else
1673         return;
1674     while (SPACE_OR_TAB(*s))
1675         s++;
1676     if (!isDIGIT(*s))
1677         return;
1678
1679     n = s;
1680     while (isDIGIT(*s))
1681         s++;
1682     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1683         return;
1684     while (SPACE_OR_TAB(*s))
1685         s++;
1686     if (*s == '"' && (t = strchr(s+1, '"'))) {
1687         s++;
1688         e = t + 1;
1689     }
1690     else {
1691         t = s;
1692         while (!isSPACE(*t))
1693             t++;
1694         e = t;
1695     }
1696     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1697         e++;
1698     if (*e != '\n' && *e != '\0')
1699         return;         /* false alarm */
1700
1701     line_num = grok_atou(n, &e) - 1;
1702
1703     if (t - s > 0) {
1704         const STRLEN len = t - s;
1705
1706         if (!PL_rsfp && !PL_parser->filtered) {
1707             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1708              * to *{"::_<newfilename"} */
1709             /* However, the long form of evals is only turned on by the
1710                debugger - usually they're "(eval %lu)" */
1711             GV * const cfgv = CopFILEGV(PL_curcop);
1712             if (cfgv) {
1713                 char smallbuf[128];
1714                 STRLEN tmplen2 = len;
1715                 char *tmpbuf2;
1716                 GV *gv2;
1717
1718                 if (tmplen2 + 2 <= sizeof smallbuf)
1719                     tmpbuf2 = smallbuf;
1720                 else
1721                     Newx(tmpbuf2, tmplen2 + 2, char);
1722
1723                 tmpbuf2[0] = '_';
1724                 tmpbuf2[1] = '<';
1725
1726                 memcpy(tmpbuf2 + 2, s, tmplen2);
1727                 tmplen2 += 2;
1728
1729                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1730                 if (!isGV(gv2)) {
1731                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1732                     /* adjust ${"::_<newfilename"} to store the new file name */
1733                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1734                     /* The line number may differ. If that is the case,
1735                        alias the saved lines that are in the array.
1736                        Otherwise alias the whole array. */
1737                     if (CopLINE(PL_curcop) == line_num) {
1738                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1739                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1740                     }
1741                     else if (GvAV(cfgv)) {
1742                         AV * const av = GvAV(cfgv);
1743                         const I32 start = CopLINE(PL_curcop)+1;
1744                         I32 items = AvFILLp(av) - start;
1745                         if (items > 0) {
1746                             AV * const av2 = GvAVn(gv2);
1747                             SV **svp = AvARRAY(av) + start;
1748                             I32 l = (I32)line_num+1;
1749                             while (items--)
1750                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1751                         }
1752                     }
1753                 }
1754
1755                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1756             }
1757         }
1758         CopFILE_free(PL_curcop);
1759         CopFILE_setn(PL_curcop, s, len);
1760     }
1761     CopLINE_set(PL_curcop, line_num);
1762 }
1763
1764 #define skipspace(s) skipspace_flags(s, 0)
1765
1766
1767 STATIC void
1768 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1769 {
1770     AV *av = CopFILEAVx(PL_curcop);
1771     if (av) {
1772         SV * sv;
1773         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1774         else {
1775             sv = *av_fetch(av, 0, 1);
1776             SvUPGRADE(sv, SVt_PVMG);
1777         }
1778         if (!SvPOK(sv)) sv_setpvs(sv,"");
1779         if (orig_sv)
1780             sv_catsv(sv, orig_sv);
1781         else
1782             sv_catpvn(sv, buf, len);
1783         if (!SvIOK(sv)) {
1784             (void)SvIOK_on(sv);
1785             SvIV_set(sv, 0);
1786         }
1787         if (PL_parser->preambling == NOLINE)
1788             av_store(av, CopLINE(PL_curcop), sv);
1789     }
1790 }
1791
1792 /*
1793  * S_skipspace
1794  * Called to gobble the appropriate amount and type of whitespace.
1795  * Skips comments as well.
1796  */
1797
1798 STATIC char *
1799 S_skipspace_flags(pTHX_ char *s, U32 flags)
1800 {
1801     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1802     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1803         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1804             s++;
1805     } else {
1806         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1807         PL_bufptr = s;
1808         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1809                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1810                     LEX_NO_NEXT_CHUNK : 0));
1811         s = PL_bufptr;
1812         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1813         if (PL_linestart > PL_bufptr)
1814             PL_bufptr = PL_linestart;
1815         return s;
1816     }
1817     return s;
1818 }
1819
1820 /*
1821  * S_check_uni
1822  * Check the unary operators to ensure there's no ambiguity in how they're
1823  * used.  An ambiguous piece of code would be:
1824  *     rand + 5
1825  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1826  * the +5 is its argument.
1827  */
1828
1829 STATIC void
1830 S_check_uni(pTHX)
1831 {
1832     const char *s;
1833     const char *t;
1834
1835     if (PL_oldoldbufptr != PL_last_uni)
1836         return;
1837     while (isSPACE(*PL_last_uni))
1838         PL_last_uni++;
1839     s = PL_last_uni;
1840     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1841         s++;
1842     if ((t = strchr(s, '(')) && t < PL_bufptr)
1843         return;
1844
1845     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1846                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1847                      (int)(s - PL_last_uni), PL_last_uni);
1848 }
1849
1850 /*
1851  * LOP : macro to build a list operator.  Its behaviour has been replaced
1852  * with a subroutine, S_lop() for which LOP is just another name.
1853  */
1854
1855 #define LOP(f,x) return lop(f,x,s)
1856
1857 /*
1858  * S_lop
1859  * Build a list operator (or something that might be one).  The rules:
1860  *  - if we have a next token, then it's a list operator (no parens) for
1861  *    which the next token has already been parsed; e.g.,
1862  *       sort foo @args
1863  *       sort foo (@args)
1864  *  - if the next thing is an opening paren, then it's a function
1865  *  - else it's a list operator
1866  */
1867
1868 STATIC I32
1869 S_lop(pTHX_ I32 f, int x, char *s)
1870 {
1871     PERL_ARGS_ASSERT_LOP;
1872
1873     pl_yylval.ival = f;
1874     CLINE;
1875     PL_bufptr = s;
1876     PL_last_lop = PL_oldbufptr;
1877     PL_last_lop_op = (OPCODE)f;
1878     if (PL_nexttoke)
1879         goto lstop;
1880     PL_expect = x;
1881     if (*s == '(')
1882         return REPORT(FUNC);
1883     s = skipspace(s);
1884     if (*s == '(')
1885         return REPORT(FUNC);
1886     else {
1887         lstop:
1888         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1889             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1890         return REPORT(LSTOP);
1891     }
1892 }
1893
1894 /*
1895  * S_force_next
1896  * When the lexer realizes it knows the next token (for instance,
1897  * it is reordering tokens for the parser) then it can call S_force_next
1898  * to know what token to return the next time the lexer is called.  Caller
1899  * will need to set PL_nextval[] and possibly PL_expect to ensure
1900  * the lexer handles the token correctly.
1901  */
1902
1903 STATIC void
1904 S_force_next(pTHX_ I32 type)
1905 {
1906 #ifdef DEBUGGING
1907     if (DEBUG_T_TEST) {
1908         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1909         tokereport(type, &NEXTVAL_NEXTTOKE);
1910     }
1911 #endif
1912     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1913     PL_nexttype[PL_nexttoke] = type;
1914     PL_nexttoke++;
1915     if (PL_lex_state != LEX_KNOWNEXT) {
1916         PL_lex_defer = PL_lex_state;
1917         PL_lex_state = LEX_KNOWNEXT;
1918     }
1919 }
1920
1921 /*
1922  * S_postderef
1923  *
1924  * This subroutine handles postfix deref syntax after the arrow has already
1925  * been emitted.  @* $* etc. are emitted as two separate token right here.
1926  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1927  * only the first, leaving yylex to find the next.
1928  */
1929
1930 static int
1931 S_postderef(pTHX_ int const funny, char const next)
1932 {
1933     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1934     assert(strchr("*[{", next));
1935     if (next == '*') {
1936         PL_expect = XOPERATOR;
1937         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1938             assert('@' == funny || '$' == funny || DOLSHARP == funny);
1939             PL_lex_state = LEX_INTERPEND;
1940             force_next(POSTJOIN);
1941         }
1942         force_next(next);
1943         PL_bufptr+=2;
1944     }
1945     else {
1946         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1947          && !PL_lex_brackets)
1948             PL_lex_dojoin = 2;
1949         PL_expect = XOPERATOR;
1950         PL_bufptr++;
1951     }
1952     return funny;
1953 }
1954
1955 void
1956 Perl_yyunlex(pTHX)
1957 {
1958     int yyc = PL_parser->yychar;
1959     if (yyc != YYEMPTY) {
1960         if (yyc) {
1961             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1962             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1963                 PL_lex_allbrackets--;
1964                 PL_lex_brackets--;
1965                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1966             } else if (yyc == '('/*)*/) {
1967                 PL_lex_allbrackets--;
1968                 yyc |= (2<<24);
1969             }
1970             force_next(yyc);
1971         }
1972         PL_parser->yychar = YYEMPTY;
1973     }
1974 }
1975
1976 STATIC SV *
1977 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1978 {
1979     SV * const sv = newSVpvn_utf8(start, len,
1980                                   !IN_BYTES
1981                                   && UTF
1982                                   && !is_invariant_string((const U8*)start, len)
1983                                   && is_utf8_string((const U8*)start, len));
1984     return sv;
1985 }
1986
1987 /*
1988  * S_force_word
1989  * When the lexer knows the next thing is a word (for instance, it has
1990  * just seen -> and it knows that the next char is a word char, then
1991  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1992  * lookahead.
1993  *
1994  * Arguments:
1995  *   char *start : buffer position (must be within PL_linestr)
1996  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1997  *   int check_keyword : if true, Perl checks to make sure the word isn't
1998  *       a keyword (do this if the word is a label, e.g. goto FOO)
1999  *   int allow_pack : if true, : characters will also be allowed (require,
2000  *       use, etc. do this)
2001  */
2002
2003 STATIC char *
2004 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2005 {
2006     char *s;
2007     STRLEN len;
2008
2009     PERL_ARGS_ASSERT_FORCE_WORD;
2010
2011     start = skipspace(start);
2012     s = start;
2013     if (isIDFIRST_lazy_if(s,UTF) ||
2014         (allow_pack && *s == ':') )
2015     {
2016         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2017         if (check_keyword) {
2018           char *s2 = PL_tokenbuf;
2019           STRLEN len2 = len;
2020           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2021             s2 += 6, len2 -= 6;
2022           if (keyword(s2, len2, 0))
2023             return start;
2024         }
2025         if (token == METHOD) {
2026             s = skipspace(s);
2027             if (*s == '(')
2028                 PL_expect = XTERM;
2029             else {
2030                 PL_expect = XOPERATOR;
2031             }
2032         }
2033         NEXTVAL_NEXTTOKE.opval
2034             = (OP*)newSVOP(OP_CONST,0,
2035                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2036         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2037         force_next(token);
2038     }
2039     return s;
2040 }
2041
2042 /*
2043  * S_force_ident
2044  * Called when the lexer wants $foo *foo &foo etc, but the program
2045  * text only contains the "foo" portion.  The first argument is a pointer
2046  * to the "foo", and the second argument is the type symbol to prefix.
2047  * Forces the next token to be a "WORD".
2048  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2049  */
2050
2051 STATIC void
2052 S_force_ident(pTHX_ const char *s, int kind)
2053 {
2054     PERL_ARGS_ASSERT_FORCE_IDENT;
2055
2056     if (s[0]) {
2057         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2058         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2059                                                                 UTF ? SVf_UTF8 : 0));
2060         NEXTVAL_NEXTTOKE.opval = o;
2061         force_next(WORD);
2062         if (kind) {
2063             o->op_private = OPpCONST_ENTERED;
2064             /* XXX see note in pp_entereval() for why we forgo typo
2065                warnings if the symbol must be introduced in an eval.
2066                GSAR 96-10-12 */
2067             gv_fetchpvn_flags(s, len,
2068                               (PL_in_eval ? GV_ADDMULTI
2069                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2070                               kind == '$' ? SVt_PV :
2071                               kind == '@' ? SVt_PVAV :
2072                               kind == '%' ? SVt_PVHV :
2073                               SVt_PVGV
2074                               );
2075         }
2076     }
2077 }
2078
2079 static void
2080 S_force_ident_maybe_lex(pTHX_ char pit)
2081 {
2082     NEXTVAL_NEXTTOKE.ival = pit;
2083     force_next('p');
2084 }
2085
2086 NV
2087 Perl_str_to_version(pTHX_ SV *sv)
2088 {
2089     NV retval = 0.0;
2090     NV nshift = 1.0;
2091     STRLEN len;
2092     const char *start = SvPV_const(sv,len);
2093     const char * const end = start + len;
2094     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2095
2096     PERL_ARGS_ASSERT_STR_TO_VERSION;
2097
2098     while (start < end) {
2099         STRLEN skip;
2100         UV n;
2101         if (utf)
2102             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2103         else {
2104             n = *(U8*)start;
2105             skip = 1;
2106         }
2107         retval += ((NV)n)/nshift;
2108         start += skip;
2109         nshift *= 1000;
2110     }
2111     return retval;
2112 }
2113
2114 /*
2115  * S_force_version
2116  * Forces the next token to be a version number.
2117  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2118  * and if "guessing" is TRUE, then no new token is created (and the caller
2119  * must use an alternative parsing method).
2120  */
2121
2122 STATIC char *
2123 S_force_version(pTHX_ char *s, int guessing)
2124 {
2125     OP *version = NULL;
2126     char *d;
2127
2128     PERL_ARGS_ASSERT_FORCE_VERSION;
2129
2130     s = skipspace(s);
2131
2132     d = s;
2133     if (*d == 'v')
2134         d++;
2135     if (isDIGIT(*d)) {
2136         while (isDIGIT(*d) || *d == '_' || *d == '.')
2137             d++;
2138         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2139             SV *ver;
2140             s = scan_num(s, &pl_yylval);
2141             version = pl_yylval.opval;
2142             ver = cSVOPx(version)->op_sv;
2143             if (SvPOK(ver) && !SvNIOK(ver)) {
2144                 SvUPGRADE(ver, SVt_PVNV);
2145                 SvNV_set(ver, str_to_version(ver));
2146                 SvNOK_on(ver);          /* hint that it is a version */
2147             }
2148         }
2149         else if (guessing) {
2150             return s;
2151         }
2152     }
2153
2154     /* NOTE: The parser sees the package name and the VERSION swapped */
2155     NEXTVAL_NEXTTOKE.opval = version;
2156     force_next(WORD);
2157
2158     return s;
2159 }
2160
2161 /*
2162  * S_force_strict_version
2163  * Forces the next token to be a version number using strict syntax rules.
2164  */
2165
2166 STATIC char *
2167 S_force_strict_version(pTHX_ char *s)
2168 {
2169     OP *version = NULL;
2170     const char *errstr = NULL;
2171
2172     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2173
2174     while (isSPACE(*s)) /* leading whitespace */
2175         s++;
2176
2177     if (is_STRICT_VERSION(s,&errstr)) {
2178         SV *ver = newSV(0);
2179         s = (char *)scan_version(s, ver, 0);
2180         version = newSVOP(OP_CONST, 0, ver);
2181     }
2182     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2183             (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2184     {
2185         PL_bufptr = s;
2186         if (errstr)
2187             yyerror(errstr); /* version required */
2188         return s;
2189     }
2190
2191     /* NOTE: The parser sees the package name and the VERSION swapped */
2192     NEXTVAL_NEXTTOKE.opval = version;
2193     force_next(WORD);
2194
2195     return s;
2196 }
2197
2198 /*
2199  * S_tokeq
2200  * Tokenize a quoted string passed in as an SV.  It finds the next
2201  * chunk, up to end of string or a backslash.  It may make a new
2202  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2203  * turns \\ into \.
2204  */
2205
2206 STATIC SV *
2207 S_tokeq(pTHX_ SV *sv)
2208 {
2209     char *s;
2210     char *send;
2211     char *d;
2212     SV *pv = sv;
2213
2214     PERL_ARGS_ASSERT_TOKEQ;
2215
2216     assert (SvPOK(sv));
2217     assert (SvLEN(sv));
2218     assert (!SvIsCOW(sv));
2219     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2220         goto finish;
2221     s = SvPVX(sv);
2222     send = SvEND(sv);
2223     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2224     while (s < send && !(*s == '\\' && s[1] == '\\'))
2225         s++;
2226     if (s == send)
2227         goto finish;
2228     d = s;
2229     if ( PL_hints & HINT_NEW_STRING ) {
2230         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2231                             SVs_TEMP | SvUTF8(sv));
2232     }
2233     while (s < send) {
2234         if (*s == '\\') {
2235             if (s + 1 < send && (s[1] == '\\'))
2236                 s++;            /* all that, just for this */
2237         }
2238         *d++ = *s++;
2239     }
2240     *d = '\0';
2241     SvCUR_set(sv, d - SvPVX_const(sv));
2242   finish:
2243     if ( PL_hints & HINT_NEW_STRING )
2244        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2245     return sv;
2246 }
2247
2248 /*
2249  * Now come three functions related to double-quote context,
2250  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2251  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2252  * interact with PL_lex_state, and create fake ( ... ) argument lists
2253  * to handle functions and concatenation.
2254  * For example,
2255  *   "foo\lbar"
2256  * is tokenised as
2257  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2258  */
2259
2260 /*
2261  * S_sublex_start
2262  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2263  *
2264  * Pattern matching will set PL_lex_op to the pattern-matching op to
2265  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2266  *
2267  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2268  *
2269  * Everything else becomes a FUNC.
2270  *
2271  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2272  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2273  * call to S_sublex_push().
2274  */
2275
2276 STATIC I32
2277 S_sublex_start(pTHX)
2278 {
2279     const I32 op_type = pl_yylval.ival;
2280
2281     if (op_type == OP_NULL) {
2282         pl_yylval.opval = PL_lex_op;
2283         PL_lex_op = NULL;
2284         return THING;
2285     }
2286     if (op_type == OP_CONST) {
2287         SV *sv = PL_lex_stuff;
2288         PL_lex_stuff = NULL;
2289         sv = tokeq(sv);
2290
2291         if (SvTYPE(sv) == SVt_PVIV) {
2292             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2293             STRLEN len;
2294             const char * const p = SvPV_const(sv, len);
2295             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2296             SvREFCNT_dec(sv);
2297             sv = nsv;
2298         }
2299         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2300         return THING;
2301     }
2302
2303     PL_sublex_info.super_state = PL_lex_state;
2304     PL_sublex_info.sub_inwhat = (U16)op_type;
2305     PL_sublex_info.sub_op = PL_lex_op;
2306     PL_lex_state = LEX_INTERPPUSH;
2307
2308     PL_expect = XTERM;
2309     if (PL_lex_op) {
2310         pl_yylval.opval = PL_lex_op;
2311         PL_lex_op = NULL;
2312         return PMFUNC;
2313     }
2314     else
2315         return FUNC;
2316 }
2317
2318 /*
2319  * S_sublex_push
2320  * Create a new scope to save the lexing state.  The scope will be
2321  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2322  * to the uc, lc, etc. found before.
2323  * Sets PL_lex_state to LEX_INTERPCONCAT.
2324  */
2325
2326 STATIC I32
2327 S_sublex_push(pTHX)
2328 {
2329     LEXSHARED *shared;
2330     const bool is_heredoc = PL_multi_close == '<';
2331     ENTER;
2332
2333     PL_lex_state = PL_sublex_info.super_state;
2334     SAVEI8(PL_lex_dojoin);
2335     SAVEI32(PL_lex_brackets);
2336     SAVEI32(PL_lex_allbrackets);
2337     SAVEI32(PL_lex_formbrack);
2338     SAVEI8(PL_lex_fakeeof);
2339     SAVEI32(PL_lex_casemods);
2340     SAVEI32(PL_lex_starts);
2341     SAVEI8(PL_lex_state);
2342     SAVESPTR(PL_lex_repl);
2343     SAVEVPTR(PL_lex_inpat);
2344     SAVEI16(PL_lex_inwhat);
2345     if (is_heredoc)
2346     {
2347         SAVECOPLINE(PL_curcop);
2348         SAVEI32(PL_multi_end);
2349         SAVEI32(PL_parser->herelines);
2350         PL_parser->herelines = 0;
2351     }
2352     SAVEI8(PL_multi_close);
2353     SAVEPPTR(PL_bufptr);
2354     SAVEPPTR(PL_bufend);
2355     SAVEPPTR(PL_oldbufptr);
2356     SAVEPPTR(PL_oldoldbufptr);
2357     SAVEPPTR(PL_last_lop);
2358     SAVEPPTR(PL_last_uni);
2359     SAVEPPTR(PL_linestart);
2360     SAVESPTR(PL_linestr);
2361     SAVEGENERICPV(PL_lex_brackstack);
2362     SAVEGENERICPV(PL_lex_casestack);
2363     SAVEGENERICPV(PL_parser->lex_shared);
2364     SAVEBOOL(PL_parser->lex_re_reparsing);
2365     SAVEI32(PL_copline);
2366
2367     /* The here-doc parser needs to be able to peek into outer lexing
2368        scopes to find the body of the here-doc.  So we put PL_linestr and
2369        PL_bufptr into lex_shared, to ‘share’ those values.
2370      */
2371     PL_parser->lex_shared->ls_linestr = PL_linestr;
2372     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2373
2374     PL_linestr = PL_lex_stuff;
2375     PL_lex_repl = PL_sublex_info.repl;
2376     PL_lex_stuff = NULL;
2377     PL_sublex_info.repl = NULL;
2378
2379     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2380        set for an inner quote-like operator and then an error causes scope-
2381        popping.  We must not have a PL_lex_stuff value left dangling, as
2382        that breaks assumptions elsewhere.  See bug #123617.  */
2383     SAVEGENERICSV(PL_lex_stuff);
2384
2385     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2386         = SvPVX(PL_linestr);
2387     PL_bufend += SvCUR(PL_linestr);
2388     PL_last_lop = PL_last_uni = NULL;
2389     SAVEFREESV(PL_linestr);
2390     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2391
2392     PL_lex_dojoin = FALSE;
2393     PL_lex_brackets = PL_lex_formbrack = 0;
2394     PL_lex_allbrackets = 0;
2395     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2396     Newx(PL_lex_brackstack, 120, char);
2397     Newx(PL_lex_casestack, 12, char);
2398     PL_lex_casemods = 0;
2399     *PL_lex_casestack = '\0';
2400     PL_lex_starts = 0;
2401     PL_lex_state = LEX_INTERPCONCAT;
2402     if (is_heredoc)
2403         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2404     PL_copline = NOLINE;
2405
2406     Newxz(shared, 1, LEXSHARED);
2407     shared->ls_prev = PL_parser->lex_shared;
2408     PL_parser->lex_shared = shared;
2409
2410     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2411     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2412     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2413         PL_lex_inpat = PL_sublex_info.sub_op;
2414     else
2415         PL_lex_inpat = NULL;
2416
2417     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2418     PL_in_eval &= ~EVAL_RE_REPARSING;
2419
2420     return '(';
2421 }
2422
2423 /*
2424  * S_sublex_done
2425  * Restores lexer state after a S_sublex_push.
2426  */
2427
2428 STATIC I32
2429 S_sublex_done(pTHX)
2430 {
2431     if (!PL_lex_starts++) {
2432         SV * const sv = newSVpvs("");
2433         if (SvUTF8(PL_linestr))
2434             SvUTF8_on(sv);
2435         PL_expect = XOPERATOR;
2436         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2437         return THING;
2438     }
2439
2440     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2441         PL_lex_state = LEX_INTERPCASEMOD;
2442         return yylex();
2443     }
2444
2445     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2446     assert(PL_lex_inwhat != OP_TRANSR);
2447     if (PL_lex_repl) {
2448         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2449         PL_linestr = PL_lex_repl;
2450         PL_lex_inpat = 0;
2451         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2452         PL_bufend += SvCUR(PL_linestr);
2453         PL_last_lop = PL_last_uni = NULL;
2454         PL_lex_dojoin = FALSE;
2455         PL_lex_brackets = 0;
2456         PL_lex_allbrackets = 0;
2457         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2458         PL_lex_casemods = 0;
2459         *PL_lex_casestack = '\0';
2460         PL_lex_starts = 0;
2461         if (SvEVALED(PL_lex_repl)) {
2462             PL_lex_state = LEX_INTERPNORMAL;
2463             PL_lex_starts++;
2464             /*  we don't clear PL_lex_repl here, so that we can check later
2465                 whether this is an evalled subst; that means we rely on the
2466                 logic to ensure sublex_done() is called again only via the
2467                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2468         }
2469         else {
2470             PL_lex_state = LEX_INTERPCONCAT;
2471             PL_lex_repl = NULL;
2472         }
2473         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2474             CopLINE(PL_curcop) +=
2475                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2476                  + PL_parser->herelines;
2477             PL_parser->herelines = 0;
2478         }
2479         return '/';
2480     }
2481     else {
2482         const line_t l = CopLINE(PL_curcop);
2483         LEAVE;
2484         if (PL_multi_close == '<')
2485             PL_parser->herelines += l - PL_multi_end;
2486         PL_bufend = SvPVX(PL_linestr);
2487         PL_bufend += SvCUR(PL_linestr);
2488         PL_expect = XOPERATOR;
2489         PL_sublex_info.sub_inwhat = 0;
2490         return ')';
2491     }
2492 }
2493
2494 PERL_STATIC_INLINE SV*
2495 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2496 {
2497     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2498      * interior, hence to the "}".  Finds what the name resolves to, returning
2499      * an SV* containing it; NULL if no valid one found */
2500
2501     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2502
2503     HV * table;
2504     SV **cvp;
2505     SV *cv;
2506     SV *rv;
2507     HV *stash;
2508     const U8* first_bad_char_loc;
2509     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2510
2511     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2512
2513     if (!SvCUR(res))
2514         return res;
2515
2516     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2517                                      e - backslash_ptr,
2518                                      &first_bad_char_loc))
2519     {
2520         /* If warnings are on, this will print a more detailed analysis of what
2521          * is wrong than the error message below */
2522         utf8n_to_uvchr(first_bad_char_loc,
2523                        e - ((char *) first_bad_char_loc),
2524                        NULL, 0);
2525
2526         /* We deliberately don't try to print the malformed character, which
2527          * might not print very well; it also may be just the first of many
2528          * malformations, so don't print what comes after it */
2529         yyerror(Perl_form(aTHX_
2530             "Malformed UTF-8 character immediately after '%.*s'",
2531             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2532         return NULL;
2533     }
2534
2535     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2536                         /* include the <}> */
2537                         e - backslash_ptr + 1);
2538     if (! SvPOK(res)) {
2539         SvREFCNT_dec_NN(res);
2540         return NULL;
2541     }
2542
2543     /* See if the charnames handler is the Perl core's, and if so, we can skip
2544      * the validation needed for a user-supplied one, as Perl's does its own
2545      * validation. */
2546     table = GvHV(PL_hintgv);             /* ^H */
2547     cvp = hv_fetchs(table, "charnames", FALSE);
2548     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2549         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2550     {
2551         const char * const name = HvNAME(stash);
2552         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2553          && strEQ(name, "_charnames")) {
2554            return res;
2555        }
2556     }
2557
2558     /* Here, it isn't Perl's charname handler.  We can't rely on a
2559      * user-supplied handler to validate the input name.  For non-ut8 input,
2560      * look to see that the first character is legal.  Then loop through the
2561      * rest checking that each is a continuation */
2562
2563     /* This code makes the reasonable assumption that the only Latin1-range
2564      * characters that begin a character name alias are alphabetic, otherwise
2565      * would have to create a isCHARNAME_BEGIN macro */
2566
2567     if (! UTF) {
2568         if (! isALPHAU(*s)) {
2569             goto bad_charname;
2570         }
2571         s++;
2572         while (s < e) {
2573             if (! isCHARNAME_CONT(*s)) {
2574                 goto bad_charname;
2575             }
2576             if (*s == ' ' && *(s-1) == ' ') {
2577                 goto multi_spaces;
2578             }
2579             if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2580                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2581                            "NO-BREAK SPACE in a charnames "
2582                            "alias definition is deprecated");
2583             }
2584             s++;
2585         }
2586     }
2587     else {
2588         /* Similarly for utf8.  For invariants can check directly; for other
2589          * Latin1, can calculate their code point and check; otherwise  use a
2590          * swash */
2591         if (UTF8_IS_INVARIANT(*s)) {
2592             if (! isALPHAU(*s)) {
2593                 goto bad_charname;
2594             }
2595             s++;
2596         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2597             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2598                 goto bad_charname;
2599             }
2600             s += 2;
2601         }
2602         else {
2603             if (! PL_utf8_charname_begin) {
2604                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2605                 PL_utf8_charname_begin = _core_swash_init("utf8",
2606                                                         "_Perl_Charname_Begin",
2607                                                         &PL_sv_undef,
2608                                                         1, 0, NULL, &flags);
2609             }
2610             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2611                 goto bad_charname;
2612             }
2613             s += UTF8SKIP(s);
2614         }
2615
2616         while (s < e) {
2617             if (UTF8_IS_INVARIANT(*s)) {
2618                 if (! isCHARNAME_CONT(*s)) {
2619                     goto bad_charname;
2620                 }
2621                 if (*s == ' ' && *(s-1) == ' ') {
2622                     goto multi_spaces;
2623                 }
2624                 s++;
2625             }
2626             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2627                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2628                 {
2629                     goto bad_charname;
2630                 }
2631                 if (*s == *NBSP_UTF8
2632                     && *(s+1) == *(NBSP_UTF8+1)
2633                     && ckWARN_d(WARN_DEPRECATED))
2634                 {
2635                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2636                                 "NO-BREAK SPACE in a charnames "
2637                                 "alias definition is deprecated");
2638                 }
2639                 s += 2;
2640             }
2641             else {
2642                 if (! PL_utf8_charname_continue) {
2643                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2644                     PL_utf8_charname_continue = _core_swash_init("utf8",
2645                                                 "_Perl_Charname_Continue",
2646                                                 &PL_sv_undef,
2647                                                 1, 0, NULL, &flags);
2648                 }
2649                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2650                     goto bad_charname;
2651                 }
2652                 s += UTF8SKIP(s);
2653             }
2654         }
2655     }
2656     if (*(s-1) == ' ') {
2657         yyerror_pv(
2658             Perl_form(aTHX_
2659             "charnames alias definitions may not contain trailing "
2660             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2661             (int)(s - backslash_ptr + 1), backslash_ptr,
2662             (int)(e - s + 1), s + 1
2663             ),
2664         UTF ? SVf_UTF8 : 0);
2665         return NULL;
2666     }
2667
2668     if (SvUTF8(res)) { /* Don't accept malformed input */
2669         const U8* first_bad_char_loc;
2670         STRLEN len;
2671         const char* const str = SvPV_const(res, len);
2672         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2673             /* If warnings are on, this will print a more detailed analysis of
2674              * what is wrong than the error message below */
2675             utf8n_to_uvchr(first_bad_char_loc,
2676                            (char *) first_bad_char_loc - str,
2677                            NULL, 0);
2678
2679             /* We deliberately don't try to print the malformed character,
2680              * which might not print very well; it also may be just the first
2681              * of many malformations, so don't print what comes after it */
2682             yyerror_pv(
2683               Perl_form(aTHX_
2684                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2685                  (int) (e - backslash_ptr + 1), backslash_ptr,
2686                  (int) ((char *) first_bad_char_loc - str), str
2687               ),
2688               SVf_UTF8);
2689             return NULL;
2690         }
2691     }
2692
2693     return res;
2694
2695   bad_charname: {
2696
2697         /* The final %.*s makes sure that should the trailing NUL be missing
2698          * that this print won't run off the end of the string */
2699         yyerror_pv(
2700           Perl_form(aTHX_
2701             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2702             (int)(s - backslash_ptr + 1), backslash_ptr,
2703             (int)(e - s + 1), s + 1
2704           ),
2705           UTF ? SVf_UTF8 : 0);
2706         return NULL;
2707     }
2708
2709   multi_spaces:
2710         yyerror_pv(
2711           Perl_form(aTHX_
2712             "charnames alias definitions may not contain a sequence of "
2713             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2714             (int)(s - backslash_ptr + 1), backslash_ptr,
2715             (int)(e - s + 1), s + 1
2716           ),
2717           UTF ? SVf_UTF8 : 0);
2718         return NULL;
2719 }
2720
2721 /*
2722   scan_const
2723
2724   Extracts the next constant part of a pattern, double-quoted string,
2725   or transliteration.  This is terrifying code.
2726
2727   For example, in parsing the double-quoted string "ab\x63$d", it would
2728   stop at the '$' and return an OP_CONST containing 'abc'.
2729
2730   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2731   processing a pattern (PL_lex_inpat is true), a transliteration
2732   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2733
2734   Returns a pointer to the character scanned up to. If this is
2735   advanced from the start pointer supplied (i.e. if anything was
2736   successfully parsed), will leave an OP_CONST for the substring scanned
2737   in pl_yylval. Caller must intuit reason for not parsing further
2738   by looking at the next characters herself.
2739
2740   In patterns:
2741     expand:
2742       \N{FOO}  => \N{U+hex_for_character_FOO}
2743       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2744
2745     pass through:
2746         all other \-char, including \N and \N{ apart from \N{ABC}
2747
2748     stops on:
2749         @ and $ where it appears to be a var, but not for $ as tail anchor
2750         \l \L \u \U \Q \E
2751         (?{  or  (??{
2752
2753
2754   In transliterations:
2755     characters are VERY literal, except for - not at the start or end
2756     of the string, which indicates a range. If the range is in bytes,
2757     scan_const expands the range to the full set of intermediate
2758     characters. If the range is in utf8, the hyphen is replaced with
2759     a certain range mark which will be handled by pmtrans() in op.c.
2760
2761   In double-quoted strings:
2762     backslashes:
2763       double-quoted style: \r and \n
2764       constants: \x31, etc.
2765       deprecated backrefs: \1 (in substitution replacements)
2766       case and quoting: \U \Q \E
2767     stops on @ and $
2768
2769   scan_const does *not* construct ops to handle interpolated strings.
2770   It stops processing as soon as it finds an embedded $ or @ variable
2771   and leaves it to the caller to work out what's going on.
2772
2773   embedded arrays (whether in pattern or not) could be:
2774       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2775
2776   $ in double-quoted strings must be the symbol of an embedded scalar.
2777
2778   $ in pattern could be $foo or could be tail anchor.  Assumption:
2779   it's a tail anchor if $ is the last thing in the string, or if it's
2780   followed by one of "()| \r\n\t"
2781
2782   \1 (backreferences) are turned into $1 in substitutions
2783
2784   The structure of the code is
2785       while (there's a character to process) {
2786           handle transliteration ranges
2787           skip regexp comments /(?#comment)/ and codes /(?{code})/
2788           skip #-initiated comments in //x patterns
2789           check for embedded arrays
2790           check for embedded scalars
2791           if (backslash) {
2792               deprecate \1 in substitution replacements
2793               handle string-changing backslashes \l \U \Q \E, etc.
2794               switch (what was escaped) {
2795                   handle \- in a transliteration (becomes a literal -)
2796                   if a pattern and not \N{, go treat as regular character
2797                   handle \132 (octal characters)
2798                   handle \x15 and \x{1234} (hex characters)
2799                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2800                   handle \cV (control characters)
2801                   handle printf-style backslashes (\f, \r, \n, etc)
2802               } (end switch)
2803               continue
2804           } (end if backslash)
2805           handle regular character
2806     } (end while character to read)
2807
2808 */
2809
2810 STATIC char *
2811 S_scan_const(pTHX_ char *start)
2812 {
2813     char *send = PL_bufend;             /* end of the constant */
2814     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2815                                            on sizing. */
2816     char *s = start;                    /* start of the constant */
2817     char *d = SvPVX(sv);                /* destination for copies */
2818     bool dorange = FALSE;               /* are we in a translit range? */
2819     bool didrange = FALSE;              /* did we just finish a range? */
2820     bool in_charclass = FALSE;          /* within /[...]/ */
2821     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2822     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2823                                            UTF8?  But, this can show as true
2824                                            when the source isn't utf8, as for
2825                                            example when it is entirely composed
2826                                            of hex constants */
2827     SV *res;                            /* result from charnames */
2828
2829     /* Note on sizing:  The scanned constant is placed into sv, which is
2830      * initialized by newSV() assuming one byte of output for every byte of
2831      * input.  This routine expects newSV() to allocate an extra byte for a
2832      * trailing NUL, which this routine will append if it gets to the end of
2833      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2834      * CAPITAL LETTER A}), or more output than input if the constant ends up
2835      * recoded to utf8, but each time a construct is found that might increase
2836      * the needed size, SvGROW() is called.  Its size parameter each time is
2837      * based on the best guess estimate at the time, namely the length used so
2838      * far, plus the length the current construct will occupy, plus room for
2839      * the trailing NUL, plus one byte for every input byte still unscanned */
2840
2841     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2842                        before set */
2843 #ifdef EBCDIC
2844     UV literal_endpoint = 0;
2845     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2846 #endif
2847
2848     PERL_ARGS_ASSERT_SCAN_CONST;
2849
2850     assert(PL_lex_inwhat != OP_TRANSR);
2851     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2852         /* If we are doing a trans and we know we want UTF8 set expectation */
2853         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2854         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2855     }
2856
2857     /* Protect sv from errors and fatal warnings. */
2858     ENTER_with_name("scan_const");
2859     SAVEFREESV(sv);
2860
2861     while (s < send || dorange) {
2862
2863         /* get transliterations out of the way (they're most literal) */
2864         if (PL_lex_inwhat == OP_TRANS) {
2865             /* expand a range A-Z to the full set of characters.  AIE! */
2866             if (dorange) {
2867                 I32 i;                          /* current expanded character */
2868                 I32 min;                        /* first character in range */
2869                 I32 max;                        /* last character in range */
2870
2871 #ifdef EBCDIC
2872                 UV uvmax = 0;
2873 #endif
2874
2875                 if (has_utf8
2876 #ifdef EBCDIC
2877                     && !native_range
2878 #endif
2879                 ) {
2880                     char * const c = (char*)utf8_hop((U8*)d, -1);
2881                     char *e = d++;
2882                     while (e-- > c)
2883                         *(e + 1) = *e;
2884                     *c = (char) ILLEGAL_UTF8_BYTE;
2885                     /* mark the range as done, and continue */
2886                     dorange = FALSE;
2887                     didrange = TRUE;
2888                     continue;
2889                 }
2890
2891                 i = d - SvPVX_const(sv);                /* remember current offset */
2892 #ifdef EBCDIC
2893                 SvGROW(sv,
2894                        SvLEN(sv) + ((has_utf8)
2895                                     ?  (512 - UTF_CONTINUATION_MARK
2896                                         + UNISKIP(0x100))
2897                                     : 256));
2898                 /* How many two-byte within 0..255: 128 in UTF-8,
2899                  * 96 in UTF-8-mod. */
2900 #else
2901                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2902 #endif
2903                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2904 #ifdef EBCDIC
2905                 if (has_utf8) {
2906                     int j;
2907                     for (j = 0; j <= 1; j++) {
2908                         char * const c = (char*)utf8_hop((U8*)d, -1);
2909                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2910                         if (j)
2911                             min = (U8)uv;
2912                         else if (uv < 256)
2913                             max = (U8)uv;
2914                         else {
2915                             max = (U8)0xff; /* only to \xff */
2916                             uvmax = uv; /* \x{100} to uvmax */
2917                         }
2918                         d = c; /* eat endpoint chars */
2919                      }
2920                 }
2921                else {
2922 #endif
2923                    d -= 2;              /* eat the first char and the - */
2924                    min = (U8)*d;        /* first char in range */
2925                    max = (U8)d[1];      /* last char in range  */
2926 #ifdef EBCDIC
2927                }
2928 #endif
2929
2930                 if (min > max) {
2931                     Perl_croak(aTHX_
2932                                "Invalid range \"%c-%c\" in transliteration operator",
2933                                (char)min, (char)max);
2934                 }
2935
2936 #ifdef EBCDIC
2937                 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
2938                  * any subsets of these ranges into individual characters */
2939                 if (literal_endpoint == 2 &&
2940                     ((isLOWER_A(min) && isLOWER_A(max)) ||
2941                      (isUPPER_A(min) && isUPPER_A(max))))
2942                 {
2943                     for (i = min; i <= max; i++) {
2944                         if (isALPHA_A(i))
2945                             *d++ = i;
2946                     }
2947                 }
2948                 else
2949 #endif
2950                     for (i = min; i <= max; i++)
2951 #ifdef EBCDIC
2952                         if (has_utf8) {
2953                             append_utf8_from_native_byte(i, &d);
2954                         }
2955                         else
2956 #endif
2957                             *d++ = (char)i;
2958
2959 #ifdef EBCDIC
2960                 if (uvmax) {
2961                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2962                     if (uvmax > 0x101)
2963                         *d++ = (char) ILLEGAL_UTF8_BYTE;
2964                     if (uvmax > 0x100)
2965                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2966                 }
2967 #endif
2968
2969                 /* mark the range as done, and continue */
2970                 dorange = FALSE;
2971                 didrange = TRUE;
2972 #ifdef EBCDIC
2973                 literal_endpoint = 0;
2974 #endif
2975                 continue;
2976             }
2977
2978             /* range begins (ignore - as first or last char) */
2979             else if (*s == '-' && s+1 < send  && s != start) {
2980                 if (didrange) {
2981                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2982                 }
2983                 if (has_utf8
2984 #ifdef EBCDIC
2985                     && !native_range
2986 #endif
2987                     ) {
2988                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
2989                     s++;
2990                     continue;
2991                 }
2992                 dorange = TRUE;
2993                 s++;
2994             }
2995             else {
2996                 didrange = FALSE;
2997 #ifdef EBCDIC
2998                 literal_endpoint = 0;
2999                 native_range = TRUE;
3000 #endif
3001             }
3002         }
3003
3004         /* if we get here, we're not doing a transliteration */
3005
3006         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3007             char *s1 = s-1;
3008             int esc = 0;
3009             while (s1 >= start && *s1-- == '\\')
3010                 esc = !esc;
3011             if (!esc)
3012                 in_charclass = TRUE;
3013         }
3014
3015         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3016             char *s1 = s-1;
3017             int esc = 0;
3018             while (s1 >= start && *s1-- == '\\')
3019                 esc = !esc;
3020             if (!esc)
3021                 in_charclass = FALSE;
3022         }
3023
3024         /* skip for regexp comments /(?#comment)/, except for the last
3025          * char, which will be done separately.
3026          * Stop on (?{..}) and friends */
3027
3028         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3029             if (s[2] == '#') {
3030                 while (s+1 < send && *s != ')')
3031                     *d++ = *s++;
3032             }
3033             else if (!PL_lex_casemods &&
3034                      (    s[2] == '{' /* This should match regcomp.c */
3035                       || (s[2] == '?' && s[3] == '{')))
3036             {
3037                 break;
3038             }
3039         }
3040
3041         /* likewise skip #-initiated comments in //x patterns */
3042         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3043           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3044             while (s+1 < send && *s != '\n')
3045                 *d++ = *s++;
3046         }
3047
3048         /* no further processing of single-quoted regex */
3049         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3050             goto default_action;
3051
3052         /* check for embedded arrays
3053            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3054            */
3055         else if (*s == '@' && s[1]) {
3056             if (isWORDCHAR_lazy_if(s+1,UTF))
3057                 break;
3058             if (strchr(":'{$", s[1]))
3059                 break;
3060             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3061                 break; /* in regexp, neither @+ nor @- are interpolated */
3062         }
3063
3064         /* check for embedded scalars.  only stop if we're sure it's a
3065            variable.
3066         */
3067         else if (*s == '$') {
3068             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3069                 break;
3070             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3071                 if (s[1] == '\\') {
3072                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3073                                    "Possible unintended interpolation of $\\ in regex");
3074                 }
3075                 break;          /* in regexp, $ might be tail anchor */
3076             }
3077         }
3078
3079         /* End of else if chain - OP_TRANS rejoin rest */
3080
3081         /* backslashes */
3082         if (*s == '\\' && s+1 < send) {
3083             char* e;    /* Can be used for ending '}', etc. */
3084
3085             s++;
3086
3087             /* warn on \1 - \9 in substitution replacements, but note that \11
3088              * is an octal; and \19 is \1 followed by '9' */
3089             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3090                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3091             {
3092                 /* diag_listed_as: \%d better written as $%d */
3093                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3094                 *--s = '$';
3095                 break;
3096             }
3097
3098             /* string-change backslash escapes */
3099             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3100                 --s;
3101                 break;
3102             }
3103             /* In a pattern, process \N, but skip any other backslash escapes.
3104              * This is because we don't want to translate an escape sequence
3105              * into a meta symbol and have the regex compiler use the meta
3106              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3107              * in spite of this, we do have to process \N here while the proper
3108              * charnames handler is in scope.  See bugs #56444 and #62056.
3109              *
3110              * There is a complication because \N in a pattern may also stand
3111              * for 'match a non-nl', and not mean a charname, in which case its
3112              * processing should be deferred to the regex compiler.  To be a
3113              * charname it must be followed immediately by a '{', and not look
3114              * like \N followed by a curly quantifier, i.e., not something like
3115              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3116              * quantifier */
3117             else if (PL_lex_inpat
3118                     && (*s != 'N'
3119                         || s[1] != '{'
3120                         || regcurly(s + 1)))
3121             {
3122                 *d++ = '\\';
3123                 goto default_action;
3124             }
3125
3126             switch (*s) {
3127
3128             /* quoted - in transliterations */
3129             case '-':
3130                 if (PL_lex_inwhat == OP_TRANS) {
3131                     *d++ = *s++;
3132                     continue;
3133                 }
3134                 /* FALLTHROUGH */
3135             default:
3136                 {
3137                     if ((isALPHANUMERIC(*s)))
3138                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3139                                        "Unrecognized escape \\%c passed through",
3140                                        *s);
3141                     /* default action is to copy the quoted character */
3142                     goto default_action;
3143                 }
3144
3145             /* eg. \132 indicates the octal constant 0132 */
3146             case '0': case '1': case '2': case '3':
3147             case '4': case '5': case '6': case '7':
3148                 {
3149                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3150                     STRLEN len = 3;
3151                     uv = grok_oct(s, &len, &flags, NULL);
3152                     s += len;
3153                     if (len < 3 && s < send && isDIGIT(*s)
3154                         && ckWARN(WARN_MISC))
3155                     {
3156                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3157                                     "%s", form_short_octal_warning(s, len));
3158                     }
3159                 }
3160                 goto NUM_ESCAPE_INSERT;
3161
3162             /* eg. \o{24} indicates the octal constant \024 */
3163             case 'o':
3164                 {
3165                     const char* error;
3166
3167                     bool valid = grok_bslash_o(&s, &uv, &error,
3168                                                TRUE, /* Output warning */
3169                                                FALSE, /* Not strict */
3170                                                TRUE, /* Output warnings for
3171                                                          non-portables */
3172                                                UTF);
3173                     if (! valid) {
3174                         yyerror(error);
3175                         continue;
3176                     }
3177                     goto NUM_ESCAPE_INSERT;
3178                 }
3179
3180             /* eg. \x24 indicates the hex constant 0x24 */
3181             case 'x':
3182                 {
3183                     const char* error;
3184
3185                     bool valid = grok_bslash_x(&s, &uv, &error,
3186                                                TRUE, /* Output warning */
3187                                                FALSE, /* Not strict */
3188                                                TRUE,  /* Output warnings for
3189                                                          non-portables */
3190                                                UTF);
3191                     if (! valid) {
3192                         yyerror(error);
3193                         continue;
3194                     }
3195                 }
3196
3197               NUM_ESCAPE_INSERT:
3198                 /* Insert oct or hex escaped character.  There will always be
3199                  * enough room in sv since such escapes will be longer than any
3200                  * UTF-8 sequence they can end up as, except if they force us
3201                  * to recode the rest of the string into utf8 */
3202
3203                 /* Here uv is the ordinal of the next character being added */
3204                 if (!UVCHR_IS_INVARIANT(uv)) {
3205                     if (!has_utf8 && uv > 255) {
3206                         /* Might need to recode whatever we have accumulated so
3207                          * far if it contains any chars variant in utf8 or
3208                          * utf-ebcdic. */
3209
3210                         SvCUR_set(sv, d - SvPVX_const(sv));
3211                         SvPOK_on(sv);
3212                         *d = '\0';
3213                         /* See Note on sizing above.  */
3214                         sv_utf8_upgrade_flags_grow(
3215                                          sv,
3216                                          SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3217                                                   /* Above-latin1 in string
3218                                                    * implies no encoding */
3219                                                   |SV_UTF8_NO_ENCODING,
3220                                          UNISKIP(uv) + (STRLEN)(send - s) + 1);
3221                         d = SvPVX(sv) + SvCUR(sv);
3222                         has_utf8 = TRUE;
3223                     }
3224
3225                     if (has_utf8) {
3226                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3227                         if (PL_lex_inwhat == OP_TRANS &&
3228                             PL_sublex_info.sub_op) {
3229                             PL_sublex_info.sub_op->op_private |=
3230                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3231                                              : OPpTRANS_TO_UTF);
3232                         }
3233 #ifdef EBCDIC
3234                         if (uv > 255 && !dorange)
3235                             native_range = FALSE;
3236 #endif
3237                     }
3238                     else {
3239                         *d++ = (char)uv;
3240                     }
3241                 }
3242                 else {
3243                     *d++ = (char) uv;
3244                 }
3245                 continue;
3246
3247             case 'N':
3248                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3249                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3250                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3251                  * GRAVE}.  For convenience all three forms are referred to as
3252                  * "named characters" below.
3253                  *
3254                  * For patterns, \N also can mean to match a non-newline.  Code
3255                  * before this 'switch' statement should already have handled
3256                  * this situation, and hence this code only has to deal with
3257                  * the named character cases.
3258                  *
3259                  * For non-patterns, the named characters are converted to
3260                  * their string equivalents.  In patterns, named characters are
3261                  * not converted to their ultimate forms for the same reasons
3262                  * that other escapes aren't.  Instead, they are converted to
3263                  * the \N{U+...} form to get the value from the charnames that
3264                  * is in effect right now, while preserving the fact that it
3265                  * was a named character, so that the regex compiler knows
3266                  * this.
3267                  *
3268                  * The structure of this section of code (besides checking for
3269                  * errors and upgrading to utf8) is:
3270                  *  If the named character is of the form \N{U+...}, pass it
3271                  *      through if a pattern; otherwise convert the code point
3272                  *      to utf8
3273                  *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
3274                  *      if a pattern; otherwise convert to utf8
3275                  *
3276                  * If the regex compiler should ever need to differentiate
3277                  * between the \N{U+...} and \N{name} forms, that could easily
3278                  * be done here by stripping any leading zeros from the
3279                  * \N{U+...} case, and adding them to the other one. */
3280
3281                 /* Here, 's' points to the 'N'; the test below is guaranteed to
3282                  * succeed if we are being called on a pattern, as we already
3283                  * know from a test above that the next character is a '{'.  A
3284                  * non-pattern \N must mean 'named character', which requires
3285                  * braces */
3286                 s++;
3287                 if (*s != '{') {
3288                     yyerror("Missing braces on \\N{}");
3289                     continue;
3290                 }
3291                 s++;
3292
3293                 /* If there is no matching '}', it is an error. */
3294                 if (! (e = strchr(s, '}'))) {
3295                     if (! PL_lex_inpat) {
3296                         yyerror("Missing right brace on \\N{}");
3297                     } else {
3298                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3299                     }
3300                     continue;
3301                 }
3302
3303                 /* Here it looks like a named character */
3304
3305                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3306                     s += 2;         /* Skip to next char after the 'U+' */
3307                     if (PL_lex_inpat) {
3308
3309                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3310                         /* Check the syntax.  */
3311                         const char *orig_s;
3312                         orig_s = s - 5;
3313                         if (!isXDIGIT(*s)) {
3314                           bad_NU:
3315                             yyerror(
3316                                 "Invalid hexadecimal number in \\N{U+...}"
3317                             );
3318                             s = e + 1;
3319                             continue;
3320                         }
3321                         while (++s < e) {
3322                             if (isXDIGIT(*s))
3323                                 continue;
3324                             else if ((*s == '.' || *s == '_')
3325                                   && isXDIGIT(s[1]))
3326                                 continue;
3327                             goto bad_NU;
3328                         }
3329
3330                         /* Pass everything through unchanged.
3331                          * +1 is for the '}' */
3332                         Copy(orig_s, d, e - orig_s + 1, char);
3333                         d += e - orig_s + 1;
3334                     }
3335                     else {  /* Not a pattern: convert the hex to string */
3336                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3337                                 | PERL_SCAN_SILENT_ILLDIGIT
3338                                 | PERL_SCAN_DISALLOW_PREFIX;
3339                         STRLEN len = e - s;
3340                         uv = grok_hex(s, &len, &flags, NULL);
3341                         if (len == 0 || (len != (STRLEN)(e - s)))
3342                             goto bad_NU;
3343
3344                          /* If the destination is not in utf8, unconditionally
3345                           * recode it to be so.  This is because \N{} implies
3346                           * Unicode semantics, and scalars have to be in utf8
3347                           * to guarantee those semantics */
3348                         if (! has_utf8) {
3349                             SvCUR_set(sv, d - SvPVX_const(sv));
3350                             SvPOK_on(sv);
3351                             *d = '\0';
3352                             /* See Note on sizing above.  */
3353                             sv_utf8_upgrade_flags_grow(
3354                                         sv,
3355                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3356                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3357                             d = SvPVX(sv) + SvCUR(sv);
3358                             has_utf8 = TRUE;
3359                         }
3360
3361                         /* Add the (Unicode) code point to the output. */
3362                         if (UNI_IS_INVARIANT(uv)) {
3363                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3364                         }
3365                         else {
3366                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3367                         }
3368                     }
3369                 }
3370                 else /* Here is \N{NAME} but not \N{U+...}. */
3371                      if ((res = get_and_check_backslash_N_name(s, e)))
3372                 {
3373                     STRLEN len;
3374                     const char *str = SvPV_const(res, len);
3375                     if (PL_lex_inpat) {
3376
3377                         if (! len) { /* The name resolved to an empty string */
3378                             Copy("\\N{}", d, 4, char);
3379                             d += 4;
3380                         }
3381                         else {
3382                             /* In order to not lose information for the regex
3383                             * compiler, pass the result in the specially made
3384                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3385                             * the code points in hex of each character
3386                             * returned by charnames */
3387
3388                             const char *str_end = str + len;
3389                             const STRLEN off = d - SvPVX_const(sv);
3390
3391                             if (! SvUTF8(res)) {
3392                                 /* For the non-UTF-8 case, we can determine the
3393                                  * exact length needed without having to parse
3394                                  * through the string.  Each character takes up
3395                                  * 2 hex digits plus either a trailing dot or
3396                                  * the "}" */
3397                                 const char initial_text[] = "\\N{U+";
3398                                 const STRLEN initial_len = sizeof(initial_text)
3399                                                            - 1;
3400                                 d = off + SvGROW(sv, off
3401                                                     + 3 * len
3402
3403                                                     /* +1 for trailing NUL */
3404                                                     + initial_len + 1
3405
3406                                                     + (STRLEN)(send - e));
3407                                 Copy(initial_text, d, initial_len, char);
3408                                 d += initial_len;
3409                                 while (str < str_end) {
3410                                     char hex_string[4];
3411                                     int len =
3412                                         my_snprintf(hex_string,
3413                                                     sizeof(hex_string),
3414                                                     "%02X.", (U8) *str);
3415                                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
3416                                     Copy(hex_string, d, 3, char);
3417                                     d += 3;
3418                                     str++;
3419                                 }
3420                                 d--;    /* Below, we will overwrite the final
3421                                            dot with a right brace */
3422                             }
3423                             else {
3424                                 STRLEN char_length; /* cur char's byte length */
3425
3426                                 /* and the number of bytes after this is
3427                                  * translated into hex digits */
3428                                 STRLEN output_length;
3429
3430                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3431                                  * for max('U+', '.'); and 1 for NUL */
3432                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3433
3434                                 /* Get the first character of the result. */
3435                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3436                                                         len,
3437                                                         &char_length,
3438                                                         UTF8_ALLOW_ANYUV);
3439                                 /* Convert first code point to hex, including
3440                                  * the boiler plate before it. */
3441                                 output_length =
3442                                     my_snprintf(hex_string, sizeof(hex_string),
3443                                                 "\\N{U+%X",
3444                                                 (unsigned int) uv);
3445
3446                                 /* Make sure there is enough space to hold it */
3447                                 d = off + SvGROW(sv, off
3448                                                     + output_length
3449                                                     + (STRLEN)(send - e)
3450                                                     + 2);       /* '}' + NUL */
3451                                 /* And output it */
3452                                 Copy(hex_string, d, output_length, char);
3453                                 d += output_length;
3454
3455                                 /* For each subsequent character, append dot and
3456                                 * its ordinal in hex */
3457                                 while ((str += char_length) < str_end) {
3458                                     const STRLEN off = d - SvPVX_const(sv);
3459                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3460                                                             str_end - str,
3461                                                             &char_length,
3462                                                             UTF8_ALLOW_ANYUV);
3463                                     output_length =
3464                                         my_snprintf(hex_string,
3465                                                     sizeof(hex_string),
3466                                                     ".%X",
3467                                                     (unsigned int) uv);
3468
3469                                     d = off + SvGROW(sv, off
3470                                                         + output_length
3471                                                         + (STRLEN)(send - e)
3472                                                         + 2);   /* '}' +  NUL */
3473                                     Copy(hex_string, d, output_length, char);
3474                                     d += output_length;
3475                                 }
3476                             }
3477
3478                             *d++ = '}'; /* Done.  Add the trailing brace */
3479                         }
3480                     }
3481                     else { /* Here, not in a pattern.  Convert the name to a
3482                             * string. */
3483
3484                          /* If destination is not in utf8, unconditionally
3485                           * recode it to be so.  This is because \N{} implies
3486                           * Unicode semantics, and scalars have to be in utf8
3487                           * to guarantee those semantics */
3488                         if (! has_utf8) {
3489                             SvCUR_set(sv, d - SvPVX_const(sv));
3490                             SvPOK_on(sv);
3491                             *d = '\0';
3492                             /* See Note on sizing above.  */
3493                             sv_utf8_upgrade_flags_grow(sv,
3494                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3495                                                 len + (STRLEN)(send - s) + 1);
3496                             d = SvPVX(sv) + SvCUR(sv);
3497                             has_utf8 = TRUE;
3498                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3499
3500                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3501                              * set correctly here). */
3502                             const STRLEN off = d - SvPVX_const(sv);
3503                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3504                         }
3505                         if (! SvUTF8(res)) {    /* Make sure \N{} return is UTF-8 */
3506                             sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3507                             str = SvPV_const(res, len);
3508                         }
3509                         Copy(str, d, len, char);
3510                         d += len;
3511                     }
3512
3513                     SvREFCNT_dec(res);
3514
3515                 } /* End \N{NAME} */
3516 #ifdef EBCDIC
3517                 if (!dorange)
3518                     native_range = FALSE; /* \N{} is defined to be Unicode */
3519 #endif
3520                 s = e + 1;  /* Point to just after the '}' */
3521                 continue;
3522
3523             /* \c is a control character */
3524             case 'c':
3525                 s++;
3526                 if (s < send) {
3527                     *d++ = grok_bslash_c(*s++, 1);
3528                 }
3529                 else {
3530                     yyerror("Missing control char name in \\c");
3531                 }
3532                 continue;
3533
3534             /* printf-style backslashes, formfeeds, newlines, etc */
3535             case 'b':
3536                 *d++ = '\b';
3537                 break;
3538             case 'n':
3539                 *d++ = '\n';
3540                 break;
3541             case 'r':
3542                 *d++ = '\r';
3543                 break;
3544             case 'f':
3545                 *d++ = '\f';
3546                 break;
3547             case 't':
3548                 *d++ = '\t';
3549                 break;
3550             case 'e':
3551                 *d++ = ESC_NATIVE;
3552                 break;
3553             case 'a':
3554                 *d++ = '\a';
3555                 break;
3556             } /* end switch */
3557
3558             s++;
3559             continue;
3560         } /* end if (backslash) */
3561 #ifdef EBCDIC
3562         else
3563             literal_endpoint++;
3564 #endif
3565
3566     default_action:
3567         /* If we started with encoded form, or already know we want it,
3568            then encode the next character */
3569         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3570             STRLEN len  = 1;
3571
3572
3573             /* One might think that it is wasted effort in the case of the
3574              * source being utf8 (this_utf8 == TRUE) to take the next character
3575              * in the source, convert it to an unsigned value, and then convert
3576              * it back again.  But the source has not been validated here.  The
3577              * routine that does the conversion checks for errors like
3578              * malformed utf8 */
3579
3580             const UV nextuv   = (this_utf8)
3581                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3582                                 : (UV) ((U8) *s);
3583             const STRLEN need = UNISKIP(nextuv);
3584             if (!has_utf8) {
3585                 SvCUR_set(sv, d - SvPVX_const(sv));
3586                 SvPOK_on(sv);
3587                 *d = '\0';
3588                 /* See Note on sizing above.  */
3589                 sv_utf8_upgrade_flags_grow(sv,
3590                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3591                                         need + (STRLEN)(send - s) + 1);
3592                 d = SvPVX(sv) + SvCUR(sv);
3593                 has_utf8 = TRUE;
3594             } else if (need > len) {
3595                 /* encoded value larger than old, may need extra space (NOTE:
3596                  * SvCUR() is not set correctly here).   See Note on sizing
3597                  * above.  */
3598                 const STRLEN off = d - SvPVX_const(sv);
3599                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3600             }
3601             s += len;
3602
3603             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3604 #ifdef EBCDIC
3605             if (uv > 255 && !dorange)
3606                 native_range = FALSE;
3607 #endif
3608         }
3609         else {
3610             *d++ = *s++;
3611         }
3612     } /* while loop to process each character */
3613
3614     /* terminate the string and set up the sv */
3615     *d = '\0';
3616     SvCUR_set(sv, d - SvPVX_const(sv));
3617     if (SvCUR(sv) >= SvLEN(sv))
3618         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3619                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3620
3621     SvPOK_on(sv);
3622     if (IN_ENCODING && !has_utf8) {
3623         sv_recode_to_utf8(sv, _get_encoding());
3624         if (SvUTF8(sv))
3625             has_utf8 = TRUE;
3626     }
3627     if (has_utf8) {
3628         SvUTF8_on(sv);
3629         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3630             PL_sublex_info.sub_op->op_private |=
3631                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3632         }
3633     }
3634
3635     /* shrink the sv if we allocated more than we used */
3636     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3637         SvPV_shrink_to_cur(sv);
3638     }
3639
3640     /* return the substring (via pl_yylval) only if we parsed anything */
3641     if (s > start) {
3642         char *s2 = start;
3643         for (; s2 < s; s2++) {
3644             if (*s2 == '\n')
3645                 COPLINE_INC_WITH_HERELINES;
3646         }
3647         SvREFCNT_inc_simple_void_NN(sv);
3648         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3649             && ! PL_parser->lex_re_reparsing)
3650         {
3651             const char *const key = PL_lex_inpat ? "qr" : "q";
3652             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3653             const char *type;
3654             STRLEN typelen;
3655
3656             if (PL_lex_inwhat == OP_TRANS) {
3657                 type = "tr";
3658                 typelen = 2;
3659             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3660                 type = "s";
3661                 typelen = 1;
3662             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3663                 type = "q";
3664                 typelen = 1;
3665             } else  {
3666                 type = "qq";
3667                 typelen = 2;
3668             }
3669
3670             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3671                                 type, typelen);
3672         }
3673         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3674     }
3675     LEAVE_with_name("scan_const");
3676     return s;
3677 }
3678
3679 /* S_intuit_more
3680  * Returns TRUE if there's more to the expression (e.g., a subscript),
3681  * FALSE otherwise.
3682  *
3683  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3684  *
3685  * ->[ and ->{ return TRUE
3686  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3687  * { and [ outside a pattern are always subscripts, so return TRUE
3688  * if we're outside a pattern and it's not { or [, then return FALSE
3689  * if we're in a pattern and the first char is a {
3690  *   {4,5} (any digits around the comma) returns FALSE
3691  * if we're in a pattern and the first char is a [
3692  *   [] returns FALSE
3693  *   [SOMETHING] has a funky algorithm to decide whether it's a
3694  *      character class or not.  It has to deal with things like
3695  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3696  * anything else returns TRUE
3697  */
3698
3699 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3700
3701 STATIC int
3702 S_intuit_more(pTHX_ char *s)
3703 {
3704     PERL_ARGS_ASSERT_INTUIT_MORE;
3705
3706     if (PL_lex_brackets)
3707         return TRUE;
3708     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3709         return TRUE;
3710     if (*s == '-' && s[1] == '>'
3711      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3712      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3713         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3714         return TRUE;
3715     if (*s != '{' && *s != '[')
3716         return FALSE;
3717     if (!PL_lex_inpat)
3718         return TRUE;
3719
3720     /* In a pattern, so maybe we have {n,m}. */
3721     if (*s == '{') {
3722         if (regcurly(s)) {
3723             return FALSE;
3724         }
3725         return TRUE;
3726     }
3727
3728     /* On the other hand, maybe we have a character class */
3729
3730     s++;
3731     if (*s == ']' || *s == '^')
3732         return FALSE;
3733     else {
3734         /* this is terrifying, and it works */
3735         int weight;
3736         char seen[256];
3737         const char * const send = strchr(s,']');
3738         unsigned char un_char, last_un_char;
3739         char tmpbuf[sizeof PL_tokenbuf * 4];
3740
3741         if (!send)              /* has to be an expression */
3742             return TRUE;
3743         weight = 2;             /* let's weigh the evidence */
3744
3745         if (*s == '$')
3746             weight -= 3;
3747         else if (isDIGIT(*s)) {
3748             if (s[1] != ']') {
3749                 if (isDIGIT(s[1]) && s[2] == ']')
3750                     weight -= 10;
3751             }
3752             else
3753                 weight -= 100;
3754         }
3755         Zero(seen,256,char);
3756         un_char = 255;
3757         for (; s < send; s++) {
3758             last_un_char = un_char;
3759             un_char = (unsigned char)*s;
3760             switch (*s) {
3761             case '@':
3762             case '&':
3763             case '$':
3764                 weight -= seen[un_char] * 10;
3765                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3766                     int len;
3767                     char *tmp = PL_bufend;
3768                     PL_bufend = (char*)send;
3769                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3770                     PL_bufend = tmp;
3771                     len = (int)strlen(tmpbuf);
3772                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3773                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3774                         weight -= 100;
3775                     else
3776                         weight -= 10;
3777                 }
3778                 else if (*s == '$' && s[1] &&
3779                   strchr("[#!%*<>()-=",s[1])) {
3780                     if (/*{*/ strchr("])} =",s[2]))
3781                         weight -= 10;
3782                     else
3783                         weight -= 1;
3784                 }
3785                 break;
3786             case '\\':
3787                 un_char = 254;
3788                 if (s[1]) {
3789                     if (strchr("wds]",s[1]))
3790                         weight += 100;
3791                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3792                         weight += 1;
3793                     else if (strchr("rnftbxcav",s[1]))
3794                         weight += 40;
3795                     else if (isDIGIT(s[1])) {
3796                         weight += 40;
3797                         while (s[1] && isDIGIT(s[1]))
3798                             s++;
3799                     }
3800                 }
3801                 else
3802                     weight += 100;
3803                 break;
3804             case '-':
3805                 if (s[1] == '\\')
3806                     weight += 50;
3807                 if (strchr("aA01! ",last_un_char))
3808                     weight += 30;
3809                 if (strchr("zZ79~",s[1]))
3810                     weight += 30;
3811                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3812                     weight -= 5;        /* cope with negative subscript */
3813                 break;
3814             default:
3815                 if (!isWORDCHAR(last_un_char)
3816                     && !(last_un_char == '$' || last_un_char == '@'
3817                          || last_un_char == '&')
3818                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3819                     char *d = s;
3820                     while (isALPHA(*s))
3821                         s++;
3822                     if (keyword(d, s - d, 0))
3823                         weight -= 150;
3824                 }
3825                 if (un_char == last_un_char + 1)
3826                     weight += 5;
3827                 weight -= seen[un_char];
3828                 break;
3829             }
3830             seen[un_char]++;
3831         }
3832         if (weight >= 0)        /* probably a character class */
3833             return FALSE;
3834     }
3835
3836     return TRUE;
3837 }
3838
3839 /*
3840  * S_intuit_method
3841  *
3842  * Does all the checking to disambiguate
3843  *   foo bar
3844  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3845  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3846  *
3847  * First argument is the stuff after the first token, e.g. "bar".
3848  *
3849  * Not a method if foo is a filehandle.
3850  * Not a method if foo is a subroutine prototyped to take a filehandle.
3851  * Not a method if it's really "Foo $bar"
3852  * Method if it's "foo $bar"
3853  * Not a method if it's really "print foo $bar"
3854  * Method if it's really "foo package::" (interpreted as package->foo)
3855  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3856  * Not a method if bar is a filehandle or package, but is quoted with
3857  *   =>
3858  */
3859
3860 STATIC int
3861 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
3862 {
3863     char *s = start + (*start == '$');
3864     char tmpbuf[sizeof PL_tokenbuf];
3865     STRLEN len;
3866     GV* indirgv;
3867         /* Mustn't actually add anything to a symbol table.
3868            But also don't want to "initialise" any placeholder
3869            constants that might already be there into full
3870            blown PVGVs with attached PVCV.  */
3871     GV * const gv =
3872         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
3873
3874     PERL_ARGS_ASSERT_INTUIT_METHOD;
3875
3876     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3877             return 0;
3878     if (cv && SvPOK(cv)) {
3879         const char *proto = CvPROTO(cv);
3880         if (proto) {
3881             while (*proto && (isSPACE(*proto) || *proto == ';'))
3882                 proto++;
3883             if (*proto == '*')
3884                 return 0;
3885         }
3886     }
3887
3888     if (*start == '$') {
3889         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3890                 isUPPER(*PL_tokenbuf))
3891             return 0;
3892         s = skipspace(s);
3893         PL_bufptr = start;
3894         PL_expect = XREF;
3895         return *s == '(' ? FUNCMETH : METHOD;
3896     }
3897
3898     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3899     /* start is the beginning of the possible filehandle/object,
3900      * and s is the end of it
3901      * tmpbuf is a copy of it (but with single quotes as double colons)
3902      */
3903
3904     if (!keyword(tmpbuf, len, 0)) {
3905         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3906             len -= 2;
3907             tmpbuf[len] = '\0';
3908             goto bare_package;
3909         }
3910         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3911         if (indirgv && GvCVu(indirgv))
3912             return 0;
3913         /* filehandle or package name makes it a method */
3914         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3915             s = skipspace(s);
3916             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3917                 return 0;       /* no assumptions -- "=>" quotes bareword */
3918       bare_package:
3919             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3920                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3921             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3922             PL_expect = XTERM;
3923             force_next(WORD);
3924             PL_bufptr = s;
3925             return *s == '(' ? FUNCMETH : METHOD;
3926         }
3927     }
3928     return 0;
3929 }
3930
3931 /* Encoded script support. filter_add() effectively inserts a
3932  * 'pre-processing' function into the current source input stream.
3933  * Note that the filter function only applies to the current source file
3934  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3935  *
3936  * The datasv parameter (which may be NULL) can be used to pass
3937  * private data to this instance of the filter. The filter function
3938  * can recover the SV using the FILTER_DATA macro and use it to
3939  * store private buffers and state information.
3940  *
3941  * The supplied datasv parameter is upgraded to a PVIO type
3942  * and the IoDIRP/IoANY field is used to store the function pointer,
3943  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3944  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3945  * private use must be set using malloc'd pointers.
3946  */
3947
3948 SV *
3949 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3950 {
3951     if (!funcp)
3952         return NULL;
3953
3954     if (!PL_parser)
3955         return NULL;
3956
3957     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3958         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3959
3960     if (!PL_rsfp_filters)
3961         PL_rsfp_filters = newAV();
3962     if (!datasv)
3963         datasv = newSV(0);
3964     SvUPGRADE(datasv, SVt_PVIO);
3965     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3966     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3967     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3968                           FPTR2DPTR(void *, IoANY(datasv)),
3969                           SvPV_nolen(datasv)));
3970     av_unshift(PL_rsfp_filters, 1);
3971     av_store(PL_rsfp_filters, 0, datasv) ;
3972     if (
3973         !PL_parser->filtered
3974      && PL_parser->lex_flags & LEX_EVALBYTES
3975      && PL_bufptr < PL_bufend
3976     ) {
3977         const char *s = PL_bufptr;
3978         while (s < PL_bufend) {
3979             if (*s == '\n') {
3980                 SV *linestr = PL_parser->linestr;
3981                 char *buf = SvPVX(linestr);
3982                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3983                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3984                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3985                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3986                 STRLEN const last_uni_pos =
3987                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3988                 STRLEN const last_lop_pos =
3989                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3990                 av_push(PL_rsfp_filters, linestr);
3991                 PL_parser->linestr =
3992                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3993                 buf = SvPVX(PL_parser->linestr);
3994                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3995                 PL_parser->bufptr = buf + bufptr_pos;
3996                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3997                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3998                 PL_parser->linestart = buf + linestart_pos;
3999                 if (PL_parser->last_uni)
4000                     PL_parser->last_uni = buf + last_uni_pos;
4001                 if (PL_parser->last_lop)
4002                     PL_parser->last_lop = buf + last_lop_pos;
4003                 SvLEN(linestr) = SvCUR(linestr);
4004                 SvCUR(linestr) = s-SvPVX(linestr);
4005                 PL_parser->filtered = 1;
4006                 break;
4007             }
4008             s++;
4009         }
4010     }
4011     return(datasv);
4012 }
4013
4014
4015 /* Delete most recently added instance of this filter function. */
4016 void
4017 Perl_filter_del(pTHX_ filter_t funcp)
4018 {
4019     SV *datasv;
4020
4021     PERL_ARGS_ASSERT_FILTER_DEL;
4022
4023 #ifdef DEBUGGING
4024     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4025                           FPTR2DPTR(void*, funcp)));
4026 #endif
4027     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4028         return;
4029     /* if filter is on top of stack (usual case) just pop it off */
4030     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4031     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4032         sv_free(av_pop(PL_rsfp_filters));
4033
4034         return;
4035     }
4036     /* we need to search for the correct entry and clear it     */
4037     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4038 }
4039
4040
4041 /* Invoke the idxth filter function for the current rsfp.        */
4042 /* maxlen 0 = read one text line */
4043 I32
4044 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4045 {
4046     filter_t funcp;
4047     SV *datasv = NULL;
4048     /* This API is bad. It should have been using unsigned int for maxlen.
4049        Not sure if we want to change the API, but if not we should sanity
4050        check the value here.  */
4051     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4052
4053     PERL_ARGS_ASSERT_FILTER_READ;
4054
4055     if (!PL_parser || !PL_rsfp_filters)
4056         return -1;
4057     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4058         /* Provide a default input filter to make life easy.    */
4059         /* Note that we append to the line. This is handy.      */
4060         DEBUG_P(PerlIO_printf(Perl_debug_log,
4061                               "filter_read %d: from rsfp\n", idx));
4062         if (correct_length) {
4063             /* Want a block */
4064             int len ;
4065             const int old_len = SvCUR(buf_sv);
4066
4067             /* ensure buf_sv is large enough */
4068             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4069             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4070                                    correct_length)) <= 0) {
4071                 if (PerlIO_error(PL_rsfp))
4072                     return -1;          /* error */
4073                 else
4074                     return 0 ;          /* end of file */
4075             }
4076             SvCUR_set(buf_sv, old_len + len) ;
4077             SvPVX(buf_sv)[old_len + len] = '\0';
4078         } else {
4079             /* Want a line */
4080             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4081                 if (PerlIO_error(PL_rsfp))
4082                     return -1;          /* error */
4083                 else
4084                     return 0 ;          /* end of file */
4085             }
4086         }
4087         return SvCUR(buf_sv);
4088     }
4089     /* Skip this filter slot if filter has been deleted */
4090     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4091         DEBUG_P(PerlIO_printf(Perl_debug_log,
4092                               "filter_read %d: skipped (filter deleted)\n",
4093                               idx));
4094         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4095     }
4096     if (SvTYPE(datasv) != SVt_PVIO) {
4097         if (correct_length) {
4098             /* Want a block */
4099             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4100             if (!remainder) return 0; /* eof */
4101             if (correct_length > remainder) correct_length = remainder;
4102             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4103             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4104         } else {
4105             /* Want a line */
4106             const char *s = SvEND(datasv);
4107             const char *send = SvPVX(datasv) + SvLEN(datasv);
4108             while (s < send) {
4109                 if (*s == '\n') {
4110                     s++;
4111                     break;
4112                 }
4113                 s++;
4114             }
4115             if (s == send) return 0; /* eof */
4116             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4117             SvCUR_set(datasv, s-SvPVX(datasv));
4118         }
4119         return SvCUR(buf_sv);
4120     }
4121     /* Get function pointer hidden within datasv        */
4122     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4123     DEBUG_P(PerlIO_printf(Perl_debug_log,
4124                           "filter_read %d: via function %p (%s)\n",
4125                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4126     /* Call function. The function is expected to       */
4127     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4128     /* Return: <0:error, =0:eof, >0:not eof             */
4129     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4130 }
4131
4132 STATIC char *
4133 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4134 {
4135     PERL_ARGS_ASSERT_FILTER_GETS;
4136
4137 #ifdef PERL_CR_FILTER
4138     if (!PL_rsfp_filters) {
4139         filter_add(S_cr_textfilter,NULL);
4140     }
4141 #endif
4142     if (PL_rsfp_filters) {
4143         if (!append)
4144             SvCUR_set(sv, 0);   /* start with empty line        */
4145         if (FILTER_READ(0, sv, 0) > 0)
4146             return ( SvPVX(sv) ) ;
4147         else
4148             return NULL ;
4149     }
4150     else
4151         return (sv_gets(sv, PL_rsfp, append));
4152 }
4153
4154 STATIC HV *
4155 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4156 {
4157     GV *gv;
4158
4159     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4160
4161     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4162         return PL_curstash;
4163
4164     if (len > 2 &&
4165         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4166         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4167     {
4168         return GvHV(gv);                        /* Foo:: */
4169     }
4170
4171     /* use constant CLASS => 'MyClass' */
4172     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4173     if (gv && GvCV(gv)) {
4174         SV * const sv = cv_const_sv(GvCV(gv));
4175         if (sv)
4176             return gv_stashsv(sv, 0);
4177     }
4178
4179     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4180 }
4181
4182
4183 STATIC char *
4184 S_tokenize_use(pTHX_ int is_use, char *s) {
4185     PERL_ARGS_ASSERT_TOKENIZE_USE;
4186
4187     if (PL_expect != XSTATE)
4188         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4189                     is_use ? "use" : "no"));
4190     PL_expect = XTERM;
4191     s = skipspace(s);
4192     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4193         s = force_version(s, TRUE);
4194         if (*s == ';' || *s == '}'
4195                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4196             NEXTVAL_NEXTTOKE.opval = NULL;
4197             force_next(WORD);
4198         }
4199         else if (*s == 'v') {
4200             s = force_word(s,WORD,FALSE,TRUE);
4201             s = force_version(s, FALSE);
4202         }
4203     }
4204     else {
4205         s = force_word(s,WORD,FALSE,TRUE);
4206         s = force_version(s, FALSE);
4207     }
4208     pl_yylval.ival = is_use;
4209     return s;
4210 }
4211 #ifdef DEBUGGING
4212     static const char* const exp_name[] =
4213         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4214           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4215           "TERMORDORDOR"
4216         };
4217 #endif
4218
4219 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4220 STATIC bool
4221 S_word_takes_any_delimeter(char *p, STRLEN len)
4222 {
4223     return (len == 1 && strchr("msyq", p[0])) ||
4224            (len == 2 && (
4225             (p[0] == 't' && p[1] == 'r') ||
4226             (p[0] == 'q' && strchr("qwxr", p[1]))));
4227 }
4228
4229 static void
4230 S_check_scalar_slice(pTHX_ char *s)
4231 {
4232     s++;
4233     while (*s == ' ' || *s == '\t') s++;
4234     if (*s == 'q' && s[1] == 'w'
4235      && !isWORDCHAR_lazy_if(s+2,UTF))
4236         return;
4237     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4238         s += UTF ? UTF8SKIP(s) : 1;
4239     if (*s == '}' || *s == ']')
4240         pl_yylval.ival = OPpSLICEWARNING;
4241 }
4242
4243 /*
4244   yylex
4245
4246   Works out what to call the token just pulled out of the input
4247   stream.  The yacc parser takes care of taking the ops we return and
4248   stitching them into a tree.
4249
4250   Returns:
4251     The type of the next token
4252
4253   Structure:
4254       Switch based on the current state:
4255           - if we already built the token before, use it
4256           - if we have a case modifier in a string, deal with that
4257           - handle other cases of interpolation inside a string
4258           - scan the next line if we are inside a format
4259       In the normal state switch on the next character:
4260           - default:
4261             if alphabetic, go to key lookup
4262             unrecoginized character - croak
4263           - 0/4/26: handle end-of-line or EOF
4264           - cases for whitespace
4265           - \n and #: handle comments and line numbers
4266           - various operators, brackets and sigils
4267           - numbers
4268           - quotes
4269           - 'v': vstrings (or go to key lookup)
4270           - 'x' repetition operator (or go to key lookup)
4271           - other ASCII alphanumerics (key lookup begins here):
4272               word before => ?
4273               keyword plugin
4274               scan built-in keyword (but do nothing with it yet)
4275               check for statement label
4276               check for lexical subs
4277                   goto just_a_word if there is one
4278               see whether built-in keyword is overridden
4279               switch on keyword number:
4280                   - default: just_a_word:
4281                       not a built-in keyword; handle bareword lookup
4282                       disambiguate between method and sub call
4283                       fall back to bareword
4284                   - cases for built-in keywords
4285 */
4286
4287
4288 int
4289 Perl_yylex(pTHX)
4290 {
4291     dVAR;
4292     char *s = PL_bufptr;
4293     char *d;
4294     STRLEN len;
4295     bool bof = FALSE;
4296     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4297     U8 formbrack = 0;
4298     U32 fake_eof = 0;
4299
4300     /* orig_keyword, gvp, and gv are initialized here because
4301      * jump to the label just_a_word_zero can bypass their
4302      * initialization later. */
4303     I32 orig_keyword = 0;
4304     GV *gv = NULL;
4305     GV **gvp = NULL;
4306
4307     DEBUG_T( {
4308         SV* tmp = newSVpvs("");
4309         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4310             (IV)CopLINE(PL_curcop),
4311             lex_state_names[PL_lex_state],
4312             exp_name[PL_expect],
4313             pv_display(tmp, s, strlen(s), 0, 60));
4314         SvREFCNT_dec(tmp);
4315     } );
4316
4317     /* when we've already built the next token, just pull it out of the queue */
4318     if (PL_nexttoke) {
4319         PL_nexttoke--;
4320         pl_yylval = PL_nextval[PL_nexttoke];
4321         if (!PL_nexttoke) {
4322             PL_lex_state = PL_lex_defer;
4323             PL_lex_defer = LEX_NORMAL;
4324         }
4325         {
4326             I32 next_type;
4327             next_type = PL_nexttype[PL_nexttoke];
4328             if (next_type & (7<<24)) {
4329                 if (next_type & (1<<24)) {
4330                     if (PL_lex_brackets > 100)
4331                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4332                     PL_lex_brackstack[PL_lex_brackets++] =
4333                         (char) ((next_type >> 16) & 0xff);
4334                 }
4335                 if (next_type & (2<<24))
4336                     PL_lex_allbrackets++;
4337                 if (next_type & (4<<24))
4338                     PL_lex_allbrackets--;
4339                 next_type &= 0xffff;
4340             }
4341             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4342         }
4343     }
4344
4345     switch (PL_lex_state) {
4346     case LEX_NORMAL:
4347     case LEX_INTERPNORMAL:
4348         break;
4349
4350     /* interpolated case modifiers like \L \U, including \Q and \E.
4351        when we get here, PL_bufptr is at the \
4352     */
4353     case LEX_INTERPCASEMOD:
4354 #ifdef DEBUGGING
4355         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4356             Perl_croak(aTHX_
4357                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4358                        PL_bufptr, PL_bufend, *PL_bufptr);
4359 #endif
4360         /* handle \E or end of string */
4361         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4362             /* if at a \E */
4363             if (PL_lex_casemods) {
4364                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4365                 PL_lex_casestack[PL_lex_casemods] = '\0';
4366
4367                 if (PL_bufptr != PL_bufend
4368                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4369                         || oldmod == 'F')) {
4370                     PL_bufptr += 2;
4371                     PL_lex_state = LEX_INTERPCONCAT;
4372                 }
4373                 PL_lex_allbrackets--;
4374                 return REPORT(')');
4375             }
4376             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4377                /* Got an unpaired \E */
4378                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4379                         "Useless use of \\E");
4380             }
4381             if (PL_bufptr != PL_bufend)
4382                 PL_bufptr += 2;
4383             PL_lex_state = LEX_INTERPCONCAT;
4384             return yylex();
4385         }
4386         else {
4387             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4388               "### Saw case modifier\n"); });
4389             s = PL_bufptr + 1;
4390             if (s[1] == '\\' && s[2] == 'E') {
4391                 PL_bufptr = s + 3;
4392                 PL_lex_state = LEX_INTERPCONCAT;
4393                 return yylex();
4394             }
4395             else {
4396                 I32 tmp;
4397                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4398                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4399                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4400                     (strchr(PL_lex_casestack, 'L')
4401                         || strchr(PL_lex_casestack, 'U')
4402                         || strchr(PL_lex_casestack, 'F'))) {
4403                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4404                     PL_lex_allbrackets--;
4405                     return REPORT(')');
4406                 }
4407                 if (PL_lex_casemods > 10)
4408                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4409                 PL_lex_casestack[PL_lex_casemods++] = *s;
4410                 PL_lex_casestack[PL_lex_casemods] = '\0';
4411                 PL_lex_state = LEX_INTERPCONCAT;
4412                 NEXTVAL_NEXTTOKE.ival = 0;
4413                 force_next((2<<24)|'(');
4414                 if (*s == 'l')
4415                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4416                 else if (*s == 'u')
4417                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4418                 else if (*s == 'L')
4419                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4420                 else if (*s == 'U')
4421                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4422                 else if (*s == 'Q')
4423                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4424                 else if (*s == 'F')
4425                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4426                 else
4427                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4428                 PL_bufptr = s + 1;
4429             }
4430             force_next(FUNC);
4431             if (PL_lex_starts) {
4432                 s = PL_bufptr;
4433                 PL_lex_starts = 0;
4434                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4435                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4436                     TOKEN(',');
4437                 else
4438                     AopNOASSIGN(OP_CONCAT);
4439             }
4440             else
4441                 return yylex();
4442         }
4443
4444     case LEX_INTERPPUSH:
4445         return REPORT(sublex_push());
4446
4447     case LEX_INTERPSTART:
4448         if (PL_bufptr == PL_bufend)
4449             return REPORT(sublex_done());
4450         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4451               "### Interpolated variable\n"); });
4452         PL_expect = XTERM;
4453         /* for /@a/, we leave the joining for the regex engine to do
4454          * (unless we're within \Q etc) */
4455         PL_lex_dojoin = (*PL_bufptr == '@'
4456                             && (!PL_lex_inpat || PL_lex_casemods));
4457         PL_lex_state = LEX_INTERPNORMAL;
4458         if (PL_lex_dojoin) {
4459             NEXTVAL_NEXTTOKE.ival = 0;
4460             force_next(',');
4461             force_ident("\"", '$');
4462             NEXTVAL_NEXTTOKE.ival = 0;
4463             force_next('$');
4464             NEXTVAL_NEXTTOKE.ival = 0;
4465             force_next((2<<24)|'(');
4466             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4467             force_next(FUNC);
4468         }
4469         /* Convert (?{...}) and friends to 'do {...}' */
4470         if (PL_lex_inpat && *PL_bufptr == '(') {
4471             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4472             PL_bufptr += 2;
4473             if (*PL_bufptr != '{')
4474                 PL_bufptr++;
4475             PL_expect = XTERMBLOCK;
4476             force_next(DO);
4477         }
4478
4479         if (PL_lex_starts++) {
4480             s = PL_bufptr;
4481             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4482             if (!PL_lex_casemods && PL_lex_inpat)
4483                 TOKEN(',');
4484             else
4485                 AopNOASSIGN(OP_CONCAT);
4486         }
4487         return yylex();
4488
4489     case LEX_INTERPENDMAYBE:
4490         if (intuit_more(PL_bufptr)) {
4491             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4492             break;
4493         }
4494         /* FALLTHROUGH */
4495
4496     case LEX_INTERPEND:
4497         if (PL_lex_dojoin) {
4498             const U8 dojoin_was = PL_lex_dojoin;
4499             PL_lex_dojoin = FALSE;
4500             PL_lex_state = LEX_INTERPCONCAT;
4501             PL_lex_allbrackets--;
4502             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4503         }
4504         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4505             && SvEVALED(PL_lex_repl))
4506         {
4507             if (PL_bufptr != PL_bufend)
4508                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4509             PL_lex_repl = NULL;
4510         }
4511         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4512            re_eval_str.  If the here-doc body’s length equals the previous
4513            value of re_eval_start, re_eval_start will now be null.  So
4514            check re_eval_str as well. */
4515         if (PL_parser->lex_shared->re_eval_start
4516          || PL_parser->lex_shared->re_eval_str) {
4517             SV *sv;
4518             if (*PL_bufptr != ')')
4519                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4520             PL_bufptr++;
4521             /* having compiled a (?{..}) expression, return the original
4522              * text too, as a const */
4523             if (PL_parser->lex_shared->re_eval_str) {
4524                 sv = PL_parser->lex_shared->re_eval_str;
4525                 PL_parser->lex_shared->re_eval_str = NULL;
4526                 SvCUR_set(sv,
4527                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4528                 SvPV_shrink_to_cur(sv);
4529             }
4530             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4531                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4532             NEXTVAL_NEXTTOKE.opval =
4533                     (OP*)newSVOP(OP_CONST, 0,
4534                                  sv);
4535             force_next(THING);
4536             PL_parser->lex_shared->re_eval_start = NULL;
4537             PL_expect = XTERM;
4538             return REPORT(',');
4539         }
4540
4541         /* FALLTHROUGH */
4542     case LEX_INTERPCONCAT:
4543 #ifdef DEBUGGING
4544         if (PL_lex_brackets)
4545             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4546                        (long) PL_lex_brackets);
4547 #endif
4548         if (PL_bufptr == PL_bufend)
4549             return REPORT(sublex_done());
4550
4551         /* m'foo' still needs to be parsed for possible (?{...}) */
4552         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4553             SV *sv = newSVsv(PL_linestr);
4554             sv = tokeq(sv);
4555             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4556             s = PL_bufend;
4557         }
4558         else {
4559             s = scan_const(PL_bufptr);
4560             if (*s == '\\')
4561                 PL_lex_state = LEX_INTERPCASEMOD;
4562             else
4563                 PL_lex_state = LEX_INTERPSTART;
4564         }
4565
4566         if (s != PL_bufptr) {
4567             NEXTVAL_NEXTTOKE = pl_yylval;
4568             PL_expect = XTERM;
4569             force_next(THING);
4570             if (PL_lex_starts++) {
4571                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4572                 if (!PL_lex_casemods && PL_lex_inpat)
4573                     TOKEN(',');
4574                 else
4575                     AopNOASSIGN(OP_CONCAT);
4576             }
4577             else {
4578                 PL_bufptr = s;
4579                 return yylex();
4580             }
4581         }
4582
4583         return yylex();
4584     case LEX_FORMLINE:
4585         s = scan_formline(PL_bufptr);
4586         if (!PL_lex_formbrack)
4587         {
4588             formbrack = 1;
4589             goto rightbracket;
4590         }
4591         PL_bufptr = s;
4592         return yylex();
4593     }
4594
4595     /* We really do *not* want PL_linestr ever becoming a COW. */
4596     assert (!SvIsCOW(PL_linestr));
4597     s = PL_bufptr;
4598     PL_oldoldbufptr = PL_oldbufptr;
4599     PL_oldbufptr = s;
4600     PL_parser->saw_infix_sigil = 0;
4601
4602   retry:
4603     switch (*s) {
4604     default:
4605         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4606             goto keylookup;
4607         {
4608         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4609         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4610                                                     UTF8SKIP(s),
4611                                                     SVs_TEMP | SVf_UTF8),
4612                                             10, UNI_DISPLAY_ISPRINT)
4613                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4614         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4615         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4616             d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4617         } else {
4618             d = PL_linestart;
4619         }
4620         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4621                           UTF8fARG(UTF, (s - d), d),
4622                          (int) len + 1);
4623     }
4624     case 4:
4625     case 26:
4626         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4627     case 0:
4628         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4629             PL_last_uni = 0;
4630             PL_last_lop = 0;
4631             if (PL_lex_brackets &&
4632                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4633                 yyerror((const char *)
4634                         (PL_lex_formbrack
4635                          ? "Format not terminated"
4636                          : "Missing right curly or square bracket"));
4637             }
4638             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4639                         "### Tokener got EOF\n");
4640             } );
4641             TOKEN(0);
4642         }
4643         if (s++ < PL_bufend)
4644             goto retry;                 /* ignore stray nulls */
4645         PL_last_uni = 0;
4646         PL_last_lop = 0;
4647         if (!PL_in_eval && !PL_preambled) {
4648             PL_preambled = TRUE;
4649             if (PL_perldb) {
4650                 /* Generate a string of Perl code to load the debugger.
4651                  * If PERL5DB is set, it will return the contents of that,
4652                  * otherwise a compile-time require of perl5db.pl.  */
4653
4654                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4655
4656                 if (pdb) {
4657                     sv_setpv(PL_linestr, pdb);
4658                     sv_catpvs(PL_linestr,";");
4659                 } else {
4660                     SETERRNO(0,SS_NORMAL);
4661                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4662                 }
4663                 PL_parser->preambling = CopLINE(PL_curcop);
4664             } else
4665                 sv_setpvs(PL_linestr,"");
4666             if (PL_preambleav) {
4667                 SV **svp = AvARRAY(PL_preambleav);
4668                 SV **const end = svp + AvFILLp(PL_preambleav);
4669                 while(svp <= end) {
4670                     sv_catsv(PL_linestr, *svp);
4671                     ++svp;
4672                     sv_catpvs(PL_linestr, ";");
4673                 }
4674                 sv_free(MUTABLE_SV(PL_preambleav));
4675                 PL_preambleav = NULL;
4676             }
4677             if (PL_minus_E)
4678                 sv_catpvs(PL_linestr,
4679                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4680             if (PL_minus_n || PL_minus_p) {
4681                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4682                 if (PL_minus_l)
4683                     sv_catpvs(PL_linestr,"chomp;");
4684                 if (PL_minus_a) {
4685                     if (PL_minus_F) {
4686                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4687                              || *PL_splitstr == '"')
4688                               && strchr(PL_splitstr + 1, *PL_splitstr))
4689                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4690                         else {
4691                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4692                                bytes can be used as quoting characters.  :-) */
4693                             const char *splits = PL_splitstr;
4694                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4695                             do {
4696                                 /* Need to \ \s  */
4697                                 if (*splits == '\\')
4698                                     sv_catpvn(PL_linestr, splits, 1);
4699                                 sv_catpvn(PL_linestr, splits, 1);
4700                             } while (*splits++);
4701                             /* This loop will embed the trailing NUL of
4702                                PL_linestr as the last thing it does before
4703                                terminating.  */
4704                             sv_catpvs(PL_linestr, ");");
4705                         }
4706                     }
4707                     else
4708                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4709                 }
4710             }
4711             sv_catpvs(PL_linestr, "\n");
4712             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4713             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4714             PL_last_lop = PL_last_uni = NULL;
4715             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4716                 update_debugger_info(PL_linestr, NULL, 0);
4717             goto retry;
4718         }
4719         do {
4720             fake_eof = 0;
4721             bof = PL_rsfp ? TRUE : FALSE;
4722             if (0) {
4723               fake_eof:
4724                 fake_eof = LEX_FAKE_EOF;
4725             }
4726             PL_bufptr = PL_bufend;
4727             COPLINE_INC_WITH_HERELINES;
4728             if (!lex_next_chunk(fake_eof)) {
4729                 CopLINE_dec(PL_curcop);
4730                 s = PL_bufptr;
4731                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4732             }
4733             CopLINE_dec(PL_curcop);
4734             s = PL_bufptr;
4735             /* If it looks like the start of a BOM or raw UTF-16,
4736              * check if it in fact is. */
4737             if (bof && PL_rsfp &&
4738                      (*s == 0 ||
4739                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4740                       *(U8*)s >= 0xFE ||
4741                       s[1] == 0)) {
4742                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4743                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4744 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4745                 /* offset may include swallowed CR */
4746                 if (!bof)
4747                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4748 #endif
4749                 if (bof) {
4750                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4751                     s = swallow_bom((U8*)s);
4752                 }
4753             }
4754             if (PL_parser->in_pod) {
4755                 /* Incest with pod. */
4756                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4757                     sv_setpvs(PL_linestr, "");
4758                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4759                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4760                     PL_last_lop = PL_last_uni = NULL;
4761                     PL_parser->in_pod = 0;
4762                 }
4763             }
4764             if (PL_rsfp || PL_parser->filtered)
4765                 incline(s);
4766         } while (PL_parser->in_pod);
4767         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4768         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4769         PL_last_lop = PL_last_uni = NULL;
4770         if (CopLINE(PL_curcop) == 1) {
4771             while (s < PL_bufend && isSPACE(*s))
4772                 s++;
4773             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4774                 s++;
4775             d = NULL;
4776             if (!PL_in_eval) {
4777                 if (*s == '#' && *(s+1) == '!')
4778                     d = s + 2;
4779 #ifdef ALTERNATE_SHEBANG
4780                 else {
4781                     static char const as[] = ALTERNATE_SHEBANG;
4782                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4783                         d = s + (sizeof(as) - 1);
4784                 }
4785 #endif /* ALTERNATE_SHEBANG */
4786             }
4787             if (d) {
4788                 char *ipath;
4789                 char *ipathend;
4790
4791                 while (isSPACE(*d))
4792                     d++;
4793                 ipath = d;
4794                 while (*d && !isSPACE(*d))
4795                     d++;
4796                 ipathend = d;
4797
4798 #ifdef ARG_ZERO_IS_SCRIPT
4799                 if (ipathend > ipath) {
4800                     /*
4801                      * HP-UX (at least) sets argv[0] to the script name,
4802                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4803                      * at least, set argv[0] to the basename of the Perl
4804                      * interpreter. So, having found "#!", we'll set it right.
4805                      */
4806                     SV* copfilesv = CopFILESV(PL_curcop);
4807                     if (copfilesv) {
4808                         SV * const x =
4809                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4810                                              SVt_PV)); /* $^X */
4811                         assert(SvPOK(x) || SvGMAGICAL(x));
4812                         if (sv_eq(x, copfilesv)) {
4813                             sv_setpvn(x, ipath, ipathend - ipath);
4814                             SvSETMAGIC(x);
4815                         }
4816                         else {
4817                             STRLEN blen;
4818                             STRLEN llen;
4819                             const char *bstart = SvPV_const(copfilesv, blen);
4820                             const char * const lstart = SvPV_const(x, llen);
4821                             if (llen < blen) {
4822                                 bstart += blen - llen;
4823                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
4824                                     sv_setpvn(x, ipath, ipathend - ipath);
4825                                     SvSETMAGIC(x);
4826                                 }
4827                             }
4828                         }
4829                     }
4830                     else {
4831                         /* Anything to do if no copfilesv? */
4832                     }
4833                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4834                 }
4835 #endif /* ARG_ZERO_IS_SCRIPT */
4836
4837                 /*
4838                  * Look for options.
4839                  */
4840                 d = instr(s,"perl -");
4841                 if (!d) {
4842                     d = instr(s,"perl");
4843 #if defined(DOSISH)
4844                     /* avoid getting into infinite loops when shebang
4845                      * line contains "Perl" rather than "perl" */
4846                     if (!d) {
4847                         for (d = ipathend-4; d >= ipath; --d) {
4848                             if (isALPHA_FOLD_EQ(*d, 'p')
4849                                 && !ibcmp(d, "perl", 4))
4850                             {
4851                                 break;
4852                             }
4853                         }
4854                         if (d < ipath)
4855                             d = NULL;
4856                     }
4857 #endif
4858                 }
4859 #ifdef ALTERNATE_SHEBANG
4860                 /*
4861                  * If the ALTERNATE_SHEBANG on this system starts with a
4862                  * character that can be part of a Perl expression, then if
4863                  * we see it but not "perl", we're probably looking at the
4864                  * start of Perl code, not a request to hand off to some
4865                  * other interpreter.  Similarly, if "perl" is there, but
4866                  * not in the first 'word' of the line, we assume the line
4867                  * contains the start of the Perl program.
4868                  */
4869                 if (d && *s != '#') {
4870                     const char *c = ipath;
4871                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4872                         c++;
4873                     if (c < d)
4874                         d = NULL;       /* "perl" not in first word; ignore */
4875                     else
4876                         *s = '#';       /* Don't try to parse shebang line */
4877                 }
4878 #endif /* ALTERNATE_SHEBANG */
4879                 if (!d &&
4880                     *s == '#' &&
4881                     ipathend > ipath &&
4882                     !PL_minus_c &&
4883                     !instr(s,"indir") &&
4884                     instr(PL_origargv[0],"perl"))
4885                 {
4886                     dVAR;
4887                     char **newargv;
4888
4889                     *ipathend = '\0';
4890                     s = ipathend + 1;
4891                     while (s < PL_bufend && isSPACE(*s))
4892                         s++;
4893                     if (s < PL_bufend) {
4894                         Newx(newargv,PL_origargc+3,char*);
4895                         newargv[1] = s;
4896                         while (s < PL_bufend && !isSPACE(*s))
4897                             s++;
4898                         *s = '\0';
4899                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4900                     }
4901                     else
4902                         newargv = PL_origargv;
4903                     newargv[0] = ipath;
4904                     PERL_FPU_PRE_EXEC
4905                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4906                     PERL_FPU_POST_EXEC
4907                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4908                 }
4909                 if (d) {
4910                     while (*d && !isSPACE(*d))
4911                         d++;
4912                     while (SPACE_OR_TAB(*d))
4913                         d++;
4914
4915                     if (*d++ == '-') {
4916                         const bool switches_done = PL_doswitches;
4917                         const U32 oldpdb = PL_perldb;
4918                         const bool oldn = PL_minus_n;
4919                         const bool oldp = PL_minus_p;
4920                         const char *d1 = d;
4921
4922                         do {
4923                             bool baduni = FALSE;
4924                             if (*d1 == 'C') {
4925                                 const char *d2 = d1 + 1;
4926                                 if (parse_unicode_opts((const char **)&d2)
4927                                     != PL_unicode)
4928                                     baduni = TRUE;
4929                             }
4930                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
4931                                 const char * const m = d1;
4932                                 while (*d1 && !isSPACE(*d1))
4933                                     d1++;
4934                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4935                                       (int)(d1 - m), m);
4936                             }
4937                             d1 = moreswitches(d1);
4938                         } while (d1);
4939                         if (PL_doswitches && !switches_done) {
4940                             int argc = PL_origargc;
4941                             char **argv = PL_origargv;
4942                             do {
4943                                 argc--,argv++;
4944                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4945                             init_argv_symbols(argc,argv);
4946                         }
4947                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4948                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4949                               /* if we have already added "LINE: while (<>) {",
4950                                  we must not do it again */
4951                         {
4952                             sv_setpvs(PL_linestr, "");
4953                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4954                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4955                             PL_last_lop = PL_last_uni = NULL;
4956                             PL_preambled = FALSE;
4957                             if (PERLDB_LINE || PERLDB_SAVESRC)
4958                                 (void)gv_fetchfile(PL_origfilename);
4959                             goto retry;
4960                         }
4961                     }
4962                 }
4963             }
4964         }
4965         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4966             PL_lex_state = LEX_FORMLINE;
4967             force_next(FORMRBRACK);
4968             TOKEN(';');
4969         }
4970         goto retry;
4971     case '\r':
4972 #ifdef PERL_STRICT_CR
4973         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4974         Perl_croak(aTHX_
4975       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4976 #endif
4977     case ' ': case '\t': case '\f': case '\v':
4978         s++;
4979         goto retry;
4980     case '#':
4981     case '\n':
4982         if (PL_lex_state != LEX_NORMAL ||
4983              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
4984             const bool in_comment = *s == '#';
4985             if (*s == '#' && s == PL_linestart && PL_in_eval
4986              && !PL_rsfp && !PL_parser->filtered) {
4987                 /* handle eval qq[#line 1 "foo"\n ...] */
4988                 CopLINE_dec(PL_curcop);
4989                 incline(s);
4990             }
4991             d = s;
4992             while (d < PL_bufend && *d != '\n')
4993                 d++;
4994             if (d < PL_bufend)
4995                 d++;
4996             else if (d > PL_bufend)
4997                 /* Found by Ilya: feed random input to Perl. */
4998                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
4999                            d, PL_bufend);
5000             s = d;
5001             if (in_comment && d == PL_bufend
5002                 && PL_lex_state == LEX_INTERPNORMAL
5003                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5004                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5005             else
5006                 incline(s);
5007             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5008                 PL_lex_state = LEX_FORMLINE;
5009                 force_next(FORMRBRACK);
5010                 TOKEN(';');
5011             }
5012         }
5013         else {
5014             while (s < PL_bufend && *s != '\n')
5015                 s++;
5016             if (s < PL_bufend)
5017                 {
5018                     s++;
5019                     if (s < PL_bufend)
5020                         incline(s);
5021                 }
5022             else if (s > PL_bufend)
5023                 /* Found by Ilya: feed random input to Perl. */
5024                 Perl_croak(aTHX_ "panic: input overflow");
5025         }
5026         goto retry;
5027     case '-':
5028         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5029             I32 ftst = 0;
5030             char tmp;
5031
5032             s++;
5033             PL_bufptr = s;
5034             tmp = *s++;
5035
5036             while (s < PL_bufend && SPACE_OR_TAB(*s))
5037                 s++;
5038
5039             if (strnEQ(s,"=>",2)) {
5040                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5041                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5042                 OPERATOR('-');          /* unary minus */
5043             }
5044             switch (tmp) {
5045             case 'r': ftst = OP_FTEREAD;        break;
5046             case 'w': ftst = OP_FTEWRITE;       break;
5047             case 'x': ftst = OP_FTEEXEC;        break;
5048             case 'o': ftst = OP_FTEOWNED;       break;
5049             case 'R': ftst = OP_FTRREAD;        break;
5050             case 'W': ftst = OP_FTRWRITE;       break;
5051             case 'X': ftst = OP_FTREXEC;        break;
5052             case 'O': ftst = OP_FTROWNED;       break;
5053             case 'e': ftst = OP_FTIS;           break;
5054             case 'z': ftst = OP_FTZERO;         break;
5055             case 's': ftst = OP_FTSIZE;         break;
5056             case 'f': ftst = OP_FTFILE;         break;
5057             case 'd': ftst = OP_FTDIR;          break;
5058             case 'l': ftst = OP_FTLINK;         break;
5059             case 'p': ftst = OP_FTPIPE;         break;
5060             case 'S': ftst = OP_FTSOCK;         break;
5061             case 'u': ftst = OP_FTSUID;         break;
5062             case 'g': ftst = OP_FTSGID;         break;
5063             case 'k': ftst = OP_FTSVTX;         break;
5064             case 'b': ftst = OP_FTBLK;          break;
5065             case 'c': ftst = OP_FTCHR;          break;
5066             case 't': ftst = OP_FTTTY;          break;
5067             case 'T': ftst = OP_FTTEXT;         break;
5068             case 'B': ftst = OP_FTBINARY;       break;
5069             case 'M': case 'A': case 'C':
5070                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5071                 switch (tmp) {
5072                 case 'M': ftst = OP_FTMTIME;    break;
5073                 case 'A': ftst = OP_FTATIME;    break;
5074                 case 'C': ftst = OP_FTCTIME;    break;
5075                 default:                        break;
5076                 }
5077                 break;
5078             default:
5079                 break;
5080             }
5081             if (ftst) {
5082                 PL_last_uni = PL_oldbufptr;
5083                 PL_last_lop_op = (OPCODE)ftst;
5084                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5085                         "### Saw file test %c\n", (int)tmp);
5086                 } );
5087                 FTST(ftst);
5088             }
5089             else {
5090                 /* Assume it was a minus followed by a one-letter named
5091                  * subroutine call (or a -bareword), then. */
5092                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5093                         "### '-%c' looked like a file test but was not\n",
5094                         (int) tmp);
5095                 } );
5096                 s = --PL_bufptr;
5097             }
5098         }
5099         {
5100             const char tmp = *s++;
5101             if (*s == tmp) {
5102                 s++;
5103                 if (PL_expect == XOPERATOR)
5104                     TERM(POSTDEC);
5105                 else
5106                     OPERATOR(PREDEC);
5107             }
5108             else if (*s == '>') {
5109                 s++;
5110                 s = skipspace(s);
5111                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5112                     ((*s == '$' || *s == '&') && s[1] == '*')
5113                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5114                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5115                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5116                  ))
5117                 {
5118                     Perl_ck_warner_d(aTHX_
5119                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5120                         "Postfix dereference is experimental"
5121                     );
5122                     PL_expect = XPOSTDEREF;
5123                     TOKEN(ARROW);
5124                 }
5125                 if (isIDFIRST_lazy_if(s,UTF)) {
5126                     s = force_word(s,METHOD,FALSE,TRUE);
5127                     TOKEN(ARROW);
5128                 }
5129                 else if (*s == '$')
5130                     OPERATOR(ARROW);
5131                 else
5132                     TERM(ARROW);
5133             }
5134             if (PL_expect == XOPERATOR) {
5135                 if (*s == '=' && !PL_lex_allbrackets &&
5136                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5137                     s--;
5138                     TOKEN(0);
5139                 }
5140                 Aop(OP_SUBTRACT);
5141             }
5142             else {
5143                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5144                     check_uni();
5145                 OPERATOR('-');          /* unary minus */
5146             }
5147         }
5148
5149     case '+':
5150         {
5151             const char tmp = *s++;
5152             if (*s == tmp) {
5153                 s++;
5154                 if (PL_expect == XOPERATOR)
5155                     TERM(POSTINC);
5156                 else
5157                     OPERATOR(PREINC);
5158             }
5159             if (PL_expect == XOPERATOR) {
5160                 if (*s == '=' && !PL_lex_allbrackets &&
5161                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5162                     s--;
5163                     TOKEN(0);
5164                 }
5165                 Aop(OP_ADD);
5166             }
5167             else {
5168                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5169                     check_uni();
5170                 OPERATOR('+');
5171             }
5172         }
5173
5174     case '*':
5175         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5176         if (PL_expect != XOPERATOR) {
5177             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5178             PL_expect = XOPERATOR;
5179             force_ident(PL_tokenbuf, '*');
5180             if (!*PL_tokenbuf)
5181                 PREREF('*');
5182             TERM('*');
5183         }
5184         s++;
5185         if (*s == '*') {
5186             s++;
5187             if (*s == '=' && !PL_lex_allbrackets &&
5188                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5189                 s -= 2;
5190                 TOKEN(0);
5191             }
5192             PWop(OP_POW);
5193         }
5194         if (*s == '=' && !PL_lex_allbrackets &&
5195                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5196             s--;
5197             TOKEN(0);
5198         }
5199         PL_parser->saw_infix_sigil = 1;
5200         Mop(OP_MULTIPLY);
5201
5202     case '%':
5203     {
5204         if (PL_expect == XOPERATOR) {
5205             if (s[1] == '=' && !PL_lex_allbrackets &&
5206                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5207                 TOKEN(0);
5208             ++s;
5209             PL_parser->saw_infix_sigil = 1;
5210             Mop(OP_MODULO);
5211         }
5212         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5213         PL_tokenbuf[0] = '%';
5214         s = scan_ident(s, PL_tokenbuf + 1,
5215                 sizeof PL_tokenbuf - 1, FALSE);
5216         pl_yylval.ival = 0;
5217         if (!PL_tokenbuf[1]) {
5218             PREREF('%');
5219         }
5220         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5221             if (*s == '[')
5222                 PL_tokenbuf[0] = '@';
5223         }
5224         PL_expect = XOPERATOR;
5225         force_ident_maybe_lex('%');
5226         TERM('%');
5227     }
5228     case '^':
5229         d = s;
5230         bof = FEATURE_BITWISE_IS_ENABLED;
5231         if (bof && s[1] == '.')
5232             s++;
5233         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5234                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5235         {
5236             s = d;
5237             TOKEN(0);
5238         }
5239         s++;
5240         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5241     case '[':
5242         if (PL_lex_brackets > 100)
5243             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5244         PL_lex_brackstack[PL_lex_brackets++] = 0;
5245         PL_lex_allbrackets++;
5246         {
5247             const char tmp = *s++;
5248             OPERATOR(tmp);
5249         }
5250     case '~':
5251         if (s[1] == '~'
5252             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5253         {
5254             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5255                 TOKEN(0);
5256             s += 2;
5257             Perl_ck_warner_d(aTHX_
5258                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5259                 "Smartmatch is experimental");
5260             Eop(OP_SMARTMATCH);
5261         }
5262         s++;
5263         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5264             s++;
5265             BCop(OP_SCOMPLEMENT);
5266         }
5267         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5268     case ',':
5269         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5270             TOKEN(0);
5271         s++;
5272         OPERATOR(',');
5273     case ':':
5274         if (s[1] == ':') {
5275             len = 0;
5276             goto just_a_word_zero_gv;
5277         }
5278         s++;
5279         {
5280         OP *attrs;
5281
5282         switch (PL_expect) {
5283         case XOPERATOR:
5284             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5285                 break;
5286             PL_bufptr = s;      /* update in case we back off */
5287             if (*s == '=') {
5288                 Perl_croak(aTHX_
5289                            "Use of := for an empty attribute list is not allowed");
5290             }
5291             goto grabattrs;
5292         case XATTRBLOCK:
5293             PL_expect = XBLOCK;
5294             goto grabattrs;
5295         case XATTRTERM:
5296             PL_expect = XTERMBLOCK;
5297          grabattrs:
5298             s = skipspace(s);
5299             attrs = NULL;
5300             while (isIDFIRST_lazy_if(s,UTF)) {
5301                 I32 tmp;
5302                 SV *sv;
5303                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5304                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5305                     if (tmp < 0) tmp = -tmp;
5306                     switch (tmp) {
5307                     case KEY_or:
5308                     case KEY_and:
5309                     case KEY_for:
5310                     case KEY_foreach:
5311                     case KEY_unless:
5312                     case KEY_if:
5313                     case KEY_while:
5314                     case KEY_until:
5315                         goto got_attrs;
5316                     default:
5317                         break;
5318                     }
5319                 }
5320                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5321                 if (*d == '(') {
5322                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5323                     COPLINE_SET_FROM_MULTI_END;
5324                     if (!d) {
5325                         /* MUST advance bufptr here to avoid bogus
5326                            "at end of line" context messages from yyerror().
5327                          */
5328                         PL_bufptr = s + len;
5329                         yyerror("Unterminated attribute parameter in attribute list");
5330                         if (attrs)
5331                             op_free(attrs);
5332                         sv_free(sv);
5333                         return REPORT(0);       /* EOF indicator */
5334                     }
5335                 }
5336                 if (PL_lex_stuff) {
5337                     sv_catsv(sv, PL_lex_stuff);
5338                     attrs = op_append_elem(OP_LIST, attrs,
5339                                         newSVOP(OP_CONST, 0, sv));
5340                     SvREFCNT_dec_NN(PL_lex_stuff);
5341                     PL_lex_stuff = NULL;
5342                 }
5343                 else {
5344                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5345                         sv_free(sv);
5346                         if (PL_in_my == KEY_our) {
5347                             deprecate(":unique");
5348                         }
5349                         else
5350                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5351                     }
5352
5353                     /* NOTE: any CV attrs applied here need to be part of
5354                        the CVf_BUILTIN_ATTRS define in cv.h! */
5355                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5356                         sv_free(sv);
5357                         CvLVALUE_on(PL_compcv);
5358                     }
5359                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5360                         sv_free(sv);
5361                         deprecate(":locked");
5362                     }
5363                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5364                         sv_free(sv);
5365                         CvMETHOD_on(PL_compcv);
5366                     }
5367                     else if (!PL_in_my && len == 5
5368                           && strnEQ(SvPVX(sv), "const", len))
5369                     {
5370                         sv_free(sv);
5371                         Perl_ck_warner_d(aTHX_
5372                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5373                            ":const is experimental"
5374                         );
5375                         CvANONCONST_on(PL_compcv);
5376                         if (!CvANON(PL_compcv))
5377                             yyerror(":const is not permitted on named "
5378                                     "subroutines");
5379                     }
5380                     /* After we've set the flags, it could be argued that
5381                        we don't need to do the attributes.pm-based setting
5382                        process, and shouldn't bother appending recognized
5383                        flags.  To experiment with that, uncomment the
5384                        following "else".  (Note that's already been
5385                        uncommented.  That keeps the above-applied built-in
5386                        attributes from being intercepted (and possibly
5387                        rejected) by a package's attribute routines, but is
5388                        justified by the performance win for the common case
5389                        of applying only built-in attributes.) */
5390                     else
5391                         attrs = op_append_elem(OP_LIST, attrs,
5392                                             newSVOP(OP_CONST, 0,
5393                                                     sv));
5394                 }
5395                 s = skipspace(d);
5396                 if (*s == ':' && s[1] != ':')
5397                     s = skipspace(s+1);
5398                 else if (s == d)
5399                     break;      /* require real whitespace or :'s */
5400                 /* XXX losing whitespace on sequential attributes here */
5401             }
5402             {
5403                 if (*s != ';' && *s != '}' &&
5404                     !(PL_expect == XOPERATOR
5405                         ? (*s == '=' ||  *s == ')')
5406                         : (*s == '{' ||  *s == '('))) {
5407                     const char q = ((*s == '\'') ? '"' : '\'');
5408                     /* If here for an expression, and parsed no attrs, back
5409                        off. */
5410                     if (PL_expect == XOPERATOR && !attrs) {
5411                         s = PL_bufptr;
5412                         break;
5413                     }
5414                     /* MUST advance bufptr here to avoid bogus "at end of line"
5415                        context messages from yyerror().
5416                     */
5417                     PL_bufptr = s;
5418                     yyerror( (const char *)
5419                              (*s
5420                               ? Perl_form(aTHX_ "Invalid separator character "
5421                                           "%c%c%c in attribute list", q, *s, q)
5422                               : "Unterminated attribute list" ) );
5423                     if (attrs)
5424                         op_free(attrs);
5425                     OPERATOR(':');
5426                 }
5427             }
5428         got_attrs:
5429             if (attrs) {
5430                 NEXTVAL_NEXTTOKE.opval = attrs;
5431                 force_next(THING);
5432             }
5433             TOKEN(COLONATTR);
5434         }
5435         }
5436         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5437             s--;
5438             TOKEN(0);
5439         }
5440         PL_lex_allbrackets--;
5441         OPERATOR(':');
5442     case '(':
5443         s++;
5444         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5445             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5446         else
5447             PL_expect = XTERM;
5448         s = skipspace(s);
5449         PL_lex_allbrackets++;
5450         TOKEN('(');
5451     case ';':
5452         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5453             TOKEN(0);
5454         CLINE;
5455         s++;
5456         PL_expect = XSTATE;
5457         TOKEN(';');
5458     case ')':
5459         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5460             TOKEN(0);
5461         s++;
5462         PL_lex_allbrackets--;
5463         s = skipspace(s);
5464         if (*s == '{')
5465             PREBLOCK(')');
5466         TERM(')');
5467     case ']':
5468         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5469             TOKEN(0);
5470         s++;
5471         if (PL_lex_brackets <= 0)
5472             /* diag_listed_as: Unmatched right %s bracket */
5473             yyerror("Unmatched right square bracket");
5474         else
5475             --PL_lex_brackets;
5476         PL_lex_allbrackets--;
5477         if (PL_lex_state == LEX_INTERPNORMAL) {
5478             if (PL_lex_brackets == 0) {
5479                 if (*s == '-' && s[1] == '>')
5480                     PL_lex_state = LEX_INTERPENDMAYBE;
5481                 else if (*s != '[' && *s != '{')
5482                     PL_lex_state = LEX_INTERPEND;
5483             }
5484         }
5485         TERM(']');
5486     case '{':
5487         s++;
5488       leftbracket:
5489         if (PL_lex_brackets > 100) {
5490             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5491         }
5492         switch (PL_expect) {
5493         case XTERM:
5494         case XTERMORDORDOR:
5495             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5496             PL_lex_allbrackets++;
5497             OPERATOR(HASHBRACK);
5498         case XOPERATOR:
5499             while (s < PL_bufend && SPACE_OR_TAB(*s))
5500                 s++;
5501             d = s;
5502             PL_tokenbuf[0] = '\0';
5503             if (d < PL_bufend && *d == '-') {
5504                 PL_tokenbuf[0] = '-';
5505                 d++;
5506                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5507                     d++;
5508             }
5509             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5510                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5511                               FALSE, &len);
5512                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5513                     d++;
5514                 if (*d == '}') {
5515                     const char minus = (PL_tokenbuf[0] == '-');
5516                     s = force_word(s + minus, WORD, FALSE, TRUE);
5517                     if (minus)
5518                         force_next('-');
5519                 }
5520             }
5521             /* FALLTHROUGH */
5522         case XATTRTERM:
5523         case XTERMBLOCK:
5524             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5525             PL_lex_allbrackets++;
5526             PL_expect = XSTATE;
5527             break;
5528         case XATTRBLOCK:
5529         case XBLOCK:
5530             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5531             PL_lex_allbrackets++;
5532             PL_expect = XSTATE;
5533             break;
5534         case XBLOCKTERM:
5535             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5536             PL_lex_allbrackets++;
5537             PL_expect = XSTATE;
5538             break;
5539         default: {
5540                 const char *t;
5541                 if (PL_oldoldbufptr == PL_last_lop)
5542                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5543                 else
5544                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5545                 PL_lex_allbrackets++;
5546                 s = skipspace(s);
5547                 if (*s == '}') {
5548                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5549                         PL_expect = XTERM;
5550                         /* This hack is to get the ${} in the message. */
5551                         PL_bufptr = s+1;
5552                         yyerror("syntax error");
5553                         break;
5554                     }
5555                     OPERATOR(HASHBRACK);
5556                 }
5557                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5558                     /* ${...} or @{...} etc., but not print {...}
5559                      * Skip the disambiguation and treat this as a block.
5560                      */
5561                     goto block_expectation;
5562                 }
5563                 /* This hack serves to disambiguate a pair of curlies
5564                  * as being a block or an anon hash.  Normally, expectation
5565                  * determines that, but in cases where we're not in a
5566                  * position to expect anything in particular (like inside
5567                  * eval"") we have to resolve the ambiguity.  This code
5568                  * covers the case where the first term in the curlies is a
5569                  * quoted string.  Most other cases need to be explicitly
5570                  * disambiguated by prepending a "+" before the opening
5571                  * curly in order to force resolution as an anon hash.
5572                  *
5573                  * XXX should probably propagate the outer expectation
5574                  * into eval"" to rely less on this hack, but that could
5575                  * potentially break current behavior of eval"".
5576                  * GSAR 97-07-21
5577                  */
5578                 t = s;
5579                 if (*s == '\'' || *s == '"' || *s == '`') {
5580                     /* common case: get past first string, handling escapes */
5581                     for (t++; t < PL_bufend && *t != *s;)
5582                         if (*t++ == '\\')
5583                             t++;
5584                     t++;
5585                 }
5586                 else if (*s == 'q') {
5587                     if (++t < PL_bufend
5588                         && (!isWORDCHAR(*t)
5589                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5590                                 && !isWORDCHAR(*t))))
5591                     {
5592                         /* skip q//-like construct */
5593                         const char *tmps;
5594                         char open, close, term;
5595                         I32 brackets = 1;
5596
5597                         while (t < PL_bufend && isSPACE(*t))
5598                             t++;
5599                         /* check for q => */
5600                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5601                             OPERATOR(HASHBRACK);
5602                         }
5603                         term = *t;
5604                         open = term;
5605                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5606                             term = tmps[5];
5607                         close = term;
5608                         if (open == close)
5609                             for (t++; t < PL_bufend; t++) {
5610                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5611                                     t++;
5612                                 else if (*t == open)
5613                                     break;
5614                             }
5615                         else {
5616                             for (t++; t < PL_bufend; t++) {
5617                                 if (*t == '\\' && t+1 < PL_bufend)
5618                                     t++;
5619                                 else if (*t == close && --brackets <= 0)
5620                                     break;
5621                                 else if (*t == open)
5622                                     brackets++;
5623                             }
5624                         }
5625                         t++;
5626                     }
5627                     else
5628                         /* skip plain q word */
5629                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5630                              t += UTF8SKIP(t);
5631                 }
5632                 else if (isWORDCHAR_lazy_if(t,UTF)) {
5633                     t += UTF8SKIP(t);
5634                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5635                          t += UTF8SKIP(t);
5636                 }
5637                 while (t < PL_bufend && isSPACE(*t))
5638                     t++;
5639                 /* if comma follows first term, call it an anon hash */
5640                 /* XXX it could be a comma expression with loop modifiers */
5641                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5642                                    || (*t == '=' && t[1] == '>')))
5643                     OPERATOR(HASHBRACK);
5644                 if (PL_expect == XREF)
5645                 {
5646                   block_expectation:
5647                     /* If there is an opening brace or 'sub:', treat it
5648                        as a term to make ${{...}}{k} and &{sub:attr...}
5649                        dwim.  Otherwise, treat it as a statement, so
5650                        map {no strict; ...} works.
5651                      */
5652                     s = skipspace(s);
5653                     if (*s == '{') {
5654                         PL_expect = XTERM;
5655                         break;
5656                     }
5657                     if (strnEQ(s, "sub", 3)) {
5658                         d = s + 3;
5659                         d = skipspace(d);
5660                         if (*d == ':') {
5661                             PL_expect = XTERM;
5662                             break;
5663                         }
5664                     }
5665                     PL_expect = XSTATE;
5666                 }
5667                 else {
5668                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5669                     PL_expect = XSTATE;
5670                 }
5671             }
5672             break;
5673         }
5674         pl_yylval.ival = CopLINE(PL_curcop);
5675         PL_copline = NOLINE;   /* invalidate current command line number */
5676         TOKEN(formbrack ? '=' : '{');
5677     case '}':
5678         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5679             TOKEN(0);
5680       rightbracket:
5681         s++;
5682         if (PL_lex_brackets <= 0)
5683             /* diag_listed_as: Unmatched right %s bracket */
5684             yyerror("Unmatched right curly bracket");
5685         else
5686             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5687         PL_lex_allbrackets--;
5688         if (PL_lex_state == LEX_INTERPNORMAL) {
5689             if (PL_lex_brackets == 0) {
5690                 if (PL_expect & XFAKEBRACK) {
5691                     PL_expect &= XENUMMASK;
5692                     PL_lex_state = LEX_INTERPEND;
5693                     PL_bufptr = s;
5694                     return yylex();     /* ignore fake brackets */
5695                 }
5696                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5697                  && SvEVALED(PL_lex_repl))
5698                     PL_lex_state = LEX_INTERPEND;
5699                 else if (*s == '-' && s[1] == '>')
5700                     PL_lex_state = LEX_INTERPENDMAYBE;
5701                 else if (*s != '[' && *s != '{')
5702                     PL_lex_state = LEX_INTERPEND;
5703             }
5704         }
5705         if (PL_expect & XFAKEBRACK) {
5706             PL_expect &= XENUMMASK;
5707             PL_bufptr = s;
5708             return yylex();             /* ignore fake brackets */
5709         }
5710         force_next(formbrack ? '.' : '}');
5711         if (formbrack) LEAVE;
5712         if (formbrack == 2) { /* means . where arguments were expected */
5713             force_next(';');
5714             TOKEN(FORMRBRACK);
5715         }
5716         TOKEN(';');
5717     case '&':
5718         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5719         s++;
5720         if (*s++ == '&') {
5721             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5722                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5723                 s -= 2;
5724                 TOKEN(0);
5725             }
5726             AOPERATOR(ANDAND);
5727         }
5728         s--;
5729         if (PL_expect == XOPERATOR) {
5730             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5731                 && isIDFIRST_lazy_if(s,UTF))
5732             {
5733                 CopLINE_dec(PL_curcop);
5734                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5735                 CopLINE_inc(PL_curcop);
5736             }
5737             d = s;
5738             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5739                 s++;
5740             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5741                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5742                 s = d;
5743                 s--;
5744                 TOKEN(0);
5745             }
5746             if (d == s) {
5747                 PL_parser->saw_infix_sigil = 1;
5748                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5749             }
5750             else
5751                 BAop(OP_SBIT_AND);
5752         }
5753
5754         PL_tokenbuf[0] = '&';
5755         s = scan_ident(s - 1, PL_tokenbuf + 1,
5756                        sizeof PL_tokenbuf - 1, TRUE);
5757         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5758         if (PL_tokenbuf[1]) {
5759             force_ident_maybe_lex('&');
5760         }
5761         else
5762             PREREF('&');
5763         TERM('&');
5764
5765     case '|':
5766         s++;
5767         if (*s++ == '|') {
5768             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5769                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5770                 s -= 2;
5771                 TOKEN(0);
5772             }
5773             AOPERATOR(OROR);
5774         }
5775         s--;
5776         d = s;
5777         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5778             s++;
5779         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5780                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5781             s = d - 1;
5782             TOKEN(0);
5783         }
5784         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
5785     case '=':
5786         s++;
5787         {
5788             const char tmp = *s++;
5789             if (tmp == '=') {
5790                 if (!PL_lex_allbrackets &&
5791                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5792                     s -= 2;
5793                     TOKEN(0);
5794                 }
5795                 Eop(OP_EQ);
5796             }
5797             if (tmp == '>') {
5798                 if (!PL_lex_allbrackets &&
5799                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5800                     s -= 2;
5801                     TOKEN(0);
5802                 }
5803                 OPERATOR(',');
5804             }
5805             if (tmp == '~')
5806                 PMop(OP_MATCH);
5807             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5808                 && strchr("+-*/%.^&|<",tmp))
5809                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5810                             "Reversed %c= operator",(int)tmp);
5811             s--;
5812             if (PL_expect == XSTATE && isALPHA(tmp) &&
5813                 (s == PL_linestart+1 || s[-2] == '\n') )
5814             {
5815                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5816                     || PL_lex_state != LEX_NORMAL) {
5817                     d = PL_bufend;
5818                     while (s < d) {
5819                         if (*s++ == '\n') {
5820                             incline(s);
5821                             if (strnEQ(s,"=cut",4)) {
5822                                 s = strchr(s,'\n');
5823                                 if (s)
5824                                     s++;
5825                                 else
5826                                     s = d;
5827                                 incline(s);
5828                                 goto retry;
5829                             }
5830                         }
5831                     }
5832                     goto retry;
5833                 }
5834                 s = PL_bufend;
5835                 PL_parser->in_pod = 1;
5836                 goto retry;
5837             }
5838         }
5839         if (PL_expect == XBLOCK) {
5840             const char *t = s;
5841 #ifdef PERL_STRICT_CR
5842             while (SPACE_OR_TAB(*t))
5843 #else
5844             while (SPACE_OR_TAB(*t) || *t == '\r')
5845 #endif
5846                 t++;
5847             if (*t == '\n' || *t == '#') {
5848                 formbrack = 1;
5849                 ENTER;
5850                 SAVEI8(PL_parser->form_lex_state);
5851                 SAVEI32(PL_lex_formbrack);
5852                 PL_parser->form_lex_state = PL_lex_state;
5853                 PL_lex_formbrack = PL_lex_brackets + 1;
5854                 goto leftbracket;
5855             }
5856         }
5857         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5858             s--;
5859             TOKEN(0);
5860         }
5861         pl_yylval.ival = 0;
5862         OPERATOR(ASSIGNOP);
5863     case '!':
5864         s++;
5865         {
5866             const char tmp = *s++;
5867             if (tmp == '=') {
5868                 /* was this !=~ where !~ was meant?
5869                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5870
5871                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5872                     const char *t = s+1;
5873
5874                     while (t < PL_bufend && isSPACE(*t))
5875                         ++t;
5876
5877                     if (*t == '/' || *t == '?' ||
5878                         ((*t == 'm' || *t == 's' || *t == 'y')
5879                          && !isWORDCHAR(t[1])) ||
5880                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5881                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5882                                     "!=~ should be !~");
5883                 }
5884                 if (!PL_lex_allbrackets &&
5885                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5886                     s -= 2;
5887                     TOKEN(0);
5888                 }
5889                 Eop(OP_NE);
5890             }
5891             if (tmp == '~')
5892                 PMop(OP_NOT);
5893         }
5894         s--;
5895         OPERATOR('!');
5896     case '<':
5897         if (PL_expect != XOPERATOR) {
5898             if (s[1] != '<' && !strchr(s,'>'))
5899                 check_uni();
5900             if (s[1] == '<' && s[2] != '>')
5901                 s = scan_heredoc(s);
5902             else
5903                 s = scan_inputsymbol(s);
5904             PL_expect = XOPERATOR;
5905             TOKEN(sublex_start());
5906         }
5907         s++;
5908         {
5909             char tmp = *s++;
5910             if (tmp == '<') {
5911                 if (*s == '=' && !PL_lex_allbrackets &&
5912                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5913                     s -= 2;
5914                     TOKEN(0);
5915                 }
5916                 SHop(OP_LEFT_SHIFT);
5917             }
5918             if (tmp == '=') {
5919                 tmp = *s++;
5920                 if (tmp == '>') {
5921                     if (!PL_lex_allbrackets &&
5922                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5923                         s -= 3;
5924                         TOKEN(0);
5925                     }
5926                     Eop(OP_NCMP);
5927                 }
5928                 s--;
5929                 if (!PL_lex_allbrackets &&
5930                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5931                     s -= 2;
5932                     TOKEN(0);
5933                 }
5934                 Rop(OP_LE);
5935             }
5936         }
5937         s--;
5938         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5939             s--;
5940             TOKEN(0);
5941         }
5942         Rop(OP_LT);
5943     case '>':
5944         s++;
5945         {
5946             const char tmp = *s++;
5947             if (tmp == '>') {
5948                 if (*s == '=' && !PL_lex_allbrackets &&
5949                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5950                     s -= 2;
5951                     TOKEN(0);
5952                 }
5953                 SHop(OP_RIGHT_SHIFT);
5954             }
5955             else if (tmp == '=') {
5956                 if (!PL_lex_allbrackets &&
5957                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5958                     s -= 2;
5959                     TOKEN(0);
5960                 }
5961                 Rop(OP_GE);
5962             }
5963         }
5964         s--;
5965         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5966             s--;
5967             TOKEN(0);
5968         }
5969         Rop(OP_GT);
5970
5971     case '$':
5972         CLINE;
5973
5974         if (PL_expect == XOPERATOR) {
5975             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5976                 return deprecate_commaless_var_list();
5977             }
5978         }
5979         else if (PL_expect == XPOSTDEREF) {
5980             if (s[1] == '#') {
5981                 s++;
5982                 POSTDEREF(DOLSHARP);
5983             }
5984             POSTDEREF('$');
5985         }
5986
5987         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5988             PL_tokenbuf[0] = '@';
5989             s = scan_ident(s + 1, PL_tokenbuf + 1,
5990                            sizeof PL_tokenbuf - 1, FALSE);
5991             if (PL_expect == XOPERATOR)
5992                 no_op("Array length", s);
5993             if (!PL_tokenbuf[1])
5994                 PREREF(DOLSHARP);
5995             PL_expect = XOPERATOR;
5996             force_ident_maybe_lex('#');
5997             TOKEN(DOLSHARP);
5998         }
5999
6000         PL_tokenbuf[0] = '$';
6001         s = scan_ident(s, PL_tokenbuf + 1,
6002                        sizeof PL_tokenbuf - 1, FALSE);
6003         if (PL_expect == XOPERATOR) {
6004             d = s;
6005             if (PL_bufptr > s) {
6006                 d = PL_bufptr-1;
6007                 PL_bufptr = PL_oldbufptr;
6008             }
6009             no_op("Scalar", d);
6010         }
6011         if (!PL_tokenbuf[1]) {
6012             if (s == PL_bufend)
6013                 yyerror("Final $ should be \\$ or $name");
6014             PREREF('$');
6015         }
6016
6017         d = s;
6018         {
6019             const char tmp = *s;
6020             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6021                 s = skipspace(s);
6022
6023             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6024                 && intuit_more(s)) {
6025                 if (*s == '[') {
6026                     PL_tokenbuf[0] = '@';
6027                     if (ckWARN(WARN_SYNTAX)) {
6028                         char *t = s+1;
6029
6030                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6031                             t++;
6032                         if (*t++ == ',') {
6033                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6034                             while (t < PL_bufend && *t != ']')
6035                                 t++;
6036                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6037                                         "Multidimensional syntax %.*s not supported",
6038                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6039                         }
6040                     }
6041                 }
6042                 else if (*s == '{') {
6043                     char *t;
6044                     PL_tokenbuf[0] = '%';
6045                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6046                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6047                         {
6048                             char tmpbuf[sizeof PL_tokenbuf];
6049                             do {
6050                                 t++;
6051                             } while (isSPACE(*t));
6052                             if (isIDFIRST_lazy_if(t,UTF)) {
6053                                 STRLEN len;
6054                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6055                                               &len);
6056                                 while (isSPACE(*t))
6057                                     t++;
6058                                 if (*t == ';'
6059                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6060                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6061                                         "You need to quote \"%"UTF8f"\"",
6062                                          UTF8fARG(UTF, len, tmpbuf));
6063                             }
6064                         }
6065                 }
6066             }
6067
6068             PL_expect = XOPERATOR;
6069             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6070                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6071                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6072                     PL_expect = XOPERATOR;
6073                 else if (strchr("$@\"'`q", *s))
6074                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6075                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6076                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6077                 else if (isIDFIRST_lazy_if(s,UTF)) {
6078                     char tmpbuf[sizeof PL_tokenbuf];
6079                     int t2;
6080                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6081                     if ((t2 = keyword(tmpbuf, len, 0))) {
6082                         /* binary operators exclude handle interpretations */
6083                         switch (t2) {
6084                         case -KEY_x:
6085                         case -KEY_eq:
6086                         case -KEY_ne:
6087                         case -KEY_gt:
6088                         case -KEY_lt:
6089                         case -KEY_ge:
6090                         case -KEY_le:
6091                         case -KEY_cmp:
6092                             break;
6093                         default:
6094                             PL_expect = XTERM;  /* e.g. print $fh length() */
6095                             break;
6096                         }
6097                     }
6098                     else {
6099                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6100                     }
6101                 }
6102                 else if (isDIGIT(*s))
6103                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6104                 else if (*s == '.' && isDIGIT(s[1]))
6105                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6106                 else if ((*s == '?' || *s == '-' || *s == '+')
6107                          && !isSPACE(s[1]) && s[1] != '=')
6108                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6109                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6110                          && s[1] != '/')
6111                     PL_expect = XTERM;          /* e.g. print $fh /.../
6112                                                    XXX except DORDOR operator
6113                                                 */
6114                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6115                          && s[2] != '=')
6116                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6117             }
6118         }
6119         force_ident_maybe_lex('$');
6120         TOKEN('$');
6121
6122     case '@':
6123         if (PL_expect == XOPERATOR)
6124             no_op("Array", s);
6125         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6126         PL_tokenbuf[0] = '@';
6127         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6128         pl_yylval.ival = 0;
6129         if (!PL_tokenbuf[1]) {
6130             PREREF('@');
6131         }
6132         if (PL_lex_state == LEX_NORMAL)
6133             s = skipspace(s);
6134         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6135             if (*s == '{')
6136                 PL_tokenbuf[0] = '%';
6137
6138             /* Warn about @ where they meant $. */
6139             if (*s == '[' || *s == '{') {
6140                 if (ckWARN(WARN_SYNTAX)) {
6141                     S_check_scalar_slice(aTHX_ s);
6142                 }
6143             }
6144         }
6145         PL_expect = XOPERATOR;
6146         force_ident_maybe_lex('@');
6147         TERM('@');
6148
6149      case '/':                  /* may be division, defined-or, or pattern */
6150         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6151             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6152                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6153                 TOKEN(0);
6154             s += 2;
6155             AOPERATOR(DORDOR);
6156         }
6157         else if (PL_expect == XOPERATOR) {
6158             s++;
6159             if (*s == '=' && !PL_lex_allbrackets &&
6160                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6161                 s--;
6162                 TOKEN(0);
6163             }
6164             Mop(OP_DIVIDE);
6165         }
6166         else {
6167             /* Disable warning on "study /blah/" */
6168             if (PL_oldoldbufptr == PL_last_uni
6169              && (*PL_last_uni != 's' || s - PL_last_uni < 5
6170                  || memNE(PL_last_uni, "study", 5)
6171                  || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6172              ))
6173                 check_uni();
6174             s = scan_pat(s,OP_MATCH);
6175             TERM(sublex_start());
6176         }
6177
6178      case '?':                  /* conditional */
6179         s++;
6180         if (!PL_lex_allbrackets &&
6181             PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6182             s--;
6183             TOKEN(0);
6184         }
6185         PL_lex_allbrackets++;
6186         OPERATOR('?');
6187
6188     case '.':
6189         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6190 #ifdef PERL_STRICT_CR
6191             && s[1] == '\n'
6192 #else
6193             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6194 #endif
6195             && (s == PL_linestart || s[-1] == '\n') )
6196         {
6197             PL_expect = XSTATE;
6198             formbrack = 2; /* dot seen where arguments expected */
6199             goto rightbracket;
6200         }
6201         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6202             s += 3;
6203             OPERATOR(YADAYADA);
6204         }
6205         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6206             char tmp = *s++;
6207             if (*s == tmp) {
6208                 if (!PL_lex_allbrackets &&
6209                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6210                     s--;
6211                     TOKEN(0);
6212                 }
6213                 s++;
6214                 if (*s == tmp) {
6215                     s++;
6216                     pl_yylval.ival = OPf_SPECIAL;
6217                 }
6218                 else
6219                     pl_yylval.ival = 0;
6220                 OPERATOR(DOTDOT);
6221             }
6222             if (*s == '=' && !PL_lex_allbrackets &&
6223                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6224                 s--;
6225                 TOKEN(0);
6226             }
6227             Aop(OP_CONCAT);
6228         }
6229         /* FALLTHROUGH */
6230     case '0': case '1': case '2': case '3': case '4':
6231     case '5': case '6': case '7': case '8': case '9':
6232         s = scan_num(s, &pl_yylval);
6233         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6234         if (PL_expect == XOPERATOR)
6235             no_op("Number",s);
6236         TERM(THING);
6237
6238     case '\'':
6239         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6240         if (!s)
6241             missingterm(NULL);
6242         COPLINE_SET_FROM_MULTI_END;
6243         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6244         if (PL_expect == XOPERATOR) {
6245             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6246                 return deprecate_commaless_var_list();
6247             }
6248             else
6249                 no_op("String",s);
6250         }
6251         pl_yylval.ival = OP_CONST;
6252         TERM(sublex_start());
6253
6254     case '"':
6255         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6256         DEBUG_T( {
6257             if (s)
6258                 printbuf("### Saw string before %s\n", s);
6259             else
6260                 PerlIO_printf(Perl_debug_log,
6261                              "### Saw unterminated string\n");
6262         } );
6263         if (PL_expect == XOPERATOR) {
6264             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6265                 return deprecate_commaless_var_list();
6266             }
6267             else
6268                 no_op("String",s);
6269         }
6270         if (!s)
6271             missingterm(NULL);
6272         pl_yylval.ival = OP_CONST;
6273         /* FIXME. I think that this can be const if char *d is replaced by
6274            more localised variables.  */
6275         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6276             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6277                 pl_yylval.ival = OP_STRINGIFY;
6278                 break;
6279             }
6280         }
6281         if (pl_yylval.ival == OP_CONST)
6282             COPLINE_SET_FROM_MULTI_END;
6283         TERM(sublex_start());
6284
6285     case '`':
6286         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6287         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6288         if (PL_expect == XOPERATOR)
6289             no_op("Backticks",s);
6290         if (!s)
6291             missingterm(NULL);
6292         pl_yylval.ival = OP_BACKTICK;
6293         TERM(sublex_start());
6294
6295     case '\\':
6296         s++;
6297         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6298          && isDIGIT(*s))
6299             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6300                            *s, *s);
6301         if (PL_expect == XOPERATOR)
6302             no_op("Backslash",s);
6303         OPERATOR(REFGEN);
6304
6305     case 'v':
6306         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6307             char *start = s + 2;
6308             while (isDIGIT(*start) || *start == '_')
6309                 start++;
6310             if (*start == '.' && isDIGIT(start[1])) {
6311                 s = scan_num(s, &pl_yylval);
6312                 TERM(THING);
6313             }
6314             else if ((*start == ':' && start[1] == ':')
6315                   || (PL_expect == XSTATE && *start == ':'))
6316                 goto keylookup;
6317             else if (PL_expect == XSTATE) {
6318                 d = start;
6319                 while (d < PL_bufend && isSPACE(*d)) d++;
6320                 if (*d == ':') goto keylookup;
6321             }
6322             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6323             if (!isALPHA(*start) && (PL_expect == XTERM
6324                         || PL_expect == XREF || PL_expect == XSTATE
6325                         || PL_expect == XTERMORDORDOR)) {
6326                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6327                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6328                 if (!gv) {
6329                     s = scan_num(s, &pl_yylval);
6330                     TERM(THING);
6331                 }
6332             }
6333         }
6334         goto keylookup;
6335     case 'x':
6336         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6337             s++;
6338             Mop(OP_REPEAT);
6339         }
6340         goto keylookup;
6341
6342     case '_':
6343     case 'a': case 'A':
6344     case 'b': case 'B':
6345     case 'c': case 'C':
6346     case 'd': case 'D':
6347     case 'e': case 'E':
6348     case 'f': case 'F':
6349     case 'g': case 'G':
6350     case 'h': case 'H':
6351     case 'i': case 'I':
6352     case 'j': case 'J':
6353     case 'k': case 'K':
6354     case 'l': case 'L':
6355     case 'm': case 'M':
6356     case 'n': case 'N':
6357     case 'o': case 'O':
6358     case 'p': case 'P':
6359     case 'q': case 'Q':
6360     case 'r': case 'R':
6361     case 's': case 'S':
6362     case 't': case 'T':
6363     case 'u': case 'U':
6364               case 'V':
6365     case 'w': case 'W':
6366               case 'X':
6367     case 'y': case 'Y':
6368     case 'z': case 'Z':
6369
6370       keylookup: {
6371         bool anydelim;
6372         bool lex;
6373         I32 tmp;
6374         SV *sv;
6375         CV *cv;
6376         PADOFFSET off;
6377         OP *rv2cv_op;
6378
6379         lex = FALSE;
6380         orig_keyword = 0;
6381         off = 0;
6382         sv = NULL;
6383         cv = NULL;
6384         gv = NULL;
6385         gvp = NULL;
6386         rv2cv_op = NULL;
6387
6388         PL_bufptr = s;
6389         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6390
6391         /* Some keywords can be followed by any delimiter, including ':' */
6392         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6393
6394         /* x::* is just a word, unless x is "CORE" */
6395         if (!anydelim && *s == ':' && s[1] == ':') {
6396             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6397             goto just_a_word;
6398         }
6399
6400         d = s;
6401         while (d < PL_bufend && isSPACE(*d))
6402                 d++;    /* no comments skipped here, or s### is misparsed */
6403
6404         /* Is this a word before a => operator? */
6405         if (*d == '=' && d[1] == '>') {
6406           fat_arrow:
6407             CLINE;
6408             pl_yylval.opval
6409                 = (OP*)newSVOP(OP_CONST, 0,
6410                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6411             pl_yylval.opval->op_private = OPpCONST_BARE;
6412             TERM(WORD);
6413         }
6414
6415         /* Check for plugged-in keyword */
6416         {
6417             OP *o;
6418             int result;
6419             char *saved_bufptr = PL_bufptr;
6420             PL_bufptr = s;
6421             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6422             s = PL_bufptr;
6423             if (result == KEYWORD_PLUGIN_DECLINE) {
6424                 /* not a plugged-in keyword */
6425                 PL_bufptr = saved_bufptr;
6426             } else if (result == KEYWORD_PLUGIN_STMT) {
6427                 pl_yylval.opval = o;
6428                 CLINE;
6429                 if (!PL_nexttoke) PL_expect = XSTATE;
6430                 return REPORT(PLUGSTMT);
6431             } else if (result == KEYWORD_PLUGIN_EXPR) {
6432                 pl_yylval.opval = o;
6433                 CLINE;
6434                 if (!PL_nexttoke) PL_expect = XOPERATOR;
6435                 return REPORT(PLUGEXPR);
6436             } else {
6437                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6438                                         PL_tokenbuf);
6439             }
6440         }
6441
6442         /* Check for built-in keyword */
6443         tmp = keyword(PL_tokenbuf, len, 0);
6444
6445         /* Is this a label? */
6446         if (!anydelim && PL_expect == XSTATE
6447               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6448             s = d + 1;
6449             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6450             pl_yylval.pval[len] = '\0';
6451             pl_yylval.pval[len+1] = UTF ? 1 : 0;
6452             CLINE;
6453             TOKEN(LABEL);
6454         }
6455
6456         /* Check for lexical sub */
6457         if (PL_expect != XOPERATOR) {
6458             char tmpbuf[sizeof PL_tokenbuf + 1];
6459             *tmpbuf = '&';
6460             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6461             off = pad_findmy_pvn(tmpbuf, len+1, 0);
6462             if (off != NOT_IN_PAD) {
6463                 assert(off); /* we assume this is boolean-true below */
6464                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6465                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6466                     HEK * const stashname = HvNAME_HEK(stash);
6467                     sv = newSVhek(stashname);
6468                     sv_catpvs(sv, "::");
6469                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6470                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6471                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6472                                     SVt_PVCV);
6473                     off = 0;
6474                     if (!gv) {
6475                         sv_free(sv);
6476                         sv = NULL;
6477                         goto just_a_word;
6478                     }
6479                 }
6480                 else {
6481                     rv2cv_op = newOP(OP_PADANY, 0);
6482                     rv2cv_op->op_targ = off;
6483                     cv = find_lexical_cv(off);
6484                 }
6485                 lex = TRUE;
6486                 goto just_a_word;
6487             }
6488             off = 0;
6489         }
6490
6491         if (tmp < 0) {                  /* second-class keyword? */
6492             GV *ogv = NULL;     /* override (winner) */
6493             GV *hgv = NULL;     /* hidden (loser) */
6494             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6495                 CV *cv;
6496                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6497                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6498                                             SVt_PVCV)) &&
6499                     (cv = GvCVu(gv)))
6500                 {
6501                     if (GvIMPORTED_CV(gv))
6502                         ogv = gv;
6503                     else if (! CvMETHOD(cv))
6504                         hgv = gv;
6505                 }
6506                 if (!ogv &&
6507                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6508                                           len, FALSE)) &&
6509                     (gv = *gvp) && (
6510                         isGV_with_GP(gv)
6511                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
6512                             :   SvPCS_IMPORTED(gv)
6513                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6514                                          len, 0), 1)
6515                    ))
6516                 {
6517                     ogv = gv;
6518                 }
6519             }
6520             if (ogv) {
6521                 orig_keyword = tmp;
6522                 tmp = 0;                /* overridden by import or by GLOBAL */
6523             }
6524             else if (gv && !gvp
6525                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6526                      && GvCVu(gv))
6527             {
6528                 tmp = 0;                /* any sub overrides "weak" keyword */
6529             }
6530             else {                      /* no override */
6531                 tmp = -tmp;
6532                 if (tmp == KEY_dump) {
6533                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6534                                    "dump() better written as CORE::dump()");
6535                 }
6536                 gv = NULL;
6537                 gvp = 0;
6538                 if (hgv && tmp != KEY_x)        /* never ambiguous */
6539                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6540                                    "Ambiguous call resolved as CORE::%s(), "
6541                                    "qualify as such or use &",
6542                                    GvENAME(hgv));
6543             }
6544         }
6545
6546         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6547          && (!anydelim || *s != '#')) {
6548             /* no override, and not s### either; skipspace is safe here
6549              * check for => on following line */
6550             bool arrow;
6551             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6552             STRLEN   soff = s         - SvPVX(PL_linestr);
6553             s = skipspace_flags(s, LEX_NO_INCLINE);
6554             arrow = *s == '=' && s[1] == '>';
6555             PL_bufptr = SvPVX(PL_linestr) + bufoff;
6556             s         = SvPVX(PL_linestr) +   soff;
6557             if (arrow)
6558                 goto fat_arrow;
6559         }
6560
6561       reserved_word:
6562         switch (tmp) {
6563
6564         default:                        /* not a keyword */
6565             /* Trade off - by using this evil construction we can pull the
6566                variable gv into the block labelled keylookup. If not, then
6567                we have to give it function scope so that the goto from the
6568                earlier ':' case doesn't bypass the initialisation.  */
6569             if (0) {
6570             just_a_word_zero_gv:
6571                 sv = NULL;
6572                 cv = NULL;
6573                 gv = NULL;
6574                 gvp = NULL;
6575                 rv2cv_op = NULL;
6576                 orig_keyword = 0;
6577                 lex = 0;
6578                 off = 0;
6579             }
6580           just_a_word: {
6581                 int pkgname = 0;
6582                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6583                 bool safebw;
6584
6585
6586                 /* Get the rest if it looks like a package qualifier */
6587
6588                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6589                     STRLEN morelen;
6590                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6591                                   TRUE, &morelen);
6592                     if (!morelen)
6593                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6594                                 UTF8fARG(UTF, len, PL_tokenbuf),
6595                                 *s == '\'' ? "'" : "::");
6596                     len += morelen;
6597                     pkgname = 1;
6598                 }
6599
6600                 if (PL_expect == XOPERATOR) {
6601                     if (PL_bufptr == PL_linestart) {
6602                         CopLINE_dec(PL_curcop);
6603                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6604                         CopLINE_inc(PL_curcop);
6605                     }
6606                     else
6607                         no_op("Bareword",s);
6608                 }
6609
6610                 /* See if the name is "Foo::",
6611                    in which case Foo is a bareword
6612                    (and a package name). */
6613
6614                 if (len > 2 &&
6615                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6616                 {
6617                     if (ckWARN(WARN_BAREWORD)
6618                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6619                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6620                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
6621                            UTF8fARG(UTF, len, PL_tokenbuf));
6622                     len -= 2;
6623                     PL_tokenbuf[len] = '\0';
6624                     gv = NULL;
6625                     gvp = 0;
6626                     safebw = TRUE;
6627                 }
6628                 else {
6629                     safebw = FALSE;
6630                 }
6631
6632                 /* if we saw a global override before, get the right name */
6633
6634                 if (!sv)
6635                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6636                                                 len);
6637                 if (gvp) {
6638                     SV * const tmp_sv = sv;
6639                     sv = newSVpvs("CORE::GLOBAL::");
6640                     sv_catsv(sv, tmp_sv);
6641                     SvREFCNT_dec(tmp_sv);
6642                 }
6643
6644
6645                 /* Presume this is going to be a bareword of some sort. */
6646                 CLINE;
6647                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6648                 pl_yylval.opval->op_private = OPpCONST_BARE;
6649
6650                 /* And if "Foo::", then that's what it certainly is. */
6651                 if (safebw)
6652                     goto safe_bareword;
6653
6654                 if (!off)
6655                 {
6656                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6657                     const_op->op_private = OPpCONST_BARE;
6658                     rv2cv_op =
6659                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6660                     cv = lex
6661                         ? isGV(gv)
6662                             ? GvCV(gv)
6663                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6664                                 ? (CV *)SvRV(gv)
6665                                 : ((CV *)gv)
6666                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6667                 }
6668
6669                 /* Use this var to track whether intuit_method has been
6670                    called.  intuit_method returns 0 or > 255.  */
6671                 tmp = 1;
6672
6673                 /* See if it's the indirect object for a list operator. */
6674
6675                 if (PL_oldoldbufptr &&
6676                     PL_oldoldbufptr < PL_bufptr &&
6677                     (PL_oldoldbufptr == PL_last_lop
6678                      || PL_oldoldbufptr == PL_last_uni) &&
6679                     /* NO SKIPSPACE BEFORE HERE! */
6680                     (PL_expect == XREF ||
6681                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6682                 {
6683                     bool immediate_paren = *s == '(';
6684
6685                     /* (Now we can afford to cross potential line boundary.) */
6686                     s = skipspace(s);
6687
6688                     /* Two barewords in a row may indicate method call. */
6689
6690                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6691                         (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6692                         goto method;
6693                     }
6694
6695                     /* If not a declared subroutine, it's an indirect object. */
6696                     /* (But it's an indir obj regardless for sort.) */
6697                     /* Also, if "_" follows a filetest operator, it's a bareword */
6698
6699                     if (
6700                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6701                          (!cv &&
6702                         (PL_last_lop_op != OP_MAPSTART &&
6703                          PL_last_lop_op != OP_GREPSTART))))
6704                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6705                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6706                        )
6707                     {
6708                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6709                         goto bareword;
6710                     }
6711                 }
6712
6713                 PL_expect = XOPERATOR;
6714                 s = skipspace(s);
6715
6716                 /* Is this a word before a => operator? */
6717                 if (*s == '=' && s[1] == '>' && !pkgname) {
6718                     op_free(rv2cv_op);
6719                     CLINE;
6720                     if (gvp || (lex && !off)) {
6721                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
6722                         /* This is our own scalar, created a few lines
6723                            above, so this is safe. */
6724                         SvREADONLY_off(sv);
6725                         sv_setpv(sv, PL_tokenbuf);
6726                         if (UTF && !IN_BYTES
6727                          && is_utf8_string((U8*)PL_tokenbuf, len))
6728                               SvUTF8_on(sv);
6729                         SvREADONLY_on(sv);
6730                     }
6731                     TERM(WORD);
6732                 }
6733
6734                 /* If followed by a paren, it's certainly a subroutine. */
6735                 if (*s == '(') {
6736                     CLINE;
6737                     if (cv) {
6738                         d = s + 1;
6739                         while (SPACE_OR_TAB(*d))
6740                             d++;
6741                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6742                             s = d + 1;
6743                             goto its_constant;
6744                         }
6745                     }
6746                     NEXTVAL_NEXTTOKE.opval =
6747                         off ? rv2cv_op : pl_yylval.opval;
6748                     if (off)
6749                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
6750                     else op_free(rv2cv_op),        force_next(WORD);
6751                     pl_yylval.ival = 0;
6752                     TOKEN('&');
6753                 }
6754
6755                 /* If followed by var or block, call it a method (unless sub) */
6756
6757                 if ((*s == '$' || *s == '{') && !cv) {
6758                     op_free(rv2cv_op);
6759                     PL_last_lop = PL_oldbufptr;
6760                     PL_last_lop_op = OP_METHOD;
6761                     if (!PL_lex_allbrackets &&
6762                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6763                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6764                     PL_expect = XBLOCKTERM;
6765                     PL_bufptr = s;
6766                     return REPORT(METHOD);
6767                 }
6768
6769                 /* If followed by a bareword, see if it looks like indir obj. */
6770
6771                 if (tmp == 1 && !orig_keyword
6772                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6773                         && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6774                   method:
6775                     if (lex && !off) {
6776                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
6777                         SvREADONLY_off(sv);
6778                         sv_setpvn(sv, PL_tokenbuf, len);
6779                         if (UTF && !IN_BYTES
6780                          && is_utf8_string((U8*)PL_tokenbuf, len))
6781                             SvUTF8_on (sv);
6782                         else SvUTF8_off(sv);
6783                     }
6784                     op_free(rv2cv_op);
6785                     if (tmp == METHOD && !PL_lex_allbrackets &&
6786                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6787                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6788                     return REPORT(tmp);
6789                 }
6790
6791                 /* Not a method, so call it a subroutine (if defined) */
6792
6793                 if (cv) {
6794                     /* Check for a constant sub */
6795                     if ((sv = cv_const_sv_or_av(cv))) {
6796                   its_constant:
6797                         op_free(rv2cv_op);
6798                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6799                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6800                         if (SvTYPE(sv) == SVt_PVAV)
6801                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6802                                                       pl_yylval.opval);
6803                         else {
6804                             pl_yylval.opval->op_private = 0;
6805                             pl_yylval.opval->op_folded = 1;
6806                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
6807                         }
6808                         TOKEN(WORD);
6809                     }
6810
6811                     op_free(pl_yylval.opval);
6812                     pl_yylval.opval =
6813                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6814                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6815                     PL_last_lop = PL_oldbufptr;
6816                     PL_last_lop_op = OP_ENTERSUB;
6817                     /* Is there a prototype? */
6818                     if (
6819                         SvPOK(cv))
6820                     {
6821                         STRLEN protolen = CvPROTOLEN(cv);
6822                         const char *proto = CvPROTO(cv);
6823                         bool optional;
6824                         proto = S_strip_spaces(aTHX_ proto, &protolen);
6825                         if (!protolen)
6826                             TERM(FUNC0SUB);
6827                         if ((optional = *proto == ';'))
6828                           do
6829                             proto++;
6830                           while (*proto == ';');
6831                         if (
6832                             (
6833                                 (
6834                                     *proto == '$' || *proto == '_'
6835                                  || *proto == '*' || *proto == '+'
6836                                 )
6837                              && proto[1] == '\0'
6838                             )
6839                          || (
6840                              *proto == '\\' && proto[1] && proto[2] == '\0'
6841                             )
6842                         )
6843                             UNIPROTO(UNIOPSUB,optional);
6844                         if (*proto == '\\' && proto[1] == '[') {
6845                             const char *p = proto + 2;
6846                             while(*p && *p != ']')
6847                                 ++p;
6848                             if(*p == ']' && !p[1])
6849                                 UNIPROTO(UNIOPSUB,optional);
6850                         }
6851                         if (*proto == '&' && *s == '{') {
6852                             if (PL_curstash)
6853                                 sv_setpvs(PL_subname, "__ANON__");
6854                             else
6855                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6856                             if (!PL_lex_allbrackets &&
6857                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6858                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6859                             PREBLOCK(LSTOPSUB);
6860                         }
6861                     }
6862                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6863                     PL_expect = XTERM;
6864                     force_next(off ? PRIVATEREF : WORD);
6865                     if (!PL_lex_allbrackets &&
6866                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6867                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6868                     TOKEN(NOAMP);
6869                 }
6870
6871                 /* Call it a bare word */
6872
6873                 if (PL_hints & HINT_STRICT_SUBS)
6874                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6875                 else {
6876                 bareword:
6877                     /* after "print" and similar functions (corresponding to
6878                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6879                      * a filehandle should be subject to "strict subs".
6880                      * Likewise for the optional indirect-object argument to system
6881                      * or exec, which can't be a bareword */
6882                     if ((PL_last_lop_op == OP_PRINT
6883                             || PL_last_lop_op == OP_PRTF
6884                             || PL_last_lop_op == OP_SAY
6885                             || PL_last_lop_op == OP_SYSTEM
6886                             || PL_last_lop_op == OP_EXEC)
6887                             && (PL_hints & HINT_STRICT_SUBS))
6888                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6889                     if (lastchar != '-') {
6890                         if (ckWARN(WARN_RESERVED)) {
6891                             d = PL_tokenbuf;
6892                             while (isLOWER(*d))
6893                                 d++;
6894                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6895                             {
6896                                 /* PL_warn_reserved is constant */
6897                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6898                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6899                                        PL_tokenbuf);
6900                                 GCC_DIAG_RESTORE;
6901                             }
6902                         }
6903                     }
6904                 }
6905                 op_free(rv2cv_op);
6906
6907             safe_bareword:
6908                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6909                  && saw_infix_sigil) {
6910                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6911                                      "Operator or semicolon missing before %c%"UTF8f,
6912                                      lastchar,
6913                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
6914                                               PL_tokenbuf));
6915                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6916                                      "Ambiguous use of %c resolved as operator %c",
6917                                      lastchar, lastchar);
6918                 }
6919                 TOKEN(WORD);
6920             }
6921
6922         case KEY___FILE__:
6923             FUN0OP(
6924                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6925             );
6926
6927         case KEY___LINE__:
6928             FUN0OP(
6929                 (OP*)newSVOP(OP_CONST, 0,
6930                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6931             );
6932
6933         case KEY___PACKAGE__:
6934             FUN0OP(
6935                 (OP*)newSVOP(OP_CONST, 0,
6936                                         (PL_curstash
6937                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6938                                          : &PL_sv_undef))
6939             );
6940
6941         case KEY___DATA__:
6942         case KEY___END__: {
6943             GV *gv;
6944             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6945                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6946                                         ? PL_curstash
6947                                         : PL_defstash;
6948                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6949                 if (!isGV(gv))
6950                     gv_init(gv,stash,"DATA",4,0);
6951                 GvMULTI_on(gv);
6952                 if (!GvIO(gv))
6953                     GvIOp(gv) = newIO();
6954                 IoIFP(GvIOp(gv)) = PL_rsfp;
6955 #if defined(HAS_FCNTL) && defined(F_SETFD)
6956                 {
6957                     const int fd = PerlIO_fileno(PL_rsfp);
6958                     fcntl(fd,F_SETFD,fd >= 3);
6959                 }
6960 #endif
6961                 /* Mark this internal pseudo-handle as clean */
6962                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6963                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6964                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6965                 else
6966                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6967 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6968                 /* if the script was opened in binmode, we need to revert
6969                  * it to text mode for compatibility; but only iff it has CRs
6970                  * XXX this is a questionable hack at best. */
6971                 if (PL_bufend-PL_bufptr > 2
6972                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6973                 {
6974                     Off_t loc = 0;
6975                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6976                         loc = PerlIO_tell(PL_rsfp);
6977                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6978                     }
6979 #ifdef NETWARE
6980                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6981 #else
6982                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6983 #endif  /* NETWARE */
6984                         if (loc > 0)
6985                             PerlIO_seek(PL_rsfp, loc, 0);
6986                     }
6987                 }
6988 #endif
6989 #ifdef PERLIO_LAYERS
6990                 if (!IN_BYTES) {
6991                     if (UTF)
6992                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6993                     else if (IN_ENCODING) {
6994                         SV *name;
6995                         dSP;
6996                         ENTER;
6997                         SAVETMPS;
6998                         PUSHMARK(sp);
6999                         XPUSHs(_get_encoding());
7000                         PUTBACK;
7001                         call_method("name", G_SCALAR);
7002                         SPAGAIN;
7003                         name = POPs;
7004                         PUTBACK;
7005                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7006                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7007                                                       SVfARG(name)));
7008                         FREETMPS;
7009                         LEAVE;
7010                     }
7011                 }
7012 #endif
7013                 PL_rsfp = NULL;
7014             }
7015             goto fake_eof;
7016         }
7017
7018         case KEY___SUB__:
7019             FUN0OP(CvCLONE(PL_compcv)
7020                         ? newOP(OP_RUNCV, 0)
7021                         : newPVOP(OP_RUNCV,0,NULL));
7022
7023         case KEY_AUTOLOAD:
7024         case KEY_DESTROY:
7025         case KEY_BEGIN:
7026         case KEY_UNITCHECK:
7027         case KEY_CHECK:
7028         case KEY_INIT:
7029         case KEY_END:
7030             if (PL_expect == XSTATE) {
7031                 s = PL_bufptr;
7032                 goto really_sub;
7033             }
7034             goto just_a_word;
7035
7036         case_KEY_CORE:
7037             {
7038                 STRLEN olen = len;
7039                 d = s;
7040                 s += 2;
7041                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7042                 if ((*s == ':' && s[1] == ':')
7043                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7044                 {
7045                     s = d;
7046                     len = olen;
7047                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7048                     goto just_a_word;
7049                 }
7050                 if (!tmp)
7051                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7052                                       UTF8fARG(UTF, len, PL_tokenbuf));
7053                 if (tmp < 0)
7054                     tmp = -tmp;
7055                 else if (tmp == KEY_require || tmp == KEY_do
7056                       || tmp == KEY_glob)
7057                     /* that's a way to remember we saw "CORE::" */
7058                     orig_keyword = tmp;
7059                 goto reserved_word;
7060             }
7061
7062         case KEY_abs:
7063             UNI(OP_ABS);
7064
7065         case KEY_alarm:
7066             UNI(OP_ALARM);
7067
7068         case KEY_accept:
7069             LOP(OP_ACCEPT,XTERM);
7070
7071         case KEY_and:
7072             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7073                 return REPORT(0);
7074             OPERATOR(ANDOP);
7075
7076         case KEY_atan2:
7077             LOP(OP_ATAN2,XTERM);
7078
7079         case KEY_bind:
7080             LOP(OP_BIND,XTERM);
7081
7082         case KEY_binmode:
7083             LOP(OP_BINMODE,XTERM);
7084
7085         case KEY_bless:
7086             LOP(OP_BLESS,XTERM);
7087
7088         case KEY_break:
7089             FUN0(OP_BREAK);
7090
7091         case KEY_chop:
7092             UNI(OP_CHOP);
7093
7094         case KEY_continue:
7095                     /* We have to disambiguate the two senses of
7096                       "continue". If the next token is a '{' then
7097                       treat it as the start of a continue block;
7098                       otherwise treat it as a control operator.
7099                      */
7100                     s = skipspace(s);
7101                     if (*s == '{')
7102             PREBLOCK(CONTINUE);
7103                     else
7104                         FUN0(OP_CONTINUE);
7105
7106         case KEY_chdir:
7107             /* may use HOME */
7108             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7109             UNI(OP_CHDIR);
7110
7111         case KEY_close:
7112             UNI(OP_CLOSE);
7113
7114         case KEY_closedir:
7115             UNI(OP_CLOSEDIR);
7116
7117         case KEY_cmp:
7118             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7119                 return REPORT(0);
7120             Eop(OP_SCMP);
7121
7122         case KEY_caller:
7123             UNI(OP_CALLER);
7124
7125         case KEY_crypt:
7126 #ifdef FCRYPT
7127             if (!PL_cryptseen) {
7128                 PL_cryptseen = TRUE;
7129                 init_des();
7130             }
7131 #endif
7132             LOP(OP_CRYPT,XTERM);
7133
7134         case KEY_chmod:
7135             LOP(OP_CHMOD,XTERM);
7136
7137         case KEY_chown:
7138             LOP(OP_CHOWN,XTERM);
7139
7140         case KEY_connect:
7141             LOP(OP_CONNECT,XTERM);
7142
7143         case KEY_chr:
7144             UNI(OP_CHR);
7145
7146         case KEY_cos:
7147             UNI(OP_COS);
7148
7149         case KEY_chroot:
7150             UNI(OP_CHROOT);
7151
7152         case KEY_default:
7153             PREBLOCK(DEFAULT);
7154
7155         case KEY_do:
7156             s = skipspace(s);
7157             if (*s == '{')
7158                 PRETERMBLOCK(DO);
7159             if (*s != '\'') {
7160                 *PL_tokenbuf = '&';
7161                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7162                               1, &len);
7163                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7164                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7165                     d = skipspace(d);
7166                     if (*d == '(') {
7167                         force_ident_maybe_lex('&');
7168                         s = d;
7169                     }
7170                 }
7171             }
7172             if (orig_keyword == KEY_do) {
7173                 orig_keyword = 0;
7174                 pl_yylval.ival = 1;
7175             }
7176             else
7177                 pl_yylval.ival = 0;
7178             OPERATOR(DO);
7179
7180         case KEY_die:
7181             PL_hints |= HINT_BLOCK_SCOPE;
7182             LOP(OP_DIE,XTERM);
7183
7184         case KEY_defined:
7185             UNI(OP_DEFINED);
7186
7187         case KEY_delete:
7188             UNI(OP_DELETE);
7189
7190         case KEY_dbmopen:
7191             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7192                               STR_WITH_LEN("NDBM_File::"),
7193                               STR_WITH_LEN("DB_File::"),
7194                               STR_WITH_LEN("GDBM_File::"),
7195                               STR_WITH_LEN("SDBM_File::"),
7196                               STR_WITH_LEN("ODBM_File::"),
7197                               NULL);
7198             LOP(OP_DBMOPEN,XTERM);
7199
7200         case KEY_dbmclose:
7201             UNI(OP_DBMCLOSE);
7202
7203         case KEY_dump:
7204             LOOPX(OP_DUMP);
7205
7206         case KEY_else:
7207             PREBLOCK(ELSE);
7208
7209         case KEY_elsif:
7210             pl_yylval.ival = CopLINE(PL_curcop);
7211             OPERATOR(ELSIF);
7212
7213         case KEY_eq:
7214             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7215                 return REPORT(0);
7216             Eop(OP_SEQ);
7217
7218         case KEY_exists:
7219             UNI(OP_EXISTS);
7220
7221         case KEY_exit:
7222             UNI(OP_EXIT);
7223
7224         case KEY_eval:
7225             s = skipspace(s);
7226             if (*s == '{') { /* block eval */
7227                 PL_expect = XTERMBLOCK;
7228                 UNIBRACK(OP_ENTERTRY);
7229             }
7230             else { /* string eval */
7231                 PL_expect = XTERM;
7232                 UNIBRACK(OP_ENTEREVAL);
7233             }
7234
7235         case KEY_evalbytes:
7236             PL_expect = XTERM;
7237             UNIBRACK(-OP_ENTEREVAL);
7238
7239         case KEY_eof:
7240             UNI(OP_EOF);
7241
7242         case KEY_exp:
7243             UNI(OP_EXP);
7244
7245         case KEY_each:
7246             UNI(OP_EACH);
7247
7248         case KEY_exec:
7249             LOP(OP_EXEC,XREF);
7250
7251         case KEY_endhostent:
7252             FUN0(OP_EHOSTENT);
7253
7254         case KEY_endnetent:
7255             FUN0(OP_ENETENT);
7256
7257         case KEY_endservent:
7258             FUN0(OP_ESERVENT);
7259
7260         case KEY_endprotoent:
7261             FUN0(OP_EPROTOENT);
7262
7263         case KEY_endpwent:
7264             FUN0(OP_EPWENT);
7265
7266         case KEY_endgrent:
7267             FUN0(OP_EGRENT);
7268
7269         case KEY_for:
7270         case KEY_foreach:
7271             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7272                 return REPORT(0);
7273             pl_yylval.ival = CopLINE(PL_curcop);
7274             s = skipspace(s);
7275             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7276                 char *p = s;
7277
7278                 if ((PL_bufend - p) >= 3 &&
7279                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7280                     p += 2;
7281                 else if ((PL_bufend - p) >= 4 &&
7282                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7283                     p += 3;
7284                 p = skipspace(p);
7285                 /* skip optional package name, as in "for my abc $x (..)" */
7286                 if (isIDFIRST_lazy_if(p,UTF)) {
7287                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7288                     p = skipspace(p);
7289                 }
7290                 if (*p != '$')
7291                     Perl_croak(aTHX_ "Missing $ on loop variable");
7292             }
7293             OPERATOR(FOR);
7294
7295         case KEY_formline:
7296             LOP(OP_FORMLINE,XTERM);
7297
7298         case KEY_fork:
7299             FUN0(OP_FORK);
7300
7301         case KEY_fc:
7302             UNI(OP_FC);
7303
7304         case KEY_fcntl:
7305             LOP(OP_FCNTL,XTERM);
7306
7307         case KEY_fileno:
7308             UNI(OP_FILENO);
7309
7310         case KEY_flock:
7311             LOP(OP_FLOCK,XTERM);
7312
7313         case KEY_gt:
7314             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7315                 return REPORT(0);
7316             Rop(OP_SGT);
7317
7318         case KEY_ge:
7319             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7320                 return REPORT(0);
7321             Rop(OP_SGE);
7322
7323         case KEY_grep:
7324             LOP(OP_GREPSTART, XREF);
7325
7326         case KEY_goto:
7327             LOOPX(OP_GOTO);
7328
7329         case KEY_gmtime:
7330             UNI(OP_GMTIME);
7331
7332         case KEY_getc:
7333             UNIDOR(OP_GETC);
7334
7335         case KEY_getppid:
7336             FUN0(OP_GETPPID);
7337
7338         case KEY_getpgrp:
7339             UNI(OP_GETPGRP);
7340
7341         case KEY_getpriority:
7342             LOP(OP_GETPRIORITY,XTERM);
7343
7344         case KEY_getprotobyname:
7345             UNI(OP_GPBYNAME);
7346
7347         case KEY_getprotobynumber:
7348             LOP(OP_GPBYNUMBER,XTERM);
7349
7350         case KEY_getprotoent:
7351             FUN0(OP_GPROTOENT);
7352
7353         case KEY_getpwent:
7354             FUN0(OP_GPWENT);
7355
7356         case KEY_getpwnam:
7357             UNI(OP_GPWNAM);
7358
7359         case KEY_getpwuid:
7360             UNI(OP_GPWUID);
7361
7362         case KEY_getpeername:
7363             UNI(OP_GETPEERNAME);
7364
7365         case KEY_gethostbyname:
7366             UNI(OP_GHBYNAME);
7367
7368         case KEY_gethostbyaddr:
7369             LOP(OP_GHBYADDR,XTERM);
7370
7371         case KEY_gethostent:
7372             FUN0(OP_GHOSTENT);
7373
7374         case KEY_getnetbyname:
7375             UNI(OP_GNBYNAME);
7376
7377         case KEY_getnetbyaddr:
7378             LOP(OP_GNBYADDR,XTERM);
7379
7380         case KEY_getnetent:
7381             FUN0(OP_GNETENT);
7382
7383         case KEY_getservbyname:
7384             LOP(OP_GSBYNAME,XTERM);
7385
7386         case KEY_getservbyport:
7387             LOP(OP_GSBYPORT,XTERM);
7388
7389         case KEY_getservent:
7390             FUN0(OP_GSERVENT);
7391
7392         case KEY_getsockname:
7393             UNI(OP_GETSOCKNAME);
7394
7395         case KEY_getsockopt:
7396             LOP(OP_GSOCKOPT,XTERM);
7397
7398         case KEY_getgrent:
7399             FUN0(OP_GGRENT);
7400
7401         case KEY_getgrnam:
7402             UNI(OP_GGRNAM);
7403
7404         case KEY_getgrgid:
7405             UNI(OP_GGRGID);
7406
7407         case KEY_getlogin:
7408             FUN0(OP_GETLOGIN);
7409
7410         case KEY_given:
7411             pl_yylval.ival = CopLINE(PL_curcop);
7412             Perl_ck_warner_d(aTHX_
7413                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7414                 "given is experimental");
7415             OPERATOR(GIVEN);
7416
7417         case KEY_glob:
7418             LOP(
7419              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7420              XTERM
7421             );
7422
7423         case KEY_hex:
7424             UNI(OP_HEX);
7425
7426         case KEY_if:
7427             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7428                 return REPORT(0);
7429             pl_yylval.ival = CopLINE(PL_curcop);
7430             OPERATOR(IF);
7431
7432         case KEY_index:
7433             LOP(OP_INDEX,XTERM);
7434
7435         case KEY_int:
7436             UNI(OP_INT);
7437
7438         case KEY_ioctl:
7439             LOP(OP_IOCTL,XTERM);
7440
7441         case KEY_join:
7442             LOP(OP_JOIN,XTERM);
7443
7444         case KEY_keys:
7445             UNI(OP_KEYS);
7446
7447         case KEY_kill:
7448             LOP(OP_KILL,XTERM);
7449
7450         case KEY_last:
7451             LOOPX(OP_LAST);
7452
7453         case KEY_lc:
7454             UNI(OP_LC);
7455
7456         case KEY_lcfirst:
7457             UNI(OP_LCFIRST);
7458
7459         case KEY_local:
7460             pl_yylval.ival = 0;
7461             OPERATOR(LOCAL);
7462
7463         case KEY_length:
7464             UNI(OP_LENGTH);
7465
7466         case KEY_lt:
7467             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7468                 return REPORT(0);
7469             Rop(OP_SLT);
7470
7471         case KEY_le:
7472             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7473                 return REPORT(0);
7474             Rop(OP_SLE);
7475
7476         case KEY_localtime:
7477             UNI(OP_LOCALTIME);
7478
7479         case KEY_log:
7480             UNI(OP_LOG);
7481
7482         case KEY_link:
7483             LOP(OP_LINK,XTERM);
7484
7485         case KEY_listen:
7486             LOP(OP_LISTEN,XTERM);
7487
7488         case KEY_lock:
7489             UNI(OP_LOCK);
7490
7491         case KEY_lstat:
7492             UNI(OP_LSTAT);
7493
7494         case KEY_m:
7495             s = scan_pat(s,OP_MATCH);
7496             TERM(sublex_start());
7497
7498         case KEY_map:
7499             LOP(OP_MAPSTART, XREF);
7500
7501         case KEY_mkdir:
7502             LOP(OP_MKDIR,XTERM);
7503
7504         case KEY_msgctl:
7505             LOP(OP_MSGCTL,XTERM);
7506
7507         case KEY_msgget:
7508             LOP(OP_MSGGET,XTERM);
7509
7510         case KEY_msgrcv:
7511             LOP(OP_MSGRCV,XTERM);
7512
7513         case KEY_msgsnd:
7514             LOP(OP_MSGSND,XTERM);
7515
7516         case KEY_our:
7517         case KEY_my:
7518         case KEY_state:
7519             PL_in_my = (U16)tmp;
7520             s = skipspace(s);
7521             if (isIDFIRST_lazy_if(s,UTF)) {
7522                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7523                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7524                 {
7525                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7526                         Perl_croak(aTHX_
7527                                   "Experimental \"%s\" subs not enabled",
7528                                    tmp == KEY_my    ? "my"    :
7529                                    tmp == KEY_state ? "state" : "our");
7530                     Perl_ck_warner_d(aTHX_
7531                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7532                         "The lexical_subs feature is experimental");
7533                     goto really_sub;
7534                 }
7535                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7536                 if (!PL_in_my_stash) {
7537                     char tmpbuf[1024];
7538                     int len;
7539                     PL_bufptr = s;
7540                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7541                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7542                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7543                 }
7544             }
7545             pl_yylval.ival = 1;
7546             OPERATOR(MY);
7547
7548         case KEY_next:
7549             LOOPX(OP_NEXT);
7550
7551         case KEY_ne:
7552             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7553                 return REPORT(0);
7554             Eop(OP_SNE);
7555
7556         case KEY_no:
7557             s = tokenize_use(0, s);
7558             TOKEN(USE);
7559
7560         case KEY_not:
7561             if (*s == '(' || (s = skipspace(s), *s == '('))
7562                 FUN1(OP_NOT);
7563             else {
7564                 if (!PL_lex_allbrackets &&
7565                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7566                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7567                 OPERATOR(NOTOP);
7568             }
7569
7570         case KEY_open:
7571             s = skipspace(s);
7572             if (isIDFIRST_lazy_if(s,UTF)) {
7573           const char *t;
7574           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7575               &len);
7576                 for (t=d; isSPACE(*t);)
7577                     t++;
7578                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7579                     /* [perl #16184] */
7580                     && !(t[0] == '=' && t[1] == '>')
7581                     && !(t[0] == ':' && t[1] == ':')
7582                     && !keyword(s, d-s, 0)
7583                 ) {
7584                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7585                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7586                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7587                 }
7588             }
7589             LOP(OP_OPEN,XTERM);
7590
7591         case KEY_or:
7592             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7593                 return REPORT(0);
7594             pl_yylval.ival = OP_OR;
7595             OPERATOR(OROP);
7596
7597         case KEY_ord:
7598             UNI(OP_ORD);
7599
7600         case KEY_oct:
7601             UNI(OP_OCT);
7602
7603         case KEY_opendir:
7604             LOP(OP_OPEN_DIR,XTERM);
7605
7606         case KEY_print:
7607             checkcomma(s,PL_tokenbuf,"filehandle");
7608             LOP(OP_PRINT,XREF);
7609
7610         case KEY_printf:
7611             checkcomma(s,PL_tokenbuf,"filehandle");
7612             LOP(OP_PRTF,XREF);
7613
7614         case KEY_prototype:
7615             UNI(OP_PROTOTYPE);
7616
7617         case KEY_push:
7618             LOP(OP_PUSH,XTERM);
7619
7620         case KEY_pop:
7621             UNIDOR(OP_POP);
7622
7623         case KEY_pos:
7624             UNIDOR(OP_POS);
7625
7626         case KEY_pack:
7627             LOP(OP_PACK,XTERM);
7628
7629         case KEY_package:
7630             s = force_word(s,WORD,FALSE,TRUE);
7631             s = skipspace(s);
7632             s = force_strict_version(s);
7633             PREBLOCK(PACKAGE);
7634
7635         case KEY_pipe:
7636             LOP(OP_PIPE_OP,XTERM);
7637
7638         case KEY_q:
7639             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7640             if (!s)
7641                 missingterm(NULL);
7642             COPLINE_SET_FROM_MULTI_END;
7643             pl_yylval.ival = OP_CONST;
7644             TERM(sublex_start());
7645
7646         case KEY_quotemeta:
7647             UNI(OP_QUOTEMETA);
7648
7649         case KEY_qw: {
7650             OP *words = NULL;
7651             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7652             if (!s)
7653                 missingterm(NULL);
7654             COPLINE_SET_FROM_MULTI_END;
7655             PL_expect = XOPERATOR;
7656             if (SvCUR(PL_lex_stuff)) {
7657                 int warned_comma = !ckWARN(WARN_QW);
7658                 int warned_comment = warned_comma;
7659                 d = SvPV_force(PL_lex_stuff, len);
7660                 while (len) {
7661                     for (; isSPACE(*d) && len; --len, ++d)
7662                         /**/;
7663                     if (len) {
7664                         SV *sv;
7665                         const char *b = d;
7666                         if (!warned_comma || !warned_comment) {
7667                             for (; !isSPACE(*d) && len; --len, ++d) {
7668                                 if (!warned_comma && *d == ',') {
7669                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7670                                         "Possible attempt to separate words with commas");
7671                                     ++warned_comma;
7672                                 }
7673                                 else if (!warned_comment && *d == '#') {
7674                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7675                                         "Possible attempt to put comments in qw() list");
7676                                     ++warned_comment;
7677                                 }
7678                             }
7679                         }
7680                         else {
7681                             for (; !isSPACE(*d) && len; --len, ++d)
7682                                 /**/;
7683                         }
7684                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7685                         words = op_append_elem(OP_LIST, words,
7686                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7687                     }
7688                 }
7689             }
7690             if (!words)
7691                 words = newNULLLIST();
7692             SvREFCNT_dec_NN(PL_lex_stuff);
7693             PL_lex_stuff = NULL;
7694             PL_expect = XOPERATOR;
7695             pl_yylval.opval = sawparens(words);
7696             TOKEN(QWLIST);
7697         }
7698
7699         case KEY_qq:
7700             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7701             if (!s)
7702                 missingterm(NULL);
7703             pl_yylval.ival = OP_STRINGIFY;
7704             if (SvIVX(PL_lex_stuff) == '\'')
7705                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7706             TERM(sublex_start());
7707
7708         case KEY_qr:
7709             s = scan_pat(s,OP_QR);
7710             TERM(sublex_start());
7711
7712         case KEY_qx:
7713             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7714             if (!s)
7715                 missingterm(NULL);
7716             pl_yylval.ival = OP_BACKTICK;
7717             TERM(sublex_start());
7718
7719         case KEY_return:
7720             OLDLOP(OP_RETURN);
7721
7722         case KEY_require:
7723             s = skipspace(s);
7724             if (isDIGIT(*s)) {
7725                 s = force_version(s, FALSE);
7726             }
7727             else if (*s != 'v' || !isDIGIT(s[1])
7728                     || (s = force_version(s, TRUE), *s == 'v'))
7729             {
7730                 *PL_tokenbuf = '\0';
7731                 s = force_word(s,WORD,TRUE,TRUE);
7732                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7733                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7734                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7735                 else if (*s == '<')
7736                     yyerror("<> at require-statement should be quotes");
7737             }
7738             if (orig_keyword == KEY_require) {
7739                 orig_keyword = 0;
7740                 pl_yylval.ival = 1;
7741             }
7742             else
7743                 pl_yylval.ival = 0;
7744             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7745             PL_bufptr = s;
7746             PL_last_uni = PL_oldbufptr;
7747             PL_last_lop_op = OP_REQUIRE;
7748             s = skipspace(s);
7749             return REPORT( (int)REQUIRE );
7750
7751         case KEY_reset:
7752             UNI(OP_RESET);
7753
7754         case KEY_redo:
7755             LOOPX(OP_REDO);
7756
7757         case KEY_rename:
7758             LOP(OP_RENAME,XTERM);
7759
7760         case KEY_rand:
7761             UNI(OP_RAND);
7762
7763         case KEY_rmdir:
7764             UNI(OP_RMDIR);
7765
7766         case KEY_rindex:
7767             LOP(OP_RINDEX,XTERM);
7768
7769         case KEY_read:
7770             LOP(OP_READ,XTERM);
7771
7772         case KEY_readdir:
7773             UNI(OP_READDIR);
7774
7775         case KEY_readline:
7776             UNIDOR(OP_READLINE);
7777
7778         case KEY_readpipe:
7779             UNIDOR(OP_BACKTICK);
7780
7781         case KEY_rewinddir:
7782             UNI(OP_REWINDDIR);
7783
7784         case KEY_recv:
7785             LOP(OP_RECV,XTERM);
7786
7787         case KEY_reverse:
7788             LOP(OP_REVERSE,XTERM);
7789
7790         case KEY_readlink:
7791             UNIDOR(OP_READLINK);
7792
7793         case KEY_ref:
7794             UNI(OP_REF);
7795
7796         case KEY_s:
7797             s = scan_subst(s);
7798             if (pl_yylval.opval)
7799                 TERM(sublex_start());
7800             else
7801                 TOKEN(1);       /* force error */
7802
7803         case KEY_say:
7804             checkcomma(s,PL_tokenbuf,"filehandle");
7805             LOP(OP_SAY,XREF);
7806
7807         case KEY_chomp:
7808             UNI(OP_CHOMP);
7809
7810         case KEY_scalar:
7811             UNI(OP_SCALAR);
7812
7813         case KEY_select:
7814             LOP(OP_SELECT,XTERM);
7815
7816         case KEY_seek:
7817             LOP(OP_SEEK,XTERM);
7818
7819         case KEY_semctl:
7820             LOP(OP_SEMCTL,XTERM);
7821
7822         case KEY_semget:
7823             LOP(OP_SEMGET,XTERM);
7824
7825         case KEY_semop:
7826             LOP(OP_SEMOP,XTERM);
7827
7828         case KEY_send:
7829             LOP(OP_SEND,XTERM);
7830
7831         case KEY_setpgrp:
7832             LOP(OP_SETPGRP,XTERM);
7833
7834         case KEY_setpriority:
7835             LOP(OP_SETPRIORITY,XTERM);
7836
7837         case KEY_sethostent:
7838             UNI(OP_SHOSTENT);
7839
7840         case KEY_setnetent:
7841             UNI(OP_SNETENT);
7842
7843         case KEY_setservent:
7844             UNI(OP_SSERVENT);
7845
7846         case KEY_setprotoent:
7847             UNI(OP_SPROTOENT);
7848
7849         case KEY_setpwent:
7850             FUN0(OP_SPWENT);
7851
7852         case KEY_setgrent:
7853             FUN0(OP_SGRENT);
7854
7855         case KEY_seekdir:
7856             LOP(OP_SEEKDIR,XTERM);
7857
7858         case KEY_setsockopt:
7859             LOP(OP_SSOCKOPT,XTERM);
7860
7861         case KEY_shift:
7862             UNIDOR(OP_SHIFT);
7863
7864         case KEY_shmctl:
7865             LOP(OP_SHMCTL,XTERM);
7866
7867         case KEY_shmget:
7868             LOP(OP_SHMGET,XTERM);
7869
7870         case KEY_shmread:
7871             LOP(OP_SHMREAD,XTERM);
7872
7873         case KEY_shmwrite:
7874             LOP(OP_SHMWRITE,XTERM);
7875
7876         case KEY_shutdown:
7877             LOP(OP_SHUTDOWN,XTERM);
7878
7879         case KEY_sin:
7880             UNI(OP_SIN);
7881
7882         case KEY_sleep:
7883             UNI(OP_SLEEP);
7884
7885         case KEY_socket:
7886             LOP(OP_SOCKET,XTERM);
7887
7888         case KEY_socketpair:
7889             LOP(OP_SOCKPAIR,XTERM);
7890
7891         case KEY_sort:
7892             checkcomma(s,PL_tokenbuf,"subroutine name");
7893             s = skipspace(s);
7894             PL_expect = XTERM;
7895             s = force_word(s,WORD,TRUE,TRUE);
7896             LOP(OP_SORT,XREF);
7897
7898         case KEY_split:
7899             LOP(OP_SPLIT,XTERM);
7900
7901         case KEY_sprintf:
7902             LOP(OP_SPRINTF,XTERM);
7903
7904         case KEY_splice:
7905             LOP(OP_SPLICE,XTERM);
7906
7907         case KEY_sqrt:
7908             UNI(OP_SQRT);
7909
7910         case KEY_srand:
7911             UNI(OP_SRAND);
7912
7913         case KEY_stat:
7914             UNI(OP_STAT);
7915
7916         case KEY_study:
7917             UNI(OP_STUDY);
7918
7919         case KEY_substr:
7920             LOP(OP_SUBSTR,XTERM);
7921
7922         case KEY_format:
7923         case KEY_sub:
7924           really_sub:
7925             {
7926                 char * const tmpbuf = PL_tokenbuf + 1;
7927                 expectation attrful;
7928                 bool have_name, have_proto;
7929                 const int key = tmp;
7930                 SV *format_name = NULL;
7931
7932                 d = s;
7933                 s = skipspace(s);
7934
7935                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7936                     (*s == ':' && s[1] == ':'))
7937                 {
7938
7939                     PL_expect = XBLOCK;
7940                     attrful = XATTRBLOCK;
7941                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7942                                   &len);
7943                     if (key == KEY_format)
7944                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7945                     *PL_tokenbuf = '&';
7946                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
7947                      || pad_findmy_pvn(
7948                             PL_tokenbuf, len + 1, 0
7949                         ) != NOT_IN_PAD)
7950                         sv_setpvn(PL_subname, tmpbuf, len);
7951                     else {
7952                         sv_setsv(PL_subname,PL_curstname);
7953                         sv_catpvs(PL_subname,"::");
7954                         sv_catpvn(PL_subname,tmpbuf,len);
7955                     }
7956                     if (SvUTF8(PL_linestr))
7957                         SvUTF8_on(PL_subname);
7958                     have_name = TRUE;
7959
7960
7961                     s = skipspace(d);
7962                 }
7963                 else {
7964                     if (key == KEY_my || key == KEY_our || key==KEY_state)
7965                     {
7966                         *d = '\0';
7967                         /* diag_listed_as: Missing name in "%s sub" */
7968                         Perl_croak(aTHX_
7969                                   "Missing name in \"%s\"", PL_bufptr);
7970                     }
7971                     PL_expect = XTERMBLOCK;
7972                     attrful = XATTRTERM;
7973                     sv_setpvs(PL_subname,"?");
7974                     have_name = FALSE;
7975                 }
7976
7977                 if (key == KEY_format) {
7978                     if (format_name) {
7979                         NEXTVAL_NEXTTOKE.opval
7980                             = (OP*)newSVOP(OP_CONST,0, format_name);
7981                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
7982                         force_next(WORD);
7983                     }
7984                     PREBLOCK(FORMAT);
7985                 }
7986
7987                 /* Look for a prototype */
7988                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
7989                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7990                     COPLINE_SET_FROM_MULTI_END;
7991                     if (!s)
7992                         Perl_croak(aTHX_ "Prototype not terminated");
7993                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
7994                     have_proto = TRUE;
7995
7996                     s = skipspace(s);
7997                 }
7998                 else
7999                     have_proto = FALSE;
8000
8001                 if (*s == ':' && s[1] != ':')
8002                     PL_expect = attrful;
8003                 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8004                     if (!have_name)
8005                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8006                     else if (*s != ';' && *s != '}')
8007                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8008                 }
8009
8010                 if (have_proto) {
8011                     NEXTVAL_NEXTTOKE.opval =
8012                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8013                     PL_lex_stuff = NULL;
8014                     force_next(THING);
8015                 }
8016                 if (!have_name) {
8017                     if (PL_curstash)
8018                         sv_setpvs(PL_subname, "__ANON__");
8019                     else
8020                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8021                     TOKEN(ANONSUB);
8022                 }
8023                 force_ident_maybe_lex('&');
8024                 TOKEN(SUB);
8025             }
8026
8027         case KEY_system:
8028             LOP(OP_SYSTEM,XREF);
8029
8030         case KEY_symlink:
8031             LOP(OP_SYMLINK,XTERM);
8032
8033         case KEY_syscall:
8034             LOP(OP_SYSCALL,XTERM);
8035
8036         case KEY_sysopen:
8037             LOP(OP_SYSOPEN,XTERM);
8038
8039         case KEY_sysseek:
8040             LOP(OP_SYSSEEK,XTERM);
8041
8042         case KEY_sysread:
8043             LOP(OP_SYSREAD,XTERM);
8044
8045         case KEY_syswrite:
8046             LOP(OP_SYSWRITE,XTERM);
8047
8048         case KEY_tr:
8049         case KEY_y:
8050             s = scan_trans(s);
8051             TERM(sublex_start());
8052
8053         case KEY_tell:
8054             UNI(OP_TELL);
8055
8056         case KEY_telldir:
8057             UNI(OP_TELLDIR);
8058
8059         case KEY_tie:
8060             LOP(OP_TIE,XTERM);
8061
8062         case KEY_tied:
8063             UNI(OP_TIED);
8064
8065         case KEY_time:
8066             FUN0(OP_TIME);
8067
8068         case KEY_times:
8069             FUN0(OP_TMS);
8070
8071         case KEY_truncate:
8072             LOP(OP_TRUNCATE,XTERM);
8073
8074         case KEY_uc:
8075             UNI(OP_UC);
8076
8077         case KEY_ucfirst:
8078             UNI(OP_UCFIRST);
8079
8080         case KEY_untie:
8081             UNI(OP_UNTIE);
8082
8083         case KEY_until:
8084             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8085                 return REPORT(0);
8086             pl_yylval.ival = CopLINE(PL_curcop);
8087             OPERATOR(UNTIL);
8088
8089         case KEY_unless:
8090             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8091                 return REPORT(0);
8092             pl_yylval.ival = CopLINE(PL_curcop);
8093             OPERATOR(UNLESS);
8094
8095         case KEY_unlink:
8096             LOP(OP_UNLINK,XTERM);
8097
8098         case KEY_undef:
8099             UNIDOR(OP_UNDEF);
8100
8101         case KEY_unpack:
8102             LOP(OP_UNPACK,XTERM);
8103
8104         case KEY_utime:
8105             LOP(OP_UTIME,XTERM);
8106
8107         case KEY_umask:
8108             UNIDOR(OP_UMASK);
8109
8110         case KEY_unshift:
8111             LOP(OP_UNSHIFT,XTERM);
8112
8113         case KEY_use:
8114             s = tokenize_use(1, s);
8115             TOKEN(USE);
8116
8117         case KEY_values:
8118             UNI(OP_VALUES);
8119
8120         case KEY_vec:
8121             LOP(OP_VEC,XTERM);
8122
8123         case KEY_when:
8124             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8125                 return REPORT(0);
8126             pl_yylval.ival = CopLINE(PL_curcop);
8127             Perl_ck_warner_d(aTHX_
8128                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8129                 "when is experimental");
8130             OPERATOR(WHEN);
8131
8132         case KEY_while:
8133             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8134                 return REPORT(0);
8135             pl_yylval.ival = CopLINE(PL_curcop);
8136             OPERATOR(WHILE);
8137
8138         case KEY_warn:
8139             PL_hints |= HINT_BLOCK_SCOPE;
8140             LOP(OP_WARN,XTERM);
8141
8142         case KEY_wait:
8143             FUN0(OP_WAIT);
8144
8145         case KEY_waitpid:
8146             LOP(OP_WAITPID,XTERM);
8147
8148         case KEY_wantarray:
8149             FUN0(OP_WANTARRAY);
8150
8151         case KEY_write:
8152             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8153              * we use the same number on EBCDIC */
8154             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8155             UNI(OP_ENTERWRITE);
8156
8157         case KEY_x:
8158             if (PL_expect == XOPERATOR) {
8159                 if (*s == '=' && !PL_lex_allbrackets &&
8160                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8161                     return REPORT(0);
8162                 Mop(OP_REPEAT);
8163             }
8164             check_uni();
8165             goto just_a_word;
8166
8167         case KEY_xor:
8168             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8169                 return REPORT(0);
8170             pl_yylval.ival = OP_XOR;
8171             OPERATOR(OROP);
8172         }
8173     }}
8174 }
8175
8176 /*
8177   S_pending_ident
8178
8179   Looks up an identifier in the pad or in a package
8180
8181   Returns:
8182     PRIVATEREF if this is a lexical name.
8183     WORD       if this belongs to a package.
8184
8185   Structure:
8186       if we're in a my declaration
8187           croak if they tried to say my($foo::bar)
8188           build the ops for a my() declaration
8189       if it's an access to a my() variable
8190           build ops for access to a my() variable
8191       if in a dq string, and they've said @foo and we can't find @foo
8192           warn
8193       build ops for a bareword
8194 */
8195
8196 static int
8197 S_pending_ident(pTHX)
8198 {
8199     PADOFFSET tmp = 0;
8200     const char pit = (char)pl_yylval.ival;
8201     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8202     /* All routes through this function want to know if there is a colon.  */
8203     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8204
8205     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8206           "### Pending identifier '%s'\n", PL_tokenbuf); });
8207
8208     /* if we're in a my(), we can't allow dynamics here.
8209        $foo'bar has already been turned into $foo::bar, so
8210        just check for colons.
8211
8212        if it's a legal name, the OP is a PADANY.
8213     */
8214     if (PL_in_my) {
8215         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8216             if (has_colon)
8217                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8218                                   "variable %s in \"our\"",
8219                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8220             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8221         }
8222         else {
8223             if (has_colon) {
8224                 /* "my" variable %s can't be in a package */
8225                 /* PL_no_myglob is constant */
8226                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8227                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8228                             PL_in_my == KEY_my ? "my" : "state",
8229                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8230                             PL_tokenbuf),
8231                             UTF ? SVf_UTF8 : 0);
8232                 GCC_DIAG_RESTORE;
8233             }
8234
8235             pl_yylval.opval = newOP(OP_PADANY, 0);
8236             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8237                                                         UTF ? SVf_UTF8 : 0);
8238             return PRIVATEREF;
8239         }
8240     }
8241
8242     /*
8243        build the ops for accesses to a my() variable.
8244     */
8245
8246     if (!has_colon) {
8247         if (!PL_in_my)
8248             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8249                                  0);
8250         if (tmp != NOT_IN_PAD) {
8251             /* might be an "our" variable" */
8252             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8253                 /* build ops for a bareword */
8254                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8255                 HEK * const stashname = HvNAME_HEK(stash);
8256                 SV *  const sym = newSVhek(stashname);
8257                 sv_catpvs(sym, "::");
8258                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8259                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8260                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8261                 if (pit != '&')
8262                   gv_fetchsv(sym,
8263                     GV_ADDMULTI,
8264                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8265                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8266                      : SVt_PVHV));
8267                 return WORD;
8268             }
8269
8270             pl_yylval.opval = newOP(OP_PADANY, 0);
8271             pl_yylval.opval->op_targ = tmp;
8272             return PRIVATEREF;
8273         }
8274     }
8275
8276     /*
8277        Whine if they've said @foo in a doublequoted string,
8278        and @foo isn't a variable we can find in the symbol
8279        table.
8280     */
8281     if (ckWARN(WARN_AMBIGUOUS) &&
8282         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8283         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8284                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8285         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8286                 /* DO NOT warn for @- and @+ */
8287                 && !( PL_tokenbuf[2] == '\0' &&
8288                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8289            )
8290         {
8291             /* Downgraded from fatal to warning 20000522 mjd */
8292             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8293                         "Possible unintended interpolation of %"UTF8f
8294                         " in string",
8295                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8296         }
8297     }
8298
8299     /* build ops for a bareword */
8300     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8301                                    newSVpvn_flags(PL_tokenbuf + 1,
8302                                                       tokenbuf_len - 1,
8303                                                       UTF ? SVf_UTF8 : 0 ));
8304     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8305     if (pit != '&')
8306         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8307                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8308                      | ( UTF ? SVf_UTF8 : 0 ),
8309                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8310                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8311                       : SVt_PVHV));
8312     return WORD;
8313 }
8314
8315 STATIC void
8316 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8317 {
8318     PERL_ARGS_ASSERT_CHECKCOMMA;
8319
8320     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8321         if (ckWARN(WARN_SYNTAX)) {
8322             int level = 1;
8323             const char *w;
8324             for (w = s+2; *w && level; w++) {
8325                 if (*w == '(')
8326                     ++level;
8327                 else if (*w == ')')
8328                     --level;
8329             }
8330             while (isSPACE(*w))
8331                 ++w;
8332             /* the list of chars below is for end of statements or
8333              * block / parens, boolean operators (&&, ||, //) and branch
8334              * constructs (or, and, if, until, unless, while, err, for).
8335              * Not a very solid hack... */
8336             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8337                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8338                             "%s (...) interpreted as function",name);
8339         }
8340     }
8341     while (s < PL_bufend && isSPACE(*s))
8342         s++;
8343     if (*s == '(')
8344         s++;
8345     while (s < PL_bufend && isSPACE(*s))
8346         s++;
8347     if (isIDFIRST_lazy_if(s,UTF)) {
8348         const char * const w = s;
8349         s += UTF ? UTF8SKIP(s) : 1;
8350         while (isWORDCHAR_lazy_if(s,UTF))
8351             s += UTF ? UTF8SKIP(s) : 1;
8352         while (s < PL_bufend && isSPACE(*s))
8353             s++;
8354         if (*s == ',') {
8355             GV* gv;
8356             PADOFFSET off;
8357             if (keyword(w, s - w, 0))
8358                 return;
8359
8360             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8361             if (gv && GvCVu(gv))
8362                 return;
8363             if (s - w <= 254) {
8364                 char tmpbuf[256];
8365                 Copy(w, tmpbuf+1, s - w, char);
8366                 *tmpbuf = '&';
8367                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8368                 if (off != NOT_IN_PAD) return;
8369             }
8370             Perl_croak(aTHX_ "No comma allowed after %s", what);
8371         }
8372     }
8373 }
8374
8375 /* S_new_constant(): do any overload::constant lookup.
8376
8377    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8378    Best used as sv=new_constant(..., sv, ...).
8379    If s, pv are NULL, calls subroutine with one argument,
8380    and <type> is used with error messages only.
8381    <type> is assumed to be well formed UTF-8 */
8382
8383 STATIC SV *
8384 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8385                SV *sv, SV *pv, const char *type, STRLEN typelen)
8386 {
8387     dSP;
8388     HV * table = GvHV(PL_hintgv);                /* ^H */
8389     SV *res;
8390     SV *errsv = NULL;
8391     SV **cvp;
8392     SV *cv, *typesv;
8393     const char *why1 = "", *why2 = "", *why3 = "";
8394
8395     PERL_ARGS_ASSERT_NEW_CONSTANT;
8396     /* We assume that this is true: */
8397     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8398     assert(type || s);
8399
8400     /* charnames doesn't work well if there have been errors found */
8401     if (PL_error_count > 0 && *key == 'c')
8402     {
8403         SvREFCNT_dec_NN(sv);
8404         return &PL_sv_undef;
8405     }
8406
8407     sv_2mortal(sv);                     /* Parent created it permanently */
8408     if (!table
8409         || ! (PL_hints & HINT_LOCALIZE_HH)
8410         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8411         || ! SvOK(*cvp))
8412     {
8413         char *msg;
8414
8415         /* Here haven't found what we're looking for.  If it is charnames,
8416          * perhaps it needs to be loaded.  Try doing that before giving up */
8417         if (*key == 'c') {
8418             Perl_load_module(aTHX_
8419                             0,
8420                             newSVpvs("_charnames"),
8421                              /* version parameter; no need to specify it, as if
8422                               * we get too early a version, will fail anyway,
8423                               * not being able to find '_charnames' */
8424                             NULL,
8425                             newSVpvs(":full"),
8426                             newSVpvs(":short"),
8427                             NULL);
8428             assert(sp == PL_stack_sp);
8429             table = GvHV(PL_hintgv);
8430             if (table
8431                 && (PL_hints & HINT_LOCALIZE_HH)
8432                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8433                 && SvOK(*cvp))
8434             {
8435                 goto now_ok;
8436             }
8437         }
8438         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8439             msg = Perl_form(aTHX_
8440                                "Constant(%.*s) unknown",
8441                                 (int)(type ? typelen : len),
8442                                 (type ? type: s));
8443         }
8444         else {
8445             why1 = "$^H{";
8446             why2 = key;
8447             why3 = "} is not defined";
8448         report:
8449             if (*key == 'c') {
8450                 msg = Perl_form(aTHX_
8451                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8452                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8453                       );
8454             }
8455             else {
8456                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8457                                     (int)(type ? typelen : len),
8458                                     (type ? type: s), why1, why2, why3);
8459             }
8460         }
8461         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8462         return SvREFCNT_inc_simple_NN(sv);
8463     }
8464   now_ok:
8465     cv = *cvp;
8466     if (!pv && s)
8467         pv = newSVpvn_flags(s, len, SVs_TEMP);
8468     if (type && pv)
8469         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8470     else
8471         typesv = &PL_sv_undef;
8472
8473     PUSHSTACKi(PERLSI_OVERLOAD);
8474     ENTER ;
8475     SAVETMPS;
8476
8477     PUSHMARK(SP) ;
8478     EXTEND(sp, 3);
8479     if (pv)
8480         PUSHs(pv);
8481     PUSHs(sv);
8482     if (pv)
8483         PUSHs(typesv);
8484     PUTBACK;
8485     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8486
8487     SPAGAIN ;
8488
8489     /* Check the eval first */
8490     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8491         STRLEN errlen;
8492         const char * errstr;
8493         sv_catpvs(errsv, "Propagated");
8494         errstr = SvPV_const(errsv, errlen);
8495         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8496         (void)POPs;
8497         res = SvREFCNT_inc_simple_NN(sv);
8498     }
8499     else {
8500         res = POPs;
8501         SvREFCNT_inc_simple_void_NN(res);
8502     }
8503
8504     PUTBACK ;
8505     FREETMPS ;
8506     LEAVE ;
8507     POPSTACK;
8508
8509     if (!SvOK(res)) {
8510         why1 = "Call to &{$^H{";
8511         why2 = key;
8512         why3 = "}} did not return a defined value";
8513         sv = res;
8514         (void)sv_2mortal(sv);
8515         goto report;
8516     }
8517
8518     return res;
8519 }
8520
8521 PERL_STATIC_INLINE void
8522 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8523     PERL_ARGS_ASSERT_PARSE_IDENT;
8524
8525     for (;;) {
8526         if (*d >= e)
8527             Perl_croak(aTHX_ "%s", ident_too_long);
8528         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8529              /* The UTF-8 case must come first, otherwise things
8530              * like c\N{COMBINING TILDE} would start failing, as the
8531              * isWORDCHAR_A case below would gobble the 'c' up.
8532              */
8533
8534             char *t = *s + UTF8SKIP(*s);
8535             while (isIDCONT_utf8((U8*)t))
8536                 t += UTF8SKIP(t);
8537             if (*d + (t - *s) > e)
8538                 Perl_croak(aTHX_ "%s", ident_too_long);
8539             Copy(*s, *d, t - *s, char);
8540             *d += t - *s;
8541             *s = t;
8542         }
8543         else if ( isWORDCHAR_A(**s) ) {
8544             do {
8545                 *(*d)++ = *(*s)++;
8546             } while (isWORDCHAR_A(**s) && *d < e);
8547         }
8548         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8549             *(*d)++ = ':';
8550             *(*d)++ = ':';
8551             (*s)++;
8552         }
8553         else if (allow_package && **s == ':' && (*s)[1] == ':'
8554            /* Disallow things like Foo::$bar. For the curious, this is
8555             * the code path that triggers the "Bad name after" warning
8556             * when looking for barewords.
8557             */
8558            && (*s)[2] != '$') {
8559             *(*d)++ = *(*s)++;
8560             *(*d)++ = *(*s)++;
8561         }
8562         else
8563             break;
8564     }
8565     return;
8566 }
8567
8568 /* Returns a NUL terminated string, with the length of the string written to
8569    *slp
8570    */
8571 STATIC char *
8572 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8573 {
8574     char *d = dest;
8575     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8576     bool is_utf8 = cBOOL(UTF);
8577
8578     PERL_ARGS_ASSERT_SCAN_WORD;
8579
8580     parse_ident(&s, &d, e, allow_package, is_utf8);
8581     *d = '\0';
8582     *slp = d - dest;
8583     return s;
8584 }
8585
8586 STATIC char *
8587 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8588 {
8589     I32 herelines = PL_parser->herelines;
8590     SSize_t bracket = -1;
8591     char funny = *s++;
8592     char *d = dest;
8593     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8594     bool is_utf8 = cBOOL(UTF);
8595     I32 orig_copline = 0, tmp_copline = 0;
8596
8597     PERL_ARGS_ASSERT_SCAN_IDENT;
8598
8599     if (isSPACE(*s) || !*s)
8600         s = skipspace(s);
8601     if (isDIGIT(*s)) {
8602         while (isDIGIT(*s)) {
8603             if (d >= e)
8604                 Perl_croak(aTHX_ "%s", ident_too_long);
8605             *d++ = *s++;
8606         }
8607     }
8608     else {
8609         parse_ident(&s, &d, e, 1, is_utf8);
8610     }
8611     *d = '\0';
8612     d = dest;
8613     if (*d) {
8614         /* Either a digit variable, or parse_ident() found an identifier
8615            (anything valid as a bareword), so job done and return.  */
8616         if (PL_lex_state != LEX_NORMAL)
8617             PL_lex_state = LEX_INTERPENDMAYBE;
8618         return s;
8619     }
8620     if (*s == '$' && s[1] &&
8621       (isIDFIRST_lazy_if(s+1,is_utf8)
8622          || isDIGIT_A((U8)s[1])
8623          || s[1] == '$'
8624          || s[1] == '{'
8625          || strnEQ(s+1,"::",2)) )
8626     {
8627         /* Dereferencing a value in a scalar variable.
8628            The alternatives are different syntaxes for a scalar variable.
8629            Using ' as a leading package separator isn't allowed. :: is.   */
8630         return s;
8631     }
8632     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8633     if (*s == '{') {
8634         bracket = s - SvPVX(PL_linestr);
8635         s++;
8636         orig_copline = CopLINE(PL_curcop);
8637         if (s < PL_bufend && isSPACE(*s)) {
8638             s = skipspace(s);
8639         }
8640     }
8641
8642 /* Is the byte 'd' a legal single character identifier name?  'u' is true
8643  * iff Unicode semantics are to be used.  The legal ones are any of:
8644  *  a) all ASCII characters except:
8645  *          1) space-type ones, like \t and SPACE;
8646             2) NUL;
8647  *          3) '{'
8648  *     The final case currently doesn't get this far in the program, so we
8649  *     don't test for it.  If that were to change, it would be ok to allow it.
8650  *  c) When not under Unicode rules, any upper Latin1 character
8651  *  d) Otherwise, when unicode rules are used, all XIDS characters.
8652  *
8653  *      Because all ASCII characters have the same representation whether
8654  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8655  *      '{' without knowing if is UTF-8 or not.
8656  * EBCDIC already uses the rules that ASCII platforms will use after the
8657  * deprecation cycle; see comment below about the deprecation. */
8658 #ifdef EBCDIC
8659 #   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
8660     (isGRAPH_A(*(s)) || ((is_utf8)                                            \
8661                          ? isIDFIRST_utf8((U8*) (s))                          \
8662                          : (isGRAPH_L1(*s)                                    \
8663                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8664 #else
8665 #   define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s))                 \
8666                                             && LIKELY(*(s) != '\0')           \
8667                                             && (! is_utf8                     \
8668                                                 || isASCII_utf8((U8*) (s))    \
8669                                                 || isIDFIRST_utf8((U8*) (s))))
8670 #endif
8671     if ((s <= PL_bufend - (is_utf8)
8672                           ? UTF8SKIP(s)
8673                           : 1)
8674         && VALID_LEN_ONE_IDENT(s, is_utf8))
8675     {
8676         /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
8677          * because often it has no graphic representation.  (We can't get to
8678          * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8679          * test for it.) */
8680         if ((is_utf8)
8681             ? ! isGRAPH_utf8( (U8*) s)
8682             : (! isGRAPH_L1( (U8) *s)
8683                || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
8684         {
8685             /* Split messages for back compat */
8686             if (isCNTRL_A( (U8) *s)) {
8687                 deprecate("literal control characters in variable names");
8688             }
8689             else {
8690                 deprecate("literal non-graphic characters in variable names");
8691             }
8692         }
8693
8694         if (is_utf8) {
8695             const STRLEN skip = UTF8SKIP(s);
8696             STRLEN i;
8697             d[skip] = '\0';
8698             for ( i = 0; i < skip; i++ )
8699                 d[i] = *s++;
8700         }
8701         else {
8702             *d = *s++;
8703             d[1] = '\0';
8704         }
8705     }
8706     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8707     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8708         *d = toCTRL(*s);
8709         s++;
8710     }
8711     /* Warn about ambiguous code after unary operators if {...} notation isn't
8712        used.  There's no difference in ambiguity; it's merely a heuristic
8713        about when not to warn.  */
8714     else if (ck_uni && bracket == -1)
8715         check_uni();
8716     if (bracket != -1) {
8717         /* If we were processing {...} notation then...  */
8718         if (isIDFIRST_lazy_if(d,is_utf8)) {
8719             /* if it starts as a valid identifier, assume that it is one.
8720                (the later check for } being at the expected point will trap
8721                cases where this doesn't pan out.)  */
8722         d += is_utf8 ? UTF8SKIP(d) : 1;
8723         parse_ident(&s, &d, e, 1, is_utf8);
8724             *d = '\0';
8725             tmp_copline = CopLINE(PL_curcop);
8726             if (s < PL_bufend && isSPACE(*s)) {
8727                 s = skipspace(s);
8728             }
8729             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8730                 /* ${foo[0]} and ${foo{bar}} notation.  */
8731                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8732                     const char * const brack =
8733                         (const char *)
8734                         ((*s == '[') ? "[...]" : "{...}");
8735                     orig_copline = CopLINE(PL_curcop);
8736                     CopLINE_set(PL_curcop, tmp_copline);
8737    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8738                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8739                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8740                         funny, dest, brack, funny, dest, brack);
8741                     CopLINE_set(PL_curcop, orig_copline);
8742                 }
8743                 bracket++;
8744                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8745                 PL_lex_allbrackets++;
8746                 return s;
8747             }
8748         }
8749         /* Handle extended ${^Foo} variables
8750          * 1999-02-27 mjd-perl-patch@plover.com */
8751         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8752                  && isWORDCHAR(*s))
8753         {
8754             d++;
8755             while (isWORDCHAR(*s) && d < e) {
8756                 *d++ = *s++;
8757             }
8758             if (d >= e)
8759                 Perl_croak(aTHX_ "%s", ident_too_long);
8760             *d = '\0';
8761         }
8762
8763         if ( !tmp_copline )
8764             tmp_copline = CopLINE(PL_curcop);
8765         if (s < PL_bufend && isSPACE(*s)) {
8766             s = skipspace(s);
8767         }
8768
8769         /* Expect to find a closing } after consuming any trailing whitespace.
8770          */
8771         if (*s == '}') {
8772             s++;
8773             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8774                 PL_lex_state = LEX_INTERPEND;
8775                 PL_expect = XREF;
8776             }
8777             if (PL_lex_state == LEX_NORMAL) {
8778                 if (ckWARN(WARN_AMBIGUOUS) &&
8779                     (keyword(dest, d - dest, 0)
8780                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8781                 {
8782                     SV *tmp = newSVpvn_flags( dest, d - dest,
8783                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8784                     if (funny == '#')
8785                         funny = '@';
8786                     orig_copline = CopLINE(PL_curcop);
8787                     CopLINE_set(PL_curcop, tmp_copline);
8788                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8789                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8790                         funny, SVfARG(tmp), funny, SVfARG(tmp));
8791                     CopLINE_set(PL_curcop, orig_copline);
8792                 }
8793             }
8794         }
8795         else {
8796             /* Didn't find the closing } at the point we expected, so restore
8797                state such that the next thing to process is the opening { and */
8798             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8799             CopLINE_set(PL_curcop, orig_copline);
8800             PL_parser->herelines = herelines;
8801             *dest = '\0';
8802         }
8803     }
8804     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8805         PL_lex_state = LEX_INTERPEND;
8806     return s;
8807 }
8808
8809 static bool
8810 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
8811
8812     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
8813      * found in the parse starting at 's', based on the subset that are valid
8814      * in this context input to this routine in 'valid_flags'. Advances s.
8815      * Returns TRUE if the input should be treated as a valid flag, so the next
8816      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
8817      * upon first call on the current regex.  This routine will set it to any
8818      * charset modifier found.  The caller shouldn't change it.  This way,
8819      * another charset modifier encountered in the parse can be detected as an
8820      * error, as we have decided to allow only one */
8821
8822     const char c = **s;
8823     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8824
8825     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8826         if (isWORDCHAR_lazy_if(*s, UTF)) {
8827             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8828                        UTF ? SVf_UTF8 : 0);
8829             (*s) += charlen;
8830             /* Pretend that it worked, so will continue processing before
8831              * dieing */
8832             return TRUE;
8833         }
8834         return FALSE;
8835     }
8836
8837     switch (c) {
8838
8839         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
8840         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8841         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8842         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8843         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8844         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8845         case LOCALE_PAT_MOD:
8846             if (*charset) {
8847                 goto multiple_charsets;
8848             }
8849             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8850             *charset = c;
8851             break;
8852         case UNICODE_PAT_MOD:
8853             if (*charset) {
8854                 goto multiple_charsets;
8855             }
8856             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8857             *charset = c;
8858             break;
8859         case ASCII_RESTRICT_PAT_MOD:
8860             if (! *charset) {
8861                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8862             }
8863             else {
8864
8865                 /* Error if previous modifier wasn't an 'a', but if it was, see
8866                  * if, and accept, a second occurrence (only) */
8867                 if (*charset != 'a'
8868                     || get_regex_charset(*pmfl)
8869                         != REGEX_ASCII_RESTRICTED_CHARSET)
8870                 {
8871                         goto multiple_charsets;
8872                 }
8873                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8874             }
8875             *charset = c;
8876             break;
8877         case DEPENDS_PAT_MOD:
8878             if (*charset) {
8879                 goto multiple_charsets;
8880             }
8881             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8882             *charset = c;
8883             break;
8884     }
8885
8886     (*s)++;
8887     return TRUE;
8888
8889     multiple_charsets:
8890         if (*charset != c) {
8891             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8892         }
8893         else if (c == 'a') {
8894   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8895             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8896         }
8897         else {
8898             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8899         }
8900
8901         /* Pretend that it worked, so will continue processing before dieing */
8902         (*s)++;
8903         return TRUE;
8904 }
8905
8906 STATIC char *
8907 S_scan_pat(pTHX_ char *start, I32 type)
8908 {
8909     PMOP *pm;
8910     char *s;
8911     const char * const valid_flags =
8912         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8913     char charset = '\0';    /* character set modifier */
8914     unsigned int x_mod_count = 0;
8915
8916     PERL_ARGS_ASSERT_SCAN_PAT;
8917
8918     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8919     if (!s)
8920         Perl_croak(aTHX_ "Search pattern not terminated");
8921
8922     pm = (PMOP*)newPMOP(type, 0);
8923     if (PL_multi_open == '?') {
8924         /* This is the only point in the code that sets PMf_ONCE:  */
8925         pm->op_pmflags |= PMf_ONCE;
8926
8927         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8928            allows us to restrict the list needed by reset to just the ??
8929            matches.  */
8930         assert(type != OP_TRANS);
8931         if (PL_curstash) {
8932             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8933             U32 elements;
8934             if (!mg) {
8935                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8936                                  0);
8937             }
8938             elements = mg->mg_len / sizeof(PMOP**);
8939             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8940             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8941             mg->mg_len = elements * sizeof(PMOP**);
8942             PmopSTASH_set(pm,PL_curstash);
8943         }
8944     }
8945
8946     /* if qr/...(?{..}).../, then need to parse the pattern within a new
8947      * anon CV. False positives like qr/[(?{]/ are harmless */
8948
8949     if (type == OP_QR) {
8950         STRLEN len;
8951         char *e, *p = SvPV(PL_lex_stuff, len);
8952         e = p + len;
8953         for (; p < e; p++) {
8954             if (p[0] == '(' && p[1] == '?'
8955                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8956             {
8957                 pm->op_pmflags |= PMf_HAS_CV;
8958                 break;
8959             }
8960         }
8961         pm->op_pmflags |= PMf_IS_QR;
8962     }
8963
8964     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
8965                                 &s, &charset, &x_mod_count))
8966     {};
8967     /* issue a warning if /c is specified,but /g is not */
8968     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8969     {
8970         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8971                        "Use of /c modifier is meaningless without /g" );
8972     }
8973
8974     STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
8975
8976     PL_lex_op = (OP*)pm;
8977     pl_yylval.ival = OP_MATCH;
8978     return s;
8979 }
8980
8981 STATIC char *
8982 S_scan_subst(pTHX_ char *start)
8983 {
8984     char *s;
8985     PMOP *pm;
8986     I32 first_start;
8987     line_t first_line;
8988     I32 es = 0;
8989     char charset = '\0';    /* character set modifier */
8990     unsigned int x_mod_count = 0;
8991     char *t;
8992
8993     PERL_ARGS_ASSERT_SCAN_SUBST;
8994
8995     pl_yylval.ival = OP_NULL;
8996
8997     s = scan_str(start, TRUE, FALSE, FALSE, &t);
8998
8999     if (!s)
9000         Perl_croak(aTHX_ "Substitution pattern not terminated");
9001
9002     s = t;
9003
9004     first_start = PL_multi_start;
9005     first_line = CopLINE(PL_curcop);
9006     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9007     if (!s) {
9008         SvREFCNT_dec_NN(PL_lex_stuff);
9009         PL_lex_stuff = NULL;
9010         Perl_croak(aTHX_ "Substitution replacement not terminated");
9011     }
9012     PL_multi_start = first_start;       /* so whole substitution is taken together */
9013
9014     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9015
9016
9017     while (*s) {
9018         if (*s == EXEC_PAT_MOD) {
9019             s++;
9020             es++;
9021         }
9022         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9023                                   &s, &charset, &x_mod_count))
9024         {
9025             break;
9026         }
9027     }
9028
9029     STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9030
9031     if ((pm->op_pmflags & PMf_CONTINUE)) {
9032         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9033     }
9034
9035     if (es) {
9036         SV * const repl = newSVpvs("");
9037
9038         PL_multi_end = 0;
9039         pm->op_pmflags |= PMf_EVAL;
9040         while (es-- > 0) {
9041             if (es)
9042                 sv_catpvs(repl, "eval ");
9043             else
9044                 sv_catpvs(repl, "do ");
9045         }
9046         sv_catpvs(repl, "{");
9047         sv_catsv(repl, PL_sublex_info.repl);
9048         sv_catpvs(repl, "}");
9049         SvEVALED_on(repl);
9050         SvREFCNT_dec(PL_sublex_info.repl);
9051         PL_sublex_info.repl = repl;
9052     }
9053     if (CopLINE(PL_curcop) != first_line) {
9054         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9055         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9056             CopLINE(PL_curcop) - first_line;
9057         CopLINE_set(PL_curcop, first_line);
9058     }
9059
9060     PL_lex_op = (OP*)pm;
9061     pl_yylval.ival = OP_SUBST;
9062     return s;
9063 }
9064
9065 STATIC char *
9066 S_scan_trans(pTHX_ char *start)
9067 {
9068     char* s;
9069     OP *o;
9070     U8 squash;
9071     U8 del;
9072     U8 complement;
9073     bool nondestruct = 0;
9074     char *t;
9075
9076     PERL_ARGS_ASSERT_SCAN_TRANS;
9077
9078     pl_yylval.ival = OP_NULL;
9079
9080     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9081     if (!s)
9082         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9083
9084     s = t;
9085
9086     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9087     if (!s) {
9088         SvREFCNT_dec_NN(PL_lex_stuff);
9089         PL_lex_stuff = NULL;
9090         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9091     }
9092
9093     complement = del = squash = 0;
9094     while (1) {
9095         switch (*s) {
9096         case 'c':
9097             complement = OPpTRANS_COMPLEMENT;
9098             break;
9099         case 'd':
9100             del = OPpTRANS_DELETE;
9101             break;
9102         case 's':
9103             squash = OPpTRANS_SQUASH;
9104             break;
9105         case 'r':
9106             nondestruct = 1;
9107             break;
9108         default:
9109             goto no_more;
9110         }
9111         s++;
9112     }
9113   no_more:
9114
9115     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9116     o->op_private &= ~OPpTRANS_ALL;
9117     o->op_private |= del|squash|complement|
9118       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9119       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9120
9121     PL_lex_op = o;
9122     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9123
9124
9125     return s;
9126 }
9127
9128 /* scan_heredoc
9129    Takes a pointer to the first < in <<FOO.
9130    Returns a pointer to the byte following <<FOO.
9131
9132    This function scans a heredoc, which involves different methods
9133    depending on whether we are in a string eval, quoted construct, etc.
9134    This is because PL_linestr could containing a single line of input, or
9135    a whole string being evalled, or the contents of the current quote-
9136    like operator.
9137
9138    The two basic methods are:
9139     - Steal lines from the input stream
9140     - Scan the heredoc in PL_linestr and remove it therefrom
9141
9142    In a file scope or filtered eval, the first method is used; in a
9143    string eval, the second.
9144
9145    In a quote-like operator, we have to choose between the two,
9146    depending on where we can find a newline.  We peek into outer lex-
9147    ing scopes until we find one with a newline in it.  If we reach the
9148    outermost lexing scope and it is a file, we use the stream method.
9149    Otherwise it is treated as an eval.
9150 */
9151
9152 STATIC char *
9153 S_scan_heredoc(pTHX_ char *s)
9154 {
9155     I32 op_type = OP_SCALAR;
9156     I32 len;
9157     SV *tmpstr;
9158     char term;
9159     char *d;
9160     char *e;
9161     char *peek;
9162     const bool infile = PL_rsfp || PL_parser->filtered;
9163     const line_t origline = CopLINE(PL_curcop);
9164     LEXSHARED *shared = PL_parser->lex_shared;
9165
9166     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9167
9168     s += 2;
9169     d = PL_tokenbuf + 1;
9170     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9171     *PL_tokenbuf = '\n';
9172     peek = s;
9173     while (SPACE_OR_TAB(*peek))
9174         peek++;
9175     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9176         s = peek;
9177         term = *s++;
9178         s = delimcpy(d, e, s, PL_bufend, term, &len);
9179         if (s == PL_bufend)
9180             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9181         d += len;
9182         s++;
9183     }
9184     else {
9185         if (*s == '\\')
9186             /* <<\FOO is equivalent to <<'FOO' */
9187             s++, term = '\'';
9188         else
9189             term = '"';
9190         if (!isWORDCHAR_lazy_if(s,UTF))
9191             deprecate("bare << to mean <<\"\"");
9192         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9193             if (d < e)
9194                 *d++ = *s;
9195         }
9196     }
9197     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9198         Perl_croak(aTHX_ "Delimiter for here document is too long");
9199     *d++ = '\n';
9200     *d = '\0';
9201     len = d - PL_tokenbuf;
9202
9203 #ifndef PERL_STRICT_CR
9204     d = strchr(s, '\r');
9205     if (d) {
9206         char * const olds = s;
9207         s = d;
9208         while (s < PL_bufend) {
9209             if (*s == '\r') {
9210                 *d++ = '\n';
9211                 if (*++s == '\n')
9212                     s++;
9213             }
9214             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9215                 *d++ = *s++;
9216                 s++;
9217             }
9218             else
9219                 *d++ = *s++;
9220         }
9221         *d = '\0';
9222         PL_bufend = d;
9223         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9224         s = olds;
9225     }
9226 #endif
9227
9228     tmpstr = newSV_type(SVt_PVIV);
9229     SvGROW(tmpstr, 80);
9230     if (term == '\'') {
9231         op_type = OP_CONST;
9232         SvIV_set(tmpstr, -1);
9233     }
9234     else if (term == '`') {
9235         op_type = OP_BACKTICK;
9236         SvIV_set(tmpstr, '\\');
9237     }
9238
9239     PL_multi_start = origline + 1 + PL_parser->herelines;
9240     PL_multi_open = PL_multi_close = '<';
9241     /* inside a string eval or quote-like operator */
9242     if (!infile || PL_lex_inwhat) {
9243         SV *linestr;
9244         char *bufend;
9245         char * const olds = s;
9246         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9247         /* These two fields are not set until an inner lexing scope is
9248            entered.  But we need them set here. */
9249         shared->ls_bufptr  = s;
9250         shared->ls_linestr = PL_linestr;
9251         if (PL_lex_inwhat)
9252           /* Look for a newline.  If the current buffer does not have one,
9253              peek into the line buffer of the parent lexing scope, going
9254              up as many levels as necessary to find one with a newline
9255              after bufptr.
9256            */
9257           while (!(s = (char *)memchr(
9258                     (void *)shared->ls_bufptr, '\n',
9259                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9260                 ))) {
9261             shared = shared->ls_prev;
9262             /* shared is only null if we have gone beyond the outermost
9263                lexing scope.  In a file, we will have broken out of the
9264                loop in the previous iteration.  In an eval, the string buf-
9265                fer ends with "\n;", so the while condition above will have
9266                evaluated to false.  So shared can never be null. */
9267             assert(shared);
9268             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9269                most lexing scope.  In a file, shared->ls_linestr at that
9270                level is just one line, so there is no body to steal. */
9271             if (infile && !shared->ls_prev) {
9272                 s = olds;
9273                 goto streaming;
9274             }
9275           }
9276         else {  /* eval */
9277             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9278             assert(s);
9279         }
9280         linestr = shared->ls_linestr;
9281         bufend = SvEND(linestr);
9282         d = s;
9283         while (s < bufend - len + 1 &&
9284           memNE(s,PL_tokenbuf,len) ) {
9285             if (*s++ == '\n')
9286                 ++PL_parser->herelines;
9287         }
9288         if (s >= bufend - len + 1) {
9289             goto interminable;
9290         }
9291         sv_setpvn(tmpstr,d+1,s-d);
9292         s += len - 1;
9293         /* the preceding stmt passes a newline */
9294         PL_parser->herelines++;
9295
9296         /* s now points to the newline after the heredoc terminator.
9297            d points to the newline before the body of the heredoc.
9298          */
9299
9300         /* We are going to modify linestr in place here, so set
9301            aside copies of the string if necessary for re-evals or
9302            (caller $n)[6]. */
9303         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9304            check shared->re_eval_str. */
9305         if (shared->re_eval_start || shared->re_eval_str) {
9306             /* Set aside the rest of the regexp */
9307             if (!shared->re_eval_str)
9308                 shared->re_eval_str =
9309                        newSVpvn(shared->re_eval_start,
9310                                 bufend - shared->re_eval_start);
9311             shared->re_eval_start -= s-d;
9312         }
9313         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9314             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9315             cx->blk_eval.cur_text == linestr)
9316         {
9317             cx->blk_eval.cur_text = newSVsv(linestr);
9318             SvSCREAM_on(cx->blk_eval.cur_text);
9319         }
9320         /* Copy everything from s onwards back to d. */
9321         Move(s,d,bufend-s + 1,char);
9322         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9323         /* Setting PL_bufend only applies when we have not dug deeper
9324            into other scopes, because sublex_done sets PL_bufend to
9325            SvEND(PL_linestr). */
9326         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9327         s = olds;
9328     }
9329     else
9330     {
9331       SV *linestr_save;
9332      streaming:
9333       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9334       term = PL_tokenbuf[1];
9335       len--;
9336       linestr_save = PL_linestr; /* must restore this afterwards */
9337       d = s;                     /* and this */
9338       PL_linestr = newSVpvs("");
9339       PL_bufend = SvPVX(PL_linestr);
9340       while (1) {
9341         PL_bufptr = PL_bufend;
9342         CopLINE_set(PL_curcop,
9343                     origline + 1 + PL_parser->herelines);
9344         if (!lex_next_chunk(LEX_NO_TERM)
9345          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9346             /* Simply freeing linestr_save might seem simpler here, as it
9347                does not matter what PL_linestr points to, since we are
9348                about to croak; but in a quote-like op, linestr_save
9349                will have been prospectively freed already, via
9350                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9351                restore PL_linestr. */
9352             SvREFCNT_dec_NN(PL_linestr);
9353             PL_linestr = linestr_save;
9354             goto interminable;
9355         }
9356         CopLINE_set(PL_curcop, origline);
9357         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9358             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9359             /* ^That should be enough to avoid this needing to grow:  */
9360             sv_catpvs(PL_linestr, "\n\0");
9361             assert(s == SvPVX(PL_linestr));
9362             PL_bufend = SvEND(PL_linestr);
9363         }
9364         s = PL_bufptr;
9365         PL_parser->herelines++;
9366         PL_last_lop = PL_last_uni = NULL;
9367 #ifndef PERL_STRICT_CR
9368         if (PL_bufend - PL_linestart >= 2) {
9369             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9370                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9371             {
9372                 PL_bufend[-2] = '\n';
9373                 PL_bufend--;
9374                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9375             }
9376             else if (PL_bufend[-1] == '\r')
9377                 PL_bufend[-1] = '\n';
9378         }
9379         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9380             PL_bufend[-1] = '\n';
9381 #endif
9382         if (*s == term && PL_bufend-s >= len
9383          && memEQ(s,PL_tokenbuf + 1,len)) {
9384             SvREFCNT_dec(PL_linestr);
9385             PL_linestr = linestr_save;
9386             PL_linestart = SvPVX(linestr_save);
9387             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9388             s = d;
9389             break;
9390         }
9391         else {
9392             sv_catsv(tmpstr,PL_linestr);
9393         }
9394       }
9395     }
9396     PL_multi_end = origline + PL_parser->herelines;
9397     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9398         SvPV_shrink_to_cur(tmpstr);
9399     }
9400     if (!IN_BYTES) {
9401         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9402             SvUTF8_on(tmpstr);
9403         else if (IN_ENCODING)
9404             sv_recode_to_utf8(tmpstr, _get_encoding());
9405     }
9406     PL_lex_stuff = tmpstr;
9407     pl_yylval.ival = op_type;
9408     return s;
9409
9410   interminable:
9411     SvREFCNT_dec(tmpstr);
9412     CopLINE_set(PL_curcop, origline);
9413     missingterm(PL_tokenbuf + 1);
9414 }
9415
9416 /* scan_inputsymbol
9417    takes: current position in input buffer
9418    returns: new position in input buffer
9419    side-effects: pl_yylval and lex_op are set.
9420
9421    This code handles:
9422
9423    <>           read from ARGV
9424    <<>>         read from ARGV without magic open
9425    <FH>         read from filehandle
9426    <pkg::FH>    read from package qualified filehandle
9427    <pkg'FH>     read from package qualified filehandle
9428    <$fh>        read from filehandle in $fh
9429    <*.h>        filename glob
9430
9431 */
9432
9433 STATIC char *
9434 S_scan_inputsymbol(pTHX_ char *start)
9435 {
9436     char *s = start;            /* current position in buffer */
9437     char *end;
9438     I32 len;
9439     bool nomagicopen = FALSE;
9440     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9441     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9442
9443     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9444
9445     end = strchr(s, '\n');
9446     if (!end)
9447         end = PL_bufend;
9448     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9449         nomagicopen = TRUE;
9450         *d = '\0';
9451         len = 0;
9452         s += 3;
9453     }
9454     else
9455         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
9456
9457     /* die if we didn't have space for the contents of the <>,
9458        or if it didn't end, or if we see a newline
9459     */
9460
9461     if (len >= (I32)sizeof PL_tokenbuf)
9462         Perl_croak(aTHX_ "Excessively long <> operator");
9463     if (s >= end)
9464         Perl_croak(aTHX_ "Unterminated <> operator");
9465
9466     s++;
9467
9468     /* check for <$fh>
9469        Remember, only scalar variables are interpreted as filehandles by
9470        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9471        treated as a glob() call.
9472        This code makes use of the fact that except for the $ at the front,
9473        a scalar variable and a filehandle look the same.
9474     */
9475     if (*d == '$' && d[1]) d++;
9476
9477     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9478     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9479         d += UTF ? UTF8SKIP(d) : 1;
9480
9481     /* If we've tried to read what we allow filehandles to look like, and
9482        there's still text left, then it must be a glob() and not a getline.
9483        Use scan_str to pull out the stuff between the <> and treat it
9484        as nothing more than a string.
9485     */
9486
9487     if (d - PL_tokenbuf != len) {
9488         pl_yylval.ival = OP_GLOB;
9489         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9490         if (!s)
9491            Perl_croak(aTHX_ "Glob not terminated");
9492         return s;
9493     }
9494     else {
9495         bool readline_overriden = FALSE;
9496         GV *gv_readline;
9497         /* we're in a filehandle read situation */
9498         d = PL_tokenbuf;
9499
9500         /* turn <> into <ARGV> */
9501         if (!len)
9502             Copy("ARGV",d,5,char);
9503
9504         /* Check whether readline() is overriden */
9505         if ((gv_readline = gv_override("readline",8)))
9506             readline_overriden = TRUE;
9507
9508         /* if <$fh>, create the ops to turn the variable into a
9509            filehandle
9510         */
9511         if (*d == '$') {
9512             /* try to find it in the pad for this block, otherwise find
9513                add symbol table ops
9514             */
9515             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9516             if (tmp != NOT_IN_PAD) {
9517                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9518                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9519                     HEK * const stashname = HvNAME_HEK(stash);
9520                     SV * const sym = sv_2mortal(newSVhek(stashname));
9521                     sv_catpvs(sym, "::");
9522                     sv_catpv(sym, d+1);
9523                     d = SvPVX(sym);
9524                     goto intro_sym;
9525                 }
9526                 else {
9527                     OP * const o = newOP(OP_PADSV, 0);
9528                     o->op_targ = tmp;
9529                     PL_lex_op = readline_overriden
9530                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9531                                 op_append_elem(OP_LIST, o,
9532                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9533                         : (OP*)newUNOP(OP_READLINE, 0, o);
9534                 }
9535             }
9536             else {
9537                 GV *gv;
9538                 ++d;
9539               intro_sym:
9540                 gv = gv_fetchpv(d,
9541                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9542                                 SVt_PV);
9543                 PL_lex_op = readline_overriden
9544                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9545                             op_append_elem(OP_LIST,
9546                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9547                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9548                     : (OP*)newUNOP(OP_READLINE, 0,
9549                             newUNOP(OP_RV2SV, 0,
9550                                 newGVOP(OP_GV, 0, gv)));
9551             }
9552             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9553             pl_yylval.ival = OP_NULL;
9554         }
9555
9556         /* If it's none of the above, it must be a literal filehandle
9557            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9558         else {
9559             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9560             PL_lex_op = readline_overriden
9561                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9562                         op_append_elem(OP_LIST,
9563                             newGVOP(OP_GV, 0, gv),
9564                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9565                 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9566             pl_yylval.ival = OP_NULL;
9567         }
9568     }
9569
9570     return s;
9571 }
9572
9573
9574 /* scan_str
9575    takes:
9576         start                   position in buffer
9577         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9578                                 only if they are of the open/close form
9579         keep_delims             preserve the delimiters around the string
9580         re_reparse              compiling a run-time /(?{})/:
9581                                    collapse // to /,  and skip encoding src
9582         delimp                  if non-null, this is set to the position of
9583                                 the closing delimiter, or just after it if
9584                                 the closing and opening delimiters differ
9585                                 (i.e., the opening delimiter of a substitu-
9586                                 tion replacement)
9587    returns: position to continue reading from buffer
9588    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9589         updates the read buffer.
9590
9591    This subroutine pulls a string out of the input.  It is called for:
9592         q               single quotes           q(literal text)
9593         '               single quotes           'literal text'
9594         qq              double quotes           qq(interpolate $here please)
9595         "               double quotes           "interpolate $here please"
9596         qx              backticks               qx(/bin/ls -l)
9597         `               backticks               `/bin/ls -l`
9598         qw              quote words             @EXPORT_OK = qw( func() $spam )
9599         m//             regexp match            m/this/
9600         s///            regexp substitute       s/this/that/
9601         tr///           string transliterate    tr/this/that/
9602         y///            string transliterate    y/this/that/
9603         ($*@)           sub prototypes          sub foo ($)
9604         (stuff)         sub attr parameters     sub foo : attr(stuff)
9605         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9606
9607    In most of these cases (all but <>, patterns and transliterate)
9608    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9609    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9610    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9611    calls scan_str().
9612
9613    It skips whitespace before the string starts, and treats the first
9614    character as the delimiter.  If the delimiter is one of ([{< then
9615    the corresponding "close" character )]}> is used as the closing
9616    delimiter.  It allows quoting of delimiters, and if the string has
9617    balanced delimiters ([{<>}]) it allows nesting.
9618
9619    On success, the SV with the resulting string is put into lex_stuff or,
9620    if that is already non-NULL, into lex_repl. The second case occurs only
9621    when parsing the RHS of the special constructs s/// and tr/// (y///).
9622    For convenience, the terminating delimiter character is stuffed into
9623    SvIVX of the SV.
9624 */
9625
9626 STATIC char *
9627 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9628                  char **delimp
9629     )
9630 {
9631     SV *sv;                     /* scalar value: string */
9632     const char *tmps;           /* temp string, used for delimiter matching */
9633     char *s = start;            /* current position in the buffer */
9634     char term;                  /* terminating character */
9635     char *to;                   /* current position in the sv's data */
9636     I32 brackets = 1;           /* bracket nesting level */
9637     bool has_utf8 = FALSE;      /* is there any utf8 content? */
9638     I32 termcode;               /* terminating char. code */
9639     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
9640     STRLEN termlen;             /* length of terminating string */
9641     int last_off = 0;           /* last position for nesting bracket */
9642     line_t herelines;
9643
9644     PERL_ARGS_ASSERT_SCAN_STR;
9645
9646     /* skip space before the delimiter */
9647     if (isSPACE(*s)) {
9648         s = skipspace(s);
9649     }
9650
9651     /* mark where we are, in case we need to report errors */
9652     CLINE;
9653
9654     /* after skipping whitespace, the next character is the terminator */
9655     term = *s;
9656     if (!UTF) {
9657         termcode = termstr[0] = term;
9658         termlen = 1;
9659     }
9660     else {
9661         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9662         Copy(s, termstr, termlen, U8);
9663         if (!UTF8_IS_INVARIANT(term))
9664             has_utf8 = TRUE;
9665     }
9666
9667     /* mark where we are */
9668     PL_multi_start = CopLINE(PL_curcop);
9669     PL_multi_open = term;
9670     herelines = PL_parser->herelines;
9671
9672     /* find corresponding closing delimiter */
9673     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9674         termcode = termstr[0] = term = tmps[5];
9675
9676     PL_multi_close = term;
9677
9678     if (PL_multi_open == PL_multi_close) {
9679         keep_bracketed_quoted = FALSE;
9680     }
9681
9682     /* create a new SV to hold the contents.  79 is the SV's initial length.
9683        What a random number. */
9684     sv = newSV_type(SVt_PVIV);
9685     SvGROW(sv, 80);
9686     SvIV_set(sv, termcode);
9687     (void)SvPOK_only(sv);               /* validate pointer */
9688
9689     /* move past delimiter and try to read a complete string */
9690     if (keep_delims)
9691         sv_catpvn(sv, s, termlen);
9692     s += termlen;
9693     for (;;) {
9694         if (IN_ENCODING && !UTF && !re_reparse) {
9695             bool cont = TRUE;
9696
9697             while (cont) {
9698                 int offset = s - SvPVX_const(PL_linestr);
9699                 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
9700                                            &offset, (char*)termstr, termlen);
9701                 const char *ns;
9702                 char *svlast;
9703
9704                 if (SvIsCOW(PL_linestr)) {
9705                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9706                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9707                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9708                     char *buf = SvPVX(PL_linestr);
9709                     bufend_pos = PL_parser->bufend - buf;
9710                     bufptr_pos = PL_parser->bufptr - buf;
9711                     oldbufptr_pos = PL_parser->oldbufptr - buf;
9712                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9713                     linestart_pos = PL_parser->linestart - buf;
9714                     last_uni_pos = PL_parser->last_uni
9715                         ? PL_parser->last_uni - buf
9716                         : 0;
9717                     last_lop_pos = PL_parser->last_lop
9718                         ? PL_parser->last_lop - buf
9719                         : 0;
9720                     re_eval_start_pos =
9721                         PL_parser->lex_shared->re_eval_start ?
9722                             PL_parser->lex_shared->re_eval_start - buf : 0;
9723                     s_pos = s - buf;
9724
9725                     sv_force_normal(PL_linestr);
9726
9727                     buf = SvPVX(PL_linestr);
9728                     PL_parser->bufend = buf + bufend_pos;
9729                     PL_parser->bufptr = buf + bufptr_pos;
9730                     PL_parser->oldbufptr = buf + oldbufptr_pos;
9731                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9732                     PL_parser->linestart = buf + linestart_pos;
9733                     if (PL_parser->last_uni)
9734                         PL_parser->last_uni = buf + last_uni_pos;
9735                     if (PL_parser->last_lop)
9736                         PL_parser->last_lop = buf + last_lop_pos;
9737                     if (PL_parser->lex_shared->re_eval_start)
9738                         PL_parser->lex_shared->re_eval_start  =
9739                             buf + re_eval_start_pos;
9740                     s = buf + s_pos;
9741                 }
9742                 ns = SvPVX_const(PL_linestr) + offset;
9743                 svlast = SvEND(sv) - 1;
9744
9745                 for (; s < ns; s++) {
9746                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9747                         COPLINE_INC_WITH_HERELINES;
9748                 }
9749                 if (!found)
9750                     goto read_more_line;
9751                 else {
9752                     /* handle quoted delimiters */
9753                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9754                         const char *t;
9755                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9756                             t--;
9757                         if ((svlast-1 - t) % 2) {
9758                             if (!keep_bracketed_quoted) {
9759                                 *(svlast-1) = term;
9760                                 *svlast = '\0';
9761                                 SvCUR_set(sv, SvCUR(sv) - 1);
9762                             }
9763                             continue;
9764                         }
9765                     }
9766                     if (PL_multi_open == PL_multi_close) {
9767                         cont = FALSE;
9768                     }
9769                     else {
9770                         const char *t;
9771                         char *w;
9772                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9773                             /* At here, all closes are "was quoted" one,
9774                                so we don't check PL_multi_close. */
9775                             if (*t == '\\') {
9776                                 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9777                                     t++;
9778                                 else
9779                                     *w++ = *t++;
9780                             }
9781                             else if (*t == PL_multi_open)
9782                                 brackets++;
9783
9784                             *w = *t;
9785                         }
9786                         if (w < t) {
9787                             *w++ = term;
9788                             *w = '\0';
9789                             SvCUR_set(sv, w - SvPVX_const(sv));
9790                         }
9791                         last_off = w - SvPVX(sv);
9792                         if (--brackets <= 0)
9793                             cont = FALSE;
9794                     }
9795                 }
9796             }
9797             if (!keep_delims) {
9798                 SvCUR_set(sv, SvCUR(sv) - 1);
9799                 *SvEND(sv) = '\0';
9800             }
9801             break;
9802         }
9803
9804         /* extend sv if need be */
9805         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9806         /* set 'to' to the next character in the sv's string */
9807         to = SvPVX(sv)+SvCUR(sv);
9808
9809         /* if open delimiter is the close delimiter read unbridle */
9810         if (PL_multi_open == PL_multi_close) {
9811             for (; s < PL_bufend; s++,to++) {
9812                 /* embedded newlines increment the current line number */
9813                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9814                     COPLINE_INC_WITH_HERELINES;
9815                 /* handle quoted delimiters */
9816                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9817                     if (!keep_bracketed_quoted
9818                         && (s[1] == term
9819                             || (re_reparse && s[1] == '\\'))
9820                     )
9821                         s++;
9822                     else /* any other quotes are simply copied straight through */
9823                         *to++ = *s++;
9824                 }
9825                 /* terminate when run out of buffer (the for() condition), or
9826                    have found the terminator */
9827                 else if (*s == term) {
9828                     if (termlen == 1)
9829                         break;
9830                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9831                         break;
9832                 }
9833                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9834                     has_utf8 = TRUE;
9835                 *to = *s;
9836             }
9837         }
9838
9839         /* if the terminator isn't the same as the start character (e.g.,
9840            matched brackets), we have to allow more in the quoting, and
9841            be prepared for nested brackets.
9842         */
9843         else {
9844             /* read until we run out of string, or we find the terminator */
9845             for (; s < PL_bufend; s++,to++) {
9846                 /* embedded newlines increment the line count */
9847                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9848                     COPLINE_INC_WITH_HERELINES;
9849                 /* backslashes can escape the open or closing characters */
9850                 if (*s == '\\' && s+1 < PL_bufend) {
9851                     if (!keep_bracketed_quoted &&
9852                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9853                     {
9854                         s++;
9855                     }
9856                     else
9857                         *to++ = *s++;
9858                 }
9859                 /* allow nested opens and closes */
9860                 else if (*s == PL_multi_close && --brackets <= 0)
9861                     break;
9862                 else if (*s == PL_multi_open)
9863                     brackets++;
9864                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9865                     has_utf8 = TRUE;
9866                 *to = *s;
9867             }
9868         }
9869         /* terminate the copied string and update the sv's end-of-string */
9870         *to = '\0';
9871         SvCUR_set(sv, to - SvPVX_const(sv));
9872
9873         /*
9874          * this next chunk reads more into the buffer if we're not done yet
9875          */
9876
9877         if (s < PL_bufend)
9878             break;              /* handle case where we are done yet :-) */
9879
9880 #ifndef PERL_STRICT_CR
9881         if (to - SvPVX_const(sv) >= 2) {
9882             if ((to[-2] == '\r' && to[-1] == '\n') ||
9883                 (to[-2] == '\n' && to[-1] == '\r'))
9884             {
9885                 to[-2] = '\n';
9886                 to--;
9887                 SvCUR_set(sv, to - SvPVX_const(sv));
9888             }
9889             else if (to[-1] == '\r')
9890                 to[-1] = '\n';
9891         }
9892         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9893             to[-1] = '\n';
9894 #endif
9895
9896      read_more_line:
9897         /* if we're out of file, or a read fails, bail and reset the current
9898            line marker so we can report where the unterminated string began
9899         */
9900         COPLINE_INC_WITH_HERELINES;
9901         PL_bufptr = PL_bufend;
9902         if (!lex_next_chunk(0)) {
9903             sv_free(sv);
9904             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9905             return NULL;
9906         }
9907         s = PL_bufptr;
9908     }
9909
9910     /* at this point, we have successfully read the delimited string */
9911
9912     if (!IN_ENCODING || UTF || re_reparse) {
9913
9914         if (keep_delims)
9915             sv_catpvn(sv, s, termlen);
9916         s += termlen;
9917     }
9918     if (has_utf8 || (IN_ENCODING && !re_reparse))
9919         SvUTF8_on(sv);
9920
9921     PL_multi_end = CopLINE(PL_curcop);
9922     CopLINE_set(PL_curcop, PL_multi_start);
9923     PL_parser->herelines = herelines;
9924
9925     /* if we allocated too much space, give some back */
9926     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9927         SvLEN_set(sv, SvCUR(sv) + 1);
9928         SvPV_renew(sv, SvLEN(sv));
9929     }
9930
9931     /* decide whether this is the first or second quoted string we've read
9932        for this op
9933     */
9934
9935     if (PL_lex_stuff)
9936         PL_sublex_info.repl = sv;
9937     else
9938         PL_lex_stuff = sv;
9939     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9940     return s;
9941 }
9942
9943 /*
9944   scan_num
9945   takes: pointer to position in buffer
9946   returns: pointer to new position in buffer
9947   side-effects: builds ops for the constant in pl_yylval.op
9948
9949   Read a number in any of the formats that Perl accepts:
9950
9951   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9952   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9953   0b[01](_?[01])*                                       binary integers
9954   0[0-7](_?[0-7])*                                      octal integers
9955   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
9956   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
9957
9958   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9959   thing it reads.
9960
9961   If it reads a number without a decimal point or an exponent, it will
9962   try converting the number to an integer and see if it can do so
9963   without loss of precision.
9964 */
9965
9966 char *
9967 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9968 {
9969     const char *s = start;      /* current position in buffer */
9970     char *d;                    /* destination in temp buffer */
9971     char *e;                    /* end of temp buffer */
9972     NV nv;                              /* number read, as a double */
9973     SV *sv = NULL;                      /* place to put the converted number */
9974     bool floatit;                       /* boolean: int or float? */
9975     const char *lastub = NULL;          /* position of last underbar */
9976     static const char* const number_too_long = "Number too long";
9977     /* Hexadecimal floating point.
9978      *
9979      * In many places (where we have quads and NV is IEEE 754 double)
9980      * we can fit the mantissa bits of a NV into an unsigned quad.
9981      * (Note that UVs might not be quads even when we have quads.)
9982      * This will not work everywhere, though (either no quads, or
9983      * using long doubles), in which case we have to resort to NV,
9984      * which will probably mean horrible loss of precision due to
9985      * multiple fp operations. */
9986     bool hexfp = FALSE;
9987     int total_bits = 0;
9988 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
9989 #  define HEXFP_UQUAD
9990     Uquad_t hexfp_uquad = 0;
9991     int hexfp_frac_bits = 0;
9992 #else
9993 #  define HEXFP_NV
9994     NV hexfp_nv = 0.0;
9995 #endif
9996     NV hexfp_mult = 1.0;
9997     UV high_non_zero = 0; /* highest digit */
9998
9999     PERL_ARGS_ASSERT_SCAN_NUM;
10000
10001     /* We use the first character to decide what type of number this is */
10002
10003     switch (*s) {
10004     default:
10005         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10006
10007     /* if it starts with a 0, it could be an octal number, a decimal in
10008        0.13 disguise, or a hexadecimal number, or a binary number. */
10009     case '0':
10010         {
10011           /* variables:
10012              u          holds the "number so far"
10013              shift      the power of 2 of the base
10014                         (hex == 4, octal == 3, binary == 1)
10015              overflowed was the number more than we can hold?
10016
10017              Shift is used when we add a digit.  It also serves as an "are
10018              we in octal/hex/binary?" indicator to disallow hex characters
10019              when in octal mode.
10020            */
10021             NV n = 0.0;
10022             UV u = 0;
10023             I32 shift;
10024             bool overflowed = FALSE;
10025             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10026             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10027             static const char* const bases[5] =
10028               { "", "binary", "", "octal", "hexadecimal" };
10029             static const char* const Bases[5] =
10030               { "", "Binary", "", "Octal", "Hexadecimal" };
10031             static const char* const maxima[5] =
10032               { "",
10033                 "0b11111111111111111111111111111111",
10034                 "",
10035                 "037777777777",
10036                 "0xffffffff" };
10037             const char *base, *Base, *max;
10038
10039             /* check for hex */
10040             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10041                 shift = 4;
10042                 s += 2;
10043                 just_zero = FALSE;
10044             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10045                 shift = 1;
10046                 s += 2;
10047                 just_zero = FALSE;
10048             }
10049             /* check for a decimal in disguise */
10050             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10051                 goto decimal;
10052             /* so it must be octal */
10053             else {
10054                 shift = 3;
10055                 s++;
10056             }
10057
10058             if (*s == '_') {
10059                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10060                                "Misplaced _ in number");
10061                lastub = s++;
10062             }
10063
10064             base = bases[shift];
10065             Base = Bases[shift];
10066             max  = maxima[shift];
10067
10068             /* read the rest of the number */
10069             for (;;) {
10070                 /* x is used in the overflow test,
10071                    b is the digit we're adding on. */
10072                 UV x, b;
10073
10074                 switch (*s) {
10075
10076                 /* if we don't mention it, we're done */
10077                 default:
10078                     goto out;
10079
10080                 /* _ are ignored -- but warned about if consecutive */
10081                 case '_':
10082                     if (lastub && s == lastub + 1)
10083                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10084                                        "Misplaced _ in number");
10085                     lastub = s++;
10086                     break;
10087
10088                 /* 8 and 9 are not octal */
10089                 case '8': case '9':
10090                     if (shift == 3)
10091                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10092                     /* FALLTHROUGH */
10093
10094                 /* octal digits */
10095                 case '2': case '3': case '4':
10096                 case '5': case '6': case '7':
10097                     if (shift == 1)
10098                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10099                     /* FALLTHROUGH */
10100
10101                 case '0': case '1':
10102                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10103                     goto digit;
10104
10105                 /* hex digits */
10106                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10107                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10108                     /* make sure they said 0x */
10109                     if (shift != 4)
10110                         goto out;
10111                     b = (*s++ & 7) + 9;
10112
10113                     /* Prepare to put the digit we have onto the end
10114                        of the number so far.  We check for overflows.
10115                     */
10116
10117                   digit:
10118                     just_zero = FALSE;
10119                     if (!overflowed) {
10120                         x = u << shift; /* make room for the digit */
10121
10122                         total_bits += shift;
10123
10124                         if ((x >> shift) != u
10125                             && !(PL_hints & HINT_NEW_BINARY)) {
10126                             overflowed = TRUE;
10127                             n = (NV) u;
10128                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10129                                              "Integer overflow in %s number",
10130                                              base);
10131                         } else
10132                             u = x | b;          /* add the digit to the end */
10133                     }
10134                     if (overflowed) {
10135                         n *= nvshift[shift];
10136                         /* If an NV has not enough bits in its
10137                          * mantissa to represent an UV this summing of
10138                          * small low-order numbers is a waste of time
10139                          * (because the NV cannot preserve the
10140                          * low-order bits anyway): we could just
10141                          * remember when did we overflow and in the
10142                          * end just multiply n by the right
10143                          * amount. */
10144                         n += (NV) b;
10145                     }
10146
10147                     if (high_non_zero == 0 && b > 0)
10148                         high_non_zero = b;
10149
10150                     /* this could be hexfp, but peek ahead
10151                      * to avoid matching ".." */
10152                     if (UNLIKELY(HEXFP_PEEK(s))) {
10153                         goto out;
10154                     }
10155
10156                     break;
10157                 }
10158             }
10159
10160           /* if we get here, we had success: make a scalar value from
10161              the number.
10162           */
10163           out:
10164
10165             /* final misplaced underbar check */
10166             if (s[-1] == '_') {
10167                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10168             }
10169
10170             if (UNLIKELY(HEXFP_PEEK(s))) {
10171                 /* Do sloppy (on the underbars) but quick detection
10172                  * (and value construction) for hexfp, the decimal
10173                  * detection will shortly be more thorough with the
10174                  * underbar checks. */
10175                 const char* h = s;
10176 #ifdef HEXFP_UQUAD
10177                 hexfp_uquad = u;
10178 #else /* HEXFP_NV */
10179                 hexfp_nv = u;
10180 #endif
10181                 if (*h == '.') {
10182 #ifdef HEXFP_NV
10183                     NV mult = 1 / 16.0;
10184 #endif
10185                     h++;
10186                     while (isXDIGIT(*h) || *h == '_') {
10187                         if (isXDIGIT(*h)) {
10188                             U8 b = XDIGIT_VALUE(*h);
10189                             total_bits += shift;
10190 #ifdef HEXFP_UQUAD
10191                             hexfp_uquad <<= shift;
10192                             hexfp_uquad |= b;
10193                             hexfp_frac_bits += shift;
10194 #else /* HEXFP_NV */
10195                             hexfp_nv += b * mult;
10196                             mult /= 16.0;
10197 #endif
10198                         }
10199                         h++;
10200                     }
10201                 }
10202
10203                 if (total_bits >= 4) {
10204                     if (high_non_zero < 0x8)
10205                         total_bits--;
10206                     if (high_non_zero < 0x4)
10207                         total_bits--;
10208                     if (high_non_zero < 0x2)
10209                         total_bits--;
10210                 }
10211
10212                 if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
10213                     bool negexp = FALSE;
10214                     h++;
10215                     if (*h == '+')
10216                         h++;
10217                     else if (*h == '-') {
10218                         negexp = TRUE;
10219                         h++;
10220                     }
10221                     if (isDIGIT(*h)) {
10222                         I32 hexfp_exp = 0;
10223                         while (isDIGIT(*h) || *h == '_') {
10224                             if (isDIGIT(*h)) {
10225                                 hexfp_exp *= 10;
10226                                 hexfp_exp += *h - '0';
10227 #ifdef NV_MIN_EXP
10228                                 if (negexp &&
10229                                     -hexfp_exp < NV_MIN_EXP - 1) {
10230                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10231                                                    "Hexadecimal float: exponent underflow");
10232 #endif
10233                                     break;
10234                                 }
10235                                 else {
10236 #ifdef NV_MAX_EXP
10237                                     if (!negexp &&
10238                                         hexfp_exp > NV_MAX_EXP - 1) {
10239                                         Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10240                                                    "Hexadecimal float: exponent overflow");
10241                                         break;
10242                                     }
10243 #endif
10244                                 }
10245                             }
10246                             h++;
10247                         }
10248                         if (negexp)
10249                             hexfp_exp = -hexfp_exp;
10250 #ifdef HEXFP_UQUAD
10251                         hexfp_exp -= hexfp_frac_bits;
10252 #endif
10253                         hexfp_mult = pow(2.0, hexfp_exp);
10254                         hexfp = TRUE;
10255                         goto decimal;
10256                     }
10257                 }
10258             }
10259
10260             if (overflowed) {
10261                 if (n > 4294967295.0)
10262                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10263                                    "%s number > %s non-portable",
10264                                    Base, max);
10265                 sv = newSVnv(n);
10266             }
10267             else {
10268 #if UVSIZE > 4
10269                 if (u > 0xffffffff)
10270                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10271                                    "%s number > %s non-portable",
10272                                    Base, max);
10273 #endif
10274                 sv = newSVuv(u);
10275             }
10276             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10277                 sv = new_constant(start, s - start, "integer",
10278                                   sv, NULL, NULL, 0);
10279             else if (PL_hints & HINT_NEW_BINARY)
10280                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10281         }
10282         break;
10283
10284     /*
10285       handle decimal numbers.
10286       we're also sent here when we read a 0 as the first digit
10287     */
10288     case '1': case '2': case '3': case '4': case '5':
10289     case '6': case '7': case '8': case '9': case '.':
10290       decimal:
10291         d = PL_tokenbuf;
10292         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10293         floatit = FALSE;
10294         if (hexfp) {
10295             floatit = TRUE;
10296             *d++ = '0';
10297             *d++ = 'x';
10298             s = start + 2;
10299         }
10300
10301         /* read next group of digits and _ and copy into d */
10302         while (isDIGIT(*s) || *s == '_' ||
10303                UNLIKELY(hexfp && isXDIGIT(*s))) {
10304             /* skip underscores, checking for misplaced ones
10305                if -w is on
10306             */
10307             if (*s == '_') {
10308                 if (lastub && s == lastub + 1)
10309                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10310                                    "Misplaced _ in number");
10311                 lastub = s++;
10312             }
10313             else {
10314                 /* check for end of fixed-length buffer */
10315                 if (d >= e)
10316                     Perl_croak(aTHX_ "%s", number_too_long);
10317                 /* if we're ok, copy the character */
10318                 *d++ = *s++;
10319             }
10320         }
10321
10322         /* final misplaced underbar check */
10323         if (lastub && s == lastub + 1) {
10324             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10325         }
10326
10327         /* read a decimal portion if there is one.  avoid
10328            3..5 being interpreted as the number 3. followed
10329            by .5
10330         */
10331         if (*s == '.' && s[1] != '.') {
10332             floatit = TRUE;
10333             *d++ = *s++;
10334
10335             if (*s == '_') {
10336                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10337                                "Misplaced _ in number");
10338                 lastub = s;
10339             }
10340
10341             /* copy, ignoring underbars, until we run out of digits.
10342             */
10343             for (; isDIGIT(*s) || *s == '_' ||
10344                      UNLIKELY(hexfp && isXDIGIT(*s));
10345                  s++) {
10346                 /* fixed length buffer check */
10347                 if (d >= e)
10348                     Perl_croak(aTHX_ "%s", number_too_long);
10349                 if (*s == '_') {
10350                    if (lastub && s == lastub + 1)
10351                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10352                                       "Misplaced _ in number");
10353                    lastub = s;
10354                 }
10355                 else
10356                     *d++ = *s;
10357             }
10358             /* fractional part ending in underbar? */
10359             if (s[-1] == '_') {
10360                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10361                                "Misplaced _ in number");
10362             }
10363             if (*s == '.' && isDIGIT(s[1])) {
10364                 /* oops, it's really a v-string, but without the "v" */
10365                 s = start;
10366                 goto vstring;
10367             }
10368         }
10369
10370         /* read exponent part, if present */
10371         if ((isALPHA_FOLD_EQ(*s, 'e')
10372               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10373             && strchr("+-0123456789_", s[1]))
10374         {
10375             floatit = TRUE;
10376
10377             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10378                ditto for p (hexfloats) */
10379             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10380                 /* At least some Mach atof()s don't grok 'E' */
10381                 *d++ = 'e';
10382             }
10383             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10384                 *d++ = 'p';
10385             }
10386
10387             s++;
10388
10389
10390             /* stray preinitial _ */
10391             if (*s == '_') {
10392                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10393                                "Misplaced _ in number");
10394                 lastub = s++;
10395             }
10396
10397             /* allow positive or negative exponent */
10398             if (*s == '+' || *s == '-')
10399                 *d++ = *s++;
10400
10401             /* stray initial _ */
10402             if (*s == '_') {
10403                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10404                                "Misplaced _ in number");
10405                 lastub = s++;
10406             }
10407
10408             /* read digits of exponent */
10409             while (isDIGIT(*s) || *s == '_') {
10410                 if (isDIGIT(*s)) {
10411                     if (d >= e)
10412                         Perl_croak(aTHX_ "%s", number_too_long);
10413                     *d++ = *s++;
10414                 }
10415                 else {
10416                    if (((lastub && s == lastub + 1) ||
10417                         (!isDIGIT(s[1]) && s[1] != '_')))
10418                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10419                                       "Misplaced _ in number");
10420                    lastub = s++;
10421                 }
10422             }
10423         }
10424
10425
10426         /*
10427            We try to do an integer conversion first if no characters
10428            indicating "float" have been found.
10429          */
10430
10431         if (!floatit) {
10432             UV uv;
10433             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10434
10435             if (flags == IS_NUMBER_IN_UV) {
10436               if (uv <= IV_MAX)
10437                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10438               else
10439                 sv = newSVuv(uv);
10440             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10441               if (uv <= (UV) IV_MIN)
10442                 sv = newSViv(-(IV)uv);
10443               else
10444                 floatit = TRUE;
10445             } else
10446               floatit = TRUE;
10447         }
10448         if (floatit) {
10449             STORE_NUMERIC_LOCAL_SET_STANDARD();
10450             /* terminate the string */
10451             *d = '\0';
10452             if (UNLIKELY(hexfp)) {
10453 #  ifdef NV_MANT_DIG
10454                 if (total_bits > NV_MANT_DIG)
10455                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10456                                    "Hexadecimal float: mantissa overflow");
10457 #  endif
10458 #ifdef HEXFP_UQUAD
10459                 nv = hexfp_uquad * hexfp_mult;
10460 #else /* HEXFP_NV */
10461                 nv = hexfp_nv * hexfp_mult;
10462 #endif
10463             } else {
10464                 nv = Atof(PL_tokenbuf);
10465             }
10466             RESTORE_NUMERIC_LOCAL();
10467             sv = newSVnv(nv);
10468         }
10469
10470         if ( floatit
10471              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10472             const char *const key = floatit ? "float" : "integer";
10473             const STRLEN keylen = floatit ? 5 : 7;
10474             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10475                                 key, keylen, sv, NULL, NULL, 0);
10476         }
10477         break;
10478
10479     /* if it starts with a v, it could be a v-string */
10480     case 'v':
10481     vstring:
10482                 sv = newSV(5); /* preallocate storage space */
10483                 ENTER_with_name("scan_vstring");
10484                 SAVEFREESV(sv);
10485                 s = scan_vstring(s, PL_bufend, sv);
10486                 SvREFCNT_inc_simple_void_NN(sv);
10487                 LEAVE_with_name("scan_vstring");
10488         break;
10489     }
10490
10491     /* make the op for the constant and return */
10492
10493     if (sv)
10494         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10495     else
10496         lvalp->opval = NULL;
10497
10498     return (char *)s;
10499 }
10500
10501 STATIC char *
10502 S_scan_formline(pTHX_ char *s)
10503 {
10504     char *eol;
10505     char *t;
10506     SV * const stuff = newSVpvs("");
10507     bool needargs = FALSE;
10508     bool eofmt = FALSE;
10509
10510     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10511
10512     while (!needargs) {
10513         if (*s == '.') {
10514             t = s+1;
10515 #ifdef PERL_STRICT_CR
10516             while (SPACE_OR_TAB(*t))
10517                 t++;
10518 #else
10519             while (SPACE_OR_TAB(*t) || *t == '\r')
10520                 t++;
10521 #endif
10522             if (*t == '\n' || t == PL_bufend) {
10523                 eofmt = TRUE;
10524                 break;
10525             }
10526         }
10527         eol = (char *) memchr(s,'\n',PL_bufend-s);
10528         if (!eol++)
10529                 eol = PL_bufend;
10530         if (*s != '#') {
10531             for (t = s; t < eol; t++) {
10532                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10533                     needargs = FALSE;
10534                     goto enough;        /* ~~ must be first line in formline */
10535                 }
10536                 if (*t == '@' || *t == '^')
10537                     needargs = TRUE;
10538             }
10539             if (eol > s) {
10540                 sv_catpvn(stuff, s, eol-s);
10541 #ifndef PERL_STRICT_CR
10542                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10543                     char *end = SvPVX(stuff) + SvCUR(stuff);
10544                     end[-2] = '\n';
10545                     end[-1] = '\0';
10546                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10547                 }
10548 #endif
10549             }
10550             else
10551               break;
10552         }
10553         s = (char*)eol;
10554         if ((PL_rsfp || PL_parser->filtered)
10555          && PL_parser->form_lex_state == LEX_NORMAL) {
10556             bool got_some;
10557             PL_bufptr = PL_bufend;
10558             COPLINE_INC_WITH_HERELINES;
10559             got_some = lex_next_chunk(0);
10560             CopLINE_dec(PL_curcop);
10561             s = PL_bufptr;
10562             if (!got_some)
10563                 break;
10564         }
10565         incline(s);
10566     }
10567   enough:
10568     if (!SvCUR(stuff) || needargs)
10569         PL_lex_state = PL_parser->form_lex_state;
10570     if (SvCUR(stuff)) {
10571         PL_expect = XSTATE;
10572         if (needargs) {
10573             const char *s2 = s;
10574             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10575                 || *s2 == '\v')
10576                 s2++;
10577             if (*s2 == '{') {
10578                 PL_expect = XTERMBLOCK;
10579                 NEXTVAL_NEXTTOKE.ival = 0;
10580                 force_next(DO);
10581             }
10582             NEXTVAL_NEXTTOKE.ival = 0;
10583             force_next(FORMLBRACK);
10584         }
10585         if (!IN_BYTES) {
10586             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10587                 SvUTF8_on(stuff);
10588             else if (IN_ENCODING)
10589                 sv_recode_to_utf8(stuff, _get_encoding());
10590         }
10591         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10592         force_next(THING);
10593     }
10594     else {
10595         SvREFCNT_dec(stuff);
10596         if (eofmt)
10597             PL_lex_formbrack = 0;
10598     }
10599     return s;
10600 }
10601
10602 I32
10603 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10604 {
10605     const I32 oldsavestack_ix = PL_savestack_ix;
10606     CV* const outsidecv = PL_compcv;
10607
10608     SAVEI32(PL_subline);
10609     save_item(PL_subname);
10610     SAVESPTR(PL_compcv);
10611
10612     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10613     CvFLAGS(PL_compcv) |= flags;
10614
10615     PL_subline = CopLINE(PL_curcop);
10616     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10617     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10618     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10619     if (outsidecv && CvPADLIST(outsidecv))
10620         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10621
10622     return oldsavestack_ix;
10623 }
10624
10625 static int
10626 S_yywarn(pTHX_ const char *const s, U32 flags)
10627 {
10628     PERL_ARGS_ASSERT_YYWARN;
10629
10630     PL_in_eval |= EVAL_WARNONLY;
10631     yyerror_pv(s, flags);
10632     return 0;
10633 }
10634
10635 int
10636 Perl_yyerror(pTHX_ const char *const s)
10637 {
10638     PERL_ARGS_ASSERT_YYERROR;
10639     return yyerror_pvn(s, strlen(s), 0);
10640 }
10641
10642 int
10643 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10644 {
10645     PERL_ARGS_ASSERT_YYERROR_PV;
10646     return yyerror_pvn(s, strlen(s), flags);
10647 }
10648
10649 int
10650 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10651 {
10652     const char *context = NULL;
10653     int contlen = -1;
10654     SV *msg;
10655     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10656     int yychar  = PL_parser->yychar;
10657
10658     PERL_ARGS_ASSERT_YYERROR_PVN;
10659
10660     if (!yychar || (yychar == ';' && !PL_rsfp))
10661         sv_catpvs(where_sv, "at EOF");
10662     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10663       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10664       PL_oldbufptr != PL_bufptr) {
10665         /*
10666                 Only for NetWare:
10667                 The code below is removed for NetWare because it abends/crashes on NetWare
10668                 when the script has error such as not having the closing quotes like:
10669                     if ($var eq "value)
10670                 Checking of white spaces is anyway done in NetWare code.
10671         */
10672 #ifndef NETWARE
10673         while (isSPACE(*PL_oldoldbufptr))
10674             PL_oldoldbufptr++;
10675 #endif
10676         context = PL_oldoldbufptr;
10677         contlen = PL_bufptr - PL_oldoldbufptr;
10678     }
10679     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10680       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10681         /*
10682                 Only for NetWare:
10683                 The code below is removed for NetWare because it abends/crashes on NetWare
10684                 when the script has error such as not having the closing quotes like:
10685                     if ($var eq "value)
10686                 Checking of white spaces is anyway done in NetWare code.
10687         */
10688 #ifndef NETWARE
10689         while (isSPACE(*PL_oldbufptr))
10690             PL_oldbufptr++;
10691 #endif
10692         context = PL_oldbufptr;
10693         contlen = PL_bufptr - PL_oldbufptr;
10694     }
10695     else if (yychar > 255)
10696         sv_catpvs(where_sv, "next token ???");
10697     else if (yychar == YYEMPTY) {
10698         if (PL_lex_state == LEX_NORMAL ||
10699            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10700             sv_catpvs(where_sv, "at end of line");
10701         else if (PL_lex_inpat)
10702             sv_catpvs(where_sv, "within pattern");
10703         else
10704             sv_catpvs(where_sv, "within string");
10705     }
10706     else {
10707         sv_catpvs(where_sv, "next char ");
10708         if (yychar < 32)
10709             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10710         else if (isPRINT_LC(yychar)) {
10711             const char string = yychar;
10712             sv_catpvn(where_sv, &string, 1);
10713         }
10714         else
10715             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10716     }
10717     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10718     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10719         OutCopFILE(PL_curcop),
10720         (IV)(PL_parser->preambling == NOLINE
10721                ? CopLINE(PL_curcop)
10722                : PL_parser->preambling));
10723     if (context)
10724         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10725                              UTF8fARG(UTF, contlen, context));
10726     else
10727         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10728     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10729         Perl_sv_catpvf(aTHX_ msg,
10730         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10731                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10732         PL_multi_end = 0;
10733     }
10734     if (PL_in_eval & EVAL_WARNONLY) {
10735         PL_in_eval &= ~EVAL_WARNONLY;
10736         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10737     }
10738     else
10739         qerror(msg);
10740     if (PL_error_count >= 10) {
10741         SV * errsv;
10742         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10743             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10744                        SVfARG(errsv), OutCopFILE(PL_curcop));
10745         else
10746             Perl_croak(aTHX_ "%s has too many errors.\n",
10747             OutCopFILE(PL_curcop));
10748     }
10749     PL_in_my = 0;
10750     PL_in_my_stash = NULL;
10751     return 0;
10752 }
10753
10754 STATIC char*
10755 S_swallow_bom(pTHX_ U8 *s)
10756 {
10757     const STRLEN slen = SvCUR(PL_linestr);
10758
10759     PERL_ARGS_ASSERT_SWALLOW_BOM;
10760
10761     switch (s[0]) {
10762     case 0xFF:
10763         if (s[1] == 0xFE) {
10764             /* UTF-16 little-endian? (or UTF-32LE?) */
10765             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10766                 /* diag_listed_as: Unsupported script encoding %s */
10767                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10768 #ifndef PERL_NO_UTF16_FILTER
10769             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10770             s += 2;
10771             if (PL_bufend > (char*)s) {
10772                 s = add_utf16_textfilter(s, TRUE);
10773             }
10774 #else
10775             /* diag_listed_as: Unsupported script encoding %s */
10776             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10777 #endif
10778         }
10779         break;
10780     case 0xFE:
10781         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10782 #ifndef PERL_NO_UTF16_FILTER
10783             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10784             s += 2;
10785             if (PL_bufend > (char *)s) {
10786                 s = add_utf16_textfilter(s, FALSE);
10787             }
10788 #else
10789             /* diag_listed_as: Unsupported script encoding %s */
10790             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10791 #endif
10792         }
10793         break;
10794     case BOM_UTF8_FIRST_BYTE: {
10795         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10796         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10797             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10798             s += len + 1;                      /* UTF-8 */
10799         }
10800         break;
10801     }
10802     case 0:
10803         if (slen > 3) {
10804              if (s[1] == 0) {
10805                   if (s[2] == 0xFE && s[3] == 0xFF) {
10806                        /* UTF-32 big-endian */
10807                        /* diag_listed_as: Unsupported script encoding %s */
10808                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10809                   }
10810              }
10811              else if (s[2] == 0 && s[3] != 0) {
10812                   /* Leading bytes
10813                    * 00 xx 00 xx
10814                    * are a good indicator of UTF-16BE. */
10815 #ifndef PERL_NO_UTF16_FILTER
10816                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10817                   s = add_utf16_textfilter(s, FALSE);
10818 #else
10819                   /* diag_listed_as: Unsupported script encoding %s */
10820                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10821 #endif
10822              }
10823         }
10824         break;
10825
10826     default:
10827          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10828                   /* Leading bytes
10829                    * xx 00 xx 00
10830                    * are a good indicator of UTF-16LE. */
10831 #ifndef PERL_NO_UTF16_FILTER
10832               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10833               s = add_utf16_textfilter(s, TRUE);
10834 #else
10835               /* diag_listed_as: Unsupported script encoding %s */
10836               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10837 #endif
10838          }
10839     }
10840     return (char*)s;
10841 }
10842
10843
10844 #ifndef PERL_NO_UTF16_FILTER
10845 static I32
10846 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10847 {
10848     SV *const filter = FILTER_DATA(idx);
10849     /* We re-use this each time round, throwing the contents away before we
10850        return.  */
10851     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10852     SV *const utf8_buffer = filter;
10853     IV status = IoPAGE(filter);
10854     const bool reverse = cBOOL(IoLINES(filter));
10855     I32 retval;
10856
10857     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10858
10859     /* As we're automatically added, at the lowest level, and hence only called
10860        from this file, we can be sure that we're not called in block mode. Hence
10861        don't bother writing code to deal with block mode.  */
10862     if (maxlen) {
10863         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10864     }
10865     if (status < 0) {
10866         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10867     }
10868     DEBUG_P(PerlIO_printf(Perl_debug_log,
10869                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10870                           FPTR2DPTR(void *, S_utf16_textfilter),
10871                           reverse ? 'l' : 'b', idx, maxlen, status,
10872                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10873
10874     while (1) {
10875         STRLEN chars;
10876         STRLEN have;
10877         I32 newlen;
10878         U8 *end;
10879         /* First, look in our buffer of existing UTF-8 data:  */
10880         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10881
10882         if (nl) {
10883             ++nl;
10884         } else if (status == 0) {
10885             /* EOF */
10886             IoPAGE(filter) = 0;
10887             nl = SvEND(utf8_buffer);
10888         }
10889         if (nl) {
10890             STRLEN got = nl - SvPVX(utf8_buffer);
10891             /* Did we have anything to append?  */
10892             retval = got != 0;
10893             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10894             /* Everything else in this code works just fine if SVp_POK isn't
10895                set.  This, however, needs it, and we need it to work, else
10896                we loop infinitely because the buffer is never consumed.  */
10897             sv_chop(utf8_buffer, nl);
10898             break;
10899         }
10900
10901         /* OK, not a complete line there, so need to read some more UTF-16.
10902            Read an extra octect if the buffer currently has an odd number. */
10903         while (1) {
10904             if (status <= 0)
10905                 break;
10906             if (SvCUR(utf16_buffer) >= 2) {
10907                 /* Location of the high octet of the last complete code point.
10908                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10909                    *coupled* with all the benefits of partial reads and
10910                    endianness.  */
10911                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10912                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10913
10914                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10915                     break;
10916                 }
10917
10918                 /* We have the first half of a surrogate. Read more.  */
10919                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10920             }
10921
10922             status = FILTER_READ(idx + 1, utf16_buffer,
10923                                  160 + (SvCUR(utf16_buffer) & 1));
10924             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10925             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10926             if (status < 0) {
10927                 /* Error */
10928                 IoPAGE(filter) = status;
10929                 return status;
10930             }
10931         }
10932
10933         chars = SvCUR(utf16_buffer) >> 1;
10934         have = SvCUR(utf8_buffer);
10935         SvGROW(utf8_buffer, have + chars * 3 + 1);
10936
10937         if (reverse) {
10938             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10939                                          (U8*)SvPVX_const(utf8_buffer) + have,
10940                                          chars * 2, &newlen);
10941         } else {
10942             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10943                                 (U8*)SvPVX_const(utf8_buffer) + have,
10944                                 chars * 2, &newlen);
10945         }
10946         SvCUR_set(utf8_buffer, have + newlen);
10947         *end = '\0';
10948
10949         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10950            it's private to us, and utf16_to_utf8{,reversed} take a
10951            (pointer,length) pair, rather than a NUL-terminated string.  */
10952         if(SvCUR(utf16_buffer) & 1) {
10953             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10954             SvCUR_set(utf16_buffer, 1);
10955         } else {
10956             SvCUR_set(utf16_buffer, 0);
10957         }
10958     }
10959     DEBUG_P(PerlIO_printf(Perl_debug_log,
10960                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10961                           status,
10962                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10963     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10964     return retval;
10965 }
10966
10967 static U8 *
10968 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10969 {
10970     SV *filter = filter_add(S_utf16_textfilter, NULL);
10971
10972     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10973
10974     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10975     sv_setpvs(filter, "");
10976     IoLINES(filter) = reversed;
10977     IoPAGE(filter) = 1; /* Not EOF */
10978
10979     /* Sadly, we have to return a valid pointer, come what may, so we have to
10980        ignore any error return from this.  */
10981     SvCUR_set(PL_linestr, 0);
10982     if (FILTER_READ(0, PL_linestr, 0)) {
10983         SvUTF8_on(PL_linestr);
10984     } else {
10985         SvUTF8_on(PL_linestr);
10986     }
10987     PL_bufend = SvEND(PL_linestr);
10988     return (U8*)SvPVX(PL_linestr);
10989 }
10990 #endif
10991
10992 /*
10993 Returns a pointer to the next character after the parsed
10994 vstring, as well as updating the passed in sv.
10995
10996 Function must be called like
10997
10998         sv = sv_2mortal(newSV(5));
10999         s = scan_vstring(s,e,sv);
11000
11001 where s and e are the start and end of the string.
11002 The sv should already be large enough to store the vstring
11003 passed in, for performance reasons.
11004
11005 This function may croak if fatal warnings are enabled in the
11006 calling scope, hence the sv_2mortal in the example (to prevent
11007 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11008 sv_2mortal.
11009
11010 */
11011
11012 char *
11013 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11014 {
11015     const char *pos = s;
11016     const char *start = s;
11017
11018     PERL_ARGS_ASSERT_SCAN_VSTRING;
11019
11020     if (*pos == 'v') pos++;  /* get past 'v' */
11021     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11022         pos++;
11023     if ( *pos != '.') {
11024         /* this may not be a v-string if followed by => */
11025         const char *next = pos;
11026         while (next < e && isSPACE(*next))
11027             ++next;
11028         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11029             /* return string not v-string */
11030             sv_setpvn(sv,(char *)s,pos-s);
11031             return (char *)pos;
11032         }
11033     }
11034
11035     if (!isALPHA(*pos)) {
11036         U8 tmpbuf[UTF8_MAXBYTES+1];
11037
11038         if (*s == 'v')
11039             s++;  /* get past 'v' */
11040
11041         sv_setpvs(sv, "");
11042
11043         for (;;) {
11044             /* this is atoi() that tolerates underscores */
11045             U8 *tmpend;
11046             UV rev = 0;
11047             const char *end = pos;
11048             UV mult = 1;
11049             while (--end >= s) {
11050                 if (*end != '_') {
11051                     const UV orev = rev;
11052                     rev += (*end - '0') * mult;
11053                     mult *= 10;
11054                     if (orev > rev)
11055                         /* diag_listed_as: Integer overflow in %s number */
11056                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11057                                          "Integer overflow in decimal number");
11058                 }
11059             }
11060 #ifdef EBCDIC
11061             if (rev > 0x7FFFFFFF)
11062                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11063 #endif
11064             /* Append native character for the rev point */
11065             tmpend = uvchr_to_utf8(tmpbuf, rev);
11066             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11067             if (!UVCHR_IS_INVARIANT(rev))
11068                  SvUTF8_on(sv);
11069             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11070                  s = ++pos;
11071             else {
11072                  s = pos;
11073                  break;
11074             }
11075             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11076                  pos++;
11077         }
11078         SvPOK_on(sv);
11079         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11080         SvRMAGICAL_on(sv);
11081     }
11082     return (char *)s;
11083 }
11084
11085 int
11086 Perl_keyword_plugin_standard(pTHX_
11087         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11088 {
11089     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11090     PERL_UNUSED_CONTEXT;
11091     PERL_UNUSED_ARG(keyword_ptr);
11092     PERL_UNUSED_ARG(keyword_len);
11093     PERL_UNUSED_ARG(op_ptr);
11094     return KEYWORD_PLUGIN_DECLINE;
11095 }
11096
11097 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11098 static void
11099 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11100 {
11101     SAVEI32(PL_lex_brackets);
11102     if (PL_lex_brackets > 100)
11103         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11104     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11105     SAVEI32(PL_lex_allbrackets);
11106     PL_lex_allbrackets = 0;
11107     SAVEI8(PL_lex_fakeeof);
11108     PL_lex_fakeeof = (U8)fakeeof;
11109     if(yyparse(gramtype) && !PL_parser->error_count)
11110         qerror(Perl_mess(aTHX_ "Parse error"));
11111 }
11112
11113 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11114 static OP *
11115 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11116 {
11117     OP *o;
11118     ENTER;
11119     SAVEVPTR(PL_eval_root);
11120     PL_eval_root = NULL;
11121     parse_recdescent(gramtype, fakeeof);
11122     o = PL_eval_root;
11123     LEAVE;
11124     return o;
11125 }
11126
11127 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11128 static OP *
11129 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11130 {
11131     OP *exprop;
11132     if (flags & ~PARSE_OPTIONAL)
11133         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11134     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11135     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11136         if (!PL_parser->error_count)
11137             qerror(Perl_mess(aTHX_ "Parse error"));
11138         exprop = newOP(OP_NULL, 0);
11139     }
11140     return exprop;
11141 }
11142
11143 /*
11144 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11145
11146 Parse a Perl arithmetic expression.  This may contain operators of precedence
11147 down to the bit shift operators.  The expression must be followed (and thus
11148 terminated) either by a comparison or lower-precedence operator or by
11149 something that would normally terminate an expression such as semicolon.
11150 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11151 otherwise it is mandatory.  It is up to the caller to ensure that the
11152 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11153 the source of the code to be parsed and the lexical context for the
11154 expression.
11155
11156 The op tree representing the expression is returned.  If an optional
11157 expression is absent, a null pointer is returned, otherwise the pointer
11158 will be non-null.
11159
11160 If an error occurs in parsing or compilation, in most cases a valid op
11161 tree is returned anyway.  The error is reflected in the parser state,
11162 normally resulting in a single exception at the top level of parsing
11163 which covers all the compilation errors that occurred.  Some compilation
11164 errors, however, will throw an exception immediately.
11165
11166 =cut
11167 */
11168
11169 OP *
11170 Perl_parse_arithexpr(pTHX_ U32 flags)
11171 {
11172     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11173 }
11174
11175 /*
11176 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11177
11178 Parse a Perl term expression.  This may contain operators of precedence
11179 down to the assignment operators.  The expression must be followed (and thus
11180 terminated) either by a comma or lower-precedence operator or by
11181 something that would normally terminate an expression such as semicolon.
11182 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11183 otherwise it is mandatory.  It is up to the caller to ensure that the
11184 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11185 the source of the code to be parsed and the lexical context for the
11186 expression.
11187
11188 The op tree representing the expression is returned.  If an optional
11189 expression is absent, a null pointer is returned, otherwise the pointer
11190 will be non-null.
11191
11192 If an error occurs in parsing or compilation, in most cases a valid op
11193 tree is returned anyway.  The error is reflected in the parser state,
11194 normally resulting in a single exception at the top level of parsing
11195 which covers all the compilation errors that occurred.  Some compilation
11196 errors, however, will throw an exception immediately.
11197
11198 =cut
11199 */
11200
11201 OP *
11202 Perl_parse_termexpr(pTHX_ U32 flags)
11203 {
11204     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11205 }
11206
11207 /*
11208 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11209
11210 Parse a Perl list expression.  This may contain operators of precedence
11211 down to the comma operator.  The expression must be followed (and thus
11212 terminated) either by a low-precedence logic operator such as C<or> or by
11213 something that would normally terminate an expression such as semicolon.
11214 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11215 otherwise it is mandatory.  It is up to the caller to ensure that the
11216 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11217 the source of the code to be parsed and the lexical context for the
11218 expression.
11219
11220 The op tree representing the expression is returned.  If an optional
11221 expression is absent, a null pointer is returned, otherwise the pointer
11222 will be non-null.
11223
11224 If an error occurs in parsing or compilation, in most cases a valid op
11225 tree is returned anyway.  The error is reflected in the parser state,
11226 normally resulting in a single exception at the top level of parsing
11227 which covers all the compilation errors that occurred.  Some compilation
11228 errors, however, will throw an exception immediately.
11229
11230 =cut
11231 */
11232
11233 OP *
11234 Perl_parse_listexpr(pTHX_ U32 flags)
11235 {
11236     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11237 }
11238
11239 /*
11240 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11241
11242 Parse a single complete Perl expression.  This allows the full
11243 expression grammar, including the lowest-precedence operators such
11244 as C<or>.  The expression must be followed (and thus terminated) by a
11245 token that an expression would normally be terminated by: end-of-file,
11246 closing bracketing punctuation, semicolon, or one of the keywords that
11247 signals a postfix expression-statement modifier.  If I<flags> includes
11248 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11249 mandatory.  It is up to the caller to ensure that the dynamic parser
11250 state (L</PL_parser> et al) is correctly set to reflect the source of
11251 the code to be parsed and the lexical context for the expression.
11252
11253 The op tree representing the expression is returned.  If an optional
11254 expression is absent, a null pointer is returned, otherwise the pointer
11255 will be non-null.
11256
11257 If an error occurs in parsing or compilation, in most cases a valid op
11258 tree is returned anyway.  The error is reflected in the parser state,
11259 normally resulting in a single exception at the top level of parsing
11260 which covers all the compilation errors that occurred.  Some compilation
11261 errors, however, will throw an exception immediately.
11262
11263 =cut
11264 */
11265
11266 OP *
11267 Perl_parse_fullexpr(pTHX_ U32 flags)
11268 {
11269     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11270 }
11271
11272 /*
11273 =for apidoc Amx|OP *|parse_block|U32 flags
11274
11275 Parse a single complete Perl code block.  This consists of an opening
11276 brace, a sequence of statements, and a closing brace.  The block
11277 constitutes a lexical scope, so C<my> variables and various compile-time
11278 effects can be contained within it.  It is up to the caller to ensure
11279 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11280 reflect the source of the code to be parsed and the lexical context for
11281 the statement.
11282
11283 The op tree representing the code block is returned.  This is always a
11284 real op, never a null pointer.  It will normally be a C<lineseq> list,
11285 including C<nextstate> or equivalent ops.  No ops to construct any kind
11286 of runtime scope are included by virtue of it being a block.
11287
11288 If an error occurs in parsing or compilation, in most cases a valid op
11289 tree (most likely null) is returned anyway.  The error is reflected in
11290 the parser state, normally resulting in a single exception at the top
11291 level of parsing which covers all the compilation errors that occurred.
11292 Some compilation errors, however, will throw an exception immediately.
11293
11294 The I<flags> parameter is reserved for future use, and must always
11295 be zero.
11296
11297 =cut
11298 */
11299
11300 OP *
11301 Perl_parse_block(pTHX_ U32 flags)
11302 {
11303     if (flags)
11304         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11305     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11306 }
11307
11308 /*
11309 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11310
11311 Parse a single unadorned Perl statement.  This may be a normal imperative
11312 statement or a declaration that has compile-time effect.  It does not
11313 include any label or other affixture.  It is up to the caller to ensure
11314 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11315 reflect the source of the code to be parsed and the lexical context for
11316 the statement.
11317
11318 The op tree representing the statement is returned.  This may be a
11319 null pointer if the statement is null, for example if it was actually
11320 a subroutine definition (which has compile-time side effects).  If not
11321 null, it will be ops directly implementing the statement, suitable to
11322 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11323 equivalent op (except for those embedded in a scope contained entirely
11324 within the statement).
11325
11326 If an error occurs in parsing or compilation, in most cases a valid op
11327 tree (most likely null) is returned anyway.  The error is reflected in
11328 the parser state, normally resulting in a single exception at the top
11329 level of parsing which covers all the compilation errors that occurred.
11330 Some compilation errors, however, will throw an exception immediately.
11331
11332 The I<flags> parameter is reserved for future use, and must always
11333 be zero.
11334
11335 =cut
11336 */
11337
11338 OP *
11339 Perl_parse_barestmt(pTHX_ U32 flags)
11340 {
11341     if (flags)
11342         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11343     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11344 }
11345
11346 /*
11347 =for apidoc Amx|SV *|parse_label|U32 flags
11348
11349 Parse a single label, possibly optional, of the type that may prefix a
11350 Perl statement.  It is up to the caller to ensure that the dynamic parser
11351 state (L</PL_parser> et al) is correctly set to reflect the source of
11352 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11353 label is optional, otherwise it is mandatory.
11354
11355 The name of the label is returned in the form of a fresh scalar.  If an
11356 optional label is absent, a null pointer is returned.
11357
11358 If an error occurs in parsing, which can only occur if the label is
11359 mandatory, a valid label is returned anyway.  The error is reflected in
11360 the parser state, normally resulting in a single exception at the top
11361 level of parsing which covers all the compilation errors that occurred.
11362
11363 =cut
11364 */
11365
11366 SV *
11367 Perl_parse_label(pTHX_ U32 flags)
11368 {
11369     if (flags & ~PARSE_OPTIONAL)
11370         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11371     if (PL_lex_state == LEX_KNOWNEXT) {
11372         PL_parser->yychar = yylex();
11373         if (PL_parser->yychar == LABEL) {
11374             char * const lpv = pl_yylval.pval;
11375             STRLEN llen = strlen(lpv);
11376             PL_parser->yychar = YYEMPTY;
11377             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11378         } else {
11379             yyunlex();
11380             goto no_label;
11381         }
11382     } else {
11383         char *s, *t;
11384         STRLEN wlen, bufptr_pos;
11385         lex_read_space(0);
11386         t = s = PL_bufptr;
11387         if (!isIDFIRST_lazy_if(s, UTF))
11388             goto no_label;
11389         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11390         if (word_takes_any_delimeter(s, wlen))
11391             goto no_label;
11392         bufptr_pos = s - SvPVX(PL_linestr);
11393         PL_bufptr = t;
11394         lex_read_space(LEX_KEEP_PREVIOUS);
11395         t = PL_bufptr;
11396         s = SvPVX(PL_linestr) + bufptr_pos;
11397         if (t[0] == ':' && t[1] != ':') {
11398             PL_oldoldbufptr = PL_oldbufptr;
11399             PL_oldbufptr = s;
11400             PL_bufptr = t+1;
11401             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11402         } else {
11403             PL_bufptr = s;
11404             no_label:
11405             if (flags & PARSE_OPTIONAL) {
11406                 return NULL;
11407             } else {
11408                 qerror(Perl_mess(aTHX_ "Parse error"));
11409                 return newSVpvs("x");
11410             }
11411         }
11412     }
11413 }
11414
11415 /*
11416 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11417
11418 Parse a single complete Perl statement.  This may be a normal imperative
11419 statement or a declaration that has compile-time effect, and may include
11420 optional labels.  It is up to the caller to ensure that the dynamic
11421 parser state (L</PL_parser> et al) is correctly set to reflect the source
11422 of the code to be parsed and the lexical context for the statement.
11423
11424 The op tree representing the statement is returned.  This may be a
11425 null pointer if the statement is null, for example if it was actually
11426 a subroutine definition (which has compile-time side effects).  If not
11427 null, it will be the result of a L</newSTATEOP> call, normally including
11428 a C<nextstate> or equivalent op.
11429
11430 If an error occurs in parsing or compilation, in most cases a valid op
11431 tree (most likely null) is returned anyway.  The error is reflected in
11432 the parser state, normally resulting in a single exception at the top
11433 level of parsing which covers all the compilation errors that occurred.
11434 Some compilation errors, however, will throw an exception immediately.
11435
11436 The I<flags> parameter is reserved for future use, and must always
11437 be zero.
11438
11439 =cut
11440 */
11441
11442 OP *
11443 Perl_parse_fullstmt(pTHX_ U32 flags)
11444 {
11445     if (flags)
11446         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11447     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11448 }
11449
11450 /*
11451 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11452
11453 Parse a sequence of zero or more Perl statements.  These may be normal
11454 imperative statements, including optional labels, or declarations
11455 that have compile-time effect, or any mixture thereof.  The statement
11456 sequence ends when a closing brace or end-of-file is encountered in a
11457 place where a new statement could have validly started.  It is up to
11458 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11459 is correctly set to reflect the source of the code to be parsed and the
11460 lexical context for the statements.
11461
11462 The op tree representing the statement sequence is returned.  This may
11463 be a null pointer if the statements were all null, for example if there
11464 were no statements or if there were only subroutine definitions (which
11465 have compile-time side effects).  If not null, it will be a C<lineseq>
11466 list, normally including C<nextstate> or equivalent ops.
11467
11468 If an error occurs in parsing or compilation, in most cases a valid op
11469 tree is returned anyway.  The error is reflected in the parser state,
11470 normally resulting in a single exception at the top level of parsing
11471 which covers all the compilation errors that occurred.  Some compilation
11472 errors, however, will throw an exception immediately.
11473
11474 The I<flags> parameter is reserved for future use, and must always
11475 be zero.
11476
11477 =cut
11478 */
11479
11480 OP *
11481 Perl_parse_stmtseq(pTHX_ U32 flags)
11482 {
11483     OP *stmtseqop;
11484     I32 c;
11485     if (flags)
11486         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11487     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11488     c = lex_peek_unichar(0);
11489     if (c != -1 && c != /*{*/'}')
11490         qerror(Perl_mess(aTHX_ "Parse error"));
11491     return stmtseqop;
11492 }
11493
11494 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11495 static void
11496 S_lex_token_boundary(pTHX)
11497 {
11498     PL_oldoldbufptr = PL_oldbufptr;
11499     PL_oldbufptr = PL_bufptr;
11500 }
11501
11502 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11503 static OP *
11504 S_parse_opt_lexvar(pTHX)
11505 {
11506     I32 sigil, c;
11507     char *s, *d;
11508     OP *var;
11509     lex_token_boundary();
11510     sigil = lex_read_unichar(0);
11511     if (lex_peek_unichar(0) == '#') {
11512         qerror(Perl_mess(aTHX_ "Parse error"));
11513         return NULL;
11514     }
11515     lex_read_space(0);
11516     c = lex_peek_unichar(0);
11517     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11518         return NULL;
11519     s = PL_bufptr;
11520     d = PL_tokenbuf + 1;
11521     PL_tokenbuf[0] = (char)sigil;
11522     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11523     PL_bufptr = s;
11524     if (d == PL_tokenbuf+1)
11525         return NULL;
11526     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11527                 OPf_MOD | (OPpLVAL_INTRO<<8));
11528     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11529     return var;
11530 }
11531
11532 OP *
11533 Perl_parse_subsignature(pTHX)
11534 {
11535     I32 c;
11536     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11537     OP *initops = NULL;
11538     lex_read_space(0);
11539     c = lex_peek_unichar(0);
11540     while (c != /*(*/')') {
11541         switch (c) {
11542             case '$': {
11543                 OP *var, *expr;
11544                 if (prev_type == 2)
11545                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11546                 var = parse_opt_lexvar();
11547                 expr = var ?
11548                     newBINOP(OP_AELEM, 0,
11549                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11550                             OP_RV2AV),
11551                         newSVOP(OP_CONST, 0, newSViv(pos))) :
11552                     NULL;
11553                 lex_read_space(0);
11554                 c = lex_peek_unichar(0);
11555                 if (c == '=') {
11556                     lex_token_boundary();
11557                     lex_read_unichar(0);
11558                     lex_read_space(0);
11559                     c = lex_peek_unichar(0);
11560                     if (c == ',' || c == /*(*/')') {
11561                         if (var)
11562                             qerror(Perl_mess(aTHX_ "Optional parameter "
11563                                     "lacks default expression"));
11564                     } else {
11565                         OP *defexpr = parse_termexpr(0);
11566                         if (defexpr->op_type == OP_UNDEF &&
11567                                 !(defexpr->op_flags & OPf_KIDS)) {
11568                             op_free(defexpr);
11569                         } else {
11570                             OP *ifop =
11571                                 newBINOP(OP_GE, 0,
11572                                     scalar(newUNOP(OP_RV2AV, 0,
11573                                             newGVOP(OP_GV, 0, PL_defgv))),
11574                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
11575                             expr = var ?
11576                                 newCONDOP(0, ifop, expr, defexpr) :
11577                                 newLOGOP(OP_OR, 0, ifop, defexpr);
11578                         }
11579                     }
11580                     prev_type = 1;
11581                 } else {
11582                     if (prev_type == 1)
11583                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
11584                                 "follows optional parameter"));
11585                     prev_type = 0;
11586                     min_arity = pos + 1;
11587                 }
11588                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11589                 if (expr)
11590                     initops = op_append_list(OP_LINESEQ, initops,
11591                                 newSTATEOP(0, NULL, expr));
11592                 max_arity = ++pos;
11593             } break;
11594             case '@':
11595             case '%': {
11596                 OP *var;
11597                 if (prev_type == 2)
11598                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11599                 var = parse_opt_lexvar();
11600                 if (c == '%') {
11601                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11602                             newBINOP(OP_BIT_AND, 0,
11603                                 scalar(newUNOP(OP_RV2AV, 0,
11604                                     newGVOP(OP_GV, 0, PL_defgv))),
11605                                 newSVOP(OP_CONST, 0, newSViv(1))),
11606                             op_convert_list(OP_DIE, 0,
11607                                 op_convert_list(OP_SPRINTF, 0,
11608                                     op_append_list(OP_LIST,
11609                                         newSVOP(OP_CONST, 0,
11610                                             newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
11611                                         newSLICEOP(0,
11612                                             op_append_list(OP_LIST,
11613                                                 newSVOP(OP_CONST, 0, newSViv(1)),
11614                                                 newSVOP(OP_CONST, 0, newSViv(2))),
11615                                             newOP(OP_CALLER, 0))))));
11616                     if (pos != min_arity)
11617                         chkop = newLOGOP(OP_AND, 0,
11618                                     newBINOP(OP_GT, 0,
11619                                         scalar(newUNOP(OP_RV2AV, 0,
11620                                             newGVOP(OP_GV, 0, PL_defgv))),
11621                                         newSVOP(OP_CONST, 0, newSViv(pos))),
11622                                     chkop);
11623                     initops = op_append_list(OP_LINESEQ,
11624                                 newSTATEOP(0, NULL, chkop),
11625                                 initops);
11626                 }
11627                 if (var) {
11628                     OP *slice = pos ?
11629                         op_prepend_elem(OP_ASLICE,
11630                             newOP(OP_PUSHMARK, 0),
11631                             newLISTOP(OP_ASLICE, 0,
11632                                 list(newRANGE(0,
11633                                     newSVOP(OP_CONST, 0, newSViv(pos)),
11634                                     newUNOP(OP_AV2ARYLEN, 0,
11635                                         ref(newUNOP(OP_RV2AV, 0,
11636                                                 newGVOP(OP_GV, 0, PL_defgv)),
11637                                             OP_AV2ARYLEN)))),
11638                                 ref(newUNOP(OP_RV2AV, 0,
11639                                         newGVOP(OP_GV, 0, PL_defgv)),
11640                                     OP_ASLICE))) :
11641                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11642                     initops = op_append_list(OP_LINESEQ, initops,
11643                         newSTATEOP(0, NULL,
11644                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
11645                 }
11646                 prev_type = 2;
11647                 max_arity = -1;
11648             } break;
11649             default:
11650                 parse_error:
11651                 qerror(Perl_mess(aTHX_ "Parse error"));
11652                 return NULL;
11653         }
11654         lex_read_space(0);
11655         c = lex_peek_unichar(0);
11656         switch (c) {
11657             case /*(*/')': break;
11658             case ',':
11659                 do {
11660                     lex_token_boundary();
11661                     lex_read_unichar(0);
11662                     lex_read_space(0);
11663                     c = lex_peek_unichar(0);
11664                 } while (c == ',');
11665                 break;
11666             default:
11667                 goto parse_error;
11668         }
11669     }
11670     if (min_arity != 0) {
11671         initops = op_append_list(OP_LINESEQ,
11672             newSTATEOP(0, NULL,
11673                 newLOGOP(OP_OR, 0,
11674                     newBINOP(OP_GE, 0,
11675                         scalar(newUNOP(OP_RV2AV, 0,
11676                             newGVOP(OP_GV, 0, PL_defgv))),
11677                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
11678                     op_convert_list(OP_DIE, 0,
11679                         op_convert_list(OP_SPRINTF, 0,
11680                             op_append_list(OP_LIST,
11681                                 newSVOP(OP_CONST, 0,
11682                                     newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
11683                                 newSLICEOP(0,
11684                                     op_append_list(OP_LIST,
11685                                         newSVOP(OP_CONST, 0, newSViv(1)),
11686                                         newSVOP(OP_CONST, 0, newSViv(2))),
11687                                     newOP(OP_CALLER, 0))))))),
11688             initops);
11689     }
11690     if (max_arity != -1) {
11691         initops = op_append_list(OP_LINESEQ,
11692             newSTATEOP(0, NULL,
11693                 newLOGOP(OP_OR, 0,
11694                     newBINOP(OP_LE, 0,
11695                         scalar(newUNOP(OP_RV2AV, 0,
11696                             newGVOP(OP_GV, 0, PL_defgv))),
11697                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
11698                     op_convert_list(OP_DIE, 0,
11699                         op_convert_list(OP_SPRINTF, 0,
11700                             op_append_list(OP_LIST,
11701                                 newSVOP(OP_CONST, 0,
11702                                     newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
11703                                 newSLICEOP(0,
11704                                     op_append_list(OP_LIST,
11705                                         newSVOP(OP_CONST, 0, newSViv(1)),
11706                                         newSVOP(OP_CONST, 0, newSViv(2))),
11707                                     newOP(OP_CALLER, 0))))))),
11708             initops);
11709     }
11710     return initops;
11711 }
11712
11713 /*
11714  * Local variables:
11715  * c-indentation-style: bsd
11716  * c-basic-offset: 4
11717  * indent-tabs-mode: nil
11718  * End:
11719  *
11720  * ex: set ts=8 sts=4 sw=4 et:
11721  */