toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42 #include "invlist_inline.h"
  43
  44 #define new_constant(a,b,c,d,e,f,g, h)  \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  58 #define PL_lex_inpat            (PL_parser->lex_inpat)
  59 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  60 #define PL_lex_op               (PL_parser->lex_op)
  61 #define PL_lex_repl             (PL_parser->lex_repl)
  62 #define PL_lex_starts           (PL_parser->lex_starts)
  63 #define PL_lex_stuff            (PL_parser->lex_stuff)
  64 #define PL_multi_start          (PL_parser->multi_start)
  65 #define PL_multi_open           (PL_parser->multi_open)
  66 #define PL_multi_close          (PL_parser->multi_close)
  67 #define PL_preambled            (PL_parser->preambled)
  68 #define PL_linestr              (PL_parser->linestr)
  69 #define PL_expect               (PL_parser->expect)
  70 #define PL_copline              (PL_parser->copline)
  71 #define PL_bufptr               (PL_parser->bufptr)
  72 #define PL_oldbufptr            (PL_parser->oldbufptr)
  73 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  74 #define PL_linestart            (PL_parser->linestart)
  75 #define PL_bufend               (PL_parser->bufend)
  76 #define PL_last_uni             (PL_parser->last_uni)
  77 #define PL_last_lop             (PL_parser->last_lop)
  78 #define PL_last_lop_op          (PL_parser->last_lop_op)
  79 #define PL_lex_state            (PL_parser->lex_state)
  80 #define PL_rsfp                 (PL_parser->rsfp)
  81 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  82 #define PL_in_my                (PL_parser->in_my)
  83 #define PL_in_my_stash          (PL_parser->in_my_stash)
  84 #define PL_tokenbuf             (PL_parser->tokenbuf)
  85 #define PL_multi_end            (PL_parser->multi_end)
  86 #define PL_error_count          (PL_parser->error_count)
  87
  88 #  define PL_nexttoke           (PL_parser->nexttoke)
  89 #  define PL_nexttype           (PL_parser->nexttype)
  90 #  define PL_nextval            (PL_parser->nextval)
  91
  92
  93 #define SvEVALED(sv) \
  94     (SvTYPE(sv) >= SVt_PVNV \
  95     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  96
  97 static const char* const ident_too_long = "Identifier too long";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 117
 118 #define SPACE_OR_TAB(c) isBLANK_A(c)
 119
 120 #define HEXFP_PEEK(s)     \
 121     (((s[0] == '.') && \
 122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 123      isALPHA_FOLD_EQ(s[0], 'p'))
 124
 125 /* LEX_* are values for PL_lex_state, the state of the lexer.
 126  * They are arranged oddly so that the guard on the switch statement
 127  * can get by with a single comparison (if the compiler is smart enough).
 128  *
 129  * These values refer to the various states within a sublex parse,
 130  * i.e. within a double quotish string
 131  */
 132
 133 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 134
 135 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 136 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 137 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 138 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 139 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 140
 141                                    /* at end of code, eg "$x" followed by:  */
 142 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 143 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 144
 145 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 146                                         string or after \E, $foo, etc       */
 147 #define LEX_INTERPCONST          2 /* NOT USED */
 148 #define LEX_FORMLINE             1 /* expecting a format line               */
 149
 150
 151 #ifdef DEBUGGING
 152 static const char* const lex_state_names[] = {
 153     "KNOWNEXT",
 154     "FORMLINE",
 155     "INTERPCONST",
 156     "INTERPCONCAT",
 157     "INTERPENDMAYBE",
 158     "INTERPEND",
 159     "INTERPSTART",
 160     "INTERPPUSH",
 161     "INTERPCASEMOD",
 162     "INTERPNORMAL",
 163     "NORMAL"
 164 };
 165 #endif
 166
 167 #include "keywords.h"
 168
 169 /* CLINE is a macro that ensures PL_copline has a sane value */
 170
 171 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 172
 173 /*
 174  * Convenience functions to return different tokens and prime the
 175  * lexer for the next token.  They all take an argument.
 176  *
 177  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 178  * OPERATOR     : generic operator
 179  * AOPERATOR    : assignment operator
 180  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 181  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 182  * PREREF       : *EXPR where EXPR is not a simple identifier
 183  * TERM         : expression term
 184  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 185  * LOOPX        : loop exiting command (goto, last, dump, etc)
 186  * FTST         : file test operator
 187  * FUN0         : zero-argument function
 188  * FUN0OP       : zero-argument function, with its op created in this file
 189  * FUN1         : not used, except for not, which isn't a UNIOP
 190  * BOop         : bitwise or or xor
 191  * BAop         : bitwise and
 192  * BCop         : bitwise complement
 193  * SHop         : shift operator
 194  * PWop         : power operator
 195  * PMop         : pattern-matching operator
 196  * Aop          : addition-level operator
 197  * AopNOASSIGN  : addition-level operator that is never part of .=
 198  * Mop          : multiplication-level operator
 199  * Eop          : equality-testing operator
 200  * Rop          : relational operator <= != gt
 201  *
 202  * Also see LOP and lop() below.
 203  */
 204
 205 #ifdef DEBUGGING /* Serve -DT. */
 206 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 207 #else
 208 #   define REPORT(retval) (retval)
 209 #endif
 210
 211 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 212 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 213 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 214 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 215 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 216 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 217 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 218 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 219 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 220                          pl_yylval.ival=f, \
 221                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 222                          REPORT((int)LOOPEX))
 223 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 224 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 225 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 226 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 227 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 228 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 229 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 230                        REPORT('~')
 231 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 232 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 233 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 234 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 235 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 236 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 237 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 238 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 239
 240 /* This bit of chicanery makes a unary function followed by
 241  * a parenthesis into a function with one argument, highest precedence.
 242  * The UNIDOR macro is for unary functions that can be followed by the //
 243  * operator (such as C<shift // 0>).
 244  */
 245 #define UNI3(f,x,have_x) { \
 246         pl_yylval.ival = f; \
 247         if (have_x) PL_expect = x; \
 248         PL_bufptr = s; \
 249         PL_last_uni = PL_oldbufptr; \
 250         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 251         if (*s == '(') \
 252             return REPORT( (int)FUNC1 ); \
 253         s = skipspace(s); \
 254         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 255         }
 256 #define UNI(f)    UNI3(f,XTERM,1)
 257 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 258 #define UNIPROTO(f,optional) { \
 259         if (optional) PL_last_uni = PL_oldbufptr; \
 260         OPERATOR(f); \
 261         }
 262
 263 #define UNIBRACK(f) UNI3(f,0,0)
 264
 265 /* grandfather return to old style */
 266 #define OLDLOP(f) \
 267         do { \
 268             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 269                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 270             pl_yylval.ival = (f); \
 271             PL_expect = XTERM; \
 272             PL_bufptr = s; \
 273             return (int)LSTOP; \
 274         } while(0)
 275
 276 #define COPLINE_INC_WITH_HERELINES                  \
 277     STMT_START {                                     \
 278         CopLINE_inc(PL_curcop);                       \
 279         if (PL_parser->herelines)                      \
 280             CopLINE(PL_curcop) += PL_parser->herelines, \
 281             PL_parser->herelines = 0;                    \
 282     } STMT_END
 283 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 284  * is no sublex_push to follow. */
 285 #define COPLINE_SET_FROM_MULTI_END            \
 286     STMT_START {                               \
 287         CopLINE_set(PL_curcop, PL_multi_end);   \
 288         if (PL_multi_end != PL_multi_start)      \
 289             PL_parser->herelines = 0;             \
 290     } STMT_END
 291
 292
 293 #ifdef DEBUGGING
 294
 295 /* how to interpret the pl_yylval associated with the token */
 296 enum token_type {
 297     TOKENTYPE_NONE,
 298     TOKENTYPE_IVAL,
 299     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 300     TOKENTYPE_PVAL,
 301     TOKENTYPE_OPVAL
 302 };
 303
 304 static struct debug_tokens {
 305     const int token;
 306     enum token_type type;
 307     const char *name;
 308 } const debug_tokens[] =
 309 {
 310     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 311     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 312     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 313     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 314     { ANON_SIGSUB,      TOKENTYPE_IVAL,         "ANON_SIGSUB" },
 315     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 316     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 317     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 318     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 319     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 320     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 321     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 322     { DO,               TOKENTYPE_NONE,         "DO" },
 323     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 324     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 325     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 326     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 327     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 328     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 329     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 330     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 331     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 332     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 333     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 334     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 335     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 336     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 337     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 338     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 339     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 340     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 341     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 342     { IF,               TOKENTYPE_IVAL,         "IF" },
 343     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 344     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 345     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 346     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 347     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 348     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 349     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 350     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 351     { MY,               TOKENTYPE_IVAL,         "MY" },
 352     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 353     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 354     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 355     { OROR,             TOKENTYPE_NONE,         "OROR" },
 356     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 357     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 358     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 359     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 360     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 361     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 362     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 363     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 364     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 365     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 366     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 367     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 368     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 369     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 370     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 371     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 372     { SIGSUB,           TOKENTYPE_NONE,         "SIGSUB" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     PERL_ARGS_ASSERT_TOKEREPORT;
 394
 395     if (DEBUG_T_TEST) {
 396         const char *name = NULL;
 397         enum token_type type = TOKENTYPE_NONE;
 398         const struct debug_tokens *p;
 399         SV* const report = newSVpvs("<== ");
 400
 401         for (p = debug_tokens; p->token; p++) {
 402             if (p->token == (int)rv) {
 403                 name = p->name;
 404                 type = p->type;
 405                 break;
 406             }
 407         }
 408         if (name)
 409             Perl_sv_catpv(aTHX_ report, name);
 410         else if (isGRAPH(rv))
 411         {
 412             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 413             if ((char)rv == 'p')
 414                 sv_catpvs(report, " (pending identifier)");
 415         }
 416         else if (!rv)
 417             sv_catpvs(report, "EOF");
 418         else
 419             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 420         switch (type) {
 421         case TOKENTYPE_NONE:
 422             break;
 423         case TOKENTYPE_IVAL:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 425             break;
 426         case TOKENTYPE_OPNUM:
 427             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 428                                     PL_op_name[lvalp->ival]);
 429             break;
 430         case TOKENTYPE_PVAL:
 431             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 432             break;
 433         case TOKENTYPE_OPVAL:
 434             if (lvalp->opval) {
 435                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 436                                     PL_op_name[lvalp->opval->op_type]);
 437                 if (lvalp->opval->op_type == OP_CONST) {
 438                     Perl_sv_catpvf(aTHX_ report, " %s",
 439                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 440                 }
 441
 442             }
 443             else
 444                 sv_catpvs(report, "(opval=null)");
 445             break;
 446         }
 447         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 448     };
 449     return (int)rv;
 450 }
 451
 452
 453 /* print the buffer with suitable escapes */
 454
 455 STATIC void
 456 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 457 {
 458     SV* const tmp = newSVpvs("");
 459
 460     PERL_ARGS_ASSERT_PRINTBUF;
 461
 462     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 463     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 464     GCC_DIAG_RESTORE_STMT;
 465     SvREFCNT_dec(tmp);
 466 }
 467
 468 #endif
 469
 470 /*
 471  * S_ao
 472  *
 473  * This subroutine looks for an '=' next to the operator that has just been
 474  * parsed and turns it into an ASSIGNOP if it finds one.
 475  */
 476
 477 STATIC int
 478 S_ao(pTHX_ int toketype)
 479 {
 480     if (*PL_bufptr == '=') {
 481         PL_bufptr++;
 482         if (toketype == ANDAND)
 483             pl_yylval.ival = OP_ANDASSIGN;
 484         else if (toketype == OROR)
 485             pl_yylval.ival = OP_ORASSIGN;
 486         else if (toketype == DORDOR)
 487             pl_yylval.ival = OP_DORASSIGN;
 488         toketype = ASSIGNOP;
 489     }
 490     return REPORT(toketype);
 491 }
 492
 493 /*
 494  * S_no_op
 495  * When Perl expects an operator and finds something else, no_op
 496  * prints the warning.  It always prints "<something> found where
 497  * operator expected.  It prints "Missing semicolon on previous line?"
 498  * if the surprise occurs at the start of the line.  "do you need to
 499  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 500  * where the compiler doesn't know if foo is a method call or a function.
 501  * It prints "Missing operator before end of line" if there's nothing
 502  * after the missing operator, or "... before <...>" if there is something
 503  * after the missing operator.
 504  *
 505  * PL_bufptr is expected to point to the start of the thing that was found,
 506  * and s after the next token or partial token.
 507  */
 508
 509 STATIC void
 510 S_no_op(pTHX_ const char *const what, char *s)
 511 {
 512     char * const oldbp = PL_bufptr;
 513     const bool is_first = (PL_oldbufptr == PL_linestart);
 514
 515     PERL_ARGS_ASSERT_NO_OP;
 516
 517     if (!s)
 518         s = oldbp;
 519     else
 520         PL_bufptr = s;
 521     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 522     if (ckWARN_d(WARN_SYNTAX)) {
 523         if (is_first)
 524             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 525                     "\t(Missing semicolon on previous line?)\n");
 526         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 527                                                            PL_bufend,
 528                                                            UTF))
 529         {
 530             const char *t;
 531             for (t = PL_oldoldbufptr;
 532                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 533                  t += UTF ? UTF8SKIP(t) : 1)
 534             {
 535                 NOOP;
 536             }
 537             if (t < PL_bufptr && isSPACE(*t))
 538                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 539                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 540                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 541         }
 542         else {
 543             assert(s >= oldbp);
 544             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 545                     "\t(Missing operator before %" UTF8f "?)\n",
 546                      UTF8fARG(UTF, s - oldbp, oldbp));
 547         }
 548     }
 549     PL_bufptr = oldbp;
 550 }
 551
 552 /*
 553  * S_missingterm
 554  * Complain about missing quote/regexp/heredoc terminator.
 555  * If it's called with NULL then it cauterizes the line buffer.
 556  * If we're in a delimited string and the delimiter is a control
 557  * character, it's reformatted into a two-char sequence like ^C.
 558  * This is fatal.
 559  */
 560
 561 STATIC void
 562 S_missingterm(pTHX_ char *s, STRLEN len)
 563 {
 564     char tmpbuf[UTF8_MAXBYTES + 1];
 565     char q;
 566     bool uni = FALSE;
 567     SV *sv;
 568     if (s) {
 569         char * const nl = (char *) my_memrchr(s, '\n', len);
 570         if (nl) {
 571             *nl = '\0';
 572             len = nl - s;
 573         }
 574         uni = UTF;
 575     }
 576     else if (PL_multi_close < 32) {
 577         *tmpbuf = '^';
 578         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 579         tmpbuf[2] = '\0';
 580         s = tmpbuf;
 581         len = 2;
 582     }
 583     else {
 584         if (LIKELY(PL_multi_close < 256)) {
 585             *tmpbuf = (char)PL_multi_close;
 586             tmpbuf[1] = '\0';
 587             len = 1;
 588         }
 589         else {
 590             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 591             *end = '\0';
 592             len = end - tmpbuf;
 593             uni = TRUE;
 594         }
 595         s = tmpbuf;
 596     }
 597     q = memchr(s, '"', len) ? '\'' : '"';
 598     sv = sv_2mortal(newSVpvn(s, len));
 599     if (uni)
 600         SvUTF8_on(sv);
 601     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 602                      " anywhere before EOF", q, SVfARG(sv), q);
 603 }
 604
 605 #include "feature.h"
 606
 607 /*
 608  * Check whether the named feature is enabled.
 609  */
 610 bool
 611 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 612 {
 613     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 614
 615     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 616
 617     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 618
 619     if (namelen > MAX_FEATURE_LEN)
 620         return FALSE;
 621     memcpy(&he_name[8], name, namelen);
 622
 623     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 624                                      REFCOUNTED_HE_EXISTS));
 625 }
 626
 627 /*
 628  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 629  * utf16-to-utf8-reversed.
 630  */
 631
 632 #ifdef PERL_CR_FILTER
 633 static void
 634 strip_return(SV *sv)
 635 {
 636     const char *s = SvPVX_const(sv);
 637     const char * const e = s + SvCUR(sv);
 638
 639     PERL_ARGS_ASSERT_STRIP_RETURN;
 640
 641     /* outer loop optimized to do nothing if there are no CR-LFs */
 642     while (s < e) {
 643         if (*s++ == '\r' && *s == '\n') {
 644             /* hit a CR-LF, need to copy the rest */
 645             char *d = s - 1;
 646             *d++ = *s++;
 647             while (s < e) {
 648                 if (*s == '\r' && s[1] == '\n')
 649                     s++;
 650                 *d++ = *s++;
 651             }
 652             SvCUR(sv) -= s - d;
 653             return;
 654         }
 655     }
 656 }
 657
 658 STATIC I32
 659 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 660 {
 661     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 662     if (count > 0 && !maxlen)
 663         strip_return(sv);
 664     return count;
 665 }
 666 #endif
 667
 668 /*
 669 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 670
 671 Creates and initialises a new lexer/parser state object, supplying
 672 a context in which to lex and parse from a new source of Perl code.
 673 A pointer to the new state object is placed in L</PL_parser>.  An entry
 674 is made on the save stack so that upon unwinding, the new state object
 675 will be destroyed and the former value of L</PL_parser> will be restored.
 676 Nothing else need be done to clean up the parsing context.
 677
 678 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 679 non-null, provides a string (in SV form) containing code to be parsed.
 680 A copy of the string is made, so subsequent modification of C<line>
 681 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 682 from which code will be read to be parsed.  If both are non-null, the
 683 code in C<line> comes first and must consist of complete lines of input,
 684 and C<rsfp> supplies the remainder of the source.
 685
 686 The C<flags> parameter is reserved for future use.  Currently it is only
 687 used by perl internally, so extensions should always pass zero.
 688
 689 =cut
 690 */
 691
 692 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 693    can share filters with the current parser.
 694    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 695    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 696    destruction. This is used to handle the case of defaulting to reading the
 697    script from the standard input because no filename was given on the command
 698    line (without getting confused by situation where STDIN has been closed, so
 699    the script handle is opened on fd 0)  */
 700
 701 void
 702 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 703 {
 704     const char *s = NULL;
 705     yy_parser *parser, *oparser;
 706
 707     if (flags && flags & ~LEX_START_FLAGS)
 708         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 709
 710     /* create and initialise a parser */
 711
 712     Newxz(parser, 1, yy_parser);
 713     parser->old_parser = oparser = PL_parser;
 714     PL_parser = parser;
 715
 716     parser->stack = NULL;
 717     parser->stack_max1 = NULL;
 718     parser->ps = NULL;
 719
 720     /* on scope exit, free this parser and restore any outer one */
 721     SAVEPARSER(parser);
 722     parser->saved_curcop = PL_curcop;
 723
 724     /* initialise lexer state */
 725
 726     parser->nexttoke = 0;
 727     parser->error_count = oparser ? oparser->error_count : 0;
 728     parser->copline = parser->preambling = NOLINE;
 729     parser->lex_state = LEX_NORMAL;
 730     parser->expect = XSTATE;
 731     parser->rsfp = rsfp;
 732     parser->recheck_utf8_validity = FALSE;
 733     parser->rsfp_filters =
 734       !(flags & LEX_START_SAME_FILTER) || !oparser
 735         ? NULL
 736         : MUTABLE_AV(SvREFCNT_inc(
 737             oparser->rsfp_filters
 738              ? oparser->rsfp_filters
 739              : (oparser->rsfp_filters = newAV())
 740           ));
 741
 742     Newx(parser->lex_brackstack, 120, char);
 743     Newx(parser->lex_casestack, 12, char);
 744     *parser->lex_casestack = '\0';
 745     Newxz(parser->lex_shared, 1, LEXSHARED);
 746
 747     if (line) {
 748         STRLEN len;
 749         const U8* first_bad_char_loc;
 750
 751         s = SvPV_const(line, len);
 752
 753         if (   SvUTF8(line)
 754             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 755                                              SvCUR(line),
 756                                              &first_bad_char_loc)))
 757         {
 758             _force_out_malformed_utf8_message(first_bad_char_loc,
 759                                               (U8 *) s + SvCUR(line),
 760                                               0,
 761                                               1 /* 1 means die */ );
 762             NOT_REACHED; /* NOTREACHED */
 763         }
 764
 765         parser->linestr = flags & LEX_START_COPIED
 766                             ? SvREFCNT_inc_simple_NN(line)
 767                             : newSVpvn_flags(s, len, SvUTF8(line));
 768         if (!rsfp)
 769             sv_catpvs(parser->linestr, "\n;");
 770     } else {
 771         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 772     }
 773
 774     parser->oldoldbufptr =
 775         parser->oldbufptr =
 776         parser->bufptr =
 777         parser->linestart = SvPVX(parser->linestr);
 778     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 779     parser->last_lop = parser->last_uni = NULL;
 780
 781     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 782                                                         |LEX_DONT_CLOSE_RSFP));
 783     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 784                                                         |LEX_DONT_CLOSE_RSFP));
 785
 786     parser->in_pod = parser->filtered = 0;
 787 }
 788
 789
 790 /* delete a parser object */
 791
 792 void
 793 Perl_parser_free(pTHX_  const yy_parser *parser)
 794 {
 795     PERL_ARGS_ASSERT_PARSER_FREE;
 796
 797     PL_curcop = parser->saved_curcop;
 798     SvREFCNT_dec(parser->linestr);
 799
 800     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 801         PerlIO_clearerr(parser->rsfp);
 802     else if (parser->rsfp && (!parser->old_parser
 803           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 804         PerlIO_close(parser->rsfp);
 805     SvREFCNT_dec(parser->rsfp_filters);
 806     SvREFCNT_dec(parser->lex_stuff);
 807     SvREFCNT_dec(parser->lex_sub_repl);
 808
 809     Safefree(parser->lex_brackstack);
 810     Safefree(parser->lex_casestack);
 811     Safefree(parser->lex_shared);
 812     PL_parser = parser->old_parser;
 813     Safefree(parser);
 814 }
 815
 816 void
 817 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 818 {
 819     I32 nexttoke = parser->nexttoke;
 820     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 821     while (nexttoke--) {
 822         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 823          && parser->nextval[nexttoke].opval
 824          && parser->nextval[nexttoke].opval->op_slabbed
 825          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 826             op_free(parser->nextval[nexttoke].opval);
 827             parser->nextval[nexttoke].opval = NULL;
 828         }
 829     }
 830 }
 831
 832
 833 /*
 834 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 835
 836 Buffer scalar containing the chunk currently under consideration of the
 837 text currently being lexed.  This is always a plain string scalar (for
 838 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 839 normal scalar means; instead refer to the buffer directly by the pointer
 840 variables described below.
 841
 842 The lexer maintains various C<char*> pointers to things in the
 843 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 844 reallocated, all of these pointers must be updated.  Don't attempt to
 845 do this manually, but rather use L</lex_grow_linestr> if you need to
 846 reallocate the buffer.
 847
 848 The content of the text chunk in the buffer is commonly exactly one
 849 complete line of input, up to and including a newline terminator,
 850 but there are situations where it is otherwise.  The octets of the
 851 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 852 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 853 flag on this scalar, which may disagree with it.
 854
 855 For direct examination of the buffer, the variable
 856 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 857 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 858 of these pointers is usually preferable to examination of the scalar
 859 through normal scalar means.
 860
 861 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 862
 863 Direct pointer to the end of the chunk of text currently being lexed, the
 864 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 865 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 866 always located at the end of the buffer, and does not count as part of
 867 the buffer's contents.
 868
 869 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 870
 871 Points to the current position of lexing inside the lexer buffer.
 872 Characters around this point may be freely examined, within
 873 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 874 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 875 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 876
 877 Lexing code (whether in the Perl core or not) moves this pointer past
 878 the characters that it consumes.  It is also expected to perform some
 879 bookkeeping whenever a newline character is consumed.  This movement
 880 can be more conveniently performed by the function L</lex_read_to>,
 881 which handles newlines appropriately.
 882
 883 Interpretation of the buffer's octets can be abstracted out by
 884 using the slightly higher-level functions L</lex_peek_unichar> and
 885 L</lex_read_unichar>.
 886
 887 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 888
 889 Points to the start of the current line inside the lexer buffer.
 890 This is useful for indicating at which column an error occurred, and
 891 not much else.  This must be updated by any lexing code that consumes
 892 a newline; the function L</lex_read_to> handles this detail.
 893
 894 =cut
 895 */
 896
 897 /*
 898 =for apidoc Amx|bool|lex_bufutf8
 899
 900 Indicates whether the octets in the lexer buffer
 901 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 902 of Unicode characters.  If not, they should be interpreted as Latin-1
 903 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 904
 905 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 906 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 907 encoding.
 908
 909 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 910 is significant, but not the whole story regarding the input character
 911 encoding.  Normally, when a file is being read, the scalar contains octets
 912 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 913 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 914 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 915 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 916 is in effect.  This logic may change in the future; use this function
 917 instead of implementing the logic yourself.
 918
 919 =cut
 920 */
 921
 922 bool
 923 Perl_lex_bufutf8(pTHX)
 924 {
 925     return UTF;
 926 }
 927
 928 /*
 929 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 930
 931 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 932 at least C<len> octets (including terminating C<NUL>).  Returns a
 933 pointer to the reallocated buffer.  This is necessary before making
 934 any direct modification of the buffer that would increase its length.
 935 L</lex_stuff_pvn> provides a more convenient way to insert text into
 936 the buffer.
 937
 938 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 939 this function updates all of the lexer's variables that point directly
 940 into the buffer.
 941
 942 =cut
 943 */
 944
 945 char *
 946 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 947 {
 948     SV *linestr;
 949     char *buf;
 950     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 951     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 952     bool current;
 953
 954     linestr = PL_parser->linestr;
 955     buf = SvPVX(linestr);
 956     if (len <= SvLEN(linestr))
 957         return buf;
 958
 959     /* Is the lex_shared linestr SV the same as the current linestr SV?
 960      * Only in this case does re_eval_start need adjusting, since it
 961      * points within lex_shared->ls_linestr's buffer */
 962     current = (   !PL_parser->lex_shared->ls_linestr
 963                || linestr == PL_parser->lex_shared->ls_linestr);
 964
 965     bufend_pos = PL_parser->bufend - buf;
 966     bufptr_pos = PL_parser->bufptr - buf;
 967     oldbufptr_pos = PL_parser->oldbufptr - buf;
 968     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 969     linestart_pos = PL_parser->linestart - buf;
 970     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 971     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 972     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 973                             PL_parser->lex_shared->re_eval_start - buf : 0;
 974
 975     buf = sv_grow(linestr, len);
 976
 977     PL_parser->bufend = buf + bufend_pos;
 978     PL_parser->bufptr = buf + bufptr_pos;
 979     PL_parser->oldbufptr = buf + oldbufptr_pos;
 980     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 981     PL_parser->linestart = buf + linestart_pos;
 982     if (PL_parser->last_uni)
 983         PL_parser->last_uni = buf + last_uni_pos;
 984     if (PL_parser->last_lop)
 985         PL_parser->last_lop = buf + last_lop_pos;
 986     if (current && PL_parser->lex_shared->re_eval_start)
 987         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 988     return buf;
 989 }
 990
 991 /*
 992 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 993
 994 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 995 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 996 reallocating the buffer if necessary.  This means that lexing code that
 997 runs later will see the characters as if they had appeared in the input.
 998 It is not recommended to do this as part of normal parsing, and most
 999 uses of this facility run the risk of the inserted characters being
1000 interpreted in an unintended manner.
1001
1002 The string to be inserted is represented by C<len> octets starting
1003 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1004 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1005 The characters are recoded for the lexer buffer, according to how the
1006 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1007 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1008 function is more convenient.
1009
1010 =cut
1011 */
1012
1013 void
1014 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1015 {
1016     dVAR;
1017     char *bufptr;
1018     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1019     if (flags & ~(LEX_STUFF_UTF8))
1020         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1021     if (UTF) {
1022         if (flags & LEX_STUFF_UTF8) {
1023             goto plain_copy;
1024         } else {
1025             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1026                                                        (U8 *) pv + len);
1027             const char *p, *e = pv+len;;
1028             if (!highhalf)
1029                 goto plain_copy;
1030             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1031             bufptr = PL_parser->bufptr;
1032             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1033             SvCUR_set(PL_parser->linestr,
1034                 SvCUR(PL_parser->linestr) + len+highhalf);
1035             PL_parser->bufend += len+highhalf;
1036             for (p = pv; p != e; p++) {
1037                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1038             }
1039         }
1040     } else {
1041         if (flags & LEX_STUFF_UTF8) {
1042             STRLEN highhalf = 0;
1043             const char *p, *e = pv+len;
1044             for (p = pv; p != e; p++) {
1045                 U8 c = (U8)*p;
1046                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1047                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1048                                 "non-Latin-1 character into Latin-1 input");
1049                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1050                     p++;
1051                     highhalf++;
1052                 } else assert(UTF8_IS_INVARIANT(c));
1053             }
1054             if (!highhalf)
1055                 goto plain_copy;
1056             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1057             bufptr = PL_parser->bufptr;
1058             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1059             SvCUR_set(PL_parser->linestr,
1060                 SvCUR(PL_parser->linestr) + len-highhalf);
1061             PL_parser->bufend += len-highhalf;
1062             p = pv;
1063             while (p < e) {
1064                 if (UTF8_IS_INVARIANT(*p)) {
1065                     *bufptr++ = *p;
1066                     p++;
1067                 }
1068                 else {
1069                     assert(p < e -1 );
1070                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1071                     p += 2;
1072                 }
1073             }
1074         } else {
1075           plain_copy:
1076             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1077             bufptr = PL_parser->bufptr;
1078             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1079             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1080             PL_parser->bufend += len;
1081             Copy(pv, bufptr, len, char);
1082         }
1083     }
1084 }
1085
1086 /*
1087 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1088
1089 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1090 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1091 reallocating the buffer if necessary.  This means that lexing code that
1092 runs later will see the characters as if they had appeared in the input.
1093 It is not recommended to do this as part of normal parsing, and most
1094 uses of this facility run the risk of the inserted characters being
1095 interpreted in an unintended manner.
1096
1097 The string to be inserted is represented by octets starting at C<pv>
1098 and continuing to the first nul.  These octets are interpreted as either
1099 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1100 in C<flags>.  The characters are recoded for the lexer buffer, according
1101 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1102 If it is not convenient to nul-terminate a string to be inserted, the
1103 L</lex_stuff_pvn> function is more appropriate.
1104
1105 =cut
1106 */
1107
1108 void
1109 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1110 {
1111     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1112     lex_stuff_pvn(pv, strlen(pv), flags);
1113 }
1114
1115 /*
1116 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1117
1118 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1119 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1120 reallocating the buffer if necessary.  This means that lexing code that
1121 runs later will see the characters as if they had appeared in the input.
1122 It is not recommended to do this as part of normal parsing, and most
1123 uses of this facility run the risk of the inserted characters being
1124 interpreted in an unintended manner.
1125
1126 The string to be inserted is the string value of C<sv>.  The characters
1127 are recoded for the lexer buffer, according to how the buffer is currently
1128 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1129 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1130 need to construct a scalar.
1131
1132 =cut
1133 */
1134
1135 void
1136 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1137 {
1138     char *pv;
1139     STRLEN len;
1140     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1141     if (flags)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1143     pv = SvPV(sv, len);
1144     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1145 }
1146
1147 /*
1148 =for apidoc Amx|void|lex_unstuff|char *ptr
1149
1150 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1151 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1152 This hides the discarded text from any lexing code that runs later,
1153 as if the text had never appeared.
1154
1155 This is not the normal way to consume lexed text.  For that, use
1156 L</lex_read_to>.
1157
1158 =cut
1159 */
1160
1161 void
1162 Perl_lex_unstuff(pTHX_ char *ptr)
1163 {
1164     char *buf, *bufend;
1165     STRLEN unstuff_len;
1166     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1167     buf = PL_parser->bufptr;
1168     if (ptr < buf)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1170     if (ptr == buf)
1171         return;
1172     bufend = PL_parser->bufend;
1173     if (ptr > bufend)
1174         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1175     unstuff_len = ptr - buf;
1176     Move(ptr, buf, bufend+1-ptr, char);
1177     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1178     PL_parser->bufend = bufend - unstuff_len;
1179 }
1180
1181 /*
1182 =for apidoc Amx|void|lex_read_to|char *ptr
1183
1184 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1185 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1186 performing the correct bookkeeping whenever a newline character is passed.
1187 This is the normal way to consume lexed text.
1188
1189 Interpretation of the buffer's octets can be abstracted out by
1190 using the slightly higher-level functions L</lex_peek_unichar> and
1191 L</lex_read_unichar>.
1192
1193 =cut
1194 */
1195
1196 void
1197 Perl_lex_read_to(pTHX_ char *ptr)
1198 {
1199     char *s;
1200     PERL_ARGS_ASSERT_LEX_READ_TO;
1201     s = PL_parser->bufptr;
1202     if (ptr < s || ptr > PL_parser->bufend)
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1204     for (; s != ptr; s++)
1205         if (*s == '\n') {
1206             COPLINE_INC_WITH_HERELINES;
1207             PL_parser->linestart = s+1;
1208         }
1209     PL_parser->bufptr = ptr;
1210 }
1211
1212 /*
1213 =for apidoc Amx|void|lex_discard_to|char *ptr
1214
1215 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1216 up to C<ptr>.  The remaining content of the buffer will be moved, and
1217 all pointers into the buffer updated appropriately.  C<ptr> must not
1218 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1219 it is not permitted to discard text that has yet to be lexed.
1220
1221 Normally it is not necessarily to do this directly, because it suffices to
1222 use the implicit discarding behaviour of L</lex_next_chunk> and things
1223 based on it.  However, if a token stretches across multiple lines,
1224 and the lexing code has kept multiple lines of text in the buffer for
1225 that purpose, then after completion of the token it would be wise to
1226 explicitly discard the now-unneeded earlier lines, to avoid future
1227 multi-line tokens growing the buffer without bound.
1228
1229 =cut
1230 */
1231
1232 void
1233 Perl_lex_discard_to(pTHX_ char *ptr)
1234 {
1235     char *buf;
1236     STRLEN discard_len;
1237     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1238     buf = SvPVX(PL_parser->linestr);
1239     if (ptr < buf)
1240         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1241     if (ptr == buf)
1242         return;
1243     if (ptr > PL_parser->bufptr)
1244         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1245     discard_len = ptr - buf;
1246     if (PL_parser->oldbufptr < ptr)
1247         PL_parser->oldbufptr = ptr;
1248     if (PL_parser->oldoldbufptr < ptr)
1249         PL_parser->oldoldbufptr = ptr;
1250     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1251         PL_parser->last_uni = NULL;
1252     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1253         PL_parser->last_lop = NULL;
1254     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1255     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1256     PL_parser->bufend -= discard_len;
1257     PL_parser->bufptr -= discard_len;
1258     PL_parser->oldbufptr -= discard_len;
1259     PL_parser->oldoldbufptr -= discard_len;
1260     if (PL_parser->last_uni)
1261         PL_parser->last_uni -= discard_len;
1262     if (PL_parser->last_lop)
1263         PL_parser->last_lop -= discard_len;
1264 }
1265
1266 void
1267 Perl_notify_parser_that_changed_to_utf8(pTHX)
1268 {
1269     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1270      * off to on.  At compile time, this has the effect of entering a 'use
1271      * utf8' section.  This means that any input was not previously checked for
1272      * UTF-8 (because it was off), but now we do need to check it, or our
1273      * assumptions about the input being sane could be wrong, and we could
1274      * segfault.  This routine just sets a flag so that the next time we look
1275      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1276      * proper phase, there may not be a parser object, but if there is, setting
1277      * the flag is harmless */
1278
1279     if (PL_parser) {
1280         PL_parser->recheck_utf8_validity = TRUE;
1281     }
1282 }
1283
1284 /*
1285 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1286
1287 Reads in the next chunk of text to be lexed, appending it to
1288 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1289 looked to the end of the current chunk and wants to know more.  It is
1290 usual, but not necessary, for lexing to have consumed the entirety of
1291 the current chunk at this time.
1292
1293 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1294 chunk (i.e., the current chunk has been entirely consumed), normally the
1295 current chunk will be discarded at the same time that the new chunk is
1296 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1297 will not be discarded.  If the current chunk has not been entirely
1298 consumed, then it will not be discarded regardless of the flag.
1299
1300 Returns true if some new text was added to the buffer, or false if the
1301 buffer has reached the end of the input text.
1302
1303 =cut
1304 */
1305
1306 #define LEX_FAKE_EOF 0x80000000
1307 #define LEX_NO_TERM  0x40000000 /* here-doc */
1308
1309 bool
1310 Perl_lex_next_chunk(pTHX_ U32 flags)
1311 {
1312     SV *linestr;
1313     char *buf;
1314     STRLEN old_bufend_pos, new_bufend_pos;
1315     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1316     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1317     bool got_some_for_debugger = 0;
1318     bool got_some;
1319
1320     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1321         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1322     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1323         return FALSE;
1324     linestr = PL_parser->linestr;
1325     buf = SvPVX(linestr);
1326     if (!(flags & LEX_KEEP_PREVIOUS)
1327           && PL_parser->bufptr == PL_parser->bufend)
1328     {
1329         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1330         linestart_pos = 0;
1331         if (PL_parser->last_uni != PL_parser->bufend)
1332             PL_parser->last_uni = NULL;
1333         if (PL_parser->last_lop != PL_parser->bufend)
1334             PL_parser->last_lop = NULL;
1335         last_uni_pos = last_lop_pos = 0;
1336         *buf = 0;
1337         SvCUR(linestr) = 0;
1338     } else {
1339         old_bufend_pos = PL_parser->bufend - buf;
1340         bufptr_pos = PL_parser->bufptr - buf;
1341         oldbufptr_pos = PL_parser->oldbufptr - buf;
1342         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1343         linestart_pos = PL_parser->linestart - buf;
1344         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1345         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1346     }
1347     if (flags & LEX_FAKE_EOF) {
1348         goto eof;
1349     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1350         got_some = 0;
1351     } else if (filter_gets(linestr, old_bufend_pos)) {
1352         got_some = 1;
1353         got_some_for_debugger = 1;
1354     } else if (flags & LEX_NO_TERM) {
1355         got_some = 0;
1356     } else {
1357         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1358             SvPVCLEAR(linestr);
1359         eof:
1360         /* End of real input.  Close filehandle (unless it was STDIN),
1361          * then add implicit termination.
1362          */
1363         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1364             PerlIO_clearerr(PL_parser->rsfp);
1365         else if (PL_parser->rsfp)
1366             (void)PerlIO_close(PL_parser->rsfp);
1367         PL_parser->rsfp = NULL;
1368         PL_parser->in_pod = PL_parser->filtered = 0;
1369         if (!PL_in_eval && PL_minus_p) {
1370             sv_catpvs(linestr,
1371                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1372             PL_minus_n = PL_minus_p = 0;
1373         } else if (!PL_in_eval && PL_minus_n) {
1374             sv_catpvs(linestr, /*{*/";}");
1375             PL_minus_n = 0;
1376         } else
1377             sv_catpvs(linestr, ";");
1378         got_some = 1;
1379     }
1380     buf = SvPVX(linestr);
1381     new_bufend_pos = SvCUR(linestr);
1382     PL_parser->bufend = buf + new_bufend_pos;
1383     PL_parser->bufptr = buf + bufptr_pos;
1384
1385     if (UTF) {
1386         const U8* first_bad_char_loc;
1387         if (UNLIKELY(! is_utf8_string_loc(
1388                             (U8 *) PL_parser->bufptr,
1389                                    PL_parser->bufend - PL_parser->bufptr,
1390                                    &first_bad_char_loc)))
1391         {
1392             _force_out_malformed_utf8_message(first_bad_char_loc,
1393                                               (U8 *) PL_parser->bufend,
1394                                               0,
1395                                               1 /* 1 means die */ );
1396             NOT_REACHED; /* NOTREACHED */
1397         }
1398     }
1399
1400     PL_parser->oldbufptr = buf + oldbufptr_pos;
1401     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1402     PL_parser->linestart = buf + linestart_pos;
1403     if (PL_parser->last_uni)
1404         PL_parser->last_uni = buf + last_uni_pos;
1405     if (PL_parser->last_lop)
1406         PL_parser->last_lop = buf + last_lop_pos;
1407     if (PL_parser->preambling != NOLINE) {
1408         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1409         PL_parser->preambling = NOLINE;
1410     }
1411     if (   got_some_for_debugger
1412         && PERLDB_LINE_OR_SAVESRC
1413         && PL_curstash != PL_debstash)
1414     {
1415         /* debugger active and we're not compiling the debugger code,
1416          * so store the line into the debugger's array of lines
1417          */
1418         update_debugger_info(NULL, buf+old_bufend_pos,
1419             new_bufend_pos-old_bufend_pos);
1420     }
1421     return got_some;
1422 }
1423
1424 /*
1425 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1426
1427 Looks ahead one (Unicode) character in the text currently being lexed.
1428 Returns the codepoint (unsigned integer value) of the next character,
1429 or -1 if lexing has reached the end of the input text.  To consume the
1430 peeked character, use L</lex_read_unichar>.
1431
1432 If the next character is in (or extends into) the next chunk of input
1433 text, the next chunk will be read in.  Normally the current chunk will be
1434 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1435 bit set, then the current chunk will not be discarded.
1436
1437 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1438 is encountered, an exception is generated.
1439
1440 =cut
1441 */
1442
1443 I32
1444 Perl_lex_peek_unichar(pTHX_ U32 flags)
1445 {
1446     dVAR;
1447     char *s, *bufend;
1448     if (flags & ~(LEX_KEEP_PREVIOUS))
1449         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1450     s = PL_parser->bufptr;
1451     bufend = PL_parser->bufend;
1452     if (UTF) {
1453         U8 head;
1454         I32 unichar;
1455         STRLEN len, retlen;
1456         if (s == bufend) {
1457             if (!lex_next_chunk(flags))
1458                 return -1;
1459             s = PL_parser->bufptr;
1460             bufend = PL_parser->bufend;
1461         }
1462         head = (U8)*s;
1463         if (UTF8_IS_INVARIANT(head))
1464             return head;
1465         if (UTF8_IS_START(head)) {
1466             len = UTF8SKIP(&head);
1467             while ((STRLEN)(bufend-s) < len) {
1468                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1469                     break;
1470                 s = PL_parser->bufptr;
1471                 bufend = PL_parser->bufend;
1472             }
1473         }
1474         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1475         if (retlen == (STRLEN)-1) {
1476             _force_out_malformed_utf8_message((U8 *) s,
1477                                               (U8 *) bufend,
1478                                               0,
1479                                               1 /* 1 means die */ );
1480             NOT_REACHED; /* NOTREACHED */
1481         }
1482         return unichar;
1483     } else {
1484         if (s == bufend) {
1485             if (!lex_next_chunk(flags))
1486                 return -1;
1487             s = PL_parser->bufptr;
1488         }
1489         return (U8)*s;
1490     }
1491 }
1492
1493 /*
1494 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1495
1496 Reads the next (Unicode) character in the text currently being lexed.
1497 Returns the codepoint (unsigned integer value) of the character read,
1498 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1499 if lexing has reached the end of the input text.  To non-destructively
1500 examine the next character, use L</lex_peek_unichar> instead.
1501
1502 If the next character is in (or extends into) the next chunk of input
1503 text, the next chunk will be read in.  Normally the current chunk will be
1504 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1505 bit set, then the current chunk will not be discarded.
1506
1507 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1508 is encountered, an exception is generated.
1509
1510 =cut
1511 */
1512
1513 I32
1514 Perl_lex_read_unichar(pTHX_ U32 flags)
1515 {
1516     I32 c;
1517     if (flags & ~(LEX_KEEP_PREVIOUS))
1518         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1519     c = lex_peek_unichar(flags);
1520     if (c != -1) {
1521         if (c == '\n')
1522             COPLINE_INC_WITH_HERELINES;
1523         if (UTF)
1524             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1525         else
1526             ++(PL_parser->bufptr);
1527     }
1528     return c;
1529 }
1530
1531 /*
1532 =for apidoc Amx|void|lex_read_space|U32 flags
1533
1534 Reads optional spaces, in Perl style, in the text currently being
1535 lexed.  The spaces may include ordinary whitespace characters and
1536 Perl-style comments.  C<#line> directives are processed if encountered.
1537 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1538 at a non-space character (or the end of the input text).
1539
1540 If spaces extend into the next chunk of input text, the next chunk will
1541 be read in.  Normally the current chunk will be discarded at the same
1542 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1543 chunk will not be discarded.
1544
1545 =cut
1546 */
1547
1548 #define LEX_NO_INCLINE    0x40000000
1549 #define LEX_NO_NEXT_CHUNK 0x80000000
1550
1551 void
1552 Perl_lex_read_space(pTHX_ U32 flags)
1553 {
1554     char *s, *bufend;
1555     const bool can_incline = !(flags & LEX_NO_INCLINE);
1556     bool need_incline = 0;
1557     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1558         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1559     s = PL_parser->bufptr;
1560     bufend = PL_parser->bufend;
1561     while (1) {
1562         char c = *s;
1563         if (c == '#') {
1564             do {
1565                 c = *++s;
1566             } while (!(c == '\n' || (c == 0 && s == bufend)));
1567         } else if (c == '\n') {
1568             s++;
1569             if (can_incline) {
1570                 PL_parser->linestart = s;
1571                 if (s == bufend)
1572                     need_incline = 1;
1573                 else
1574                     incline(s, bufend);
1575             }
1576         } else if (isSPACE(c)) {
1577             s++;
1578         } else if (c == 0 && s == bufend) {
1579             bool got_more;
1580             line_t l;
1581             if (flags & LEX_NO_NEXT_CHUNK)
1582                 break;
1583             PL_parser->bufptr = s;
1584             l = CopLINE(PL_curcop);
1585             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1586             got_more = lex_next_chunk(flags);
1587             CopLINE_set(PL_curcop, l);
1588             s = PL_parser->bufptr;
1589             bufend = PL_parser->bufend;
1590             if (!got_more)
1591                 break;
1592             if (can_incline && need_incline && PL_parser->rsfp) {
1593                 incline(s, bufend);
1594                 need_incline = 0;
1595             }
1596         } else if (!c) {
1597             s++;
1598         } else {
1599             break;
1600         }
1601     }
1602     PL_parser->bufptr = s;
1603 }
1604
1605 /*
1606
1607 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1608
1609 This function performs syntax checking on a prototype, C<proto>.
1610 If C<warn> is true, any illegal characters or mismatched brackets
1611 will trigger illegalproto warnings, declaring that they were
1612 detected in the prototype for C<name>.
1613
1614 The return value is C<true> if this is a valid prototype, and
1615 C<false> if it is not, regardless of whether C<warn> was C<true> or
1616 C<false>.
1617
1618 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1619
1620 =cut
1621
1622  */
1623
1624 bool
1625 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1626 {
1627     STRLEN len, origlen;
1628     char *p;
1629     bool bad_proto = FALSE;
1630     bool in_brackets = FALSE;
1631     bool after_slash = FALSE;
1632     char greedy_proto = ' ';
1633     bool proto_after_greedy_proto = FALSE;
1634     bool must_be_last = FALSE;
1635     bool underscore = FALSE;
1636     bool bad_proto_after_underscore = FALSE;
1637
1638     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1639
1640     if (!proto)
1641         return TRUE;
1642
1643     p = SvPV(proto, len);
1644     origlen = len;
1645     for (; len--; p++) {
1646         if (!isSPACE(*p)) {
1647             if (must_be_last)
1648                 proto_after_greedy_proto = TRUE;
1649             if (underscore) {
1650                 if (!strchr(";@%", *p))
1651                     bad_proto_after_underscore = TRUE;
1652                 underscore = FALSE;
1653             }
1654             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1655                 bad_proto = TRUE;
1656             }
1657             else {
1658                 if (*p == '[')
1659                     in_brackets = TRUE;
1660                 else if (*p == ']')
1661                     in_brackets = FALSE;
1662                 else if ((*p == '@' || *p == '%')
1663                          && !after_slash
1664                          && !in_brackets )
1665                 {
1666                     must_be_last = TRUE;
1667                     greedy_proto = *p;
1668                 }
1669                 else if (*p == '_')
1670                     underscore = TRUE;
1671             }
1672             if (*p == '\\')
1673                 after_slash = TRUE;
1674             else
1675                 after_slash = FALSE;
1676         }
1677     }
1678
1679     if (warn) {
1680         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1681         p -= origlen;
1682         p = SvUTF8(proto)
1683             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1684                              origlen, UNI_DISPLAY_ISPRINT)
1685             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1686
1687         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1688             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1689             sv_catpvs(name2, "::");
1690             sv_catsv(name2, (SV *)name);
1691             name = name2;
1692         }
1693
1694         if (proto_after_greedy_proto)
1695             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1696                         "Prototype after '%c' for %" SVf " : %s",
1697                         greedy_proto, SVfARG(name), p);
1698         if (in_brackets)
1699             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1700                         "Missing ']' in prototype for %" SVf " : %s",
1701                         SVfARG(name), p);
1702         if (bad_proto)
1703             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1704                         "Illegal character in prototype for %" SVf " : %s",
1705                         SVfARG(name), p);
1706         if (bad_proto_after_underscore)
1707             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1708                         "Illegal character after '_' in prototype for %" SVf " : %s",
1709                         SVfARG(name), p);
1710     }
1711
1712     return (! (proto_after_greedy_proto || bad_proto) );
1713 }
1714
1715 /*
1716  * S_incline
1717  * This subroutine has nothing to do with tilting, whether at windmills
1718  * or pinball tables.  Its name is short for "increment line".  It
1719  * increments the current line number in CopLINE(PL_curcop) and checks
1720  * to see whether the line starts with a comment of the form
1721  *    # line 500 "foo.pm"
1722  * If so, it sets the current line number and file to the values in the comment.
1723  */
1724
1725 STATIC void
1726 S_incline(pTHX_ const char *s, const char *end)
1727 {
1728     const char *t;
1729     const char *n;
1730     const char *e;
1731     line_t line_num;
1732     UV uv;
1733
1734     PERL_ARGS_ASSERT_INCLINE;
1735
1736     assert(end >= s);
1737
1738     COPLINE_INC_WITH_HERELINES;
1739     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1740      && s+1 == PL_bufend && *s == ';') {
1741         /* fake newline in string eval */
1742         CopLINE_dec(PL_curcop);
1743         return;
1744     }
1745     if (*s++ != '#')
1746         return;
1747     while (SPACE_OR_TAB(*s))
1748         s++;
1749     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1750         s += sizeof("line") - 1;
1751     else
1752         return;
1753     if (SPACE_OR_TAB(*s))
1754         s++;
1755     else
1756         return;
1757     while (SPACE_OR_TAB(*s))
1758         s++;
1759     if (!isDIGIT(*s))
1760         return;
1761
1762     n = s;
1763     while (isDIGIT(*s))
1764         s++;
1765     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1766         return;
1767     while (SPACE_OR_TAB(*s))
1768         s++;
1769     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1770         s++;
1771         e = t + 1;
1772     }
1773     else {
1774         t = s;
1775         while (*t && !isSPACE(*t))
1776             t++;
1777         e = t;
1778     }
1779     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1780         e++;
1781     if (*e != '\n' && *e != '\0')
1782         return;         /* false alarm */
1783
1784     if (!grok_atoUV(n, &uv, &e))
1785         return;
1786     line_num = ((line_t)uv) - 1;
1787
1788     if (t - s > 0) {
1789         const STRLEN len = t - s;
1790
1791         if (!PL_rsfp && !PL_parser->filtered) {
1792             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1793              * to *{"::_<newfilename"} */
1794             /* However, the long form of evals is only turned on by the
1795                debugger - usually they're "(eval %lu)" */
1796             GV * const cfgv = CopFILEGV(PL_curcop);
1797             if (cfgv) {
1798                 char smallbuf[128];
1799                 STRLEN tmplen2 = len;
1800                 char *tmpbuf2;
1801                 GV *gv2;
1802
1803                 if (tmplen2 + 2 <= sizeof smallbuf)
1804                     tmpbuf2 = smallbuf;
1805                 else
1806                     Newx(tmpbuf2, tmplen2 + 2, char);
1807
1808                 tmpbuf2[0] = '_';
1809                 tmpbuf2[1] = '<';
1810
1811                 memcpy(tmpbuf2 + 2, s, tmplen2);
1812                 tmplen2 += 2;
1813
1814                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1815                 if (!isGV(gv2)) {
1816                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1817                     /* adjust ${"::_<newfilename"} to store the new file name */
1818                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1819                     /* The line number may differ. If that is the case,
1820                        alias the saved lines that are in the array.
1821                        Otherwise alias the whole array. */
1822                     if (CopLINE(PL_curcop) == line_num) {
1823                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1824                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1825                     }
1826                     else if (GvAV(cfgv)) {
1827                         AV * const av = GvAV(cfgv);
1828                         const line_t start = CopLINE(PL_curcop)+1;
1829                         SSize_t items = AvFILLp(av) - start;
1830                         if (items > 0) {
1831                             AV * const av2 = GvAVn(gv2);
1832                             SV **svp = AvARRAY(av) + start;
1833                             Size_t l = line_num+1;
1834                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1835                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1836                         }
1837                     }
1838                 }
1839
1840                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1841             }
1842         }
1843         CopFILE_free(PL_curcop);
1844         CopFILE_setn(PL_curcop, s, len);
1845     }
1846     CopLINE_set(PL_curcop, line_num);
1847 }
1848
1849 STATIC void
1850 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1851 {
1852     AV *av = CopFILEAVx(PL_curcop);
1853     if (av) {
1854         SV * sv;
1855         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1856         else {
1857             sv = *av_fetch(av, 0, 1);
1858             SvUPGRADE(sv, SVt_PVMG);
1859         }
1860         if (!SvPOK(sv)) SvPVCLEAR(sv);
1861         if (orig_sv)
1862             sv_catsv(sv, orig_sv);
1863         else
1864             sv_catpvn(sv, buf, len);
1865         if (!SvIOK(sv)) {
1866             (void)SvIOK_on(sv);
1867             SvIV_set(sv, 0);
1868         }
1869         if (PL_parser->preambling == NOLINE)
1870             av_store(av, CopLINE(PL_curcop), sv);
1871     }
1872 }
1873
1874 /*
1875  * skipspace
1876  * Called to gobble the appropriate amount and type of whitespace.
1877  * Skips comments as well.
1878  * Returns the next character after the whitespace that is skipped.
1879  *
1880  * peekspace
1881  * Same thing, but look ahead without incrementing line numbers or
1882  * adjusting PL_linestart.
1883  */
1884
1885 #define skipspace(s) skipspace_flags(s, 0)
1886 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1887
1888 STATIC char *
1889 S_skipspace_flags(pTHX_ char *s, U32 flags)
1890 {
1891     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1892     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1893         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1894             s++;
1895     } else {
1896         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1897         PL_bufptr = s;
1898         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1899                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1900                     LEX_NO_NEXT_CHUNK : 0));
1901         s = PL_bufptr;
1902         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1903         if (PL_linestart > PL_bufptr)
1904             PL_bufptr = PL_linestart;
1905         return s;
1906     }
1907     return s;
1908 }
1909
1910 /*
1911  * S_check_uni
1912  * Check the unary operators to ensure there's no ambiguity in how they're
1913  * used.  An ambiguous piece of code would be:
1914  *     rand + 5
1915  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1916  * the +5 is its argument.
1917  */
1918
1919 STATIC void
1920 S_check_uni(pTHX)
1921 {
1922     const char *s;
1923
1924     if (PL_oldoldbufptr != PL_last_uni)
1925         return;
1926     while (isSPACE(*PL_last_uni))
1927         PL_last_uni++;
1928     s = PL_last_uni;
1929     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1930         s += UTF ? UTF8SKIP(s) : 1;
1931     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1932         return;
1933
1934     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1935                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1936                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1937 }
1938
1939 /*
1940  * LOP : macro to build a list operator.  Its behaviour has been replaced
1941  * with a subroutine, S_lop() for which LOP is just another name.
1942  */
1943
1944 #define LOP(f,x) return lop(f,x,s)
1945
1946 /*
1947  * S_lop
1948  * Build a list operator (or something that might be one).  The rules:
1949  *  - if we have a next token, then it's a list operator (no parens) for
1950  *    which the next token has already been parsed; e.g.,
1951  *       sort foo @args
1952  *       sort foo (@args)
1953  *  - if the next thing is an opening paren, then it's a function
1954  *  - else it's a list operator
1955  */
1956
1957 STATIC I32
1958 S_lop(pTHX_ I32 f, U8 x, char *s)
1959 {
1960     PERL_ARGS_ASSERT_LOP;
1961
1962     pl_yylval.ival = f;
1963     CLINE;
1964     PL_bufptr = s;
1965     PL_last_lop = PL_oldbufptr;
1966     PL_last_lop_op = (OPCODE)f;
1967     if (PL_nexttoke)
1968         goto lstop;
1969     PL_expect = x;
1970     if (*s == '(')
1971         return REPORT(FUNC);
1972     s = skipspace(s);
1973     if (*s == '(')
1974         return REPORT(FUNC);
1975     else {
1976         lstop:
1977         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1978             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1979         return REPORT(LSTOP);
1980     }
1981 }
1982
1983 /*
1984  * S_force_next
1985  * When the lexer realizes it knows the next token (for instance,
1986  * it is reordering tokens for the parser) then it can call S_force_next
1987  * to know what token to return the next time the lexer is called.  Caller
1988  * will need to set PL_nextval[] and possibly PL_expect to ensure
1989  * the lexer handles the token correctly.
1990  */
1991
1992 STATIC void
1993 S_force_next(pTHX_ I32 type)
1994 {
1995 #ifdef DEBUGGING
1996     if (DEBUG_T_TEST) {
1997         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1998         tokereport(type, &NEXTVAL_NEXTTOKE);
1999     }
2000 #endif
2001     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2002     PL_nexttype[PL_nexttoke] = type;
2003     PL_nexttoke++;
2004 }
2005
2006 /*
2007  * S_postderef
2008  *
2009  * This subroutine handles postfix deref syntax after the arrow has already
2010  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2011  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2012  * only the first, leaving yylex to find the next.
2013  */
2014
2015 static int
2016 S_postderef(pTHX_ int const funny, char const next)
2017 {
2018     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2019     if (next == '*') {
2020         PL_expect = XOPERATOR;
2021         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2022             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2023             PL_lex_state = LEX_INTERPEND;
2024             if ('@' == funny)
2025                 force_next(POSTJOIN);
2026         }
2027         force_next(next);
2028         PL_bufptr+=2;
2029     }
2030     else {
2031         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2032          && !PL_lex_brackets)
2033             PL_lex_dojoin = 2;
2034         PL_expect = XOPERATOR;
2035         PL_bufptr++;
2036     }
2037     return funny;
2038 }
2039
2040 void
2041 Perl_yyunlex(pTHX)
2042 {
2043     int yyc = PL_parser->yychar;
2044     if (yyc != YYEMPTY) {
2045         if (yyc) {
2046             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2047             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2048                 PL_lex_allbrackets--;
2049                 PL_lex_brackets--;
2050                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2051             } else if (yyc == '('/*)*/) {
2052                 PL_lex_allbrackets--;
2053                 yyc |= (2<<24);
2054             }
2055             force_next(yyc);
2056         }
2057         PL_parser->yychar = YYEMPTY;
2058     }
2059 }
2060
2061 STATIC SV *
2062 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2063 {
2064     SV * const sv = newSVpvn_utf8(start, len,
2065                     ! IN_BYTES
2066                   &&  UTF
2067                   &&  len != 0
2068                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2069     return sv;
2070 }
2071
2072 /*
2073  * S_force_word
2074  * When the lexer knows the next thing is a word (for instance, it has
2075  * just seen -> and it knows that the next char is a word char, then
2076  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2077  * lookahead.
2078  *
2079  * Arguments:
2080  *   char *start : buffer position (must be within PL_linestr)
2081  *   int token   : PL_next* will be this type of bare word
2082  *                 (e.g., METHOD,BAREWORD)
2083  *   int check_keyword : if true, Perl checks to make sure the word isn't
2084  *       a keyword (do this if the word is a label, e.g. goto FOO)
2085  *   int allow_pack : if true, : characters will also be allowed (require,
2086  *       use, etc. do this)
2087  */
2088
2089 STATIC char *
2090 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2091 {
2092     char *s;
2093     STRLEN len;
2094
2095     PERL_ARGS_ASSERT_FORCE_WORD;
2096
2097     start = skipspace(start);
2098     s = start;
2099     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2100         || (allow_pack && *s == ':' && s[1] == ':') )
2101     {
2102         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2103         if (check_keyword) {
2104           char *s2 = PL_tokenbuf;
2105           STRLEN len2 = len;
2106           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2107             s2 += sizeof("CORE::") - 1;
2108             len2 -= sizeof("CORE::") - 1;
2109           }
2110           if (keyword(s2, len2, 0))
2111             return start;
2112         }
2113         if (token == METHOD) {
2114             s = skipspace(s);
2115             if (*s == '(')
2116                 PL_expect = XTERM;
2117             else {
2118                 PL_expect = XOPERATOR;
2119             }
2120         }
2121         NEXTVAL_NEXTTOKE.opval
2122             = newSVOP(OP_CONST,0,
2123                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2124         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2125         force_next(token);
2126     }
2127     return s;
2128 }
2129
2130 /*
2131  * S_force_ident
2132  * Called when the lexer wants $foo *foo &foo etc, but the program
2133  * text only contains the "foo" portion.  The first argument is a pointer
2134  * to the "foo", and the second argument is the type symbol to prefix.
2135  * Forces the next token to be a "BAREWORD".
2136  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2137  */
2138
2139 STATIC void
2140 S_force_ident(pTHX_ const char *s, int kind)
2141 {
2142     PERL_ARGS_ASSERT_FORCE_IDENT;
2143
2144     if (s[0]) {
2145         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2146         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2147                                                                 UTF ? SVf_UTF8 : 0));
2148         NEXTVAL_NEXTTOKE.opval = o;
2149         force_next(BAREWORD);
2150         if (kind) {
2151             o->op_private = OPpCONST_ENTERED;
2152             /* XXX see note in pp_entereval() for why we forgo typo
2153                warnings if the symbol must be introduced in an eval.
2154                GSAR 96-10-12 */
2155             gv_fetchpvn_flags(s, len,
2156                               (PL_in_eval ? GV_ADDMULTI
2157                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2158                               kind == '$' ? SVt_PV :
2159                               kind == '@' ? SVt_PVAV :
2160                               kind == '%' ? SVt_PVHV :
2161                               SVt_PVGV
2162                               );
2163         }
2164     }
2165 }
2166
2167 static void
2168 S_force_ident_maybe_lex(pTHX_ char pit)
2169 {
2170     NEXTVAL_NEXTTOKE.ival = pit;
2171     force_next('p');
2172 }
2173
2174 NV
2175 Perl_str_to_version(pTHX_ SV *sv)
2176 {
2177     NV retval = 0.0;
2178     NV nshift = 1.0;
2179     STRLEN len;
2180     const char *start = SvPV_const(sv,len);
2181     const char * const end = start + len;
2182     const bool utf = cBOOL(SvUTF8(sv));
2183
2184     PERL_ARGS_ASSERT_STR_TO_VERSION;
2185
2186     while (start < end) {
2187         STRLEN skip;
2188         UV n;
2189         if (utf)
2190             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2191         else {
2192             n = *(U8*)start;
2193             skip = 1;
2194         }
2195         retval += ((NV)n)/nshift;
2196         start += skip;
2197         nshift *= 1000;
2198     }
2199     return retval;
2200 }
2201
2202 /*
2203  * S_force_version
2204  * Forces the next token to be a version number.
2205  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2206  * and if "guessing" is TRUE, then no new token is created (and the caller
2207  * must use an alternative parsing method).
2208  */
2209
2210 STATIC char *
2211 S_force_version(pTHX_ char *s, int guessing)
2212 {
2213     OP *version = NULL;
2214     char *d;
2215
2216     PERL_ARGS_ASSERT_FORCE_VERSION;
2217
2218     s = skipspace(s);
2219
2220     d = s;
2221     if (*d == 'v')
2222         d++;
2223     if (isDIGIT(*d)) {
2224         while (isDIGIT(*d) || *d == '_' || *d == '.')
2225             d++;
2226         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2227             SV *ver;
2228             s = scan_num(s, &pl_yylval);
2229             version = pl_yylval.opval;
2230             ver = cSVOPx(version)->op_sv;
2231             if (SvPOK(ver) && !SvNIOK(ver)) {
2232                 SvUPGRADE(ver, SVt_PVNV);
2233                 SvNV_set(ver, str_to_version(ver));
2234                 SvNOK_on(ver);          /* hint that it is a version */
2235             }
2236         }
2237         else if (guessing) {
2238             return s;
2239         }
2240     }
2241
2242     /* NOTE: The parser sees the package name and the VERSION swapped */
2243     NEXTVAL_NEXTTOKE.opval = version;
2244     force_next(BAREWORD);
2245
2246     return s;
2247 }
2248
2249 /*
2250  * S_force_strict_version
2251  * Forces the next token to be a version number using strict syntax rules.
2252  */
2253
2254 STATIC char *
2255 S_force_strict_version(pTHX_ char *s)
2256 {
2257     OP *version = NULL;
2258     const char *errstr = NULL;
2259
2260     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2261
2262     while (isSPACE(*s)) /* leading whitespace */
2263         s++;
2264
2265     if (is_STRICT_VERSION(s,&errstr)) {
2266         SV *ver = newSV(0);
2267         s = (char *)scan_version(s, ver, 0);
2268         version = newSVOP(OP_CONST, 0, ver);
2269     }
2270     else if ((*s != ';' && *s != '{' && *s != '}' )
2271              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2272     {
2273         PL_bufptr = s;
2274         if (errstr)
2275             yyerror(errstr); /* version required */
2276         return s;
2277     }
2278
2279     /* NOTE: The parser sees the package name and the VERSION swapped */
2280     NEXTVAL_NEXTTOKE.opval = version;
2281     force_next(BAREWORD);
2282
2283     return s;
2284 }
2285
2286 /*
2287  * S_tokeq
2288  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2289  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2290  * unchanged, and a new SV containing the modified input is returned.
2291  */
2292
2293 STATIC SV *
2294 S_tokeq(pTHX_ SV *sv)
2295 {
2296     char *s;
2297     char *send;
2298     char *d;
2299     SV *pv = sv;
2300
2301     PERL_ARGS_ASSERT_TOKEQ;
2302
2303     assert (SvPOK(sv));
2304     assert (SvLEN(sv));
2305     assert (!SvIsCOW(sv));
2306     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2307         goto finish;
2308     s = SvPVX(sv);
2309     send = SvEND(sv);
2310     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2311     while (s < send && !(*s == '\\' && s[1] == '\\'))
2312         s++;
2313     if (s == send)
2314         goto finish;
2315     d = s;
2316     if ( PL_hints & HINT_NEW_STRING ) {
2317         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2318                             SVs_TEMP | SvUTF8(sv));
2319     }
2320     while (s < send) {
2321         if (*s == '\\') {
2322             if (s + 1 < send && (s[1] == '\\'))
2323                 s++;            /* all that, just for this */
2324         }
2325         *d++ = *s++;
2326     }
2327     *d = '\0';
2328     SvCUR_set(sv, d - SvPVX_const(sv));
2329   finish:
2330     if ( PL_hints & HINT_NEW_STRING )
2331        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2332     return sv;
2333 }
2334
2335 /*
2336  * Now come three functions related to double-quote context,
2337  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2338  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2339  * interact with PL_lex_state, and create fake ( ... ) argument lists
2340  * to handle functions and concatenation.
2341  * For example,
2342  *   "foo\lbar"
2343  * is tokenised as
2344  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2345  */
2346
2347 /*
2348  * S_sublex_start
2349  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2350  *
2351  * Pattern matching will set PL_lex_op to the pattern-matching op to
2352  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2353  *
2354  * OP_CONST is easy--just make the new op and return.
2355  *
2356  * Everything else becomes a FUNC.
2357  *
2358  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2359  * had an OP_CONST.  This just sets us up for a
2360  * call to S_sublex_push().
2361  */
2362
2363 STATIC I32
2364 S_sublex_start(pTHX)
2365 {
2366     const I32 op_type = pl_yylval.ival;
2367
2368     if (op_type == OP_NULL) {
2369         pl_yylval.opval = PL_lex_op;
2370         PL_lex_op = NULL;
2371         return THING;
2372     }
2373     if (op_type == OP_CONST) {
2374         SV *sv = PL_lex_stuff;
2375         PL_lex_stuff = NULL;
2376         sv = tokeq(sv);
2377
2378         if (SvTYPE(sv) == SVt_PVIV) {
2379             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2380             STRLEN len;
2381             const char * const p = SvPV_const(sv, len);
2382             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2383             SvREFCNT_dec(sv);
2384             sv = nsv;
2385         }
2386         pl_yylval.opval = newSVOP(op_type, 0, sv);
2387         return THING;
2388     }
2389
2390     PL_parser->lex_super_state = PL_lex_state;
2391     PL_parser->lex_sub_inwhat = (U16)op_type;
2392     PL_parser->lex_sub_op = PL_lex_op;
2393     PL_parser->sub_no_recover = FALSE;
2394     PL_parser->sub_error_count = PL_error_count;
2395     PL_lex_state = LEX_INTERPPUSH;
2396
2397     PL_expect = XTERM;
2398     if (PL_lex_op) {
2399         pl_yylval.opval = PL_lex_op;
2400         PL_lex_op = NULL;
2401         return PMFUNC;
2402     }
2403     else
2404         return FUNC;
2405 }
2406
2407 /*
2408  * S_sublex_push
2409  * Create a new scope to save the lexing state.  The scope will be
2410  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2411  * to the uc, lc, etc. found before.
2412  * Sets PL_lex_state to LEX_INTERPCONCAT.
2413  */
2414
2415 STATIC I32
2416 S_sublex_push(pTHX)
2417 {
2418     LEXSHARED *shared;
2419     const bool is_heredoc = PL_multi_close == '<';
2420     ENTER;
2421
2422     PL_lex_state = PL_parser->lex_super_state;
2423     SAVEI8(PL_lex_dojoin);
2424     SAVEI32(PL_lex_brackets);
2425     SAVEI32(PL_lex_allbrackets);
2426     SAVEI32(PL_lex_formbrack);
2427     SAVEI8(PL_lex_fakeeof);
2428     SAVEI32(PL_lex_casemods);
2429     SAVEI32(PL_lex_starts);
2430     SAVEI8(PL_lex_state);
2431     SAVESPTR(PL_lex_repl);
2432     SAVEVPTR(PL_lex_inpat);
2433     SAVEI16(PL_lex_inwhat);
2434     if (is_heredoc)
2435     {
2436         SAVECOPLINE(PL_curcop);
2437         SAVEI32(PL_multi_end);
2438         SAVEI32(PL_parser->herelines);
2439         PL_parser->herelines = 0;
2440     }
2441     SAVEIV(PL_multi_close);
2442     SAVEPPTR(PL_bufptr);
2443     SAVEPPTR(PL_bufend);
2444     SAVEPPTR(PL_oldbufptr);
2445     SAVEPPTR(PL_oldoldbufptr);
2446     SAVEPPTR(PL_last_lop);
2447     SAVEPPTR(PL_last_uni);
2448     SAVEPPTR(PL_linestart);
2449     SAVESPTR(PL_linestr);
2450     SAVEGENERICPV(PL_lex_brackstack);
2451     SAVEGENERICPV(PL_lex_casestack);
2452     SAVEGENERICPV(PL_parser->lex_shared);
2453     SAVEBOOL(PL_parser->lex_re_reparsing);
2454     SAVEI32(PL_copline);
2455
2456     /* The here-doc parser needs to be able to peek into outer lexing
2457        scopes to find the body of the here-doc.  So we put PL_linestr and
2458        PL_bufptr into lex_shared, to ‘share’ those values.
2459      */
2460     PL_parser->lex_shared->ls_linestr = PL_linestr;
2461     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2462
2463     PL_linestr = PL_lex_stuff;
2464     PL_lex_repl = PL_parser->lex_sub_repl;
2465     PL_lex_stuff = NULL;
2466     PL_parser->lex_sub_repl = NULL;
2467
2468     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2469        set for an inner quote-like operator and then an error causes scope-
2470        popping.  We must not have a PL_lex_stuff value left dangling, as
2471        that breaks assumptions elsewhere.  See bug #123617.  */
2472     SAVEGENERICSV(PL_lex_stuff);
2473     SAVEGENERICSV(PL_parser->lex_sub_repl);
2474
2475     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2476         = SvPVX(PL_linestr);
2477     PL_bufend += SvCUR(PL_linestr);
2478     PL_last_lop = PL_last_uni = NULL;
2479     SAVEFREESV(PL_linestr);
2480     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2481
2482     PL_lex_dojoin = FALSE;
2483     PL_lex_brackets = PL_lex_formbrack = 0;
2484     PL_lex_allbrackets = 0;
2485     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2486     Newx(PL_lex_brackstack, 120, char);
2487     Newx(PL_lex_casestack, 12, char);
2488     PL_lex_casemods = 0;
2489     *PL_lex_casestack = '\0';
2490     PL_lex_starts = 0;
2491     PL_lex_state = LEX_INTERPCONCAT;
2492     if (is_heredoc)
2493         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2494     PL_copline = NOLINE;
2495
2496     Newxz(shared, 1, LEXSHARED);
2497     shared->ls_prev = PL_parser->lex_shared;
2498     PL_parser->lex_shared = shared;
2499
2500     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2501     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2502     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2503         PL_lex_inpat = PL_parser->lex_sub_op;
2504     else
2505         PL_lex_inpat = NULL;
2506
2507     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2508     PL_in_eval &= ~EVAL_RE_REPARSING;
2509
2510     return '(';
2511 }
2512
2513 /*
2514  * S_sublex_done
2515  * Restores lexer state after a S_sublex_push.
2516  */
2517
2518 STATIC I32
2519 S_sublex_done(pTHX)
2520 {
2521     if (!PL_lex_starts++) {
2522         SV * const sv = newSVpvs("");
2523         if (SvUTF8(PL_linestr))
2524             SvUTF8_on(sv);
2525         PL_expect = XOPERATOR;
2526         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2527         return THING;
2528     }
2529
2530     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2531         PL_lex_state = LEX_INTERPCASEMOD;
2532         return yylex();
2533     }
2534
2535     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2536     assert(PL_lex_inwhat != OP_TRANSR);
2537     if (PL_lex_repl) {
2538         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2539         PL_linestr = PL_lex_repl;
2540         PL_lex_inpat = 0;
2541         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2542         PL_bufend += SvCUR(PL_linestr);
2543         PL_last_lop = PL_last_uni = NULL;
2544         PL_lex_dojoin = FALSE;
2545         PL_lex_brackets = 0;
2546         PL_lex_allbrackets = 0;
2547         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2548         PL_lex_casemods = 0;
2549         *PL_lex_casestack = '\0';
2550         PL_lex_starts = 0;
2551         if (SvEVALED(PL_lex_repl)) {
2552             PL_lex_state = LEX_INTERPNORMAL;
2553             PL_lex_starts++;
2554             /*  we don't clear PL_lex_repl here, so that we can check later
2555                 whether this is an evalled subst; that means we rely on the
2556                 logic to ensure sublex_done() is called again only via the
2557                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2558         }
2559         else {
2560             PL_lex_state = LEX_INTERPCONCAT;
2561             PL_lex_repl = NULL;
2562         }
2563         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2564             CopLINE(PL_curcop) +=
2565                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2566                  + PL_parser->herelines;
2567             PL_parser->herelines = 0;
2568         }
2569         return '/';
2570     }
2571     else {
2572         const line_t l = CopLINE(PL_curcop);
2573         LEAVE;
2574         if (PL_parser->sub_error_count != PL_error_count) {
2575             if (PL_parser->sub_no_recover) {
2576                 yyquit();
2577                 NOT_REACHED;
2578             }
2579         }
2580         if (PL_multi_close == '<')
2581             PL_parser->herelines += l - PL_multi_end;
2582         PL_bufend = SvPVX(PL_linestr);
2583         PL_bufend += SvCUR(PL_linestr);
2584         PL_expect = XOPERATOR;
2585         return ')';
2586     }
2587 }
2588
2589 STATIC SV*
2590 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2591 {
2592     /* This justs wraps get_and_check_backslash_N_name() to output any error
2593      * message it returns. */
2594
2595     const char * error_msg = NULL;
2596     SV * result;
2597
2598     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2599
2600     /* charnames doesn't work well if there have been errors found */
2601     if (PL_error_count > 0) {
2602         return NULL;
2603     }
2604
2605     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2606
2607     if (error_msg) {
2608         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2609     }
2610
2611     return result;
2612 }
2613
2614 SV*
2615 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2616                                           const char* const e,
2617                                           const bool is_utf8,
2618                                           const char ** error_msg)
2619 {
2620     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2621      * interior, hence to the "}".  Finds what the name resolves to, returning
2622      * an SV* containing it; NULL if no valid one found.
2623      *
2624      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2625      * doesn't have to be. */
2626
2627     SV* res;
2628     HV * table;
2629     SV **cvp;
2630     SV *cv;
2631     SV *rv;
2632     HV *stash;
2633     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2634     dVAR;
2635
2636     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2637
2638     assert(e >= s);
2639     assert(s > (char *) 3);
2640
2641     res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2642
2643     if (!SvCUR(res)) {
2644         SvREFCNT_dec_NN(res);
2645         /* diag_listed_as: Unknown charname '%s' */
2646         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2647         return NULL;
2648     }
2649
2650     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2651                         /* include the <}> */
2652                         e - backslash_ptr + 1, error_msg);
2653     if (! SvPOK(res)) {
2654         SvREFCNT_dec_NN(res);
2655         return NULL;
2656     }
2657
2658     /* See if the charnames handler is the Perl core's, and if so, we can skip
2659      * the validation needed for a user-supplied one, as Perl's does its own
2660      * validation. */
2661     table = GvHV(PL_hintgv);             /* ^H */
2662     cvp = hv_fetchs(table, "charnames", FALSE);
2663     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2664         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2665     {
2666         const char * const name = HvNAME(stash);
2667          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2668            return res;
2669        }
2670     }
2671
2672     /* Here, it isn't Perl's charname handler.  We can't rely on a
2673      * user-supplied handler to validate the input name.  For non-ut8 input,
2674      * look to see that the first character is legal.  Then loop through the
2675      * rest checking that each is a continuation */
2676
2677     /* This code makes the reasonable assumption that the only Latin1-range
2678      * characters that begin a character name alias are alphabetic, otherwise
2679      * would have to create a isCHARNAME_BEGIN macro */
2680
2681     if (! is_utf8) {
2682         if (! isALPHAU(*s)) {
2683             goto bad_charname;
2684         }
2685         s++;
2686         while (s < e) {
2687             if (! isCHARNAME_CONT(*s)) {
2688                 goto bad_charname;
2689             }
2690             if (*s == ' ' && *(s-1) == ' ') {
2691                 goto multi_spaces;
2692             }
2693             s++;
2694         }
2695     }
2696     else {
2697         /* Similarly for utf8.  For invariants can check directly; for other
2698          * Latin1, can calculate their code point and check; otherwise  use a
2699          * swash */
2700         if (UTF8_IS_INVARIANT(*s)) {
2701             if (! isALPHAU(*s)) {
2702                 goto bad_charname;
2703             }
2704             s++;
2705         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2706             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2707                 goto bad_charname;
2708             }
2709             s += 2;
2710         }
2711         else {
2712             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2713                                        utf8_to_uvchr_buf((U8 *) s,
2714                                                          (U8 *) e,
2715                                                          NULL)))
2716             {
2717                 goto bad_charname;
2718             }
2719             s += UTF8SKIP(s);
2720         }
2721
2722         while (s < e) {
2723             if (UTF8_IS_INVARIANT(*s)) {
2724                 if (! isCHARNAME_CONT(*s)) {
2725                     goto bad_charname;
2726                 }
2727                 if (*s == ' ' && *(s-1) == ' ') {
2728                     goto multi_spaces;
2729                 }
2730                 s++;
2731             }
2732             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2733                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2734                 {
2735                     goto bad_charname;
2736                 }
2737                 s += 2;
2738             }
2739             else {
2740                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2741                                            utf8_to_uvchr_buf((U8 *) s,
2742                                                              (U8 *) e,
2743                                                              NULL)))
2744                 {
2745                     goto bad_charname;
2746                 }
2747                 s += UTF8SKIP(s);
2748             }
2749         }
2750     }
2751     if (*(s-1) == ' ') {
2752         /* diag_listed_as: charnames alias definitions may not contain
2753                            trailing white-space; marked by <-- HERE in %s
2754          */
2755         *error_msg = Perl_form(aTHX_
2756             "charnames alias definitions may not contain trailing "
2757             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2758             (int)(s - backslash_ptr + 1), backslash_ptr,
2759             (int)(e - s + 1), s + 1);
2760         return NULL;
2761     }
2762
2763     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2764         const U8* first_bad_char_loc;
2765         STRLEN len;
2766         const char* const str = SvPV_const(res, len);
2767         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2768                                           &first_bad_char_loc)))
2769         {
2770             _force_out_malformed_utf8_message(first_bad_char_loc,
2771                                               (U8 *) PL_parser->bufend,
2772                                               0,
2773                                               0 /* 0 means don't die */ );
2774             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2775                                immediately after '%s' */
2776             *error_msg = Perl_form(aTHX_
2777                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2778                  (int) (e - backslash_ptr + 1), backslash_ptr,
2779                  (int) ((char *) first_bad_char_loc - str), str);
2780             return NULL;
2781         }
2782     }
2783
2784     return res;
2785
2786   bad_charname: {
2787
2788         /* The final %.*s makes sure that should the trailing NUL be missing
2789          * that this print won't run off the end of the string */
2790         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2791                            in \N{%s} */
2792         *error_msg = Perl_form(aTHX_
2793             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2794             (int)(s - backslash_ptr + 1), backslash_ptr,
2795             (int)(e - s + 1), s + 1);
2796         return NULL;
2797     }
2798
2799   multi_spaces:
2800         /* diag_listed_as: charnames alias definitions may not contain a
2801                            sequence of multiple spaces; marked by <-- HERE
2802                            in %s */
2803         *error_msg = Perl_form(aTHX_
2804             "charnames alias definitions may not contain a sequence of "
2805             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2806             (int)(s - backslash_ptr + 1), backslash_ptr,
2807             (int)(e - s + 1), s + 1);
2808         return NULL;
2809 }
2810
2811 /*
2812   scan_const
2813
2814   Extracts the next constant part of a pattern, double-quoted string,
2815   or transliteration.  This is terrifying code.
2816
2817   For example, in parsing the double-quoted string "ab\x63$d", it would
2818   stop at the '$' and return an OP_CONST containing 'abc'.
2819
2820   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2821   processing a pattern (PL_lex_inpat is true), a transliteration
2822   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2823
2824   Returns a pointer to the character scanned up to. If this is
2825   advanced from the start pointer supplied (i.e. if anything was
2826   successfully parsed), will leave an OP_CONST for the substring scanned
2827   in pl_yylval. Caller must intuit reason for not parsing further
2828   by looking at the next characters herself.
2829
2830   In patterns:
2831     expand:
2832       \N{FOO}  => \N{U+hex_for_character_FOO}
2833       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2834
2835     pass through:
2836         all other \-char, including \N and \N{ apart from \N{ABC}
2837
2838     stops on:
2839         @ and $ where it appears to be a var, but not for $ as tail anchor
2840         \l \L \u \U \Q \E
2841         (?{  or  (??{
2842
2843   In transliterations:
2844     characters are VERY literal, except for - not at the start or end
2845     of the string, which indicates a range.  However some backslash sequences
2846     are recognized: \r, \n, and the like
2847                     \007 \o{}, \x{}, \N{}
2848     If all elements in the transliteration are below 256,
2849     scan_const expands the range to the full set of intermediate
2850     characters. If the range is in utf8, the hyphen is replaced with
2851     a certain range mark which will be handled by pmtrans() in op.c.
2852
2853   In double-quoted strings:
2854     backslashes:
2855       all those recognized in transliterations
2856       deprecated backrefs: \1 (in substitution replacements)
2857       case and quoting: \U \Q \E
2858     stops on @ and $
2859
2860   scan_const does *not* construct ops to handle interpolated strings.
2861   It stops processing as soon as it finds an embedded $ or @ variable
2862   and leaves it to the caller to work out what's going on.
2863
2864   embedded arrays (whether in pattern or not) could be:
2865       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2866
2867   $ in double-quoted strings must be the symbol of an embedded scalar.
2868
2869   $ in pattern could be $foo or could be tail anchor.  Assumption:
2870   it's a tail anchor if $ is the last thing in the string, or if it's
2871   followed by one of "()| \r\n\t"
2872
2873   \1 (backreferences) are turned into $1 in substitutions
2874
2875   The structure of the code is
2876       while (there's a character to process) {
2877           handle transliteration ranges
2878           skip regexp comments /(?#comment)/ and codes /(?{code})/
2879           skip #-initiated comments in //x patterns
2880           check for embedded arrays
2881           check for embedded scalars
2882           if (backslash) {
2883               deprecate \1 in substitution replacements
2884               handle string-changing backslashes \l \U \Q \E, etc.
2885               switch (what was escaped) {
2886                   handle \- in a transliteration (becomes a literal -)
2887                   if a pattern and not \N{, go treat as regular character
2888                   handle \132 (octal characters)
2889                   handle \x15 and \x{1234} (hex characters)
2890                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2891                   handle \cV (control characters)
2892                   handle printf-style backslashes (\f, \r, \n, etc)
2893               } (end switch)
2894               continue
2895           } (end if backslash)
2896           handle regular character
2897     } (end while character to read)
2898
2899 */
2900
2901 STATIC char *
2902 S_scan_const(pTHX_ char *start)
2903 {
2904     char *send = PL_bufend;             /* end of the constant */
2905     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2906                                            on sizing. */
2907     char *s = start;                    /* start of the constant */
2908     char *d = SvPVX(sv);                /* destination for copies */
2909     bool dorange = FALSE;               /* are we in a translit range? */
2910     bool didrange = FALSE;              /* did we just finish a range? */
2911     bool in_charclass = FALSE;          /* within /[...]/ */
2912     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2913     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2914                                            UTF8?  But, this can show as true
2915                                            when the source isn't utf8, as for
2916                                            example when it is entirely composed
2917                                            of hex constants */
2918     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
2919                                            number of characters found so far
2920                                            that will expand (into 2 bytes)
2921                                            should we have to convert to
2922                                            UTF-8) */
2923     SV *res;                            /* result from charnames */
2924     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
2925                                    high-end character is temporarily placed */
2926
2927     /* Does something require special handling in tr/// ?  This avoids extra
2928      * work in a less likely case.  As such, khw didn't feel it was worth
2929      * adding any branches to the more mainline code to handle this, which
2930      * means that this doesn't get set in some circumstances when things like
2931      * \x{100} get expanded out.  As a result there needs to be extra testing
2932      * done in the tr code */
2933     bool has_above_latin1 = FALSE;
2934
2935     /* Note on sizing:  The scanned constant is placed into sv, which is
2936      * initialized by newSV() assuming one byte of output for every byte of
2937      * input.  This routine expects newSV() to allocate an extra byte for a
2938      * trailing NUL, which this routine will append if it gets to the end of
2939      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2940      * CAPITAL LETTER A}), or more output than input if the constant ends up
2941      * recoded to utf8, but each time a construct is found that might increase
2942      * the needed size, SvGROW() is called.  Its size parameter each time is
2943      * based on the best guess estimate at the time, namely the length used so
2944      * far, plus the length the current construct will occupy, plus room for
2945      * the trailing NUL, plus one byte for every input byte still unscanned */
2946
2947     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2948                        before set */
2949 #ifdef EBCDIC
2950     int backslash_N = 0;            /* ? was the character from \N{} */
2951     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2952                                        platform-specific like \x65 */
2953 #endif
2954
2955     PERL_ARGS_ASSERT_SCAN_CONST;
2956
2957     assert(PL_lex_inwhat != OP_TRANSR);
2958     if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2959         /* If we are doing a trans and we know we want UTF8 set expectation */
2960         has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2961         this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2962     }
2963
2964     /* Protect sv from errors and fatal warnings. */
2965     ENTER_with_name("scan_const");
2966     SAVEFREESV(sv);
2967
2968     while (s < send
2969            || dorange   /* Handle tr/// range at right edge of input */
2970     ) {
2971
2972         /* get transliterations out of the way (they're most literal) */
2973         if (PL_lex_inwhat == OP_TRANS) {
2974
2975             /* But there isn't any special handling necessary unless there is a
2976              * range, so for most cases we just drop down and handle the value
2977              * as any other.  There are two exceptions.
2978              *
2979              * 1.  A hyphen indicates that we are actually going to have a
2980              *     range.  In this case, skip the '-', set a flag, then drop
2981              *     down to handle what should be the end range value.
2982              * 2.  After we've handled that value, the next time through, that
2983              *     flag is set and we fix up the range.
2984              *
2985              * Ranges entirely within Latin1 are expanded out entirely, in
2986              * order to make the transliteration a simple table look-up.
2987              * Ranges that extend above Latin1 have to be done differently, so
2988              * there is no advantage to expanding them here, so they are
2989              * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
2990              * signifies a hyphen without any possible ambiguity.  On EBCDIC
2991              * machines, if the range is expressed as Unicode, the Latin1
2992              * portion is expanded out even if the range extends above
2993              * Latin1.  This is because each code point in it has to be
2994              * processed here individually to get its native translation */
2995
2996             if (! dorange) {
2997
2998                 /* Here, we don't think we're in a range.  If the new character
2999                  * is not a hyphen; or if it is a hyphen, but it's too close to
3000                  * either edge to indicate a range, or if we haven't output any
3001                  * characters yet then it's a regular character. */
3002                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
3003
3004                     /* A regular character.  Process like any other, but first
3005                      * clear any flags */
3006                     didrange = FALSE;
3007                     dorange = FALSE;
3008 #ifdef EBCDIC
3009                     non_portable_endpoint = 0;
3010                     backslash_N = 0;
3011 #endif
3012                     /* The tests here for being above Latin1 and similar ones
3013                      * in the following 'else' suffice to find all such
3014                      * occurences in the constant, except those added by a
3015                      * backslash escape sequence, like \x{100}.  Mostly, those
3016                      * set 'has_above_latin1' as appropriate */
3017                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3018                         has_above_latin1 = TRUE;
3019                     }
3020
3021                     /* Drops down to generic code to process current byte */
3022                 }
3023                 else {  /* Is a '-' in the context where it means a range */
3024                     if (didrange) { /* Something like y/A-C-Z// */
3025                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3026                                          " operator");
3027                     }
3028
3029                     dorange = TRUE;
3030
3031                     s++;    /* Skip past the hyphen */
3032
3033                     /* d now points to where the end-range character will be
3034                      * placed.  Save it so won't have to go finding it later,
3035                      * and drop down to get that character.  (Actually we
3036                      * instead save the offset, to handle the case where a
3037                      * realloc in the meantime could change the actual
3038                      * pointer).  We'll finish processing the range the next
3039                      * time through the loop */
3040                     offset_to_max = d - SvPVX_const(sv);
3041
3042                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3043                         has_above_latin1 = TRUE;
3044                     }
3045
3046                     /* Drops down to generic code to process current byte */
3047                 }
3048             }  /* End of not a range */
3049             else {
3050                 /* Here we have parsed a range.  Now must handle it.  At this
3051                  * point:
3052                  * 'sv' is a SV* that contains the output string we are
3053                  *      constructing.  The final two characters in that string
3054                  *      are the range start and range end, in order.
3055                  * 'd'  points to just beyond the range end in the 'sv' string,
3056                  *      where we would next place something
3057                  * 'offset_to_max' is the offset in 'sv' at which the character
3058                  *      (the range's maximum end point) before 'd'  begins.
3059                  */
3060                 char * max_ptr = SvPVX(sv) + offset_to_max;
3061                 char * min_ptr;
3062                 IV range_min;
3063                 IV range_max;   /* last character in range */
3064                 STRLEN grow;
3065                 Size_t offset_to_min = 0;
3066                 Size_t extras = 0;
3067 #ifdef EBCDIC
3068                 bool convert_unicode;
3069                 IV real_range_max = 0;
3070 #endif
3071                 /* Get the code point values of the range ends. */
3072                 if (has_utf8) {
3073                     /* We know the utf8 is valid, because we just constructed
3074                      * it ourselves in previous loop iterations */
3075                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3076                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3077                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3078
3079                     /* This compensates for not all code setting
3080                      * 'has_above_latin1', so that we don't skip stuff that
3081                      * should be executed */
3082                     if (range_max > 255) {
3083                         has_above_latin1 = TRUE;
3084                     }
3085                 }
3086                 else {
3087                     min_ptr = max_ptr - 1;
3088                     range_min = * (U8*) min_ptr;
3089                     range_max = * (U8*) max_ptr;
3090                 }
3091
3092                 /* If the range is just a single code point, like tr/a-a/.../,
3093                  * that code point is already in the output, twice.  We can
3094                  * just back up over the second instance and avoid all the rest
3095                  * of the work.  But if it is a variant character, it's been
3096                  * counted twice, so decrement.  (This unlikely scenario is
3097                  * special cased, like the one for a range of 2 code points
3098                  * below, only because the main-line code below needs a range
3099                  * of 3 or more to work without special casing.  Might as well
3100                  * get it out of the way now.) */
3101                 if (UNLIKELY(range_max == range_min)) {
3102                     d = max_ptr;
3103                     if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3104                         utf8_variant_count--;
3105                     }
3106                     goto range_done;
3107                 }
3108
3109 #ifdef EBCDIC
3110                 /* On EBCDIC platforms, we may have to deal with portable
3111                  * ranges.  These happen if at least one range endpoint is a
3112                  * Unicode value (\N{...}), or if the range is a subset of
3113                  * [A-Z] or [a-z], and both ends are literal characters,
3114                  * like 'A', and not like \x{C1} */
3115                 convert_unicode =
3116                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3117                                                        hence portable range */
3118                     || (     ! non_portable_endpoint
3119                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3120                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3121                 if (convert_unicode) {
3122
3123                     /* Special handling is needed for these portable ranges.
3124                      * They are defined to be in Unicode terms, which includes
3125                      * all the Unicode code points between the end points.
3126                      * Convert to Unicode to get the Unicode range.  Later we
3127                      * will convert each code point in the range back to
3128                      * native.  */
3129                     range_min = NATIVE_TO_UNI(range_min);
3130                     range_max = NATIVE_TO_UNI(range_max);
3131                 }
3132 #endif
3133
3134                 if (range_min > range_max) {
3135 #ifdef EBCDIC
3136                     if (convert_unicode) {
3137                         /* Need to convert back to native for meaningful
3138                          * messages for this platform */
3139                         range_min = UNI_TO_NATIVE(range_min);
3140                         range_max = UNI_TO_NATIVE(range_max);
3141                     }
3142 #endif
3143                     /* Use the characters themselves for the error message if
3144                      * ASCII printables; otherwise some visible representation
3145                      * of them */
3146                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3147                         Perl_croak(aTHX_
3148                          "Invalid range \"%c-%c\" in transliteration operator",
3149                          (char)range_min, (char)range_max);
3150                     }
3151 #ifdef EBCDIC
3152                     else if (convert_unicode) {
3153         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3154                         Perl_croak(aTHX_
3155                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3156                            UVXf "}\" in transliteration operator",
3157                            range_min, range_max);
3158                     }
3159 #endif
3160                     else {
3161         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3162                         Perl_croak(aTHX_
3163                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3164                            " in transliteration operator",
3165                            range_min, range_max);
3166                     }
3167                 }
3168
3169                 /* If the range is exactly two code points long, they are
3170                  * already both in the output */
3171                 if (UNLIKELY(range_min + 1 == range_max)) {
3172                     goto range_done;
3173                 }
3174
3175                 /* Here the range contains at least 3 code points */
3176
3177                 if (has_utf8) {
3178
3179                     /* If everything in the transliteration is below 256, we
3180                      * can avoid special handling later.  A translation table
3181                      * for each of those bytes is created by op.c.  So we
3182                      * expand out all ranges to their constituent code points.
3183                      * But if we've encountered something above 255, the
3184                      * expanding won't help, so skip doing that.  But if it's
3185                      * EBCDIC, we may have to look at each character below 256
3186                      * if we have to convert to/from Unicode values */
3187                     if (   has_above_latin1
3188 #ifdef EBCDIC
3189                         && (range_min > 255 || ! convert_unicode)
3190 #endif
3191                     ) {
3192                         /* Move the high character one byte to the right; then
3193                          * insert between it and the range begin, an illegal
3194                          * byte which serves to indicate this is a range (using
3195                          * a '-' would be ambiguous). */
3196                         char *e = d++;
3197                         while (e-- > max_ptr) {
3198                             *(e + 1) = *e;
3199                         }
3200                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3201                         goto range_done;
3202                     }
3203
3204                     /* Here, we're going to expand out the range.  For EBCDIC
3205                      * the range can extend above 255 (not so in ASCII), so
3206                      * for EBCDIC, split it into the parts above and below
3207                      * 255/256 */
3208 #ifdef EBCDIC
3209                     if (range_max > 255) {
3210                         real_range_max = range_max;
3211                         range_max = 255;
3212                     }
3213 #endif
3214                 }
3215
3216                 /* Here we need to expand out the string to contain each
3217                  * character in the range.  Grow the output to handle this.
3218                  * For non-UTF8, we need a byte for each code point in the
3219                  * range, minus the three that we've already allocated for: the
3220                  * hyphen, the min, and the max.  For UTF-8, we need this
3221                  * plus an extra byte for each code point that occupies two
3222                  * bytes (is variant) when in UTF-8 (except we've already
3223                  * allocated for the end points, including if they are
3224                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3225                  * platforms, it's easy to calculate a precise number.  To
3226                  * start, we count the variants in the range, which we need
3227                  * elsewhere in this function anyway.  (For the case where it
3228                  * isn't easy to calculate, 'extras' has been initialized to 0,
3229                  * and the calculation is done in a loop further down.) */
3230 #ifdef EBCDIC
3231                 if (convert_unicode)
3232 #endif
3233                 {
3234                     /* This is executed unconditionally on ASCII, and for
3235                      * Unicode ranges on EBCDIC.  Under these conditions, all
3236                      * code points above a certain value are variant; and none
3237                      * under that value are.  We just need to find out how much
3238                      * of the range is above that value.  We don't count the
3239                      * end points here, as they will already have been counted
3240                      * as they were parsed. */
3241                     if (range_min >= UTF_CONTINUATION_MARK) {
3242
3243                         /* The whole range is made up of variants */
3244                         extras = (range_max - 1) - (range_min + 1) + 1;
3245                     }
3246                     else if (range_max >= UTF_CONTINUATION_MARK) {
3247
3248                         /* Only the higher portion of the range is variants */
3249                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3250                     }
3251
3252                     utf8_variant_count += extras;
3253                 }
3254
3255                 /* The base growth is the number of code points in the range,
3256                  * not including the endpoints, which have already been sized
3257                  * for (and output).  We don't subtract for the hyphen, as it
3258                  * has been parsed but not output, and the SvGROW below is
3259                  * based only on what's been output plus what's left to parse.
3260                  * */
3261                 grow = (range_max - 1) - (range_min + 1) + 1;
3262
3263                 if (has_utf8) {
3264 #ifdef EBCDIC
3265                     /* In some cases in EBCDIC, we haven't yet calculated a
3266                      * precise amount needed for the UTF-8 variants.  Just
3267                      * assume the worst case, that everything will expand by a
3268                      * byte */
3269                     if (! convert_unicode) {
3270                         grow *= 2;
3271                     }
3272                     else
3273 #endif
3274                     {
3275                         /* Otherwise we know exactly how many variants there
3276                          * are in the range. */
3277                         grow += extras;
3278                     }
3279                 }
3280
3281                 /* Grow, but position the output to overwrite the range min end
3282                  * point, because in some cases we overwrite that */
3283                 SvCUR_set(sv, d - SvPVX_const(sv));
3284                 offset_to_min = min_ptr - SvPVX_const(sv);
3285
3286                 /* See Note on sizing above. */
3287                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3288                                              + (send - s)
3289                                              + grow
3290                                              + 1 /* Trailing NUL */ );
3291
3292                 /* Now, we can expand out the range. */
3293 #ifdef EBCDIC
3294                 if (convert_unicode) {
3295                     SSize_t i;
3296
3297                     /* Recall that the min and max are now in Unicode terms, so
3298                      * we have to convert each character to its native
3299                      * equivalent */
3300                     if (has_utf8) {
3301                         for (i = range_min; i <= range_max; i++) {
3302                             append_utf8_from_native_byte(
3303                                                     LATIN1_TO_NATIVE((U8) i),
3304                                                     (U8 **) &d);
3305                         }
3306                     }
3307                     else {
3308                         for (i = range_min; i <= range_max; i++) {
3309                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3310                         }
3311                     }
3312                 }
3313                 else
3314 #endif
3315                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3316                 {
3317                     /* Here, no conversions are necessary, which means that the
3318                      * first character in the range is already in 'd' and
3319                      * valid, so we can skip overwriting it */
3320                     if (has_utf8) {
3321                         SSize_t i;
3322                         d += UTF8SKIP(d);
3323                         for (i = range_min + 1; i <= range_max; i++) {
3324                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3325                         }
3326                     }
3327                     else {
3328                         SSize_t i;
3329                         d++;
3330                         assert(range_min + 1 <= range_max);
3331                         for (i = range_min + 1; i < range_max; i++) {
3332 #ifdef EBCDIC
3333                             /* In this case on EBCDIC, we haven't calculated
3334                              * the variants.  Do it here, as we go along */
3335                             if (! UVCHR_IS_INVARIANT(i)) {
3336                                 utf8_variant_count++;
3337                             }
3338 #endif
3339                             *d++ = (char)i;
3340                         }
3341
3342                         /* The range_max is done outside the loop so as to
3343                          * avoid having to special case not incrementing
3344                          * 'utf8_variant_count' on EBCDIC (it's already been
3345                          * counted when originally parsed) */
3346                         *d++ = (char) range_max;
3347                     }
3348                 }
3349
3350 #ifdef EBCDIC
3351                 /* If the original range extended above 255, add in that
3352                  * portion. */
3353                 if (real_range_max) {
3354                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3355                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3356                     if (real_range_max > 0x100) {
3357                         if (real_range_max > 0x101) {
3358                             *d++ = (char) ILLEGAL_UTF8_BYTE;
3359                         }
3360                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3361                     }
3362                 }
3363 #endif
3364
3365               range_done:
3366                 /* mark the range as done, and continue */
3367                 didrange = TRUE;
3368                 dorange = FALSE;
3369 #ifdef EBCDIC
3370                 non_portable_endpoint = 0;
3371                 backslash_N = 0;
3372 #endif
3373                 continue;
3374             } /* End of is a range */
3375         } /* End of transliteration.  Joins main code after these else's */
3376         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3377             char *s1 = s-1;
3378             int esc = 0;
3379             while (s1 >= start && *s1-- == '\\')
3380                 esc = !esc;
3381             if (!esc)
3382                 in_charclass = TRUE;
3383         }
3384         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3385             char *s1 = s-1;
3386             int esc = 0;
3387             while (s1 >= start && *s1-- == '\\')
3388                 esc = !esc;
3389             if (!esc)
3390                 in_charclass = FALSE;
3391         }
3392             /* skip for regexp comments /(?#comment)/, except for the last
3393              * char, which will be done separately.  Stop on (?{..}) and
3394              * friends */
3395         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3396             if (s[2] == '#') {
3397                 while (s+1 < send && *s != ')')
3398                     *d++ = *s++;
3399             }
3400             else if (!PL_lex_casemods
3401                      && (    s[2] == '{' /* This should match regcomp.c */
3402                          || (s[2] == '?' && s[3] == '{')))
3403             {
3404                 break;
3405             }
3406         }
3407             /* likewise skip #-initiated comments in //x patterns */
3408         else if (*s == '#'
3409                  && PL_lex_inpat
3410                  && !in_charclass
3411                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3412         {
3413             while (s < send && *s != '\n')
3414                 *d++ = *s++;
3415         }
3416             /* no further processing of single-quoted regex */
3417         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3418             goto default_action;
3419
3420             /* check for embedded arrays
3421              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3422              */
3423         else if (*s == '@' && s[1]) {
3424             if (UTF
3425                ? isIDFIRST_utf8_safe(s+1, send)
3426                : isWORDCHAR_A(s[1]))
3427             {
3428                 break;
3429             }
3430             if (strchr(":'{$", s[1]))
3431                 break;
3432             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3433                 break; /* in regexp, neither @+ nor @- are interpolated */
3434         }
3435             /* check for embedded scalars.  only stop if we're sure it's a
3436              * variable.  */
3437         else if (*s == '$') {
3438             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3439                 break;
3440             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3441                 if (s[1] == '\\') {
3442                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3443                                    "Possible unintended interpolation of $\\ in regex");
3444                 }
3445                 break;          /* in regexp, $ might be tail anchor */
3446             }
3447         }
3448
3449         /* End of else if chain - OP_TRANS rejoin rest */
3450
3451         if (UNLIKELY(s >= send)) {
3452             assert(s == send);
3453             break;
3454         }
3455
3456         /* backslashes */
3457         if (*s == '\\' && s+1 < send) {
3458             char* e;    /* Can be used for ending '}', etc. */
3459
3460             s++;
3461
3462             /* warn on \1 - \9 in substitution replacements, but note that \11
3463              * is an octal; and \19 is \1 followed by '9' */
3464             if (PL_lex_inwhat == OP_SUBST
3465                 && !PL_lex_inpat
3466                 && isDIGIT(*s)
3467                 && *s != '0'
3468                 && !isDIGIT(s[1]))
3469             {
3470                 /* diag_listed_as: \%d better written as $%d */
3471                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3472                 *--s = '$';
3473                 break;
3474             }
3475
3476             /* string-change backslash escapes */
3477             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3478                 --s;
3479                 break;
3480             }
3481             /* In a pattern, process \N, but skip any other backslash escapes.
3482              * This is because we don't want to translate an escape sequence
3483              * into a meta symbol and have the regex compiler use the meta
3484              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3485              * in spite of this, we do have to process \N here while the proper
3486              * charnames handler is in scope.  See bugs #56444 and #62056.
3487              *
3488              * There is a complication because \N in a pattern may also stand
3489              * for 'match a non-nl', and not mean a charname, in which case its
3490              * processing should be deferred to the regex compiler.  To be a
3491              * charname it must be followed immediately by a '{', and not look
3492              * like \N followed by a curly quantifier, i.e., not something like
3493              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3494              * quantifier */
3495             else if (PL_lex_inpat
3496                     && (*s != 'N'
3497                         || s[1] != '{'
3498                         || regcurly(s + 1)))
3499             {
3500                 *d++ = '\\';
3501                 goto default_action;
3502             }
3503
3504             switch (*s) {
3505             default:
3506                 {
3507                     if ((isALPHANUMERIC(*s)))
3508                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3509                                        "Unrecognized escape \\%c passed through",
3510                                        *s);
3511                     /* default action is to copy the quoted character */
3512                     goto default_action;
3513                 }
3514
3515             /* eg. \132 indicates the octal constant 0132 */
3516             case '0': case '1': case '2': case '3':
3517             case '4': case '5': case '6': case '7':
3518                 {
3519                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3520                     STRLEN len = 3;
3521                     uv = grok_oct(s, &len, &flags, NULL);
3522                     s += len;
3523                     if (len < 3 && s < send && isDIGIT(*s)
3524                         && ckWARN(WARN_MISC))
3525                     {
3526                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3527                                     "%s", form_short_octal_warning(s, len));
3528                     }
3529                 }
3530                 goto NUM_ESCAPE_INSERT;
3531
3532             /* eg. \o{24} indicates the octal constant \024 */
3533             case 'o':
3534                 {
3535                     const char* error;
3536
3537                     bool valid = grok_bslash_o(&s, PL_bufend,
3538                                                &uv, &error,
3539                                                TRUE, /* Output warning */
3540                                                FALSE, /* Not strict */
3541                                                TRUE, /* Output warnings for
3542                                                          non-portables */
3543                                                UTF);
3544                     if (! valid) {
3545                         yyerror(error);
3546                         uv = 0; /* drop through to ensure range ends are set */
3547                     }
3548                     goto NUM_ESCAPE_INSERT;
3549                 }
3550
3551             /* eg. \x24 indicates the hex constant 0x24 */
3552             case 'x':
3553                 {
3554                     const char* error;
3555
3556                     bool valid = grok_bslash_x(&s, PL_bufend,
3557                                                &uv, &error,
3558                                                TRUE, /* Output warning */
3559                                                FALSE, /* Not strict */
3560                                                TRUE,  /* Output warnings for
3561                                                          non-portables */
3562                                                UTF);
3563                     if (! valid) {
3564                         yyerror(error);
3565                         uv = 0; /* drop through to ensure range ends are set */
3566                     }
3567                 }
3568
3569               NUM_ESCAPE_INSERT:
3570                 /* Insert oct or hex escaped character. */
3571
3572                 /* Here uv is the ordinal of the next character being added */
3573                 if (UVCHR_IS_INVARIANT(uv)) {
3574                     *d++ = (char) uv;
3575                 }
3576                 else {
3577                     if (!has_utf8 && uv > 255) {
3578
3579                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3580                          * If we've only seen invariants so far, all we have to
3581                          * do is turn on the flag */
3582                         if (utf8_variant_count == 0) {
3583                             SvUTF8_on(sv);
3584                         }
3585                         else {
3586                             SvCUR_set(sv, d - SvPVX_const(sv));
3587                             SvPOK_on(sv);
3588                             *d = '\0';
3589
3590                             sv_utf8_upgrade_flags_grow(
3591                                            sv,
3592                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3593
3594                                            /* Since we're having to grow here,
3595                                             * make sure we have enough room for
3596                                             * this escape and a NUL, so the
3597                                             * code immediately below won't have
3598                                             * to actually grow again */
3599                                           UVCHR_SKIP(uv)
3600                                         + (STRLEN)(send - s) + 1);
3601                             d = SvPVX(sv) + SvCUR(sv);
3602                         }
3603
3604                         has_above_latin1 = TRUE;
3605                         has_utf8 = TRUE;
3606                     }
3607
3608                     if (! has_utf8) {
3609                         *d++ = (char)uv;
3610                         utf8_variant_count++;
3611                     }
3612                     else {
3613                        /* Usually, there will already be enough room in 'sv'
3614                         * since such escapes are likely longer than any UTF-8
3615                         * sequence they can end up as.  This isn't the case on
3616                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3617                         * UTF-8 for it contains 14.  And, we have to allow for
3618                         * a trailing NUL.  It probably can't happen on ASCII
3619                         * platforms, but be safe.  See Note on sizing above. */
3620                         const STRLEN needed = d - SvPVX(sv)
3621                                             + UVCHR_SKIP(uv)
3622                                             + (send - s)
3623                                             + 1;
3624                         if (UNLIKELY(needed > SvLEN(sv))) {
3625                             SvCUR_set(sv, d - SvPVX_const(sv));
3626                             d = SvCUR(sv) + SvGROW(sv, needed);
3627                         }
3628
3629                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3630                         if (PL_lex_inwhat == OP_TRANS
3631                             && PL_parser->lex_sub_op)
3632                         {
3633                             PL_parser->lex_sub_op->op_private |=
3634                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3635                                              : OPpTRANS_TO_UTF);
3636                         }
3637                     }
3638                 }
3639 #ifdef EBCDIC
3640                 non_portable_endpoint++;
3641 #endif
3642                 continue;
3643
3644             case 'N':
3645                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3646                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3647                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3648                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3649                  * convenience all three forms are referred to as "named
3650                  * characters" below.
3651                  *
3652                  * For patterns, \N also can mean to match a non-newline.  Code
3653                  * before this 'switch' statement should already have handled
3654                  * this situation, and hence this code only has to deal with
3655                  * the named character cases.
3656                  *
3657                  * For non-patterns, the named characters are converted to
3658                  * their string equivalents.  In patterns, named characters are
3659                  * not converted to their ultimate forms for the same reasons
3660                  * that other escapes aren't (mainly that the ultimate
3661                  * character could be considered a meta-symbol by the regex
3662                  * compiler).  Instead, they are converted to the \N{U+...}
3663                  * form to get the value from the charnames that is in effect
3664                  * right now, while preserving the fact that it was a named
3665                  * character, so that the regex compiler knows this.
3666                  *
3667                  * The structure of this section of code (besides checking for
3668                  * errors and upgrading to utf8) is:
3669                  *    If the named character is of the form \N{U+...}, pass it
3670                  *      through if a pattern; otherwise convert the code point
3671                  *      to utf8
3672                  *    Otherwise must be some \N{NAME}: convert to
3673                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3674                  *
3675                  * Transliteration is an exception.  The conversion to utf8 is
3676                  * only done if the code point requires it to be representable.
3677                  *
3678                  * Here, 's' points to the 'N'; the test below is guaranteed to
3679                  * succeed if we are being called on a pattern, as we already
3680                  * know from a test above that the next character is a '{'.  A
3681                  * non-pattern \N must mean 'named character', which requires
3682                  * braces */
3683                 s++;
3684                 if (*s != '{') {
3685                     yyerror("Missing braces on \\N{}");
3686                     *d++ = '\0';
3687                     continue;
3688                 }
3689                 s++;
3690
3691                 /* If there is no matching '}', it is an error. */
3692                 if (! (e = (char *) memchr(s, '}', send - s))) {
3693                     if (! PL_lex_inpat) {
3694                         yyerror("Missing right brace on \\N{}");
3695                     } else {
3696                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3697                     }
3698                     yyquit(); /* Have exhausted the input. */
3699                 }
3700
3701                 /* Here it looks like a named character */
3702
3703                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3704                     s += 2;         /* Skip to next char after the 'U+' */
3705                     if (PL_lex_inpat) {
3706
3707                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3708                         /* Check the syntax.  */
3709                         const char *orig_s;
3710                         orig_s = s - 5;
3711                         if (!isXDIGIT(*s)) {
3712                           bad_NU:
3713                             yyerror(
3714                                 "Invalid hexadecimal number in \\N{U+...}"
3715                             );
3716                             s = e + 1;
3717                             *d++ = '\0';
3718                             continue;
3719                         }
3720                         while (++s < e) {
3721                             if (isXDIGIT(*s))
3722                                 continue;
3723                             else if ((*s == '.' || *s == '_')
3724                                   && isXDIGIT(s[1]))
3725                                 continue;
3726                             goto bad_NU;
3727                         }
3728
3729                         /* Pass everything through unchanged.
3730                          * +1 is for the '}' */
3731                         Copy(orig_s, d, e - orig_s + 1, char);
3732                         d += e - orig_s + 1;
3733                     }
3734                     else {  /* Not a pattern: convert the hex to string */
3735                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3736                                 | PERL_SCAN_SILENT_ILLDIGIT
3737                                 | PERL_SCAN_DISALLOW_PREFIX;
3738                         STRLEN len = e - s;
3739                         uv = grok_hex(s, &len, &flags, NULL);
3740                         if (len == 0 || (len != (STRLEN)(e - s)))
3741                             goto bad_NU;
3742
3743                          /* For non-tr///, if the destination is not in utf8,
3744                           * unconditionally recode it to be so.  This is
3745                           * because \N{} implies Unicode semantics, and scalars
3746                           * have to be in utf8 to guarantee those semantics.
3747                           * tr/// doesn't care about Unicode rules, so no need
3748                           * there to upgrade to UTF-8 for small enough code
3749                           * points */
3750                         if (! has_utf8 && (   uv > 0xFF
3751                                            || PL_lex_inwhat != OP_TRANS))
3752                         {
3753                             /* See Note on sizing above.  */
3754                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3755
3756                             SvCUR_set(sv, d - SvPVX_const(sv));
3757                             SvPOK_on(sv);
3758                             *d = '\0';
3759
3760                             if (utf8_variant_count == 0) {
3761                                 SvUTF8_on(sv);
3762                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3763                             }
3764                             else {
3765                                 sv_utf8_upgrade_flags_grow(
3766                                                sv,
3767                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3768                                                extra);
3769                                 d = SvPVX(sv) + SvCUR(sv);
3770                             }
3771
3772                             has_utf8 = TRUE;
3773                             has_above_latin1 = TRUE;
3774                         }
3775
3776                         /* Add the (Unicode) code point to the output. */
3777                         if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3778                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3779                         }
3780                         else {
3781                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3782                         }
3783                     }
3784                 }
3785                 else /* Here is \N{NAME} but not \N{U+...}. */
3786                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3787                 {   /* Failed.  We should die eventually, but for now use a NUL
3788                        to keep parsing */
3789                     *d++ = '\0';
3790                 }
3791                 else {  /* Successfully evaluated the name */
3792                     STRLEN len;
3793                     const char *str = SvPV_const(res, len);
3794                     if (PL_lex_inpat) {
3795
3796                         if (! len) { /* The name resolved to an empty string */
3797                             const char empty_N[] = "\\N{_}";
3798                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
3799                             d += sizeof(empty_N) - 1;
3800                         }
3801                         else {
3802                             /* In order to not lose information for the regex
3803                             * compiler, pass the result in the specially made
3804                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3805                             * the code points in hex of each character
3806                             * returned by charnames */
3807
3808                             const char *str_end = str + len;
3809                             const STRLEN off = d - SvPVX_const(sv);
3810
3811                             if (! SvUTF8(res)) {
3812                                 /* For the non-UTF-8 case, we can determine the
3813                                  * exact length needed without having to parse
3814                                  * through the string.  Each character takes up
3815                                  * 2 hex digits plus either a trailing dot or
3816                                  * the "}" */
3817                                 const char initial_text[] = "\\N{U+";
3818                                 const STRLEN initial_len = sizeof(initial_text)
3819                                                            - 1;
3820                                 d = off + SvGROW(sv, off
3821                                                     + 3 * len
3822
3823                                                     /* +1 for trailing NUL */
3824                                                     + initial_len + 1
3825
3826                                                     + (STRLEN)(send - e));
3827                                 Copy(initial_text, d, initial_len, char);
3828                                 d += initial_len;
3829                                 while (str < str_end) {
3830                                     char hex_string[4];
3831                                     int len =
3832                                         my_snprintf(hex_string,
3833                                                   sizeof(hex_string),
3834                                                   "%02X.",
3835
3836                                                   /* The regex compiler is
3837                                                    * expecting Unicode, not
3838                                                    * native */
3839                                                   NATIVE_TO_LATIN1(*str));
3840                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3841                                                            sizeof(hex_string));
3842                                     Copy(hex_string, d, 3, char);
3843                                     d += 3;
3844                                     str++;
3845                                 }
3846                                 d--;    /* Below, we will overwrite the final
3847                                            dot with a right brace */
3848                             }
3849                             else {
3850                                 STRLEN char_length; /* cur char's byte length */
3851
3852                                 /* and the number of bytes after this is
3853                                  * translated into hex digits */
3854                                 STRLEN output_length;
3855
3856                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3857                                  * for max('U+', '.'); and 1 for NUL */
3858                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3859
3860                                 /* Get the first character of the result. */
3861                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3862                                                         len,
3863                                                         &char_length,
3864                                                         UTF8_ALLOW_ANYUV);
3865                                 /* Convert first code point to Unicode hex,
3866                                  * including the boiler plate before it. */
3867                                 output_length =
3868                                     my_snprintf(hex_string, sizeof(hex_string),
3869                                              "\\N{U+%X",
3870                                              (unsigned int) NATIVE_TO_UNI(uv));
3871
3872                                 /* Make sure there is enough space to hold it */
3873                                 d = off + SvGROW(sv, off
3874                                                     + output_length
3875                                                     + (STRLEN)(send - e)
3876                                                     + 2);       /* '}' + NUL */
3877                                 /* And output it */
3878                                 Copy(hex_string, d, output_length, char);
3879                                 d += output_length;
3880
3881                                 /* For each subsequent character, append dot and
3882                                 * its Unicode code point in hex */
3883                                 while ((str += char_length) < str_end) {
3884                                     const STRLEN off = d - SvPVX_const(sv);
3885                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3886                                                             str_end - str,
3887                                                             &char_length,
3888                                                             UTF8_ALLOW_ANYUV);
3889                                     output_length =
3890                                         my_snprintf(hex_string,
3891                                              sizeof(hex_string),
3892                                              ".%X",
3893                                              (unsigned int) NATIVE_TO_UNI(uv));
3894
3895                                     d = off + SvGROW(sv, off
3896                                                         + output_length
3897                                                         + (STRLEN)(send - e)
3898                                                         + 2);   /* '}' +  NUL */
3899                                     Copy(hex_string, d, output_length, char);
3900                                     d += output_length;
3901                                 }
3902                             }
3903
3904                             *d++ = '}'; /* Done.  Add the trailing brace */
3905                         }
3906                     }
3907                     else { /* Here, not in a pattern.  Convert the name to a
3908                             * string. */
3909
3910                         if (PL_lex_inwhat == OP_TRANS) {
3911                             str = SvPV_const(res, len);
3912                             if (len > ((SvUTF8(res))
3913                                        ? UTF8SKIP(str)
3914                                        : 1U))
3915                             {
3916                                 yyerror(Perl_form(aTHX_
3917                                     "%.*s must not be a named sequence"
3918                                     " in transliteration operator",
3919                                         /*  +1 to include the "}" */
3920                                     (int) (e + 1 - start), start));
3921                                 *d++ = '\0';
3922                                 goto end_backslash_N;
3923                             }
3924
3925                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3926                                 has_above_latin1 = TRUE;
3927                             }
3928
3929                         }
3930                         else if (! SvUTF8(res)) {
3931                             /* Make sure \N{} return is UTF-8.  This is because
3932                              * \N{} implies Unicode semantics, and scalars have
3933                              * to be in utf8 to guarantee those semantics; but
3934                              * not needed in tr/// */
3935                             sv_utf8_upgrade_flags(res, 0);
3936                             str = SvPV_const(res, len);
3937                         }
3938
3939                          /* Upgrade destination to be utf8 if this new
3940                           * component is */
3941                         if (! has_utf8 && SvUTF8(res)) {
3942                             /* See Note on sizing above.  */
3943                             const STRLEN extra = len + (send - s) + 1;
3944
3945                             SvCUR_set(sv, d - SvPVX_const(sv));
3946                             SvPOK_on(sv);
3947                             *d = '\0';
3948
3949                             if (utf8_variant_count == 0) {
3950                                 SvUTF8_on(sv);
3951                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3952                             }
3953                             else {
3954                                 sv_utf8_upgrade_flags_grow(sv,
3955                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3956                                                 extra);
3957                                 d = SvPVX(sv) + SvCUR(sv);
3958                             }
3959                             has_utf8 = TRUE;
3960                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3961
3962                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3963                              * set correctly here). */
3964                             const STRLEN extra = len + (send - e) + 1;
3965                             const STRLEN off = d - SvPVX_const(sv);
3966                             d = off + SvGROW(sv, off + extra);
3967                         }
3968                         Copy(str, d, len, char);
3969                         d += len;
3970                     }
3971
3972                     SvREFCNT_dec(res);
3973
3974                 } /* End \N{NAME} */
3975
3976               end_backslash_N:
3977 #ifdef EBCDIC
3978                 backslash_N++; /* \N{} is defined to be Unicode */
3979 #endif
3980                 s = e + 1;  /* Point to just after the '}' */
3981                 continue;
3982
3983             /* \c is a control character */
3984             case 'c':
3985                 s++;
3986                 if (s < send) {
3987                     *d++ = grok_bslash_c(*s, 1);
3988                 }
3989                 else {
3990                     yyerror("Missing control char name in \\c");
3991                     yyquit();   /* Are at end of input, no sense continuing */
3992                 }
3993 #ifdef EBCDIC
3994                 non_portable_endpoint++;
3995 #endif
3996                 break;
3997
3998             /* printf-style backslashes, formfeeds, newlines, etc */
3999             case 'b':
4000                 *d++ = '\b';
4001                 break;
4002             case 'n':
4003                 *d++ = '\n';
4004                 break;
4005             case 'r':
4006                 *d++ = '\r';
4007                 break;
4008             case 'f':
4009                 *d++ = '\f';
4010                 break;
4011             case 't':
4012                 *d++ = '\t';
4013                 break;
4014             case 'e':
4015                 *d++ = ESC_NATIVE;
4016                 break;
4017             case 'a':
4018                 *d++ = '\a';
4019                 break;
4020             } /* end switch */
4021
4022             s++;
4023             continue;
4024         } /* end if (backslash) */
4025
4026     default_action:
4027         /* Just copy the input to the output, though we may have to convert
4028          * to/from UTF-8.
4029          *
4030          * If the input has the same representation in UTF-8 as not, it will be
4031          * a single byte, and we don't care about UTF8ness; just copy the byte */
4032         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4033             *d++ = *s++;
4034         }
4035         else if (! this_utf8 && ! has_utf8) {
4036             /* If neither source nor output is UTF-8, is also a single byte,
4037              * just copy it; but this byte counts should we later have to
4038              * convert to UTF-8 */
4039             *d++ = *s++;
4040             utf8_variant_count++;
4041         }
4042         else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
4043             const STRLEN len = UTF8SKIP(s);
4044
4045             /* We expect the source to have already been checked for
4046              * malformedness */
4047             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4048
4049             Copy(s, d, len, U8);
4050             d += len;
4051             s += len;
4052         }
4053         else { /* UTF8ness matters and doesn't match, need to convert */
4054             STRLEN len = 1;
4055             const UV nextuv   = (this_utf8)
4056                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
4057                                 : (UV) ((U8) *s);
4058             STRLEN need = UVCHR_SKIP(nextuv);
4059
4060             if (!has_utf8) {
4061                 SvCUR_set(sv, d - SvPVX_const(sv));
4062                 SvPOK_on(sv);
4063                 *d = '\0';
4064
4065                 /* See Note on sizing above. */
4066                 need += (STRLEN)(send - s) + 1;
4067
4068                 if (utf8_variant_count == 0) {
4069                     SvUTF8_on(sv);
4070                     d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4071                 }
4072                 else {
4073                     sv_utf8_upgrade_flags_grow(sv,
4074                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4075                                                need);
4076                     d = SvPVX(sv) + SvCUR(sv);
4077                 }
4078                 has_utf8 = TRUE;
4079             } else if (need > len) {
4080                 /* encoded value larger than old, may need extra space (NOTE:
4081                  * SvCUR() is not set correctly here).   See Note on sizing
4082                  * above.  */
4083                 const STRLEN extra = need + (send - s) + 1;
4084                 const STRLEN off = d - SvPVX_const(sv);
4085                 d = off + SvGROW(sv, off + extra);
4086             }
4087             s += len;
4088
4089             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
4090         }
4091     } /* while loop to process each character */
4092
4093     /* terminate the string and set up the sv */
4094     *d = '\0';
4095     SvCUR_set(sv, d - SvPVX_const(sv));
4096     if (SvCUR(sv) >= SvLEN(sv))
4097         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
4098                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
4099
4100     SvPOK_on(sv);
4101     if (has_utf8) {
4102         SvUTF8_on(sv);
4103         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4104             PL_parser->lex_sub_op->op_private |=
4105                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4106         }
4107     }
4108
4109     /* shrink the sv if we allocated more than we used */
4110     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4111         SvPV_shrink_to_cur(sv);
4112     }
4113
4114     /* return the substring (via pl_yylval) only if we parsed anything */
4115     if (s > start) {
4116         char *s2 = start;
4117         for (; s2 < s; s2++) {
4118             if (*s2 == '\n')
4119                 COPLINE_INC_WITH_HERELINES;
4120         }
4121         SvREFCNT_inc_simple_void_NN(sv);
4122         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4123             && ! PL_parser->lex_re_reparsing)
4124         {
4125             const char *const key = PL_lex_inpat ? "qr" : "q";
4126             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4127             const char *type;
4128             STRLEN typelen;
4129
4130             if (PL_lex_inwhat == OP_TRANS) {
4131                 type = "tr";
4132                 typelen = 2;
4133             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4134                 type = "s";
4135                 typelen = 1;
4136             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4137                 type = "q";
4138                 typelen = 1;
4139             } else  {
4140                 type = "qq";
4141                 typelen = 2;
4142             }
4143
4144             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4145                                 type, typelen, NULL);
4146         }
4147         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4148     }
4149     LEAVE_with_name("scan_const");
4150     return s;
4151 }
4152
4153 /* S_intuit_more
4154  * Returns TRUE if there's more to the expression (e.g., a subscript),
4155  * FALSE otherwise.
4156  *
4157  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4158  *
4159  * ->[ and ->{ return TRUE
4160  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4161  * { and [ outside a pattern are always subscripts, so return TRUE
4162  * if we're outside a pattern and it's not { or [, then return FALSE
4163  * if we're in a pattern and the first char is a {
4164  *   {4,5} (any digits around the comma) returns FALSE
4165  * if we're in a pattern and the first char is a [
4166  *   [] returns FALSE
4167  *   [SOMETHING] has a funky algorithm to decide whether it's a
4168  *      character class or not.  It has to deal with things like
4169  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4170  * anything else returns TRUE
4171  */
4172
4173 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4174
4175 STATIC int
4176 S_intuit_more(pTHX_ char *s, char *e)
4177 {
4178     PERL_ARGS_ASSERT_INTUIT_MORE;
4179
4180     if (PL_lex_brackets)
4181         return TRUE;
4182     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4183         return TRUE;
4184     if (*s == '-' && s[1] == '>'
4185      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4186      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4187         ||(s[2] == '@' && strchr("*[{",s[3])) ))
4188         return TRUE;
4189     if (*s != '{' && *s != '[')
4190         return FALSE;
4191     PL_parser->sub_no_recover = TRUE;
4192     if (!PL_lex_inpat)
4193         return TRUE;
4194
4195     /* In a pattern, so maybe we have {n,m}. */
4196     if (*s == '{') {
4197         if (regcurly(s)) {
4198             return FALSE;
4199         }
4200         return TRUE;
4201     }
4202
4203     /* On the other hand, maybe we have a character class */
4204
4205     s++;
4206     if (*s == ']' || *s == '^')
4207         return FALSE;
4208     else {
4209         /* this is terrifying, and it works */
4210         int weight;
4211         char seen[256];
4212         const char * const send = (char *) memchr(s, ']', e - s);
4213         unsigned char un_char, last_un_char;
4214         char tmpbuf[sizeof PL_tokenbuf * 4];
4215
4216         if (!send)              /* has to be an expression */
4217             return TRUE;
4218         weight = 2;             /* let's weigh the evidence */
4219
4220         if (*s == '$')
4221             weight -= 3;
4222         else if (isDIGIT(*s)) {
4223             if (s[1] != ']') {
4224                 if (isDIGIT(s[1]) && s[2] == ']')
4225                     weight -= 10;
4226             }
4227             else
4228                 weight -= 100;
4229         }
4230         Zero(seen,256,char);
4231         un_char = 255;
4232         for (; s < send; s++) {
4233             last_un_char = un_char;
4234             un_char = (unsigned char)*s;
4235             switch (*s) {
4236             case '@':
4237             case '&':
4238             case '$':
4239                 weight -= seen[un_char] * 10;
4240                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4241                     int len;
4242                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4243                     len = (int)strlen(tmpbuf);
4244                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4245                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4246                         weight -= 100;
4247                     else
4248                         weight -= 10;
4249                 }
4250                 else if (*s == '$'
4251                          && s[1]
4252                          && strchr("[#!%*<>()-=",s[1]))
4253                 {
4254                     if (/*{*/ strchr("])} =",s[2]))
4255                         weight -= 10;
4256                     else
4257                         weight -= 1;
4258                 }
4259                 break;
4260             case '\\':
4261                 un_char = 254;
4262                 if (s[1]) {
4263                     if (strchr("wds]",s[1]))
4264                         weight += 100;
4265                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4266                         weight += 1;
4267                     else if (strchr("rnftbxcav",s[1]))
4268                         weight += 40;
4269                     else if (isDIGIT(s[1])) {
4270                         weight += 40;
4271                         while (s[1] && isDIGIT(s[1]))
4272                             s++;
4273                     }
4274                 }
4275                 else
4276                     weight += 100;
4277                 break;
4278             case '-':
4279                 if (s[1] == '\\')
4280                     weight += 50;
4281                 if (strchr("aA01! ",last_un_char))
4282                     weight += 30;
4283                 if (strchr("zZ79~",s[1]))
4284                     weight += 30;
4285                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4286                     weight -= 5;        /* cope with negative subscript */
4287                 break;
4288             default:
4289                 if (!isWORDCHAR(last_un_char)
4290                     && !(last_un_char == '$' || last_un_char == '@'
4291                          || last_un_char == '&')
4292                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4293                     char *d = s;
4294                     while (isALPHA(*s))
4295                         s++;
4296                     if (keyword(d, s - d, 0))
4297                         weight -= 150;
4298                 }
4299                 if (un_char == last_un_char + 1)
4300                     weight += 5;
4301                 weight -= seen[un_char];
4302                 break;
4303             }
4304             seen[un_char]++;
4305         }
4306         if (weight >= 0)        /* probably a character class */
4307             return FALSE;
4308     }
4309
4310     return TRUE;
4311 }
4312
4313 /*
4314  * S_intuit_method
4315  *
4316  * Does all the checking to disambiguate
4317  *   foo bar
4318  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4319  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4320  *
4321  * First argument is the stuff after the first token, e.g. "bar".
4322  *
4323  * Not a method if foo is a filehandle.
4324  * Not a method if foo is a subroutine prototyped to take a filehandle.
4325  * Not a method if it's really "Foo $bar"
4326  * Method if it's "foo $bar"
4327  * Not a method if it's really "print foo $bar"
4328  * Method if it's really "foo package::" (interpreted as package->foo)
4329  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4330  * Not a method if bar is a filehandle or package, but is quoted with
4331  *   =>
4332  */
4333
4334 STATIC int
4335 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4336 {
4337     char *s = start + (*start == '$');
4338     char tmpbuf[sizeof PL_tokenbuf];
4339     STRLEN len;
4340     GV* indirgv;
4341         /* Mustn't actually add anything to a symbol table.
4342            But also don't want to "initialise" any placeholder
4343            constants that might already be there into full
4344            blown PVGVs with attached PVCV.  */
4345     GV * const gv =
4346         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4347
4348     PERL_ARGS_ASSERT_INTUIT_METHOD;
4349
4350     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4351             return 0;
4352     if (cv && SvPOK(cv)) {
4353         const char *proto = CvPROTO(cv);
4354         if (proto) {
4355             while (*proto && (isSPACE(*proto) || *proto == ';'))
4356                 proto++;
4357             if (*proto == '*')
4358                 return 0;
4359         }
4360     }
4361
4362     if (*start == '$') {
4363         SSize_t start_off = start - SvPVX(PL_linestr);
4364         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4365             || isUPPER(*PL_tokenbuf))
4366             return 0;
4367         /* this could be $# */
4368         if (isSPACE(*s))
4369             s = skipspace(s);
4370         PL_bufptr = SvPVX(PL_linestr) + start_off;
4371         PL_expect = XREF;
4372         return *s == '(' ? FUNCMETH : METHOD;
4373     }
4374
4375     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4376     /* start is the beginning of the possible filehandle/object,
4377      * and s is the end of it
4378      * tmpbuf is a copy of it (but with single quotes as double colons)
4379      */
4380
4381     if (!keyword(tmpbuf, len, 0)) {
4382         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4383             len -= 2;
4384             tmpbuf[len] = '\0';
4385             goto bare_package;
4386         }
4387         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4388                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4389                                     SVt_PVCV);
4390         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4391          && (!isGV(indirgv) || GvCVu(indirgv)))
4392             return 0;
4393         /* filehandle or package name makes it a method */
4394         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4395             s = skipspace(s);
4396             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4397                 return 0;       /* no assumptions -- "=>" quotes bareword */
4398       bare_package:
4399             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4400                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4401             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4402             PL_expect = XTERM;
4403             force_next(BAREWORD);
4404             PL_bufptr = s;
4405             return *s == '(' ? FUNCMETH : METHOD;
4406         }
4407     }
4408     return 0;
4409 }
4410
4411 /* Encoded script support. filter_add() effectively inserts a
4412  * 'pre-processing' function into the current source input stream.
4413  * Note that the filter function only applies to the current source file
4414  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4415  *
4416  * The datasv parameter (which may be NULL) can be used to pass
4417  * private data to this instance of the filter. The filter function
4418  * can recover the SV using the FILTER_DATA macro and use it to
4419  * store private buffers and state information.
4420  *
4421  * The supplied datasv parameter is upgraded to a PVIO type
4422  * and the IoDIRP/IoANY field is used to store the function pointer,
4423  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4424  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4425  * private use must be set using malloc'd pointers.
4426  */
4427
4428 SV *
4429 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4430 {
4431     if (!funcp)
4432         return NULL;
4433
4434     if (!PL_parser)
4435         return NULL;
4436
4437     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4438         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4439
4440     if (!PL_rsfp_filters)
4441         PL_rsfp_filters = newAV();
4442     if (!datasv)
4443         datasv = newSV(0);
4444     SvUPGRADE(datasv, SVt_PVIO);
4445     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4446     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4447     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4448                           FPTR2DPTR(void *, IoANY(datasv)),
4449                           SvPV_nolen(datasv)));
4450     av_unshift(PL_rsfp_filters, 1);
4451     av_store(PL_rsfp_filters, 0, datasv) ;
4452     if (
4453         !PL_parser->filtered
4454      && PL_parser->lex_flags & LEX_EVALBYTES
4455      && PL_bufptr < PL_bufend
4456     ) {
4457         const char *s = PL_bufptr;
4458         while (s < PL_bufend) {
4459             if (*s == '\n') {
4460                 SV *linestr = PL_parser->linestr;
4461                 char *buf = SvPVX(linestr);
4462                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4463                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4464                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4465                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4466                 STRLEN const last_uni_pos =
4467                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4468                 STRLEN const last_lop_pos =
4469                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4470                 av_push(PL_rsfp_filters, linestr);
4471                 PL_parser->linestr =
4472                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4473                 buf = SvPVX(PL_parser->linestr);
4474                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4475                 PL_parser->bufptr = buf + bufptr_pos;
4476                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4477                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4478                 PL_parser->linestart = buf + linestart_pos;
4479                 if (PL_parser->last_uni)
4480                     PL_parser->last_uni = buf + last_uni_pos;
4481                 if (PL_parser->last_lop)
4482                     PL_parser->last_lop = buf + last_lop_pos;
4483                 SvLEN_set(linestr, SvCUR(linestr));
4484                 SvCUR_set(linestr, s - SvPVX(linestr));
4485                 PL_parser->filtered = 1;
4486                 break;
4487             }
4488             s++;
4489         }
4490     }
4491     return(datasv);
4492 }
4493
4494
4495 /* Delete most recently added instance of this filter function. */
4496 void
4497 Perl_filter_del(pTHX_ filter_t funcp)
4498 {
4499     SV *datasv;
4500
4501     PERL_ARGS_ASSERT_FILTER_DEL;
4502
4503 #ifdef DEBUGGING
4504     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4505                           FPTR2DPTR(void*, funcp)));
4506 #endif
4507     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4508         return;
4509     /* if filter is on top of stack (usual case) just pop it off */
4510     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4511     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4512         sv_free(av_pop(PL_rsfp_filters));
4513
4514         return;
4515     }
4516     /* we need to search for the correct entry and clear it     */
4517     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4518 }
4519
4520
4521 /* Invoke the idxth filter function for the current rsfp.        */
4522 /* maxlen 0 = read one text line */
4523 I32
4524 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4525 {
4526     filter_t funcp;
4527     I32 ret;
4528     SV *datasv = NULL;
4529     /* This API is bad. It should have been using unsigned int for maxlen.
4530        Not sure if we want to change the API, but if not we should sanity
4531        check the value here.  */
4532     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4533
4534     PERL_ARGS_ASSERT_FILTER_READ;
4535
4536     if (!PL_parser || !PL_rsfp_filters)
4537         return -1;
4538     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4539         /* Provide a default input filter to make life easy.    */
4540         /* Note that we append to the line. This is handy.      */
4541         DEBUG_P(PerlIO_printf(Perl_debug_log,
4542                               "filter_read %d: from rsfp\n", idx));
4543         if (correct_length) {
4544             /* Want a block */
4545             int len ;
4546             const int old_len = SvCUR(buf_sv);
4547
4548             /* ensure buf_sv is large enough */
4549             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4550             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4551                                    correct_length)) <= 0) {
4552                 if (PerlIO_error(PL_rsfp))
4553                     return -1;          /* error */
4554                 else
4555                     return 0 ;          /* end of file */
4556             }
4557             SvCUR_set(buf_sv, old_len + len) ;
4558             SvPVX(buf_sv)[old_len + len] = '\0';
4559         } else {
4560             /* Want a line */
4561             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4562                 if (PerlIO_error(PL_rsfp))
4563                     return -1;          /* error */
4564                 else
4565                     return 0 ;          /* end of file */
4566             }
4567         }
4568         return SvCUR(buf_sv);
4569     }
4570     /* Skip this filter slot if filter has been deleted */
4571     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4572         DEBUG_P(PerlIO_printf(Perl_debug_log,
4573                               "filter_read %d: skipped (filter deleted)\n",
4574                               idx));
4575         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4576     }
4577     if (SvTYPE(datasv) != SVt_PVIO) {
4578         if (correct_length) {
4579             /* Want a block */
4580             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4581             if (!remainder) return 0; /* eof */
4582             if (correct_length > remainder) correct_length = remainder;
4583             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4584             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4585         } else {
4586             /* Want a line */
4587             const char *s = SvEND(datasv);
4588             const char *send = SvPVX(datasv) + SvLEN(datasv);
4589             while (s < send) {
4590                 if (*s == '\n') {
4591                     s++;
4592                     break;
4593                 }
4594                 s++;
4595             }
4596             if (s == send) return 0; /* eof */
4597             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4598             SvCUR_set(datasv, s-SvPVX(datasv));
4599         }
4600         return SvCUR(buf_sv);
4601     }
4602     /* Get function pointer hidden within datasv        */
4603     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4604     DEBUG_P(PerlIO_printf(Perl_debug_log,
4605                           "filter_read %d: via function %p (%s)\n",
4606                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4607     /* Call function. The function is expected to       */
4608     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4609     /* Return: <0:error, =0:eof, >0:not eof             */
4610     ENTER;
4611     save_scalar(PL_errgv);
4612     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4613     LEAVE;
4614     return ret;
4615 }
4616
4617 STATIC char *
4618 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4619 {
4620     PERL_ARGS_ASSERT_FILTER_GETS;
4621
4622 #ifdef PERL_CR_FILTER
4623     if (!PL_rsfp_filters) {
4624         filter_add(S_cr_textfilter,NULL);
4625     }
4626 #endif
4627     if (PL_rsfp_filters) {
4628         if (!append)
4629             SvCUR_set(sv, 0);   /* start with empty line        */
4630         if (FILTER_READ(0, sv, 0) > 0)
4631             return ( SvPVX(sv) ) ;
4632         else
4633             return NULL ;
4634     }
4635     else
4636         return (sv_gets(sv, PL_rsfp, append));
4637 }
4638
4639 STATIC HV *
4640 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4641 {
4642     GV *gv;
4643
4644     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4645
4646     if (memEQs(pkgname, len, "__PACKAGE__"))
4647         return PL_curstash;
4648
4649     if (len > 2
4650         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4651         && (gv = gv_fetchpvn_flags(pkgname,
4652                                    len,
4653                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4654     {
4655         return GvHV(gv);                        /* Foo:: */
4656     }
4657
4658     /* use constant CLASS => 'MyClass' */
4659     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4660     if (gv && GvCV(gv)) {
4661         SV * const sv = cv_const_sv(GvCV(gv));
4662         if (sv)
4663             return gv_stashsv(sv, 0);
4664     }
4665
4666     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4667 }
4668
4669
4670 STATIC char *
4671 S_tokenize_use(pTHX_ int is_use, char *s) {
4672     PERL_ARGS_ASSERT_TOKENIZE_USE;
4673
4674     if (PL_expect != XSTATE)
4675         /* diag_listed_as: "use" not allowed in expression */
4676         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4677                     is_use ? "use" : "no"));
4678     PL_expect = XTERM;
4679     s = skipspace(s);
4680     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4681         s = force_version(s, TRUE);
4682         if (*s == ';' || *s == '}'
4683                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4684             NEXTVAL_NEXTTOKE.opval = NULL;
4685             force_next(BAREWORD);
4686         }
4687         else if (*s == 'v') {
4688             s = force_word(s,BAREWORD,FALSE,TRUE);
4689             s = force_version(s, FALSE);
4690         }
4691     }
4692     else {
4693         s = force_word(s,BAREWORD,FALSE,TRUE);
4694         s = force_version(s, FALSE);
4695     }
4696     pl_yylval.ival = is_use;
4697     return s;
4698 }
4699 #ifdef DEBUGGING
4700     static const char* const exp_name[] =
4701         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4702           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4703           "SIGVAR", "TERMORDORDOR"
4704         };
4705 #endif
4706
4707 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4708 STATIC bool
4709 S_word_takes_any_delimiter(char *p, STRLEN len)
4710 {
4711     return (len == 1 && strchr("msyq", p[0]))
4712             || (len == 2
4713                 && ((p[0] == 't' && p[1] == 'r')
4714                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4715 }
4716
4717 static void
4718 S_check_scalar_slice(pTHX_ char *s)
4719 {
4720     s++;
4721     while (SPACE_OR_TAB(*s)) s++;
4722     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4723                                                              PL_bufend,
4724                                                              UTF))
4725     {
4726         return;
4727     }
4728     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4729            || (*s && strchr(" \t$#+-'\"", *s)))
4730     {
4731         s += UTF ? UTF8SKIP(s) : 1;
4732     }
4733     if (*s == '}' || *s == ']')
4734         pl_yylval.ival = OPpSLICEWARNING;
4735 }
4736
4737 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4738 static void
4739 S_lex_token_boundary(pTHX)
4740 {
4741     PL_oldoldbufptr = PL_oldbufptr;
4742     PL_oldbufptr = PL_bufptr;
4743 }
4744
4745 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4746 static char *
4747 S_vcs_conflict_marker(pTHX_ char *s)
4748 {
4749     lex_token_boundary();
4750     PL_bufptr = s;
4751     yyerror("Version control conflict marker");
4752     while (s < PL_bufend && *s != '\n')
4753         s++;
4754     return s;
4755 }
4756
4757 /*
4758   yylex
4759
4760   Works out what to call the token just pulled out of the input
4761   stream.  The yacc parser takes care of taking the ops we return and
4762   stitching them into a tree.
4763
4764   Returns:
4765     The type of the next token
4766
4767   Structure:
4768       Check if we have already built the token; if so, use it.
4769       Switch based on the current state:
4770           - if we have a case modifier in a string, deal with that
4771           - handle other cases of interpolation inside a string
4772           - scan the next line if we are inside a format
4773       In the normal state, switch on the next character:
4774           - default:
4775             if alphabetic, go to key lookup
4776             unrecognized character - croak
4777           - 0/4/26: handle end-of-line or EOF
4778           - cases for whitespace
4779           - \n and #: handle comments and line numbers
4780           - various operators, brackets and sigils
4781           - numbers
4782           - quotes
4783           - 'v': vstrings (or go to key lookup)
4784           - 'x' repetition operator (or go to key lookup)
4785           - other ASCII alphanumerics (key lookup begins here):
4786               word before => ?
4787               keyword plugin
4788               scan built-in keyword (but do nothing with it yet)
4789               check for statement label
4790               check for lexical subs
4791                   goto just_a_word if there is one
4792               see whether built-in keyword is overridden
4793               switch on keyword number:
4794                   - default: just_a_word:
4795                       not a built-in keyword; handle bareword lookup
4796                       disambiguate between method and sub call
4797                       fall back to bareword
4798                   - cases for built-in keywords
4799 */
4800
4801
4802 int
4803 Perl_yylex(pTHX)
4804 {
4805     dVAR;
4806     char *s = PL_bufptr;
4807     char *d;
4808     STRLEN len;
4809     bool bof = FALSE;
4810     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4811     U8 formbrack = 0;
4812     U32 fake_eof = 0;
4813
4814     /* orig_keyword, gvp, and gv are initialized here because
4815      * jump to the label just_a_word_zero can bypass their
4816      * initialization later. */
4817     I32 orig_keyword = 0;
4818     GV *gv = NULL;
4819     GV **gvp = NULL;
4820
4821     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4822         const U8* first_bad_char_loc;
4823         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4824                                                         PL_bufend - PL_bufptr,
4825                                                         &first_bad_char_loc)))
4826         {
4827             _force_out_malformed_utf8_message(first_bad_char_loc,
4828                                               (U8 *) PL_bufend,
4829                                               0,
4830                                               1 /* 1 means die */ );
4831             NOT_REACHED; /* NOTREACHED */
4832         }
4833         PL_parser->recheck_utf8_validity = FALSE;
4834     }
4835     DEBUG_T( {
4836         SV* tmp = newSVpvs("");
4837         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4838             (IV)CopLINE(PL_curcop),
4839             lex_state_names[PL_lex_state],
4840             exp_name[PL_expect],
4841             pv_display(tmp, s, strlen(s), 0, 60));
4842         SvREFCNT_dec(tmp);
4843     } );
4844
4845     /* when we've already built the next token, just pull it out of the queue */
4846     if (PL_nexttoke) {
4847         PL_nexttoke--;
4848         pl_yylval = PL_nextval[PL_nexttoke];
4849         {
4850             I32 next_type;
4851             next_type = PL_nexttype[PL_nexttoke];
4852             if (next_type & (7<<24)) {
4853                 if (next_type & (1<<24)) {
4854                     if (PL_lex_brackets > 100)
4855                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4856                     PL_lex_brackstack[PL_lex_brackets++] =
4857                         (char) ((next_type >> 16) & 0xff);
4858                 }
4859                 if (next_type & (2<<24))
4860                     PL_lex_allbrackets++;
4861                 if (next_type & (4<<24))
4862                     PL_lex_allbrackets--;
4863                 next_type &= 0xffff;
4864             }
4865             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4866         }
4867     }
4868
4869     switch (PL_lex_state) {
4870     case LEX_NORMAL:
4871     case LEX_INTERPNORMAL:
4872         break;
4873
4874     /* interpolated case modifiers like \L \U, including \Q and \E.
4875        when we get here, PL_bufptr is at the \
4876     */
4877     case LEX_INTERPCASEMOD:
4878 #ifdef DEBUGGING
4879         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4880             Perl_croak(aTHX_
4881                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4882                        PL_bufptr, PL_bufend, *PL_bufptr);
4883 #endif
4884         /* handle \E or end of string */
4885         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4886             /* if at a \E */
4887             if (PL_lex_casemods) {
4888                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4889                 PL_lex_casestack[PL_lex_casemods] = '\0';
4890
4891                 if (PL_bufptr != PL_bufend
4892                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4893                         || oldmod == 'F')) {
4894                     PL_bufptr += 2;
4895                     PL_lex_state = LEX_INTERPCONCAT;
4896                 }
4897                 PL_lex_allbrackets--;
4898                 return REPORT(')');
4899             }
4900             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4901                /* Got an unpaired \E */
4902                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4903                         "Useless use of \\E");
4904             }
4905             if (PL_bufptr != PL_bufend)
4906                 PL_bufptr += 2;
4907             PL_lex_state = LEX_INTERPCONCAT;
4908             return yylex();
4909         }
4910         else {
4911             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4912               "### Saw case modifier\n"); });
4913             s = PL_bufptr + 1;
4914             if (s[1] == '\\' && s[2] == 'E') {
4915                 PL_bufptr = s + 3;
4916                 PL_lex_state = LEX_INTERPCONCAT;
4917                 return yylex();
4918             }
4919             else {
4920                 I32 tmp;
4921                 if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
4922                     || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
4923                 {
4924                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4925                 }
4926                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4927                     && (strpbrk(PL_lex_casestack, "LUF")))
4928                 {
4929                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4930                     PL_lex_allbrackets--;
4931                     return REPORT(')');
4932                 }
4933                 if (PL_lex_casemods > 10)
4934                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4935                 PL_lex_casestack[PL_lex_casemods++] = *s;
4936                 PL_lex_casestack[PL_lex_casemods] = '\0';
4937                 PL_lex_state = LEX_INTERPCONCAT;
4938                 NEXTVAL_NEXTTOKE.ival = 0;
4939                 force_next((2<<24)|'(');
4940                 if (*s == 'l')
4941                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4942                 else if (*s == 'u')
4943                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4944                 else if (*s == 'L')
4945                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4946                 else if (*s == 'U')
4947                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4948                 else if (*s == 'Q')
4949                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4950                 else if (*s == 'F')
4951                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4952                 else
4953                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4954                 PL_bufptr = s + 1;
4955             }
4956             force_next(FUNC);
4957             if (PL_lex_starts) {
4958                 s = PL_bufptr;
4959                 PL_lex_starts = 0;
4960                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4961                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4962                     TOKEN(',');
4963                 else
4964                     AopNOASSIGN(OP_CONCAT);
4965             }
4966             else
4967                 return yylex();
4968         }
4969
4970     case LEX_INTERPPUSH:
4971         return REPORT(sublex_push());
4972
4973     case LEX_INTERPSTART:
4974         if (PL_bufptr == PL_bufend)
4975             return REPORT(sublex_done());
4976         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4977               "### Interpolated variable\n"); });
4978         PL_expect = XTERM;
4979         /* for /@a/, we leave the joining for the regex engine to do
4980          * (unless we're within \Q etc) */
4981         PL_lex_dojoin = (*PL_bufptr == '@'
4982                             && (!PL_lex_inpat || PL_lex_casemods));
4983         PL_lex_state = LEX_INTERPNORMAL;
4984         if (PL_lex_dojoin) {
4985             NEXTVAL_NEXTTOKE.ival = 0;
4986             force_next(',');
4987             force_ident("\"", '$');
4988             NEXTVAL_NEXTTOKE.ival = 0;
4989             force_next('$');
4990             NEXTVAL_NEXTTOKE.ival = 0;
4991             force_next((2<<24)|'(');
4992             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4993             force_next(FUNC);
4994         }
4995         /* Convert (?{...}) and friends to 'do {...}' */
4996         if (PL_lex_inpat && *PL_bufptr == '(') {
4997             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4998             PL_bufptr += 2;
4999             if (*PL_bufptr != '{')
5000                 PL_bufptr++;
5001             PL_expect = XTERMBLOCK;
5002             force_next(DO);
5003         }
5004
5005         if (PL_lex_starts++) {
5006             s = PL_bufptr;
5007             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5008             if (!PL_lex_casemods && PL_lex_inpat)
5009                 TOKEN(',');
5010             else
5011                 AopNOASSIGN(OP_CONCAT);
5012         }
5013         return yylex();
5014
5015     case LEX_INTERPENDMAYBE:
5016         if (intuit_more(PL_bufptr, PL_bufend)) {
5017             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5018             break;
5019         }
5020         /* FALLTHROUGH */
5021
5022     case LEX_INTERPEND:
5023         if (PL_lex_dojoin) {
5024             const U8 dojoin_was = PL_lex_dojoin;
5025             PL_lex_dojoin = FALSE;
5026             PL_lex_state = LEX_INTERPCONCAT;
5027             PL_lex_allbrackets--;
5028             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
5029         }
5030         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5031             && SvEVALED(PL_lex_repl))
5032         {
5033             if (PL_bufptr != PL_bufend)
5034                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5035             PL_lex_repl = NULL;
5036         }
5037         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5038            re_eval_str.  If the here-doc body’s length equals the previous
5039            value of re_eval_start, re_eval_start will now be null.  So
5040            check re_eval_str as well. */
5041         if (PL_parser->lex_shared->re_eval_start
5042          || PL_parser->lex_shared->re_eval_str) {
5043             SV *sv;
5044             if (*PL_bufptr != ')')
5045                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5046             PL_bufptr++;
5047             /* having compiled a (?{..}) expression, return the original
5048              * text too, as a const */
5049             if (PL_parser->lex_shared->re_eval_str) {
5050                 sv = PL_parser->lex_shared->re_eval_str;
5051                 PL_parser->lex_shared->re_eval_str = NULL;
5052                 SvCUR_set(sv,
5053                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5054                 SvPV_shrink_to_cur(sv);
5055             }
5056             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5057                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5058             NEXTVAL_NEXTTOKE.opval =
5059                     newSVOP(OP_CONST, 0,
5060                                  sv);
5061             force_next(THING);
5062             PL_parser->lex_shared->re_eval_start = NULL;
5063             PL_expect = XTERM;
5064             return REPORT(',');
5065         }
5066
5067         /* FALLTHROUGH */
5068     case LEX_INTERPCONCAT:
5069 #ifdef DEBUGGING
5070         if (PL_lex_brackets)
5071             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5072                        (long) PL_lex_brackets);
5073 #endif
5074         if (PL_bufptr == PL_bufend)
5075             return REPORT(sublex_done());
5076
5077         /* m'foo' still needs to be parsed for possible (?{...}) */
5078         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5079             SV *sv = newSVsv(PL_linestr);
5080             sv = tokeq(sv);
5081             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5082             s = PL_bufend;
5083         }
5084         else {
5085             int save_error_count = PL_error_count;
5086
5087             s = scan_const(PL_bufptr);
5088
5089             /* Set flag if this was a pattern and there were errors.  op.c will
5090              * refuse to compile a pattern with this flag set.  Otherwise, we
5091              * could get segfaults, etc. */
5092             if (PL_lex_inpat && PL_error_count > save_error_count) {
5093                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5094             }
5095             if (*s == '\\')
5096                 PL_lex_state = LEX_INTERPCASEMOD;
5097             else
5098                 PL_lex_state = LEX_INTERPSTART;
5099         }
5100
5101         if (s != PL_bufptr) {
5102             NEXTVAL_NEXTTOKE = pl_yylval;
5103             PL_expect = XTERM;
5104             force_next(THING);
5105             if (PL_lex_starts++) {
5106                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5107                 if (!PL_lex_casemods && PL_lex_inpat)
5108                     TOKEN(',');
5109                 else
5110                     AopNOASSIGN(OP_CONCAT);
5111             }
5112             else {
5113                 PL_bufptr = s;
5114                 return yylex();
5115             }
5116         }
5117
5118         return yylex();
5119     case LEX_FORMLINE:
5120         if (PL_parser->sub_error_count != PL_error_count) {
5121             /* There was an error parsing a formline, which tends to
5122                mess up the parser.
5123                Unlike interpolated sub-parsing, we can't treat any of
5124                these as recoverable, so no need to check sub_no_recover.
5125             */
5126             yyquit();
5127         }
5128         assert(PL_lex_formbrack);
5129         s = scan_formline(PL_bufptr);
5130         if (!PL_lex_formbrack)
5131         {
5132             formbrack = 1;
5133             goto rightbracket;
5134         }
5135         PL_bufptr = s;
5136         return yylex();
5137     }
5138
5139     /* We really do *not* want PL_linestr ever becoming a COW. */
5140     assert (!SvIsCOW(PL_linestr));
5141     s = PL_bufptr;
5142     PL_oldoldbufptr = PL_oldbufptr;
5143     PL_oldbufptr = s;
5144     PL_parser->saw_infix_sigil = 0;
5145
5146     if (PL_in_my == KEY_sigvar) {
5147         /* we expect the sigil and optional var name part of a
5148          * signature element here. Since a '$' is not necessarily
5149          * followed by a var name, handle it specially here; the general
5150          * yylex code would otherwise try to interpret whatever follows
5151          * as a var; e.g. ($, ...) would be seen as the var '$,'
5152          */
5153
5154         U8 sigil;
5155
5156         s = skipspace(s);
5157         sigil = *s++;
5158         PL_bufptr = s; /* for error reporting */
5159         switch (sigil) {
5160         case '$':
5161         case '@':
5162         case '%':
5163             /* spot stuff that looks like an prototype */
5164             if (strchr("$:@%&*;\\[]", *s)) {
5165                 yyerror("Illegal character following sigil in a subroutine signature");
5166                 break;
5167             }
5168             /* '$#' is banned, while '$ # comment' isn't */
5169             if (*s == '#') {
5170                 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5171                 break;
5172             }
5173             s = skipspace(s);
5174             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5175                 char *dest = PL_tokenbuf + 1;
5176                 /* read var name, including sigil, into PL_tokenbuf */
5177                 PL_tokenbuf[0] = sigil;
5178                 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5179                     0, cBOOL(UTF), FALSE, FALSE);
5180                 *dest = '\0';
5181                 assert(PL_tokenbuf[1]); /* we have a variable name */
5182             }
5183             else {
5184                 *PL_tokenbuf = 0;
5185                 PL_in_my = 0;
5186             }
5187
5188             s = skipspace(s);
5189             /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5190              * as the ASSIGNOP, and exclude other tokens that start with =
5191              */
5192             if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
5193                 /* save now to report with the same context as we did when
5194                  * all ASSIGNOPS were accepted */
5195                 PL_oldbufptr = s;
5196
5197                 ++s;
5198                 NEXTVAL_NEXTTOKE.ival = 0;
5199                 force_next(ASSIGNOP);
5200                 PL_expect = XTERM;
5201             }
5202             else if (*s == ',' || *s == ')') {
5203                 PL_expect = XOPERATOR;
5204             }
5205             else {
5206                 /* make sure the context shows the unexpected character and
5207                  * hopefully a bit more */
5208                 if (*s) ++s;
5209                 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5210                     s++;
5211                 PL_bufptr = s; /* for error reporting */
5212                 yyerror("Illegal operator following parameter in a subroutine signature");
5213                 PL_in_my = 0;
5214             }
5215             if (*PL_tokenbuf) {
5216                 NEXTVAL_NEXTTOKE.ival = sigil;
5217                 force_next('p'); /* force a signature pending identifier */
5218             }
5219             break;
5220
5221         case ')':
5222             PL_expect = XBLOCK;
5223             break;
5224         case ',': /* handle ($a,,$b) */
5225             break;
5226
5227         default:
5228             PL_in_my = 0;
5229             yyerror("A signature parameter must start with '$', '@' or '%'");
5230             /* very crude error recovery: skip to likely next signature
5231              * element */
5232             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5233                 s++;
5234             break;
5235         }
5236         TOKEN(sigil);
5237     }
5238
5239   retry:
5240     switch (*s) {
5241     default:
5242         if (UTF) {
5243             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5244                 goto keylookup;
5245             }
5246         }
5247         else if (isALNUMC(*s)) {
5248             goto keylookup;
5249         }
5250     {
5251         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5252         const char *c;
5253         if (UTF) {
5254             STRLEN skiplen = UTF8SKIP(s);
5255             STRLEN stravail = PL_bufend - s;
5256             c = sv_uni_display(dsv, newSVpvn_flags(s,
5257                                                    skiplen > stravail ? stravail : skiplen,
5258                                                    SVs_TEMP | SVf_UTF8),
5259                                10, UNI_DISPLAY_ISPRINT);
5260         }
5261         else {
5262             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5263         }
5264
5265         if (s >= PL_linestart) {
5266             d = PL_linestart;
5267         }
5268         else {
5269             /* somehow (probably due to a parse failure), PL_linestart has advanced
5270              * pass PL_bufptr, get a reasonable beginning of line
5271              */
5272             d = s;
5273             while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
5274                 --d;
5275         }
5276         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
5277         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5278             d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
5279         }
5280
5281         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5282                           UTF8fARG(UTF, (s - d), d),
5283                          (int) len + 1);
5284     }
5285     case 4:
5286     case 26:
5287         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5288     case 0:
5289         if ((!PL_rsfp || PL_lex_inwhat)
5290          && (!PL_parser->filtered || s+1 < PL_bufend)) {
5291             PL_last_uni = 0;
5292             PL_last_lop = 0;
5293             if (PL_lex_brackets
5294                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5295             {
5296                 yyerror((const char *)
5297                         (PL_lex_formbrack
5298                          ? "Format not terminated"
5299                          : "Missing right curly or square bracket"));
5300             }
5301             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5302                         "### Tokener got EOF\n");
5303             } );
5304             TOKEN(0);
5305         }
5306         if (s++ < PL_bufend)
5307             goto retry;                 /* ignore stray nulls */
5308         PL_last_uni = 0;
5309         PL_last_lop = 0;
5310         if (!PL_in_eval && !PL_preambled) {
5311             PL_preambled = TRUE;
5312             if (PL_perldb) {
5313                 /* Generate a string of Perl code to load the debugger.
5314                  * If PERL5DB is set, it will return the contents of that,
5315                  * otherwise a compile-time require of perl5db.pl.  */
5316
5317                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5318
5319                 if (pdb) {
5320                     sv_setpv(PL_linestr, pdb);
5321                     sv_catpvs(PL_linestr,";");
5322                 } else {
5323                     SETERRNO(0,SS_NORMAL);
5324                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5325                 }
5326                 PL_parser->preambling = CopLINE(PL_curcop);
5327             } else
5328                 SvPVCLEAR(PL_linestr);
5329             if (PL_preambleav) {
5330                 SV **svp = AvARRAY(PL_preambleav);
5331                 SV **const end = svp + AvFILLp(PL_preambleav);
5332                 while(svp <= end) {
5333                     sv_catsv(PL_linestr, *svp);
5334                     ++svp;
5335                     sv_catpvs(PL_linestr, ";");
5336                 }
5337                 sv_free(MUTABLE_SV(PL_preambleav));
5338                 PL_preambleav = NULL;
5339             }
5340             if (PL_minus_E)
5341                 sv_catpvs(PL_linestr,
5342                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5343             if (PL_minus_n || PL_minus_p) {
5344                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5345                 if (PL_minus_l)
5346                     sv_catpvs(PL_linestr,"chomp;");
5347                 if (PL_minus_a) {
5348                     if (PL_minus_F) {
5349                         if (   (   *PL_splitstr == '/'
5350                                 || *PL_splitstr == '\''
5351                                 || *PL_splitstr == '"')
5352                             && strchr(PL_splitstr + 1, *PL_splitstr))
5353                         {
5354                             /* strchr is ok, because -F pattern can't contain
5355                              * embeddded NULs */
5356                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5357                         }
5358                         else {
5359                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5360                                bytes can be used as quoting characters.  :-) */
5361                             const char *splits = PL_splitstr;
5362                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5363                             do {
5364                                 /* Need to \ \s  */
5365                                 if (*splits == '\\')
5366                                     sv_catpvn(PL_linestr, splits, 1);
5367                                 sv_catpvn(PL_linestr, splits, 1);
5368                             } while (*splits++);
5369                             /* This loop will embed the trailing NUL of
5370                                PL_linestr as the last thing it does before
5371                                terminating.  */
5372                             sv_catpvs(PL_linestr, ");");
5373                         }
5374                     }
5375                     else
5376                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5377                 }
5378             }
5379             sv_catpvs(PL_linestr, "\n");
5380             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5381             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5382             PL_last_lop = PL_last_uni = NULL;
5383             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5384                 update_debugger_info(PL_linestr, NULL, 0);
5385             goto retry;
5386         }
5387         do {
5388             fake_eof = 0;
5389             bof = cBOOL(PL_rsfp);
5390             if (0) {
5391               fake_eof:
5392                 fake_eof = LEX_FAKE_EOF;
5393             }
5394             PL_bufptr = PL_bufend;
5395             COPLINE_INC_WITH_HERELINES;
5396             if (!lex_next_chunk(fake_eof)) {
5397                 CopLINE_dec(PL_curcop);
5398                 s = PL_bufptr;
5399                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5400             }
5401             CopLINE_dec(PL_curcop);
5402             s = PL_bufptr;
5403             /* If it looks like the start of a BOM or raw UTF-16,
5404              * check if it in fact is. */
5405             if (bof && PL_rsfp
5406                 && (   *s == 0
5407                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
5408                     || *(U8*)s >= 0xFE
5409                     || s[1] == 0))
5410             {
5411                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5412                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5413 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5414                 /* offset may include swallowed CR */
5415                 if (!bof)
5416                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5417 #endif
5418                 if (bof) {
5419                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5420                     s = swallow_bom((U8*)s);
5421                 }
5422             }
5423             if (PL_parser->in_pod) {
5424                 /* Incest with pod. */
5425                 if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
5426                     && !isALPHA(s[4]))
5427                 {
5428                     SvPVCLEAR(PL_linestr);
5429                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5430                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5431                     PL_last_lop = PL_last_uni = NULL;
5432                     PL_parser->in_pod = 0;
5433                 }
5434             }
5435             if (PL_rsfp || PL_parser->filtered)
5436                 incline(s, PL_bufend);
5437         } while (PL_parser->in_pod);
5438         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5439         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5440         PL_last_lop = PL_last_uni = NULL;
5441         if (CopLINE(PL_curcop) == 1) {
5442             while (s < PL_bufend && isSPACE(*s))
5443                 s++;
5444             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5445                 s++;
5446             d = NULL;
5447             if (!PL_in_eval) {
5448                 if (*s == '#' && *(s+1) == '!')
5449                     d = s + 2;
5450 #ifdef ALTERNATE_SHEBANG
5451                 else {
5452                     static char const as[] = ALTERNATE_SHEBANG;
5453                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5454                         d = s + (sizeof(as) - 1);
5455                 }
5456 #endif /* ALTERNATE_SHEBANG */
5457             }
5458             if (d) {
5459                 char *ipath;
5460                 char *ipathend;
5461
5462                 while (isSPACE(*d))
5463                     d++;
5464                 ipath = d;
5465                 while (*d && !isSPACE(*d))
5466                     d++;
5467                 ipathend = d;
5468
5469 #ifdef ARG_ZERO_IS_SCRIPT
5470                 if (ipathend > ipath) {
5471                     /*
5472                      * HP-UX (at least) sets argv[0] to the script name,
5473                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5474                      * at least, set argv[0] to the basename of the Perl
5475                      * interpreter. So, having found "#!", we'll set it right.
5476                      */
5477                     SV* copfilesv = CopFILESV(PL_curcop);
5478                     if (copfilesv) {
5479                         SV * const x =
5480                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5481                                              SVt_PV)); /* $^X */
5482                         assert(SvPOK(x) || SvGMAGICAL(x));
5483                         if (sv_eq(x, copfilesv)) {
5484                             sv_setpvn(x, ipath, ipathend - ipath);
5485                             SvSETMAGIC(x);
5486                         }
5487                         else {
5488                             STRLEN blen;
5489                             STRLEN llen;
5490                             const char *bstart = SvPV_const(copfilesv, blen);
5491                             const char * const lstart = SvPV_const(x, llen);
5492                             if (llen < blen) {
5493                                 bstart += blen - llen;
5494                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5495                                     sv_setpvn(x, ipath, ipathend - ipath);
5496                                     SvSETMAGIC(x);
5497                                 }
5498                             }
5499                         }
5500                     }
5501                     else {
5502                         /* Anything to do if no copfilesv? */
5503                     }
5504                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5505                 }
5506 #endif /* ARG_ZERO_IS_SCRIPT */
5507
5508                 /*
5509                  * Look for options.
5510                  */
5511                 d = instr(s,"perl -");
5512                 if (!d) {
5513                     d = instr(s,"perl");
5514 #if defined(DOSISH)
5515                     /* avoid getting into infinite loops when shebang
5516                      * line contains "Perl" rather than "perl" */
5517                     if (!d) {
5518                         for (d = ipathend-4; d >= ipath; --d) {
5519                             if (isALPHA_FOLD_EQ(*d, 'p')
5520                                 && !ibcmp(d, "perl", 4))
5521                             {
5522                                 break;
5523                             }
5524                         }
5525                         if (d < ipath)
5526                             d = NULL;
5527                     }
5528 #endif
5529                 }
5530 #ifdef ALTERNATE_SHEBANG
5531                 /*
5532                  * If the ALTERNATE_SHEBANG on this system starts with a
5533                  * character that can be part of a Perl expression, then if
5534                  * we see it but not "perl", we're probably looking at the
5535                  * start of Perl code, not a request to hand off to some
5536                  * other interpreter.  Similarly, if "perl" is there, but
5537                  * not in the first 'word' of the line, we assume the line
5538                  * contains the start of the Perl program.
5539                  */
5540                 if (d && *s != '#') {
5541                     const char *c = ipath;
5542                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5543                         c++;
5544                     if (c < d)
5545                         d = NULL;       /* "perl" not in first word; ignore */
5546                     else
5547                         *s = '#';       /* Don't try to parse shebang line */
5548                 }
5549 #endif /* ALTERNATE_SHEBANG */
5550                 if (!d
5551                     && *s == '#'
5552                     && ipathend > ipath
5553                     && !PL_minus_c
5554                     && !instr(s,"indir")
5555                     && instr(PL_origargv[0],"perl"))
5556                 {
5557                     dVAR;
5558                     char **newargv;
5559
5560                     *ipathend = '\0';
5561                     s = ipathend + 1;
5562                     while (s < PL_bufend && isSPACE(*s))
5563                         s++;
5564                     if (s < PL_bufend) {
5565                         Newx(newargv,PL_origargc+3,char*);
5566                         newargv[1] = s;
5567                         while (s < PL_bufend && !isSPACE(*s))
5568                             s++;
5569                         *s = '\0';
5570                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5571                     }
5572                     else
5573                         newargv = PL_origargv;
5574                     newargv[0] = ipath;
5575                     PERL_FPU_PRE_EXEC
5576                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5577                     PERL_FPU_POST_EXEC
5578                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5579                 }
5580                 if (d) {
5581                     while (*d && !isSPACE(*d))
5582                         d++;
5583                     while (SPACE_OR_TAB(*d))
5584                         d++;
5585
5586                     if (*d++ == '-') {
5587                         const bool switches_done = PL_doswitches;
5588                         const U32 oldpdb = PL_perldb;
5589                         const bool oldn = PL_minus_n;
5590                         const bool oldp = PL_minus_p;
5591                         const char *d1 = d;
5592
5593                         do {
5594                             bool baduni = FALSE;
5595                             if (*d1 == 'C') {
5596                                 const char *d2 = d1 + 1;
5597                                 if (parse_unicode_opts((const char **)&d2)
5598                                     != PL_unicode)
5599                                     baduni = TRUE;
5600                             }
5601                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5602                                 const char * const m = d1;
5603                                 while (*d1 && !isSPACE(*d1))
5604                                     d1++;
5605                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5606                                       (int)(d1 - m), m);
5607                             }
5608                             d1 = moreswitches(d1);
5609                         } while (d1);
5610                         if (PL_doswitches && !switches_done) {
5611                             int argc = PL_origargc;
5612                             char **argv = PL_origargv;
5613                             do {
5614                                 argc--,argv++;
5615                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5616                             init_argv_symbols(argc,argv);
5617                         }
5618                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5619                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5620                               /* if we have already added "LINE: while (<>) {",
5621                                  we must not do it again */
5622                         {
5623                             SvPVCLEAR(PL_linestr);
5624                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5625                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5626                             PL_last_lop = PL_last_uni = NULL;
5627                             PL_preambled = FALSE;
5628                             if (PERLDB_LINE_OR_SAVESRC)
5629                                 (void)gv_fetchfile(PL_origfilename);
5630                             goto retry;
5631                         }
5632                     }
5633                 }
5634             }
5635         }
5636         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5637             PL_lex_state = LEX_FORMLINE;
5638             force_next(FORMRBRACK);
5639             TOKEN(';');
5640         }
5641         goto retry;
5642     case '\r':
5643 #ifdef PERL_STRICT_CR
5644         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5645         Perl_croak(aTHX_
5646       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5647 #endif
5648     case ' ': case '\t': case '\f': case '\v':
5649         s++;
5650         goto retry;
5651     case '#':
5652     case '\n':
5653         if (PL_lex_state != LEX_NORMAL
5654             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5655         {
5656             const bool in_comment = *s == '#';
5657             if (*s == '#' && s == PL_linestart && PL_in_eval
5658              && !PL_rsfp && !PL_parser->filtered) {
5659                 /* handle eval qq[#line 1 "foo"\n ...] */
5660                 CopLINE_dec(PL_curcop);
5661                 incline(s, PL_bufend);
5662             }
5663             d = s;
5664             while (d < PL_bufend && *d != '\n')
5665                 d++;
5666             if (d < PL_bufend)
5667                 d++;
5668             s = d;
5669             if (in_comment && d == PL_bufend
5670                 && PL_lex_state == LEX_INTERPNORMAL
5671                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5672                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5673             else
5674                 incline(s, PL_bufend);
5675             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5676                 PL_lex_state = LEX_FORMLINE;
5677                 force_next(FORMRBRACK);
5678                 TOKEN(';');
5679             }
5680         }
5681         else {
5682             while (s < PL_bufend && *s != '\n')
5683                 s++;
5684             if (s < PL_bufend)
5685                 {
5686                     s++;
5687                     if (s < PL_bufend)
5688                         incline(s, PL_bufend);
5689                 }
5690         }
5691         goto retry;
5692     case '-':
5693         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5694             I32 ftst = 0;
5695             char tmp;
5696
5697             s++;
5698             PL_bufptr = s;
5699             tmp = *s++;
5700
5701             while (s < PL_bufend && SPACE_OR_TAB(*s))
5702                 s++;
5703
5704             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5705                 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5706                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5707                 OPERATOR('-');          /* unary minus */
5708             }
5709             switch (tmp) {
5710             case 'r': ftst = OP_FTEREAD;        break;
5711             case 'w': ftst = OP_FTEWRITE;       break;
5712             case 'x': ftst = OP_FTEEXEC;        break;
5713             case 'o': ftst = OP_FTEOWNED;       break;
5714             case 'R': ftst = OP_FTRREAD;        break;
5715             case 'W': ftst = OP_FTRWRITE;       break;
5716             case 'X': ftst = OP_FTREXEC;        break;
5717             case 'O': ftst = OP_FTROWNED;       break;
5718             case 'e': ftst = OP_FTIS;           break;
5719             case 'z': ftst = OP_FTZERO;         break;
5720             case 's': ftst = OP_FTSIZE;         break;
5721             case 'f': ftst = OP_FTFILE;         break;
5722             case 'd': ftst = OP_FTDIR;          break;
5723             case 'l': ftst = OP_FTLINK;         break;
5724             case 'p': ftst = OP_FTPIPE;         break;
5725             case 'S': ftst = OP_FTSOCK;         break;
5726             case 'u': ftst = OP_FTSUID;         break;
5727             case 'g': ftst = OP_FTSGID;         break;
5728             case 'k': ftst = OP_FTSVTX;         break;
5729             case 'b': ftst = OP_FTBLK;          break;
5730             case 'c': ftst = OP_FTCHR;          break;
5731             case 't': ftst = OP_FTTTY;          break;
5732             case 'T': ftst = OP_FTTEXT;         break;
5733             case 'B': ftst = OP_FTBINARY;       break;
5734             case 'M': case 'A': case 'C':
5735                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5736                 switch (tmp) {
5737                 case 'M': ftst = OP_FTMTIME;    break;
5738                 case 'A': ftst = OP_FTATIME;    break;
5739                 case 'C': ftst = OP_FTCTIME;    break;
5740                 default:                        break;
5741                 }
5742                 break;
5743             default:
5744                 break;
5745             }
5746             if (ftst) {
5747                 PL_last_uni = PL_oldbufptr;
5748                 PL_last_lop_op = (OPCODE)ftst;
5749                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5750                         "### Saw file test %c\n", (int)tmp);
5751                 } );
5752                 FTST(ftst);
5753             }
5754             else {
5755                 /* Assume it was a minus followed by a one-letter named
5756                  * subroutine call (or a -bareword), then. */
5757                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5758                         "### '-%c' looked like a file test but was not\n",
5759                         (int) tmp);
5760                 } );
5761                 s = --PL_bufptr;
5762             }
5763         }
5764         {
5765             const char tmp = *s++;
5766             if (*s == tmp) {
5767                 s++;
5768                 if (PL_expect == XOPERATOR)
5769                     TERM(POSTDEC);
5770                 else
5771                     OPERATOR(PREDEC);
5772             }
5773             else if (*s == '>') {
5774                 s++;
5775                 s = skipspace(s);
5776                 if (((*s == '$' || *s == '&') && s[1] == '*')
5777                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5778                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5779                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5780                  )
5781                 {
5782                     PL_expect = XPOSTDEREF;
5783                     TOKEN(ARROW);
5784                 }
5785                 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5786                     s = force_word(s,METHOD,FALSE,TRUE);
5787                     TOKEN(ARROW);
5788                 }
5789                 else if (*s == '$')
5790                     OPERATOR(ARROW);
5791                 else
5792                     TERM(ARROW);
5793             }
5794             if (PL_expect == XOPERATOR) {
5795                 if (*s == '='
5796                     && !PL_lex_allbrackets
5797                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5798                 {
5799                     s--;
5800                     TOKEN(0);
5801                 }
5802                 Aop(OP_SUBTRACT);
5803             }
5804             else {
5805                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5806                     check_uni();
5807                 OPERATOR('-');          /* unary minus */
5808             }
5809         }
5810
5811     case '+':
5812         {
5813             const char tmp = *s++;
5814             if (*s == tmp) {
5815                 s++;
5816                 if (PL_expect == XOPERATOR)
5817                     TERM(POSTINC);
5818                 else
5819                     OPERATOR(PREINC);
5820             }
5821             if (PL_expect == XOPERATOR) {
5822                 if (*s == '='
5823                     && !PL_lex_allbrackets
5824                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5825                 {
5826                     s--;
5827                     TOKEN(0);
5828                 }
5829                 Aop(OP_ADD);
5830             }
5831             else {
5832                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5833                     check_uni();
5834                 OPERATOR('+');
5835             }
5836         }
5837
5838     case '*':
5839         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5840         if (PL_expect != XOPERATOR) {
5841             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5842             PL_expect = XOPERATOR;
5843             force_ident(PL_tokenbuf, '*');
5844             if (!*PL_tokenbuf)
5845                 PREREF('*');
5846             TERM('*');
5847         }
5848         s++;
5849         if (*s == '*') {
5850             s++;
5851             if (*s == '=' && !PL_lex_allbrackets
5852                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5853             {
5854                 s -= 2;
5855                 TOKEN(0);
5856             }
5857             PWop(OP_POW);
5858         }
5859         if (*s == '='
5860             && !PL_lex_allbrackets
5861             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5862         {
5863             s--;
5864             TOKEN(0);
5865         }
5866         PL_parser->saw_infix_sigil = 1;
5867         Mop(OP_MULTIPLY);
5868
5869     case '%':
5870     {
5871         if (PL_expect == XOPERATOR) {
5872             if (s[1] == '='
5873                 && !PL_lex_allbrackets
5874                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5875             {
5876                 TOKEN(0);
5877             }
5878             ++s;
5879             PL_parser->saw_infix_sigil = 1;
5880             Mop(OP_MODULO);
5881         }
5882         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5883         PL_tokenbuf[0] = '%';
5884         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5885         pl_yylval.ival = 0;
5886         if (!PL_tokenbuf[1]) {
5887             PREREF('%');
5888         }
5889         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5890             && intuit_more(s, PL_bufend)) {
5891             if (*s == '[')
5892                 PL_tokenbuf[0] = '@';
5893         }
5894         PL_expect = XOPERATOR;
5895         force_ident_maybe_lex('%');
5896         TERM('%');
5897     }
5898     case '^':
5899         d = s;
5900         bof = FEATURE_BITWISE_IS_ENABLED;
5901         if (bof && s[1] == '.')
5902             s++;
5903         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5904                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5905         {
5906             s = d;
5907             TOKEN(0);
5908         }
5909         s++;
5910         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5911     case '[':
5912         if (PL_lex_brackets > 100)
5913             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5914         PL_lex_brackstack[PL_lex_brackets++] = 0;
5915         PL_lex_allbrackets++;
5916         {
5917             const char tmp = *s++;
5918             OPERATOR(tmp);
5919         }
5920     case '~':
5921         if (s[1] == '~'
5922             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5923         {
5924             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5925                 TOKEN(0);
5926             s += 2;
5927             Perl_ck_warner_d(aTHX_
5928                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5929                 "Smartmatch is experimental");
5930             Eop(OP_SMARTMATCH);
5931         }
5932         s++;
5933         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5934             s++;
5935             BCop(OP_SCOMPLEMENT);
5936         }
5937         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5938     case ',':
5939         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5940             TOKEN(0);
5941         s++;
5942         OPERATOR(',');
5943     case ':':
5944         if (s[1] == ':') {
5945             len = 0;
5946             goto just_a_word_zero_gv;
5947         }
5948         s++;
5949         {
5950         OP *attrs;
5951
5952         switch (PL_expect) {
5953         case XOPERATOR:
5954             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5955                 break;
5956             PL_bufptr = s;      /* update in case we back off */
5957             if (*s == '=') {
5958                 Perl_croak(aTHX_
5959                            "Use of := for an empty attribute list is not allowed");
5960             }
5961             goto grabattrs;
5962         case XATTRBLOCK:
5963             PL_expect = XBLOCK;
5964             goto grabattrs;
5965         case XATTRTERM:
5966             PL_expect = XTERMBLOCK;
5967          grabattrs:
5968             /* NB: as well as parsing normal attributes, we also end up
5969              * here if there is something looking like attributes
5970              * following a signature (which is illegal, but used to be
5971              * legal in 5.20..5.26). If the latter, we still parse the
5972              * attributes so that error messages(s) are less confusing,
5973              * but ignore them (parser->sig_seen).
5974              */
5975             s = skipspace(s);
5976             attrs = NULL;
5977             while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5978                 bool sig = PL_parser->sig_seen;
5979                 I32 tmp;
5980                 SV *sv;
5981                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5982                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5983                     if (tmp < 0) tmp = -tmp;
5984                     switch (tmp) {
5985                     case KEY_or:
5986                     case KEY_and:
5987                     case KEY_for:
5988                     case KEY_foreach:
5989                     case KEY_unless:
5990                     case KEY_if:
5991                     case KEY_while:
5992                     case KEY_until:
5993                         goto got_attrs;
5994                     default:
5995                         break;
5996                     }
5997                 }
5998                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5999                 if (*d == '(') {
6000                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6001                     if (!d) {
6002                         if (attrs)
6003                             op_free(attrs);
6004                         sv_free(sv);
6005                         Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6006                     }
6007                     COPLINE_SET_FROM_MULTI_END;
6008                 }
6009                 if (PL_lex_stuff) {
6010                     sv_catsv(sv, PL_lex_stuff);
6011                     attrs = op_append_elem(OP_LIST, attrs,
6012                                         newSVOP(OP_CONST, 0, sv));
6013                     SvREFCNT_dec_NN(PL_lex_stuff);
6014                     PL_lex_stuff = NULL;
6015                 }
6016                 else {
6017                     /* NOTE: any CV attrs applied here need to be part of
6018                        the CVf_BUILTIN_ATTRS define in cv.h! */
6019                     if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
6020                         sv_free(sv);
6021                         if (!sig)
6022                             CvLVALUE_on(PL_compcv);
6023                     }
6024                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
6025                         sv_free(sv);
6026                         if (!sig)
6027                             CvMETHOD_on(PL_compcv);
6028                     }
6029                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
6030                     {
6031                         sv_free(sv);
6032                         if (!sig) {
6033                             Perl_ck_warner_d(aTHX_
6034                                 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
6035                                ":const is experimental"
6036                             );
6037                             CvANONCONST_on(PL_compcv);
6038                             if (!CvANON(PL_compcv))
6039                                 yyerror(":const is not permitted on named "
6040                                         "subroutines");
6041                         }
6042                     }
6043                     /* After we've set the flags, it could be argued that
6044                        we don't need to do the attributes.pm-based setting
6045                        process, and shouldn't bother appending recognized
6046                        flags.  To experiment with that, uncomment the
6047                        following "else".  (Note that's already been
6048                        uncommented.  That keeps the above-applied built-in
6049                        attributes from being intercepted (and possibly
6050                        rejected) by a package's attribute routines, but is
6051                        justified by the performance win for the common case
6052                        of applying only built-in attributes.) */
6053                     else
6054                         attrs = op_append_elem(OP_LIST, attrs,
6055                                             newSVOP(OP_CONST, 0,
6056                                                     sv));
6057                 }
6058                 s = skipspace(d);
6059                 if (*s == ':' && s[1] != ':')
6060                     s = skipspace(s+1);
6061                 else if (s == d)
6062                     break;      /* require real whitespace or :'s */
6063                 /* XXX losing whitespace on sequential attributes here */
6064             }
6065             {
6066                 if (*s != ';'
6067                     && *s != '}'
6068                     && !(PL_expect == XOPERATOR
6069                          ? (*s == '=' ||  *s == ')')
6070                          : (*s == '{' ||  *s == '(')))
6071                 {
6072                     const char q = ((*s == '\'') ? '"' : '\'');
6073                     /* If here for an expression, and parsed no attrs, back
6074                        off. */
6075                     if (PL_expect == XOPERATOR && !attrs) {
6076                         s = PL_bufptr;
6077                         break;
6078                     }
6079                     /* MUST advance bufptr here to avoid bogus "at end of line"
6080                        context messages from yyerror().
6081                     */
6082                     PL_bufptr = s;
6083                     yyerror( (const char *)
6084                              (*s
6085                               ? Perl_form(aTHX_ "Invalid separator character "
6086                                           "%c%c%c in attribute list", q, *s, q)
6087                               : "Unterminated attribute list" ) );
6088                     if (attrs)
6089                         op_free(attrs);
6090                     OPERATOR(':');
6091                 }
6092             }
6093         got_attrs:
6094             if (PL_parser->sig_seen) {
6095                 /* see comment about about sig_seen and parser error
6096                  * handling */
6097                 if (attrs)
6098                     op_free(attrs);
6099                 Perl_croak(aTHX_ "Subroutine attributes must come "
6100                                  "before the signature");
6101                 }
6102             if (attrs) {
6103                 NEXTVAL_NEXTTOKE.opval = attrs;
6104                 force_next(THING);
6105             }
6106             TOKEN(COLONATTR);
6107         }
6108         }
6109         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6110             s--;
6111             TOKEN(0);
6112         }
6113         PL_lex_allbrackets--;
6114         OPERATOR(':');
6115     case '(':
6116         s++;
6117         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6118             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6119         else
6120             PL_expect = XTERM;
6121         s = skipspace(s);
6122         PL_lex_allbrackets++;
6123         TOKEN('(');
6124     case ';':
6125         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6126             TOKEN(0);
6127         CLINE;
6128         s++;
6129         PL_expect = XSTATE;
6130         TOKEN(';');
6131     case ')':
6132         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6133             TOKEN(0);
6134         s++;
6135         PL_lex_allbrackets--;
6136         s = skipspace(s);
6137         if (*s == '{')
6138             PREBLOCK(')');
6139         TERM(')');
6140     case ']':
6141         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6142             TOKEN(0);
6143         s++;
6144         if (PL_lex_brackets <= 0)
6145             /* diag_listed_as: Unmatched right %s bracket */
6146             yyerror("Unmatched right square bracket");
6147         else
6148             --PL_lex_brackets;
6149         PL_lex_allbrackets--;
6150         if (PL_lex_state == LEX_INTERPNORMAL) {
6151             if (PL_lex_brackets == 0) {
6152                 if (*s == '-' && s[1] == '>')
6153                     PL_lex_state = LEX_INTERPENDMAYBE;
6154                 else if (*s != '[' && *s != '{')
6155                     PL_lex_state = LEX_INTERPEND;
6156             }
6157         }
6158         TERM(']');
6159     case '{':
6160         s++;
6161       leftbracket:
6162         if (PL_lex_brackets > 100) {
6163             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6164         }
6165         switch (PL_expect) {
6166         case XTERM:
6167         case XTERMORDORDOR:
6168             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6169             PL_lex_allbrackets++;
6170             OPERATOR(HASHBRACK);
6171         case XOPERATOR:
6172             while (s < PL_bufend && SPACE_OR_TAB(*s))
6173                 s++;
6174             d = s;
6175             PL_tokenbuf[0] = '\0';
6176             if (d < PL_bufend && *d == '-') {
6177                 PL_tokenbuf[0] = '-';
6178                 d++;
6179                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6180                     d++;
6181             }
6182             if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6183                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6184                               FALSE, &len);
6185                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6186                     d++;
6187                 if (*d == '}') {
6188                     const char minus = (PL_tokenbuf[0] == '-');
6189                     s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6190                     if (minus)
6191                         force_next('-');
6192                 }
6193             }
6194             /* FALLTHROUGH */
6195         case XATTRTERM:
6196         case XTERMBLOCK:
6197             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6198             PL_lex_allbrackets++;
6199             PL_expect = XSTATE;
6200             break;
6201         case XATTRBLOCK:
6202         case XBLOCK:
6203             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6204             PL_lex_allbrackets++;
6205             PL_expect = XSTATE;
6206             break;
6207         case XBLOCKTERM:
6208             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6209             PL_lex_allbrackets++;
6210             PL_expect = XSTATE;
6211             break;
6212         default: {
6213                 const char *t;
6214                 if (PL_oldoldbufptr == PL_last_lop)
6215                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6216                 else
6217                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6218                 PL_lex_allbrackets++;
6219                 s = skipspace(s);
6220                 if (*s == '}') {
6221                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6222                         PL_expect = XTERM;
6223                         /* This hack is to get the ${} in the message. */
6224                         PL_bufptr = s+1;
6225                         yyerror("syntax error");
6226                         break;
6227                     }
6228                     OPERATOR(HASHBRACK);
6229                 }
6230                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6231                     /* ${...} or @{...} etc., but not print {...}
6232                      * Skip the disambiguation and treat this as a block.
6233                      */
6234                     goto block_expectation;
6235                 }
6236                 /* This hack serves to disambiguate a pair of curlies
6237                  * as being a block or an anon hash.  Normally, expectation
6238                  * determines that, but in cases where we're not in a
6239                  * position to expect anything in particular (like inside
6240                  * eval"") we have to resolve the ambiguity.  This code
6241                  * covers the case where the first term in the curlies is a
6242                  * quoted string.  Most other cases need to be explicitly
6243                  * disambiguated by prepending a "+" before the opening
6244                  * curly in order to force resolution as an anon hash.
6245                  *
6246                  * XXX should probably propagate the outer expectation
6247                  * into eval"" to rely less on this hack, but that could
6248                  * potentially break current behavior of eval"".
6249                  * GSAR 97-07-21
6250                  */
6251                 t = s;
6252                 if (*s == '\'' || *s == '"' || *s == '`') {
6253                     /* common case: get past first string, handling escapes */
6254                     for (t++; t < PL_bufend && *t != *s;)
6255                         if (*t++ == '\\')
6256                             t++;
6257                     t++;
6258                 }
6259                 else if (*s == 'q') {
6260                     if (++t < PL_bufend
6261                         && (!isWORDCHAR(*t)
6262                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6263                                 && !isWORDCHAR(*t))))
6264                     {
6265                         /* skip q//-like construct */
6266                         const char *tmps;
6267                         char open, close, term;
6268                         I32 brackets = 1;
6269
6270                         while (t < PL_bufend && isSPACE(*t))
6271                             t++;
6272                         /* check for q => */
6273                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6274                             OPERATOR(HASHBRACK);
6275                         }
6276                         term = *t;
6277                         open = term;
6278                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6279                             term = tmps[5];
6280                         close = term;
6281                         if (open == close)
6282                             for (t++; t < PL_bufend; t++) {
6283                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6284                                     t++;
6285                                 else if (*t == open)
6286                                     break;
6287                             }
6288                         else {
6289                             for (t++; t < PL_bufend; t++) {
6290                                 if (*t == '\\' && t+1 < PL_bufend)
6291                                     t++;
6292                                 else if (*t == close && --brackets <= 0)
6293                                     break;
6294                                 else if (*t == open)
6295                                     brackets++;
6296                             }
6297                         }
6298                         t++;
6299                     }
6300                     else
6301                         /* skip plain q word */
6302                         while (   t < PL_bufend
6303                                && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6304                         {
6305                             t += UTF ? UTF8SKIP(t) : 1;
6306                         }
6307                 }
6308                 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6309                     t += UTF ? UTF8SKIP(t) : 1;
6310                     while (   t < PL_bufend
6311                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6312                     {
6313                         t += UTF ? UTF8SKIP(t) : 1;
6314                     }
6315                 }
6316                 while (t < PL_bufend && isSPACE(*t))
6317                     t++;
6318                 /* if comma follows first term, call it an anon hash */
6319                 /* XXX it could be a comma expression with loop modifiers */
6320                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6321                                    || (*t == '=' && t[1] == '>')))
6322                     OPERATOR(HASHBRACK);
6323                 if (PL_expect == XREF)
6324                 {
6325                   block_expectation:
6326                     /* If there is an opening brace or 'sub:', treat it
6327                        as a term to make ${{...}}{k} and &{sub:attr...}
6328                        dwim.  Otherwise, treat it as a statement, so
6329                        map {no strict; ...} works.
6330                      */
6331                     s = skipspace(s);
6332                     if (*s == '{') {
6333                         PL_expect = XTERM;
6334                         break;
6335                     }
6336                     if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6337                         PL_bufptr = s;
6338                         d = s + 3;
6339                         d = skipspace(d);
6340                         s = PL_bufptr;
6341                         if (*d == ':') {
6342                             PL_expect = XTERM;
6343                             break;
6344                         }
6345                     }
6346                     PL_expect = XSTATE;
6347                 }
6348                 else {
6349                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6350                     PL_expect = XSTATE;
6351                 }
6352             }
6353             break;
6354         }
6355         pl_yylval.ival = CopLINE(PL_curcop);
6356         PL_copline = NOLINE;   /* invalidate current command line number */
6357         TOKEN(formbrack ? '=' : '{');
6358     case '}':
6359         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6360             TOKEN(0);
6361       rightbracket:
6362         assert(s != PL_bufend);
6363         s++;
6364         if (PL_lex_brackets <= 0)
6365             /* diag_listed_as: Unmatched right %s bracket */
6366             yyerror("Unmatched right curly bracket");
6367         else
6368             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6369         PL_lex_allbrackets--;
6370         if (PL_lex_state == LEX_INTERPNORMAL) {
6371             if (PL_lex_brackets == 0) {
6372                 if (PL_expect & XFAKEBRACK) {
6373                     PL_expect &= XENUMMASK;
6374                     PL_lex_state = LEX_INTERPEND;
6375                     PL_bufptr = s;
6376                     return yylex();     /* ignore fake brackets */
6377                 }
6378                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6379                  && SvEVALED(PL_lex_repl))
6380                     PL_lex_state = LEX_INTERPEND;
6381                 else if (*s == '-' && s[1] == '>')
6382                     PL_lex_state = LEX_INTERPENDMAYBE;
6383                 else if (*s != '[' && *s != '{')
6384                     PL_lex_state = LEX_INTERPEND;
6385             }
6386         }
6387         if (PL_expect & XFAKEBRACK) {
6388             PL_expect &= XENUMMASK;
6389             PL_bufptr = s;
6390             return yylex();             /* ignore fake brackets */
6391         }
6392         force_next(formbrack ? '.' : '}');
6393         if (formbrack) LEAVE_with_name("lex_format");
6394         if (formbrack == 2) { /* means . where arguments were expected */
6395             force_next(';');
6396             TOKEN(FORMRBRACK);
6397         }
6398         TOKEN(';');
6399     case '&':
6400         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6401         s++;
6402         if (*s++ == '&') {
6403             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6404                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6405                 s -= 2;
6406                 TOKEN(0);
6407             }
6408             AOPERATOR(ANDAND);
6409         }
6410         s--;
6411         if (PL_expect == XOPERATOR) {
6412             if (   PL_bufptr == PL_linestart
6413                 && ckWARN(WARN_SEMICOLON)
6414                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6415             {
6416                 CopLINE_dec(PL_curcop);
6417                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6418                 CopLINE_inc(PL_curcop);
6419             }
6420             d = s;
6421             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6422                 s++;
6423             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6424                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6425                 s = d;
6426                 s--;
6427                 TOKEN(0);
6428             }
6429             if (d == s) {
6430                 PL_parser->saw_infix_sigil = 1;
6431                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6432             }
6433             else
6434                 BAop(OP_SBIT_AND);
6435         }
6436
6437         PL_tokenbuf[0] = '&';
6438         s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6439         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6440         if (PL_tokenbuf[1]) {
6441             force_ident_maybe_lex('&');
6442         }
6443         else
6444             PREREF('&');
6445         TERM('&');
6446
6447     case '|':
6448         s++;
6449         if (*s++ == '|') {
6450             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6451                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6452                 s -= 2;
6453                 TOKEN(0);
6454             }
6455             AOPERATOR(OROR);
6456         }
6457         s--;
6458         d = s;
6459         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6460             s++;
6461         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6462                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6463             s = d - 1;
6464             TOKEN(0);
6465         }
6466         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6467     case '=':
6468         s++;
6469         {
6470             const char tmp = *s++;
6471             if (tmp == '=') {
6472                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6473                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "====="))
6474                 {
6475                     s = vcs_conflict_marker(s + 5);
6476                     goto retry;
6477                 }
6478                 if (!PL_lex_allbrackets
6479                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6480                 {
6481                     s -= 2;
6482                     TOKEN(0);
6483                 }
6484                 Eop(OP_EQ);
6485             }
6486             if (tmp == '>') {
6487                 if (!PL_lex_allbrackets
6488                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6489                 {
6490                     s -= 2;
6491                     TOKEN(0);
6492                 }
6493                 OPERATOR(',');
6494             }
6495             if (tmp == '~')
6496                 PMop(OP_MATCH);
6497             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6498                 && strchr("+-*/%.^&|<",tmp))
6499                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6500                             "Reversed %c= operator",(int)tmp);
6501             s--;
6502             if (PL_expect == XSTATE
6503                 && isALPHA(tmp)
6504                 && (s == PL_linestart+1 || s[-2] == '\n') )
6505             {
6506                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6507                     || PL_lex_state != LEX_NORMAL)
6508                 {
6509                     d = PL_bufend;
6510                     while (s < d) {
6511                         if (*s++ == '\n') {
6512                             incline(s, PL_bufend);
6513                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
6514                             {
6515                                 s = (char *) memchr(s,'\n', d - s);
6516                                 if (s)
6517                                     s++;
6518                                 else
6519                                     s = d;
6520                                 incline(s, PL_bufend);
6521                                 goto retry;
6522                             }
6523                         }
6524                     }
6525                     goto retry;
6526                 }
6527                 s = PL_bufend;
6528                 PL_parser->in_pod = 1;
6529                 goto retry;
6530             }
6531         }
6532         if (PL_expect == XBLOCK) {
6533             const char *t = s;
6534 #ifdef PERL_STRICT_CR
6535             while (SPACE_OR_TAB(*t))
6536 #else
6537             while (SPACE_OR_TAB(*t) || *t == '\r')
6538 #endif
6539                 t++;
6540             if (*t == '\n' || *t == '#') {
6541                 formbrack = 1;
6542                 ENTER_with_name("lex_format");
6543                 SAVEI8(PL_parser->form_lex_state);
6544                 SAVEI32(PL_lex_formbrack);
6545                 PL_parser->form_lex_state = PL_lex_state;
6546                 PL_lex_formbrack = PL_lex_brackets + 1;
6547                 PL_parser->sub_error_count = PL_error_count;
6548                 goto leftbracket;
6549             }
6550         }
6551         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6552             s--;
6553             TOKEN(0);
6554         }
6555         pl_yylval.ival = 0;
6556         OPERATOR(ASSIGNOP);
6557     case '!':
6558         s++;
6559         {
6560             const char tmp = *s++;
6561             if (tmp == '=') {
6562                 /* was this !=~ where !~ was meant?
6563                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6564
6565                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6566                     const char *t = s+1;
6567
6568                     while (t < PL_bufend && isSPACE(*t))
6569                         ++t;
6570
6571                     if (*t == '/' || *t == '?'
6572                         || ((*t == 'm' || *t == 's' || *t == 'y')
6573                             && !isWORDCHAR(t[1]))
6574                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6575                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6576                                     "!=~ should be !~");
6577                 }
6578                 if (!PL_lex_allbrackets
6579                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6580                 {
6581                     s -= 2;
6582                     TOKEN(0);
6583                 }
6584                 Eop(OP_NE);
6585             }
6586             if (tmp == '~')
6587                 PMop(OP_NOT);
6588         }
6589         s--;
6590         OPERATOR('!');
6591     case '<':
6592         if (PL_expect != XOPERATOR) {
6593             if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6594                 check_uni();
6595             if (s[1] == '<' && s[2] != '>') {
6596                 if (   (s == PL_linestart || s[-1] == '\n')
6597                     && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
6598                 {
6599                     s = vcs_conflict_marker(s + 7);
6600                     goto retry;
6601                 }
6602                 s = scan_heredoc(s);
6603             }
6604             else
6605                 s = scan_inputsymbol(s);
6606             PL_expect = XOPERATOR;
6607             TOKEN(sublex_start());
6608         }
6609         s++;
6610         {
6611             char tmp = *s++;
6612             if (tmp == '<') {
6613                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6614                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<"))
6615                 {
6616                     s = vcs_conflict_marker(s + 5);
6617                     goto retry;
6618                 }
6619                 if (*s == '=' && !PL_lex_allbrackets
6620                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6621                 {
6622                     s -= 2;
6623                     TOKEN(0);
6624                 }
6625                 SHop(OP_LEFT_SHIFT);
6626             }
6627             if (tmp == '=') {
6628                 tmp = *s++;
6629                 if (tmp == '>') {
6630                     if (!PL_lex_allbrackets
6631                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6632                     {
6633                         s -= 3;
6634                         TOKEN(0);
6635                     }
6636                     Eop(OP_NCMP);
6637                 }
6638                 s--;
6639                 if (!PL_lex_allbrackets
6640                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6641                 {
6642                     s -= 2;
6643                     TOKEN(0);
6644                 }
6645                 Rop(OP_LE);
6646             }
6647         }
6648         s--;
6649         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6650             s--;
6651             TOKEN(0);
6652         }
6653         Rop(OP_LT);
6654     case '>':
6655         s++;
6656         {
6657             const char tmp = *s++;
6658             if (tmp == '>') {
6659                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6660                     && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>"))
6661                 {
6662                     s = vcs_conflict_marker(s + 5);
6663                     goto retry;
6664                 }
6665                 if (*s == '=' && !PL_lex_allbrackets
6666                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6667                 {
6668                     s -= 2;
6669                     TOKEN(0);
6670                 }
6671                 SHop(OP_RIGHT_SHIFT);
6672             }
6673             else if (tmp == '=') {
6674                 if (!PL_lex_allbrackets
6675                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6676                 {
6677                     s -= 2;
6678                     TOKEN(0);
6679                 }
6680                 Rop(OP_GE);
6681             }
6682         }
6683         s--;
6684         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6685             s--;
6686             TOKEN(0);
6687         }
6688         Rop(OP_GT);
6689
6690     case '$':
6691         CLINE;
6692
6693         if (PL_expect == XPOSTDEREF) {
6694             if (s[1] == '#') {
6695                 s++;
6696                 POSTDEREF(DOLSHARP);
6697             }
6698             POSTDEREF('$');
6699         }
6700
6701         if (   s[1] == '#'
6702             && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6703                 || strchr("{$:+-@", s[2])))
6704         {
6705             PL_tokenbuf[0] = '@';
6706             s = scan_ident(s + 1, PL_tokenbuf + 1,
6707                            sizeof PL_tokenbuf - 1, FALSE);
6708             if (PL_expect == XOPERATOR) {
6709                 d = s;
6710                 if (PL_bufptr > s) {
6711                     d = PL_bufptr-1;
6712                     PL_bufptr = PL_oldbufptr;
6713                 }
6714                 no_op("Array length", d);
6715             }
6716             if (!PL_tokenbuf[1])
6717                 PREREF(DOLSHARP);
6718             PL_expect = XOPERATOR;
6719             force_ident_maybe_lex('#');
6720             TOKEN(DOLSHARP);
6721         }
6722
6723         PL_tokenbuf[0] = '$';
6724         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6725         if (PL_expect == XOPERATOR) {
6726             d = s;
6727             if (PL_bufptr > s) {
6728                 d = PL_bufptr-1;
6729                 PL_bufptr = PL_oldbufptr;
6730             }
6731             no_op("Scalar", d);
6732         }
6733         if (!PL_tokenbuf[1]) {
6734             if (s == PL_bufend)
6735                 yyerror("Final $ should be \\$ or $name");
6736             PREREF('$');
6737         }
6738
6739         d = s;
6740         {
6741             const char tmp = *s;
6742             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6743                 s = skipspace(s);
6744
6745             if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6746                 && intuit_more(s, PL_bufend)) {
6747                 if (*s == '[') {
6748                     PL_tokenbuf[0] = '@';
6749                     if (ckWARN(WARN_SYNTAX)) {
6750                         char *t = s+1;
6751
6752                         while (   isSPACE(*t)
6753                                || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6754                                || *t == '$')
6755                         {
6756                             t += UTF ? UTF8SKIP(t) : 1;
6757                         }
6758                         if (*t++ == ',') {
6759                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6760                             while (t < PL_bufend && *t != ']')
6761                                 t++;
6762                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6763                                         "Multidimensional syntax %" UTF8f " not supported",
6764                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6765                         }
6766                     }
6767                 }
6768                 else if (*s == '{') {
6769                     char *t;
6770                     PL_tokenbuf[0] = '%';
6771                     if (    strEQ(PL_tokenbuf+1, "SIG")
6772                         && ckWARN(WARN_SYNTAX)
6773                         && (t = (char *) memchr(s, '}', PL_bufend - s))
6774                         && (t = (char *) memchr(t, '=', PL_bufend - t)))
6775                     {
6776                         char tmpbuf[sizeof PL_tokenbuf];
6777                         do {
6778                             t++;
6779                         } while (isSPACE(*t));
6780                         if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6781                             STRLEN len;
6782                             t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6783                                             &len);
6784                             while (isSPACE(*t))
6785                                 t++;
6786                             if (  *t == ';'
6787                                 && get_cvn_flags(tmpbuf, len, UTF
6788                                                                 ? SVf_UTF8
6789                                                                 : 0))
6790                             {
6791                                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6792                                     "You need to quote \"%" UTF8f "\"",
6793                                         UTF8fARG(UTF, len, tmpbuf));
6794                             }
6795                         }
6796                     }
6797                 }
6798             }
6799
6800             PL_expect = XOPERATOR;
6801             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6802                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6803                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6804                     PL_expect = XOPERATOR;
6805                 else if (strchr("$@\"'`q", *s))
6806                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6807                 else if (   strchr("&*<%", *s)
6808                          && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6809                 {
6810                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6811                 }
6812                 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6813                     char tmpbuf[sizeof PL_tokenbuf];
6814                     int t2;
6815                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6816                     if ((t2 = keyword(tmpbuf, len, 0))) {
6817                         /* binary operators exclude handle interpretations */
6818                         switch (t2) {
6819                         case -KEY_x:
6820                         case -KEY_eq:
6821                         case -KEY_ne:
6822                         case -KEY_gt:
6823                         case -KEY_lt:
6824                         case -KEY_ge:
6825                         case -KEY_le:
6826                         case -KEY_cmp:
6827                             break;
6828                         default:
6829                             PL_expect = XTERM;  /* e.g. print $fh length() */
6830                             break;
6831                         }
6832                     }
6833                     else {
6834                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6835                     }
6836                 }
6837                 else if (isDIGIT(*s))
6838                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6839                 else if (*s == '.' && isDIGIT(s[1]))
6840                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6841                 else if ((*s == '?' || *s == '-' || *s == '+')
6842                          && !isSPACE(s[1]) && s[1] != '=')
6843                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6844                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6845                          && s[1] != '/')
6846                     PL_expect = XTERM;          /* e.g. print $fh /.../
6847                                                    XXX except DORDOR operator
6848                                                 */
6849                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6850                          && s[2] != '=')
6851                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6852             }
6853         }
6854         force_ident_maybe_lex('$');
6855         TOKEN('$');
6856
6857     case '@':
6858         if (PL_expect == XPOSTDEREF)
6859             POSTDEREF('@');
6860         PL_tokenbuf[0] = '@';
6861         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6862         if (PL_expect == XOPERATOR) {
6863             d = s;
6864             if (PL_bufptr > s) {
6865                 d = PL_bufptr-1;
6866                 PL_bufptr = PL_oldbufptr;
6867             }
6868             no_op("Array", d);
6869         }
6870         pl_yylval.ival = 0;
6871         if (!PL_tokenbuf[1]) {
6872             PREREF('@');
6873         }
6874         if (PL_lex_state == LEX_NORMAL)
6875             s = skipspace(s);
6876         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6877             && intuit_more(s, PL_bufend))
6878         {
6879             if (*s == '{')
6880                 PL_tokenbuf[0] = '%';
6881
6882             /* Warn about @ where they meant $. */
6883             if (*s == '[' || *s == '{') {
6884                 if (ckWARN(WARN_SYNTAX)) {
6885                     S_check_scalar_slice(aTHX_ s);
6886                 }
6887             }
6888         }
6889         PL_expect = XOPERATOR;
6890         force_ident_maybe_lex('@');
6891         TERM('@');
6892
6893      case '/':                  /* may be division, defined-or, or pattern */
6894         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6895             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6896                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6897                 TOKEN(0);
6898             s += 2;
6899             AOPERATOR(DORDOR);
6900         }
6901         else if (PL_expect == XOPERATOR) {
6902             s++;
6903             if (*s == '=' && !PL_lex_allbrackets
6904                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6905             {
6906                 s--;
6907                 TOKEN(0);
6908             }
6909             Mop(OP_DIVIDE);
6910         }
6911         else {
6912             /* Disable warning on "study /blah/" */
6913             if (    PL_oldoldbufptr == PL_last_uni
6914                 && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6915                     || memNE(PL_last_uni, "study", 5)
6916                     || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6917              ))
6918                 check_uni();
6919             s = scan_pat(s,OP_MATCH);
6920             TERM(sublex_start());
6921         }
6922
6923      case '?':                  /* conditional */
6924         s++;
6925         if (!PL_lex_allbrackets
6926             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6927         {
6928             s--;
6929             TOKEN(0);
6930         }
6931         PL_lex_allbrackets++;
6932         OPERATOR('?');
6933
6934     case '.':
6935         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6936 #ifdef PERL_STRICT_CR
6937             && s[1] == '\n'
6938 #else
6939             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6940 #endif
6941             && (s == PL_linestart || s[-1] == '\n') )
6942         {
6943             PL_expect = XSTATE;
6944             formbrack = 2; /* dot seen where arguments expected */
6945             goto rightbracket;
6946         }
6947         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6948             s += 3;
6949             OPERATOR(YADAYADA);
6950         }
6951         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6952             char tmp = *s++;
6953             if (*s == tmp) {
6954                 if (!PL_lex_allbrackets
6955                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6956                 {
6957                     s--;
6958                     TOKEN(0);
6959                 }
6960                 s++;
6961                 if (*s == tmp) {
6962                     s++;
6963                     pl_yylval.ival = OPf_SPECIAL;
6964                 }
6965                 else
6966                     pl_yylval.ival = 0;
6967                 OPERATOR(DOTDOT);
6968             }
6969             if (*s == '=' && !PL_lex_allbrackets
6970                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6971             {
6972                 s--;
6973                 TOKEN(0);
6974             }
6975             Aop(OP_CONCAT);
6976         }
6977         /* FALLTHROUGH */
6978     case '0': case '1': case '2': case '3': case '4':
6979     case '5': case '6': case '7': case '8': case '9':
6980         s = scan_num(s, &pl_yylval);
6981         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6982         if (PL_expect == XOPERATOR)
6983             no_op("Number",s);
6984         TERM(THING);
6985
6986     case '\'':
6987         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6988         if (!s)
6989             missingterm(NULL, 0);
6990         COPLINE_SET_FROM_MULTI_END;
6991         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6992         if (PL_expect == XOPERATOR) {
6993             no_op("String",s);
6994         }
6995         pl_yylval.ival = OP_CONST;
6996         TERM(sublex_start());
6997
6998     case '"':
6999         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7000         DEBUG_T( {
7001             if (s)
7002                 printbuf("### Saw string before %s\n", s);
7003             else
7004                 PerlIO_printf(Perl_debug_log,
7005                              "### Saw unterminated string\n");
7006         } );
7007         if (PL_expect == XOPERATOR) {
7008                 no_op("String",s);
7009         }
7010         if (!s)
7011             missingterm(NULL, 0);
7012         pl_yylval.ival = OP_CONST;
7013         /* FIXME. I think that this can be const if char *d is replaced by
7014            more localised variables.  */
7015         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
7016             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
7017                 pl_yylval.ival = OP_STRINGIFY;
7018                 break;
7019             }
7020         }
7021         if (pl_yylval.ival == OP_CONST)
7022             COPLINE_SET_FROM_MULTI_END;
7023         TERM(sublex_start());
7024
7025     case '`':
7026         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7027         DEBUG_T( {
7028             if (s)
7029                 printbuf("### Saw backtick string before %s\n", s);
7030             else
7031                 PerlIO_printf(Perl_debug_log,
7032                              "### Saw unterminated backtick string\n");
7033         } );
7034         if (PL_expect == XOPERATOR)
7035             no_op("Backticks",s);
7036         if (!s)
7037             missingterm(NULL, 0);
7038         pl_yylval.ival = OP_BACKTICK;
7039         TERM(sublex_start());
7040
7041     case '\\':
7042         s++;
7043         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7044          && isDIGIT(*s))
7045             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
7046                            *s, *s);
7047         if (PL_expect == XOPERATOR)
7048             no_op("Backslash",s);
7049         OPERATOR(REFGEN);
7050
7051     case 'v':
7052         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
7053             char *start = s + 2;
7054             while (isDIGIT(*start) || *start == '_')
7055                 start++;
7056             if (*start == '.' && isDIGIT(start[1])) {
7057                 s = scan_num(s, &pl_yylval);
7058                 TERM(THING);
7059             }
7060             else if ((*start == ':' && start[1] == ':')
7061                   || (PL_expect == XSTATE && *start == ':'))
7062                 goto keylookup;
7063             else if (PL_expect == XSTATE) {
7064                 d = start;
7065                 while (d < PL_bufend && isSPACE(*d)) d++;
7066                 if (*d == ':') goto keylookup;
7067             }
7068             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
7069             if (!isALPHA(*start) && (PL_expect == XTERM
7070                         || PL_expect == XREF || PL_expect == XSTATE
7071                         || PL_expect == XTERMORDORDOR)) {
7072                 GV *const gv = gv_fetchpvn_flags(s, start - s,
7073                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
7074                 if (!gv) {
7075                     s = scan_num(s, &pl_yylval);
7076                     TERM(THING);
7077                 }
7078             }
7079         }
7080         goto keylookup;
7081     case 'x':
7082         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7083             s++;
7084             Mop(OP_REPEAT);
7085         }
7086         goto keylookup;
7087
7088     case '_':
7089     case 'a': case 'A':
7090     case 'b': case 'B':
7091     case 'c': case 'C':
7092     case 'd': case 'D':
7093     case 'e': case 'E':
7094     case 'f': case 'F':
7095     case 'g': case 'G':
7096     case 'h': case 'H':
7097     case 'i': case 'I':
7098     case 'j': case 'J':
7099     case 'k': case 'K':
7100     case 'l': case 'L':
7101     case 'm': case 'M':
7102     case 'n': case 'N':
7103     case 'o': case 'O':
7104     case 'p': case 'P':
7105     case 'q': case 'Q':
7106     case 'r': case 'R':
7107     case 's': case 'S':
7108     case 't': case 'T':
7109     case 'u': case 'U':
7110               case 'V':
7111     case 'w': case 'W':
7112               case 'X':
7113     case 'y': case 'Y':
7114     case 'z': case 'Z':
7115
7116       keylookup: {
7117         bool anydelim;
7118         bool lex;
7119         I32 tmp;
7120         SV *sv;
7121         CV *cv;
7122         PADOFFSET off;
7123         OP *rv2cv_op;
7124
7125         lex = FALSE;
7126         orig_keyword = 0;
7127         off = 0;
7128         sv = NULL;
7129         cv = NULL;
7130         gv = NULL;
7131         gvp = NULL;
7132         rv2cv_op = NULL;
7133
7134         PL_bufptr = s;
7135         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7136
7137         /* Some keywords can be followed by any delimiter, including ':' */
7138         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
7139
7140         /* x::* is just a word, unless x is "CORE" */
7141         if (!anydelim && *s == ':' && s[1] == ':') {
7142             if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE;
7143             goto just_a_word;
7144         }
7145
7146         d = s;
7147         while (d < PL_bufend && isSPACE(*d))
7148                 d++;    /* no comments skipped here, or s### is misparsed */
7149
7150         /* Is this a word before a => operator? */
7151         if (*d == '=' && d[1] == '>') {
7152           fat_arrow:
7153             CLINE;
7154             pl_yylval.opval
7155                 = newSVOP(OP_CONST, 0,
7156                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7157             pl_yylval.opval->op_private = OPpCONST_BARE;
7158             TERM(BAREWORD);
7159         }
7160
7161         /* Check for plugged-in keyword */
7162         {
7163             OP *o;
7164             int result;
7165             char *saved_bufptr = PL_bufptr;
7166             PL_bufptr = s;
7167             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7168             s = PL_bufptr;
7169             if (result == KEYWORD_PLUGIN_DECLINE) {
7170                 /* not a plugged-in keyword */
7171                 PL_bufptr = saved_bufptr;
7172             } else if (result == KEYWORD_PLUGIN_STMT) {
7173                 pl_yylval.opval = o;
7174                 CLINE;
7175                 if (!PL_nexttoke) PL_expect = XSTATE;
7176                 return REPORT(PLUGSTMT);
7177             } else if (result == KEYWORD_PLUGIN_EXPR) {
7178                 pl_yylval.opval = o;
7179                 CLINE;
7180                 if (!PL_nexttoke) PL_expect = XOPERATOR;
7181                 return REPORT(PLUGEXPR);
7182             } else {
7183                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7184                                         PL_tokenbuf);
7185             }
7186         }
7187
7188         /* Check for built-in keyword */
7189         tmp = keyword(PL_tokenbuf, len, 0);
7190
7191         /* Is this a label? */
7192         if (!anydelim && PL_expect == XSTATE
7193               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7194             s = d + 1;
7195             pl_yylval.opval =
7196                 newSVOP(OP_CONST, 0,
7197                     newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
7198             CLINE;
7199             TOKEN(LABEL);
7200         }
7201
7202         /* Check for lexical sub */
7203         if (PL_expect != XOPERATOR) {
7204             char tmpbuf[sizeof PL_tokenbuf + 1];
7205             *tmpbuf = '&';
7206             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7207             off = pad_findmy_pvn(tmpbuf, len+1, 0);
7208             if (off != NOT_IN_PAD) {
7209                 assert(off); /* we assume this is boolean-true below */
7210                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7211                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7212                     HEK * const stashname = HvNAME_HEK(stash);
7213                     sv = newSVhek(stashname);
7214                     sv_catpvs(sv, "::");
7215                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7216                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7217                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7218                                     SVt_PVCV);
7219                     off = 0;
7220                     if (!gv) {
7221                         sv_free(sv);
7222                         sv = NULL;
7223                         goto just_a_word;
7224                     }
7225                 }
7226                 else {
7227                     rv2cv_op = newOP(OP_PADANY, 0);
7228                     rv2cv_op->op_targ = off;
7229                     cv = find_lexical_cv(off);
7230                 }
7231                 lex = TRUE;
7232                 goto just_a_word;
7233             }
7234             off = 0;
7235         }
7236
7237         if (tmp < 0) {                  /* second-class keyword? */
7238             GV *ogv = NULL;     /* override (winner) */
7239             GV *hgv = NULL;     /* hidden (loser) */
7240             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7241                 CV *cv;
7242                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7243                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7244                                             SVt_PVCV))
7245                     && (cv = GvCVu(gv)))
7246                 {
7247                     if (GvIMPORTED_CV(gv))
7248                         ogv = gv;
7249                     else if (! CvMETHOD(cv))
7250                         hgv = gv;
7251                 }
7252                 if (!ogv
7253                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7254                                                               len, FALSE))
7255                     && (gv = *gvp)
7256                     && (isGV_with_GP(gv)
7257                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
7258                         :   SvPCS_IMPORTED(gv)
7259                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7260                                                                  len, 0), 1)))
7261                 {
7262                     ogv = gv;
7263                 }
7264             }
7265             if (ogv) {
7266                 orig_keyword = tmp;
7267                 tmp = 0;                /* overridden by import or by GLOBAL */
7268             }
7269             else if (gv && !gvp
7270                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7271                      && GvCVu(gv))
7272             {
7273                 tmp = 0;                /* any sub overrides "weak" keyword */
7274             }
7275             else {                      /* no override */
7276                 tmp = -tmp;
7277                 if (tmp == KEY_dump) {
7278                     Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
7279                 }
7280                 gv = NULL;
7281                 gvp = 0;
7282                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7283                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7284                                    "Ambiguous call resolved as CORE::%s(), "
7285                                    "qualify as such or use &",
7286                                    GvENAME(hgv));
7287             }
7288         }
7289
7290         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7291          && (!anydelim || *s != '#')) {
7292             /* no override, and not s### either; skipspace is safe here
7293              * check for => on following line */
7294             bool arrow;
7295             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7296             STRLEN   soff = s         - SvPVX(PL_linestr);
7297             s = peekspace(s);
7298             arrow = *s == '=' && s[1] == '>';
7299             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7300             s         = SvPVX(PL_linestr) +   soff;
7301             if (arrow)
7302                 goto fat_arrow;
7303         }
7304
7305       reserved_word:
7306         switch (tmp) {
7307
7308             /* Trade off - by using this evil construction we can pull the
7309                variable gv into the block labelled keylookup. If not, then
7310                we have to give it function scope so that the goto from the
7311                earlier ':' case doesn't bypass the initialisation.  */
7312             just_a_word_zero_gv:
7313                 sv = NULL;
7314                 cv = NULL;
7315                 gv = NULL;
7316                 gvp = NULL;
7317                 rv2cv_op = NULL;
7318                 orig_keyword = 0;
7319                 lex = 0;
7320                 off = 0;
7321             /* FALLTHROUGH */
7322         default:                        /* not a keyword */
7323           just_a_word: {
7324                 int pkgname = 0;
7325                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7326                 bool safebw;
7327                 bool no_op_error = FALSE;
7328
7329                 if (PL_expect == XOPERATOR) {
7330                     if (PL_bufptr == PL_linestart) {
7331                         CopLINE_dec(PL_curcop);
7332                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7333                         CopLINE_inc(PL_curcop);
7334                     }
7335                     else
7336                         /* We want to call no_op with s pointing after the
7337                            bareword, so defer it.  But we want it to come
7338                            before the Bad name croak.  */
7339                         no_op_error = TRUE;
7340                 }
7341
7342                 /* Get the rest if it looks like a package qualifier */
7343
7344                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7345                     STRLEN morelen;
7346                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7347                                   TRUE, &morelen);
7348                     if (no_op_error) {
7349                         no_op("Bareword",s);
7350                         no_op_error = FALSE;
7351                     }
7352                     if (!morelen)
7353                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7354                                 UTF8fARG(UTF, len, PL_tokenbuf),
7355                                 *s == '\'' ? "'" : "::");
7356                     len += morelen;
7357                     pkgname = 1;
7358                 }
7359
7360                 if (no_op_error)
7361                         no_op("Bareword",s);
7362
7363                 /* See if the name is "Foo::",
7364                    in which case Foo is a bareword
7365                    (and a package name). */
7366
7367                 if (len > 2
7368                     && PL_tokenbuf[len - 2] == ':'
7369                     && PL_tokenbuf[len - 1] == ':')
7370                 {
7371                     if (ckWARN(WARN_BAREWORD)
7372                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7373                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7374                                     "Bareword \"%" UTF8f
7375                                     "\" refers to nonexistent package",
7376                                     UTF8fARG(UTF, len, PL_tokenbuf));
7377                     len -= 2;
7378                     PL_tokenbuf[len] = '\0';
7379                     gv = NULL;
7380                     gvp = 0;
7381                     safebw = TRUE;
7382                 }
7383                 else {
7384                     safebw = FALSE;
7385                 }
7386
7387                 /* if we saw a global override before, get the right name */
7388
7389                 if (!sv)
7390                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7391                                                 len);
7392                 if (gvp) {
7393                     SV * const tmp_sv = sv;
7394                     sv = newSVpvs("CORE::GLOBAL::");
7395                     sv_catsv(sv, tmp_sv);
7396                     SvREFCNT_dec(tmp_sv);
7397                 }
7398
7399
7400                 /* Presume this is going to be a bareword of some sort. */
7401                 CLINE;
7402                 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7403                 pl_yylval.opval->op_private = OPpCONST_BARE;
7404
7405                 /* And if "Foo::", then that's what it certainly is. */
7406                 if (safebw)
7407                     goto safe_bareword;
7408
7409                 if (!off)
7410                 {
7411                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7412                     const_op->op_private = OPpCONST_BARE;
7413                     rv2cv_op =
7414                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7415                     cv = lex
7416                         ? isGV(gv)
7417                             ? GvCV(gv)
7418                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7419                                 ? (CV *)SvRV(gv)
7420                                 : ((CV *)gv)
7421                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7422                 }
7423
7424                 /* Use this var to track whether intuit_method has been
7425                    called.  intuit_method returns 0 or > 255.  */
7426                 tmp = 1;
7427
7428                 /* See if it's the indirect object for a list operator. */
7429
7430                 if (PL_oldoldbufptr
7431                     && PL_oldoldbufptr < PL_bufptr
7432                     && (PL_oldoldbufptr == PL_last_lop
7433                         || PL_oldoldbufptr == PL_last_uni)
7434                     && /* NO SKIPSPACE BEFORE HERE! */
7435                        (PL_expect == XREF
7436                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7437                                                                == OA_FILEREF))
7438                 {
7439                     bool immediate_paren = *s == '(';
7440                     SSize_t s_off;
7441
7442                     /* (Now we can afford to cross potential line boundary.) */
7443                     s = skipspace(s);
7444
7445                     /* intuit_method() can indirectly call lex_next_chunk(),
7446                      * invalidating s
7447                      */
7448                     s_off = s - SvPVX(PL_linestr);
7449                     /* Two barewords in a row may indicate method call. */
7450                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7451                             || *s == '$')
7452                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7453                     {
7454                         /* the code at method: doesn't use s */
7455                         goto method;
7456                     }
7457                     s = SvPVX(PL_linestr) + s_off;
7458
7459                     /* If not a declared subroutine, it's an indirect object. */
7460                     /* (But it's an indir obj regardless for sort.) */
7461                     /* Also, if "_" follows a filetest operator, it's a bareword */
7462
7463                     if (
7464                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
7465                          || (!cv
7466                              && (PL_last_lop_op != OP_MAPSTART
7467                                  && PL_last_lop_op != OP_GREPSTART))))
7468                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7469                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7470                                                             == OA_FILESTATOP))
7471                        )
7472                     {
7473                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7474                         goto bareword;
7475                     }
7476                 }
7477
7478                 PL_expect = XOPERATOR;
7479                 s = skipspace(s);
7480
7481                 /* Is this a word before a => operator? */
7482                 if (*s == '=' && s[1] == '>' && !pkgname) {
7483                     op_free(rv2cv_op);
7484                     CLINE;
7485                     if (gvp || (lex && !off)) {
7486                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7487                         /* This is our own scalar, created a few lines
7488                            above, so this is safe. */
7489                         SvREADONLY_off(sv);
7490                         sv_setpv(sv, PL_tokenbuf);
7491                         if (UTF && !IN_BYTES
7492                          && is_utf8_string((U8*)PL_tokenbuf, len))
7493                               SvUTF8_on(sv);
7494                         SvREADONLY_on(sv);
7495                     }
7496                     TERM(BAREWORD);
7497                 }
7498
7499                 /* If followed by a paren, it's certainly a subroutine. */
7500                 if (*s == '(') {
7501                     CLINE;
7502                     if (cv) {
7503                         d = s + 1;
7504                         while (SPACE_OR_TAB(*d))
7505                             d++;
7506                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7507                             s = d + 1;
7508                             goto its_constant;
7509                         }
7510                     }
7511                     NEXTVAL_NEXTTOKE.opval =
7512                         off ? rv2cv_op : pl_yylval.opval;
7513                     if (off)
7514                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7515                     else op_free(rv2cv_op),        force_next(BAREWORD);
7516                     pl_yylval.ival = 0;
7517                     TOKEN('&');
7518                 }
7519
7520                 /* If followed by var or block, call it a method (unless sub) */
7521
7522                 if ((*s == '$' || *s == '{') && !cv) {
7523                     op_free(rv2cv_op);
7524                     PL_last_lop = PL_oldbufptr;
7525                     PL_last_lop_op = OP_METHOD;
7526                     if (!PL_lex_allbrackets
7527                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7528                     {
7529                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7530                     }
7531                     PL_expect = XBLOCKTERM;
7532                     PL_bufptr = s;
7533                     return REPORT(METHOD);
7534                 }
7535
7536                 /* If followed by a bareword, see if it looks like indir obj. */
7537
7538                 if (   tmp == 1
7539                     && !orig_keyword
7540                     && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7541                     && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7542                 {
7543                   method:
7544                     if (lex && !off) {
7545                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7546                         SvREADONLY_off(sv);
7547                         sv_setpvn(sv, PL_tokenbuf, len);
7548                         if (UTF && !IN_BYTES
7549                          && is_utf8_string((U8*)PL_tokenbuf, len))
7550                             SvUTF8_on (sv);
7551                         else SvUTF8_off(sv);
7552                     }
7553                     op_free(rv2cv_op);
7554                     if (tmp == METHOD && !PL_lex_allbrackets
7555                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7556                     {
7557                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7558                     }
7559                     return REPORT(tmp);
7560                 }
7561
7562                 /* Not a method, so call it a subroutine (if defined) */
7563
7564                 if (cv) {
7565                     /* Check for a constant sub */
7566                     if ((sv = cv_const_sv_or_av(cv))) {
7567                   its_constant:
7568                         op_free(rv2cv_op);
7569                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7570                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7571                         if (SvTYPE(sv) == SVt_PVAV)
7572                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7573                                                       pl_yylval.opval);
7574                         else {
7575                             pl_yylval.opval->op_private = 0;
7576                             pl_yylval.opval->op_folded = 1;
7577                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7578                         }
7579                         TOKEN(BAREWORD);
7580                     }
7581
7582                     op_free(pl_yylval.opval);
7583                     pl_yylval.opval =
7584                         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7585                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7586                     PL_last_lop = PL_oldbufptr;
7587                     PL_last_lop_op = OP_ENTERSUB;
7588                     /* Is there a prototype? */
7589                     if (
7590                         SvPOK(cv))
7591                     {
7592                         STRLEN protolen = CvPROTOLEN(cv);
7593                         const char *proto = CvPROTO(cv);
7594                         bool optional;
7595                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7596                         if (!protolen)
7597                             TERM(FUNC0SUB);
7598                         if ((optional = *proto == ';'))
7599                           do
7600                             proto++;
7601                           while (*proto == ';');
7602                         if (
7603                             (
7604                                 (
7605                                     *proto == '$' || *proto == '_'
7606                                  || *proto == '*' || *proto == '+'
7607                                 )
7608                              && proto[1] == '\0'
7609                             )
7610                          || (
7611                              *proto == '\\' && proto[1] && proto[2] == '\0'
7612                             )
7613                         )
7614                             UNIPROTO(UNIOPSUB,optional);
7615                         if (*proto == '\\' && proto[1] == '[') {
7616                             const char *p = proto + 2;
7617                             while(*p && *p != ']')
7618                                 ++p;
7619                             if(*p == ']' && !p[1])
7620                                 UNIPROTO(UNIOPSUB,optional);
7621                         }
7622                         if (*proto == '&' && *s == '{') {
7623                             if (PL_curstash)
7624                                 sv_setpvs(PL_subname, "__ANON__");
7625                             else
7626                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7627                             if (!PL_lex_allbrackets
7628                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7629                             {
7630                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7631                             }
7632                             PREBLOCK(LSTOPSUB);
7633                         }
7634                     }
7635                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7636                     PL_expect = XTERM;
7637                     force_next(off ? PRIVATEREF : BAREWORD);
7638                     if (!PL_lex_allbrackets
7639                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7640                     {
7641                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7642                     }
7643                     TOKEN(NOAMP);
7644                 }
7645
7646                 /* Call it a bare word */
7647
7648                 if (PL_hints & HINT_STRICT_SUBS)
7649                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7650                 else {
7651                 bareword:
7652                     /* after "print" and similar functions (corresponding to
7653                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7654                      * a filehandle should be subject to "strict subs".
7655                      * Likewise for the optional indirect-object argument to system
7656                      * or exec, which can't be a bareword */
7657                     if ((PL_last_lop_op == OP_PRINT
7658                             || PL_last_lop_op == OP_PRTF
7659                             || PL_last_lop_op == OP_SAY
7660                             || PL_last_lop_op == OP_SYSTEM
7661                             || PL_last_lop_op == OP_EXEC)
7662                             && (PL_hints & HINT_STRICT_SUBS))
7663                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7664                     if (lastchar != '-') {
7665                         if (ckWARN(WARN_RESERVED)) {
7666                             d = PL_tokenbuf;
7667                             while (isLOWER(*d))
7668                                 d++;
7669                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7670                             {
7671                                 /* PL_warn_reserved is constant */
7672                                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7673                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7674                                        PL_tokenbuf);
7675                                 GCC_DIAG_RESTORE_STMT;
7676                             }
7677                         }
7678                     }
7679                 }
7680                 op_free(rv2cv_op);
7681
7682             safe_bareword:
7683                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7684                  && saw_infix_sigil) {
7685                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7686                                      "Operator or semicolon missing before %c%" UTF8f,
7687                                      lastchar,
7688                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7689                                               PL_tokenbuf));
7690                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7691                                      "Ambiguous use of %c resolved as operator %c",
7692                                      lastchar, lastchar);
7693                 }
7694                 TOKEN(BAREWORD);
7695             }
7696
7697         case KEY___FILE__:
7698             FUN0OP(
7699                 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7700             );
7701
7702         case KEY___LINE__:
7703             FUN0OP(
7704                 newSVOP(OP_CONST, 0,
7705                     Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7706             );
7707
7708         case KEY___PACKAGE__:
7709             FUN0OP(
7710                 newSVOP(OP_CONST, 0,
7711                                         (PL_curstash
7712                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7713                                          : &PL_sv_undef))
7714             );
7715
7716         case KEY___DATA__:
7717         case KEY___END__: {
7718             GV *gv;
7719             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7720                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7721                                         ? PL_curstash
7722                                         : PL_defstash;
7723                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7724                 if (!isGV(gv))
7725                     gv_init(gv,stash,"DATA",4,0);
7726                 GvMULTI_on(gv);
7727                 if (!GvIO(gv))
7728                     GvIOp(gv) = newIO();
7729                 IoIFP(GvIOp(gv)) = PL_rsfp;
7730                 /* Mark this internal pseudo-handle as clean */
7731                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7732                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7733                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7734                 else
7735                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7736 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7737                 /* if the script was opened in binmode, we need to revert
7738                  * it to text mode for compatibility; but only iff it has CRs
7739                  * XXX this is a questionable hack at best. */
7740                 if (PL_bufend-PL_bufptr > 2
7741                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7742                 {
7743                     Off_t loc = 0;
7744                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7745                         loc = PerlIO_tell(PL_rsfp);
7746                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7747                     }
7748 #ifdef NETWARE
7749                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7750 #else
7751                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7752 #endif  /* NETWARE */
7753                         if (loc > 0)
7754                             PerlIO_seek(PL_rsfp, loc, 0);
7755                     }
7756                 }
7757 #endif
7758 #ifdef PERLIO_LAYERS
7759                 if (!IN_BYTES) {
7760                     if (UTF)
7761                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7762                 }
7763 #endif
7764                 PL_rsfp = NULL;
7765             }
7766             goto fake_eof;
7767         }
7768
7769         case KEY___SUB__:
7770             FUN0OP(CvCLONE(PL_compcv)
7771                         ? newOP(OP_RUNCV, 0)
7772                         : newPVOP(OP_RUNCV,0,NULL));
7773
7774         case KEY_AUTOLOAD:
7775         case KEY_DESTROY:
7776         case KEY_BEGIN:
7777         case KEY_UNITCHECK:
7778         case KEY_CHECK:
7779         case KEY_INIT:
7780         case KEY_END:
7781             if (PL_expect == XSTATE) {
7782                 s = PL_bufptr;
7783                 goto really_sub;
7784             }
7785             goto just_a_word;
7786
7787         case_KEY_CORE:
7788             {
7789                 STRLEN olen = len;
7790                 d = s;
7791                 s += 2;
7792                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7793                 if ((*s == ':' && s[1] == ':')
7794                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7795                 {
7796                     s = d;
7797                     len = olen;
7798                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7799                     goto just_a_word;
7800                 }
7801                 if (!tmp)
7802                     Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7803                                       UTF8fARG(UTF, len, PL_tokenbuf));
7804                 if (tmp < 0)
7805                     tmp = -tmp;
7806                 else if (tmp == KEY_require || tmp == KEY_do
7807                       || tmp == KEY_glob)
7808                     /* that's a way to remember we saw "CORE::" */
7809                     orig_keyword = tmp;
7810                 goto reserved_word;
7811             }
7812
7813         case KEY_abs:
7814             UNI(OP_ABS);
7815
7816         case KEY_alarm:
7817             UNI(OP_ALARM);
7818
7819         case KEY_accept:
7820             LOP(OP_ACCEPT,XTERM);
7821
7822         case KEY_and:
7823             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7824                 return REPORT(0);
7825             OPERATOR(ANDOP);
7826
7827         case KEY_atan2:
7828             LOP(OP_ATAN2,XTERM);
7829
7830         case KEY_bind:
7831             LOP(OP_BIND,XTERM);
7832
7833         case KEY_binmode:
7834             LOP(OP_BINMODE,XTERM);
7835
7836         case KEY_bless:
7837             LOP(OP_BLESS,XTERM);
7838
7839         case KEY_break:
7840             FUN0(OP_BREAK);
7841
7842         case KEY_chop:
7843             UNI(OP_CHOP);
7844
7845         case KEY_continue:
7846                     /* We have to disambiguate the two senses of
7847                       "continue". If the next token is a '{' then
7848                       treat it as the start of a continue block;
7849                       otherwise treat it as a control operator.
7850                      */
7851                     s = skipspace(s);
7852                     if (*s == '{')
7853             PREBLOCK(CONTINUE);
7854                     else
7855                         FUN0(OP_CONTINUE);
7856
7857         case KEY_chdir:
7858             /* may use HOME */
7859             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7860             UNI(OP_CHDIR);
7861
7862         case KEY_close:
7863             UNI(OP_CLOSE);
7864
7865         case KEY_closedir:
7866             UNI(OP_CLOSEDIR);
7867
7868         case KEY_cmp:
7869             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7870                 return REPORT(0);
7871             Eop(OP_SCMP);
7872
7873         case KEY_caller:
7874             UNI(OP_CALLER);
7875
7876         case KEY_crypt:
7877 #ifdef FCRYPT
7878             if (!PL_cryptseen) {
7879                 PL_cryptseen = TRUE;
7880                 init_des();
7881             }
7882 #endif
7883             LOP(OP_CRYPT,XTERM);
7884
7885         case KEY_chmod:
7886             LOP(OP_CHMOD,XTERM);
7887
7888         case KEY_chown:
7889             LOP(OP_CHOWN,XTERM);
7890
7891         case KEY_connect:
7892             LOP(OP_CONNECT,XTERM);
7893
7894         case KEY_chr:
7895             UNI(OP_CHR);
7896
7897         case KEY_cos:
7898             UNI(OP_COS);
7899
7900         case KEY_chroot:
7901             UNI(OP_CHROOT);
7902
7903         case KEY_default:
7904             PREBLOCK(DEFAULT);
7905
7906         case KEY_do:
7907             s = skipspace(s);
7908             if (*s == '{')
7909                 PRETERMBLOCK(DO);
7910             if (*s != '\'') {
7911                 *PL_tokenbuf = '&';
7912                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7913                               1, &len);
7914                 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7915                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7916                     SSize_t off = s-SvPVX(PL_linestr);
7917                     d = skipspace(d);
7918                     s = SvPVX(PL_linestr)+off;
7919                     if (*d == '(') {
7920                         force_ident_maybe_lex('&');
7921                         s = d;
7922                     }
7923                 }
7924             }
7925             if (orig_keyword == KEY_do) {
7926                 orig_keyword = 0;
7927                 pl_yylval.ival = 1;
7928             }
7929             else
7930                 pl_yylval.ival = 0;
7931             OPERATOR(DO);
7932
7933         case KEY_die:
7934             PL_hints |= HINT_BLOCK_SCOPE;
7935             LOP(OP_DIE,XTERM);
7936
7937         case KEY_defined:
7938             UNI(OP_DEFINED);
7939
7940         case KEY_delete:
7941             UNI(OP_DELETE);
7942
7943         case KEY_dbmopen:
7944             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7945                               STR_WITH_LEN("NDBM_File::"),
7946                               STR_WITH_LEN("DB_File::"),
7947                               STR_WITH_LEN("GDBM_File::"),
7948                               STR_WITH_LEN("SDBM_File::"),
7949                               STR_WITH_LEN("ODBM_File::"),
7950                               NULL);
7951             LOP(OP_DBMOPEN,XTERM);
7952
7953         case KEY_dbmclose:
7954             UNI(OP_DBMCLOSE);
7955
7956         case KEY_dump:
7957             LOOPX(OP_DUMP);
7958
7959         case KEY_else:
7960             PREBLOCK(ELSE);
7961
7962         case KEY_elsif:
7963             pl_yylval.ival = CopLINE(PL_curcop);
7964             OPERATOR(ELSIF);
7965
7966         case KEY_eq:
7967             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7968                 return REPORT(0);
7969             Eop(OP_SEQ);
7970
7971         case KEY_exists:
7972             UNI(OP_EXISTS);
7973
7974         case KEY_exit:
7975             UNI(OP_EXIT);
7976
7977         case KEY_eval:
7978             s = skipspace(s);
7979             if (*s == '{') { /* block eval */
7980                 PL_expect = XTERMBLOCK;
7981                 UNIBRACK(OP_ENTERTRY);
7982             }
7983             else { /* string eval */
7984                 PL_expect = XTERM;
7985                 UNIBRACK(OP_ENTEREVAL);
7986             }
7987
7988         case KEY_evalbytes:
7989             PL_expect = XTERM;
7990             UNIBRACK(-OP_ENTEREVAL);
7991
7992         case KEY_eof:
7993             UNI(OP_EOF);
7994
7995         case KEY_exp:
7996             UNI(OP_EXP);
7997
7998         case KEY_each:
7999             UNI(OP_EACH);
8000
8001         case KEY_exec:
8002             LOP(OP_EXEC,XREF);
8003
8004         case KEY_endhostent:
8005             FUN0(OP_EHOSTENT);
8006
8007         case KEY_endnetent:
8008             FUN0(OP_ENETENT);
8009
8010         case KEY_endservent:
8011             FUN0(OP_ESERVENT);
8012
8013         case KEY_endprotoent:
8014             FUN0(OP_EPROTOENT);
8015
8016         case KEY_endpwent:
8017             FUN0(OP_EPWENT);
8018
8019         case KEY_endgrent:
8020             FUN0(OP_EGRENT);
8021
8022         case KEY_for:
8023         case KEY_foreach:
8024             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8025                 return REPORT(0);
8026             pl_yylval.ival = CopLINE(PL_curcop);
8027             s = skipspace(s);
8028             if (   PL_expect == XSTATE
8029                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
8030             {
8031                 char *p = s;
8032                 SSize_t s_off = s - SvPVX(PL_linestr);
8033
8034                 if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "my")
8035                     && isSPACE(*(p + 2)))
8036                 {
8037                     p += 2;
8038                 }
8039                 else if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")
8040                          && isSPACE(*(p + 3)))
8041                 {
8042                     p += 3;
8043                 }
8044
8045                 p = skipspace(p);
8046                 /* skip optional package name, as in "for my abc $x (..)" */
8047                 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
8048                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8049                     p = skipspace(p);
8050                 }
8051                 if (*p != '$' && *p != '\\')
8052                     Perl_croak(aTHX_ "Missing $ on loop variable");
8053
8054                 /* The buffer may have been reallocated, update s */
8055                 s = SvPVX(PL_linestr) + s_off;
8056             }
8057             OPERATOR(FOR);
8058
8059         case KEY_formline:
8060             LOP(OP_FORMLINE,XTERM);
8061
8062         case KEY_fork:
8063             FUN0(OP_FORK);
8064
8065         case KEY_fc:
8066             UNI(OP_FC);
8067
8068         case KEY_fcntl:
8069             LOP(OP_FCNTL,XTERM);
8070
8071         case KEY_fileno:
8072             UNI(OP_FILENO);
8073
8074         case KEY_flock:
8075             LOP(OP_FLOCK,XTERM);
8076
8077         case KEY_gt:
8078             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8079                 return REPORT(0);
8080             Rop(OP_SGT);
8081
8082         case KEY_ge:
8083             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8084                 return REPORT(0);
8085             Rop(OP_SGE);
8086
8087         case KEY_grep:
8088             LOP(OP_GREPSTART, XREF);
8089
8090         case KEY_goto:
8091             LOOPX(OP_GOTO);
8092
8093         case KEY_gmtime:
8094             UNI(OP_GMTIME);
8095
8096         case KEY_getc:
8097             UNIDOR(OP_GETC);
8098
8099         case KEY_getppid:
8100             FUN0(OP_GETPPID);
8101
8102         case KEY_getpgrp:
8103             UNI(OP_GETPGRP);
8104
8105         case KEY_getpriority:
8106             LOP(OP_GETPRIORITY,XTERM);
8107
8108         case KEY_getprotobyname:
8109             UNI(OP_GPBYNAME);
8110
8111         case KEY_getprotobynumber:
8112             LOP(OP_GPBYNUMBER,XTERM);
8113
8114         case KEY_getprotoent:
8115             FUN0(OP_GPROTOENT);
8116
8117         case KEY_getpwent:
8118             FUN0(OP_GPWENT);
8119
8120         case KEY_getpwnam:
8121             UNI(OP_GPWNAM);
8122
8123         case KEY_getpwuid:
8124             UNI(OP_GPWUID);
8125
8126         case KEY_getpeername:
8127             UNI(OP_GETPEERNAME);
8128
8129         case KEY_gethostbyname:
8130             UNI(OP_GHBYNAME);
8131
8132         case KEY_gethostbyaddr:
8133             LOP(OP_GHBYADDR,XTERM);
8134
8135         case KEY_gethostent:
8136             FUN0(OP_GHOSTENT);
8137
8138         case KEY_getnetbyname:
8139             UNI(OP_GNBYNAME);
8140
8141         case KEY_getnetbyaddr:
8142             LOP(OP_GNBYADDR,XTERM);
8143
8144         case KEY_getnetent:
8145             FUN0(OP_GNETENT);
8146
8147         case KEY_getservbyname:
8148             LOP(OP_GSBYNAME,XTERM);
8149
8150         case KEY_getservbyport:
8151             LOP(OP_GSBYPORT,XTERM);
8152
8153         case KEY_getservent:
8154             FUN0(OP_GSERVENT);
8155
8156         case KEY_getsockname:
8157             UNI(OP_GETSOCKNAME);
8158
8159         case KEY_getsockopt:
8160             LOP(OP_GSOCKOPT,XTERM);
8161
8162         case KEY_getgrent:
8163             FUN0(OP_GGRENT);
8164
8165         case KEY_getgrnam:
8166             UNI(OP_GGRNAM);
8167
8168         case KEY_getgrgid:
8169             UNI(OP_GGRGID);
8170
8171         case KEY_getlogin:
8172             FUN0(OP_GETLOGIN);
8173
8174         case KEY_given:
8175             pl_yylval.ival = CopLINE(PL_curcop);
8176             Perl_ck_warner_d(aTHX_
8177                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8178                 "given is experimental");
8179             OPERATOR(GIVEN);
8180
8181         case KEY_glob:
8182             LOP(
8183              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8184              XTERM
8185             );
8186
8187         case KEY_hex:
8188             UNI(OP_HEX);
8189
8190         case KEY_if:
8191             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8192                 return REPORT(0);
8193             pl_yylval.ival = CopLINE(PL_curcop);
8194             OPERATOR(IF);
8195
8196         case KEY_index:
8197             LOP(OP_INDEX,XTERM);
8198
8199         case KEY_int:
8200             UNI(OP_INT);
8201
8202         case KEY_ioctl:
8203             LOP(OP_IOCTL,XTERM);
8204
8205         case KEY_join:
8206             LOP(OP_JOIN,XTERM);
8207
8208         case KEY_keys:
8209             UNI(OP_KEYS);
8210
8211         case KEY_kill:
8212             LOP(OP_KILL,XTERM);
8213
8214         case KEY_last:
8215             LOOPX(OP_LAST);
8216
8217         case KEY_lc:
8218             UNI(OP_LC);
8219
8220         case KEY_lcfirst:
8221             UNI(OP_LCFIRST);
8222
8223         case KEY_local:
8224             OPERATOR(LOCAL);
8225
8226         case KEY_length:
8227             UNI(OP_LENGTH);
8228
8229         case KEY_lt:
8230             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8231                 return REPORT(0);
8232             Rop(OP_SLT);
8233
8234         case KEY_le:
8235             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8236                 return REPORT(0);
8237             Rop(OP_SLE);
8238
8239         case KEY_localtime:
8240             UNI(OP_LOCALTIME);
8241
8242         case KEY_log:
8243             UNI(OP_LOG);
8244
8245         case KEY_link:
8246             LOP(OP_LINK,XTERM);
8247
8248         case KEY_listen:
8249             LOP(OP_LISTEN,XTERM);
8250
8251         case KEY_lock:
8252             UNI(OP_LOCK);
8253
8254         case KEY_lstat:
8255             UNI(OP_LSTAT);
8256
8257         case KEY_m:
8258             s = scan_pat(s,OP_MATCH);
8259             TERM(sublex_start());
8260
8261         case KEY_map:
8262             LOP(OP_MAPSTART, XREF);
8263
8264         case KEY_mkdir:
8265             LOP(OP_MKDIR,XTERM);
8266
8267         case KEY_msgctl:
8268             LOP(OP_MSGCTL,XTERM);
8269
8270         case KEY_msgget:
8271             LOP(OP_MSGGET,XTERM);
8272
8273         case KEY_msgrcv:
8274             LOP(OP_MSGRCV,XTERM);
8275
8276         case KEY_msgsnd:
8277             LOP(OP_MSGSND,XTERM);
8278
8279         case KEY_our:
8280         case KEY_my:
8281         case KEY_state:
8282             if (PL_in_my) {
8283                 PL_bufptr = s;
8284                 yyerror(Perl_form(aTHX_
8285                                   "Can't redeclare \"%s\" in \"%s\"",
8286                                    tmp      == KEY_my    ? "my" :
8287                                    tmp      == KEY_state ? "state" : "our",
8288                                    PL_in_my == KEY_my    ? "my" :
8289                                    PL_in_my == KEY_state ? "state" : "our"));
8290             }
8291             PL_in_my = (U16)tmp;
8292             s = skipspace(s);
8293             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8294                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8295                 if (memEQs(PL_tokenbuf, len, "sub"))
8296                     goto really_sub;
8297                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8298                 if (!PL_in_my_stash) {
8299                     char tmpbuf[1024];
8300                     int len;
8301                     PL_bufptr = s;
8302                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8303                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8304                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8305                 }
8306             }
8307             else if (*s == '\\') {
8308                 if (!FEATURE_MYREF_IS_ENABLED)
8309                     Perl_croak(aTHX_ "The experimental declared_refs "
8310                                      "feature is not enabled");
8311                 Perl_ck_warner_d(aTHX_
8312                      packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8313                     "Declaring references is experimental");
8314             }
8315             OPERATOR(MY);
8316
8317         case KEY_next:
8318             LOOPX(OP_NEXT);
8319
8320         case KEY_ne:
8321             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8322                 return REPORT(0);
8323             Eop(OP_SNE);
8324
8325         case KEY_no:
8326             s = tokenize_use(0, s);
8327             TOKEN(USE);
8328
8329         case KEY_not:
8330             if (*s == '(' || (s = skipspace(s), *s == '('))
8331                 FUN1(OP_NOT);
8332             else {
8333                 if (!PL_lex_allbrackets
8334                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8335                 {
8336                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8337                 }
8338                 OPERATOR(NOTOP);
8339             }
8340
8341         case KEY_open:
8342             s = skipspace(s);
8343             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8344                 const char *t;
8345                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8346                               &len);
8347                 for (t=d; isSPACE(*t);)
8348                     t++;
8349                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8350                     /* [perl #16184] */
8351                     && !(t[0] == '=' && t[1] == '>')
8352                     && !(t[0] == ':' && t[1] == ':')
8353                     && !keyword(s, d-s, 0)
8354                 ) {
8355                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8356                        "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8357                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8358                 }
8359             }
8360             LOP(OP_OPEN,XTERM);
8361
8362         case KEY_or:
8363             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8364                 return REPORT(0);
8365             pl_yylval.ival = OP_OR;
8366             OPERATOR(OROP);
8367
8368         case KEY_ord:
8369             UNI(OP_ORD);
8370
8371         case KEY_oct:
8372             UNI(OP_OCT);
8373
8374         case KEY_opendir:
8375             LOP(OP_OPEN_DIR,XTERM);
8376
8377         case KEY_print:
8378             checkcomma(s,PL_tokenbuf,"filehandle");
8379             LOP(OP_PRINT,XREF);
8380
8381         case KEY_printf:
8382             checkcomma(s,PL_tokenbuf,"filehandle");
8383             LOP(OP_PRTF,XREF);
8384
8385         case KEY_prototype:
8386             UNI(OP_PROTOTYPE);
8387
8388         case KEY_push:
8389             LOP(OP_PUSH,XTERM);
8390
8391         case KEY_pop:
8392             UNIDOR(OP_POP);
8393
8394         case KEY_pos:
8395             UNIDOR(OP_POS);
8396
8397         case KEY_pack:
8398             LOP(OP_PACK,XTERM);
8399
8400         case KEY_package:
8401             s = force_word(s,BAREWORD,FALSE,TRUE);
8402             s = skipspace(s);
8403             s = force_strict_version(s);
8404             PREBLOCK(PACKAGE);
8405
8406         case KEY_pipe:
8407             LOP(OP_PIPE_OP,XTERM);
8408
8409         case KEY_q:
8410             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8411             if (!s)
8412                 missingterm(NULL, 0);
8413             COPLINE_SET_FROM_MULTI_END;
8414             pl_yylval.ival = OP_CONST;
8415             TERM(sublex_start());
8416
8417         case KEY_quotemeta:
8418             UNI(OP_QUOTEMETA);
8419
8420         case KEY_qw: {
8421             OP *words = NULL;
8422             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8423             if (!s)
8424                 missingterm(NULL, 0);
8425             COPLINE_SET_FROM_MULTI_END;
8426             PL_expect = XOPERATOR;
8427             if (SvCUR(PL_lex_stuff)) {
8428                 int warned_comma = !ckWARN(WARN_QW);
8429                 int warned_comment = warned_comma;
8430                 d = SvPV_force(PL_lex_stuff, len);
8431                 while (len) {
8432                     for (; isSPACE(*d) && len; --len, ++d)
8433                         /**/;
8434                     if (len) {
8435                         SV *sv;
8436                         const char *b = d;
8437                         if (!warned_comma || !warned_comment) {
8438                             for (; !isSPACE(*d) && len; --len, ++d) {
8439                                 if (!warned_comma && *d == ',') {
8440                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8441                                         "Possible attempt to separate words with commas");
8442                                     ++warned_comma;
8443                                 }
8444                                 else if (!warned_comment && *d == '#') {
8445                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8446                                         "Possible attempt to put comments in qw() list");
8447                                     ++warned_comment;
8448                                 }
8449                             }
8450                         }
8451                         else {
8452                             for (; !isSPACE(*d) && len; --len, ++d)
8453                                 /**/;
8454                         }
8455                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8456                         words = op_append_elem(OP_LIST, words,
8457                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8458                     }
8459                 }
8460             }
8461             if (!words)
8462                 words = newNULLLIST();
8463             SvREFCNT_dec_NN(PL_lex_stuff);
8464             PL_lex_stuff = NULL;
8465             PL_expect = XOPERATOR;
8466             pl_yylval.opval = sawparens(words);
8467             TOKEN(QWLIST);
8468         }
8469
8470         case KEY_qq:
8471             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8472             if (!s)
8473                 missingterm(NULL, 0);
8474             pl_yylval.ival = OP_STRINGIFY;
8475             if (SvIVX(PL_lex_stuff) == '\'')
8476                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8477             TERM(sublex_start());
8478
8479         case KEY_qr:
8480             s = scan_pat(s,OP_QR);
8481             TERM(sublex_start());
8482
8483         case KEY_qx:
8484             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8485             if (!s)
8486                 missingterm(NULL, 0);
8487             pl_yylval.ival = OP_BACKTICK;
8488             TERM(sublex_start());
8489
8490         case KEY_return:
8491             OLDLOP(OP_RETURN);
8492
8493         case KEY_require:
8494             s = skipspace(s);
8495             if (isDIGIT(*s)) {
8496                 s = force_version(s, FALSE);
8497             }
8498             else if (*s != 'v' || !isDIGIT(s[1])
8499                     || (s = force_version(s, TRUE), *s == 'v'))
8500             {
8501                 *PL_tokenbuf = '\0';
8502                 s = force_word(s,BAREWORD,TRUE,TRUE);
8503                 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8504                                            PL_tokenbuf + sizeof(PL_tokenbuf),
8505                                            UTF))
8506                 {
8507                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8508                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8509                 }
8510                 else if (*s == '<')
8511                     yyerror("<> at require-statement should be quotes");
8512             }
8513             if (orig_keyword == KEY_require) {
8514                 orig_keyword = 0;
8515                 pl_yylval.ival = 1;
8516             }
8517             else
8518                 pl_yylval.ival = 0;
8519             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8520             PL_bufptr = s;
8521             PL_last_uni = PL_oldbufptr;
8522             PL_last_lop_op = OP_REQUIRE;
8523             s = skipspace(s);
8524             return REPORT( (int)REQUIRE );
8525
8526         case KEY_reset:
8527             UNI(OP_RESET);
8528
8529         case KEY_redo:
8530             LOOPX(OP_REDO);
8531
8532         case KEY_rename:
8533             LOP(OP_RENAME,XTERM);
8534
8535         case KEY_rand:
8536             UNI(OP_RAND);
8537
8538         case KEY_rmdir:
8539             UNI(OP_RMDIR);
8540
8541         case KEY_rindex:
8542             LOP(OP_RINDEX,XTERM);
8543
8544         case KEY_read:
8545             LOP(OP_READ,XTERM);
8546
8547         case KEY_readdir:
8548             UNI(OP_READDIR);
8549
8550         case KEY_readline:
8551             UNIDOR(OP_READLINE);
8552
8553         case KEY_readpipe:
8554             UNIDOR(OP_BACKTICK);
8555
8556         case KEY_rewinddir:
8557             UNI(OP_REWINDDIR);
8558
8559         case KEY_recv:
8560             LOP(OP_RECV,XTERM);
8561
8562         case KEY_reverse:
8563             LOP(OP_REVERSE,XTERM);
8564
8565         case KEY_readlink:
8566             UNIDOR(OP_READLINK);
8567
8568         case KEY_ref:
8569             UNI(OP_REF);
8570
8571         case KEY_s:
8572             s = scan_subst(s);
8573             if (pl_yylval.opval)
8574                 TERM(sublex_start());
8575             else
8576                 TOKEN(1);       /* force error */
8577
8578         case KEY_say:
8579             checkcomma(s,PL_tokenbuf,"filehandle");
8580             LOP(OP_SAY,XREF);
8581
8582         case KEY_chomp:
8583             UNI(OP_CHOMP);
8584
8585         case KEY_scalar:
8586             UNI(OP_SCALAR);
8587
8588         case KEY_select:
8589             LOP(OP_SELECT,XTERM);
8590
8591         case KEY_seek:
8592             LOP(OP_SEEK,XTERM);
8593
8594         case KEY_semctl:
8595             LOP(OP_SEMCTL,XTERM);
8596
8597         case KEY_semget:
8598             LOP(OP_SEMGET,XTERM);
8599
8600         case KEY_semop:
8601             LOP(OP_SEMOP,XTERM);
8602
8603         case KEY_send:
8604             LOP(OP_SEND,XTERM);
8605
8606         case KEY_setpgrp:
8607             LOP(OP_SETPGRP,XTERM);
8608
8609         case KEY_setpriority:
8610             LOP(OP_SETPRIORITY,XTERM);
8611
8612         case KEY_sethostent:
8613             UNI(OP_SHOSTENT);
8614
8615         case KEY_setnetent:
8616             UNI(OP_SNETENT);
8617
8618         case KEY_setservent:
8619             UNI(OP_SSERVENT);
8620
8621         case KEY_setprotoent:
8622             UNI(OP_SPROTOENT);
8623
8624         case KEY_setpwent:
8625             FUN0(OP_SPWENT);
8626
8627         case KEY_setgrent:
8628             FUN0(OP_SGRENT);
8629
8630         case KEY_seekdir:
8631             LOP(OP_SEEKDIR,XTERM);
8632
8633         case KEY_setsockopt:
8634             LOP(OP_SSOCKOPT,XTERM);
8635
8636         case KEY_shift:
8637             UNIDOR(OP_SHIFT);
8638
8639         case KEY_shmctl:
8640             LOP(OP_SHMCTL,XTERM);
8641
8642         case KEY_shmget:
8643             LOP(OP_SHMGET,XTERM);
8644
8645         case KEY_shmread:
8646             LOP(OP_SHMREAD,XTERM);
8647
8648         case KEY_shmwrite:
8649             LOP(OP_SHMWRITE,XTERM);
8650
8651         case KEY_shutdown:
8652             LOP(OP_SHUTDOWN,XTERM);
8653
8654         case KEY_sin:
8655             UNI(OP_SIN);
8656
8657         case KEY_sleep:
8658             UNI(OP_SLEEP);
8659
8660         case KEY_socket:
8661             LOP(OP_SOCKET,XTERM);
8662
8663         case KEY_socketpair:
8664             LOP(OP_SOCKPAIR,XTERM);
8665
8666         case KEY_sort:
8667             checkcomma(s,PL_tokenbuf,"subroutine name");
8668             s = skipspace(s);
8669             PL_expect = XTERM;
8670             s = force_word(s,BAREWORD,TRUE,TRUE);
8671             LOP(OP_SORT,XREF);
8672
8673         case KEY_split:
8674             LOP(OP_SPLIT,XTERM);
8675
8676         case KEY_sprintf:
8677             LOP(OP_SPRINTF,XTERM);
8678
8679         case KEY_splice:
8680             LOP(OP_SPLICE,XTERM);
8681
8682         case KEY_sqrt:
8683             UNI(OP_SQRT);
8684
8685         case KEY_srand:
8686             UNI(OP_SRAND);
8687
8688         case KEY_stat:
8689             UNI(OP_STAT);
8690
8691         case KEY_study:
8692             UNI(OP_STUDY);
8693
8694         case KEY_substr:
8695             LOP(OP_SUBSTR,XTERM);
8696
8697         case KEY_format:
8698         case KEY_sub:
8699           really_sub:
8700             {
8701                 char * const tmpbuf = PL_tokenbuf + 1;
8702                 bool have_name, have_proto;
8703                 const int key = tmp;
8704                 SV *format_name = NULL;
8705                 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
8706
8707                 SSize_t off = s-SvPVX(PL_linestr);
8708                 s = skipspace(s);
8709                 d = SvPVX(PL_linestr)+off;
8710
8711                 SAVEBOOL(PL_parser->sig_seen);
8712                 PL_parser->sig_seen = FALSE;
8713
8714                 if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8715                     || *s == '\''
8716                     || (*s == ':' && s[1] == ':'))
8717                 {
8718
8719                     PL_expect = XATTRBLOCK;
8720                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8721                                   &len);
8722                     if (key == KEY_format)
8723                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8724                     *PL_tokenbuf = '&';
8725                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8726                      || pad_findmy_pvn(
8727                             PL_tokenbuf, len + 1, 0
8728                         ) != NOT_IN_PAD)
8729                         sv_setpvn(PL_subname, tmpbuf, len);
8730                     else {
8731                         sv_setsv(PL_subname,PL_curstname);
8732                         sv_catpvs(PL_subname,"::");
8733                         sv_catpvn(PL_subname,tmpbuf,len);
8734                     }
8735                     if (SvUTF8(PL_linestr))
8736                         SvUTF8_on(PL_subname);
8737                     have_name = TRUE;
8738
8739
8740                     s = skipspace(d);
8741                 }
8742                 else {
8743                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8744                     {
8745                         *d = '\0';
8746                         /* diag_listed_as: Missing name in "%s sub" */
8747                         Perl_croak(aTHX_
8748                                   "Missing name in \"%s\"", PL_bufptr);
8749                     }
8750                     PL_expect = XATTRTERM;
8751                     sv_setpvs(PL_subname,"?");
8752                     have_name = FALSE;
8753                 }
8754
8755                 if (key == KEY_format) {
8756                     if (format_name) {
8757                         NEXTVAL_NEXTTOKE.opval
8758                             = newSVOP(OP_CONST,0, format_name);
8759                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8760                         force_next(BAREWORD);
8761                     }
8762                     PREBLOCK(FORMAT);
8763                 }
8764
8765                 /* Look for a prototype */
8766                 if (*s == '(' && !is_sigsub) {
8767                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8768                     if (!s)
8769                         Perl_croak(aTHX_ "Prototype not terminated");
8770                     COPLINE_SET_FROM_MULTI_END;
8771                     (void)validate_proto(PL_subname, PL_lex_stuff,
8772                                          ckWARN(WARN_ILLEGALPROTO), 0);
8773                     have_proto = TRUE;
8774
8775                     s = skipspace(s);
8776                 }
8777                 else
8778                     have_proto = FALSE;
8779
8780                 if (  !(*s == ':' && s[1] != ':')
8781                     && (*s != '{' && *s != '(') && key != KEY_format)
8782                 {
8783                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8784                            key == KEY_DESTROY || key == KEY_BEGIN ||
8785                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8786                            key == KEY_INIT || key == KEY_END ||
8787                            key == KEY_my || key == KEY_state ||
8788                            key == KEY_our);
8789                     if (!have_name)
8790                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8791                     else if (*s != ';' && *s != '}')
8792                         Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8793                 }
8794
8795                 if (have_proto) {
8796                     NEXTVAL_NEXTTOKE.opval =
8797                         newSVOP(OP_CONST, 0, PL_lex_stuff);
8798                     PL_lex_stuff = NULL;
8799                     force_next(THING);
8800                 }
8801                 if (!have_name) {
8802                     if (PL_curstash)
8803                         sv_setpvs(PL_subname, "__ANON__");
8804                     else
8805                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8806                     if (is_sigsub)
8807                         TOKEN(ANON_SIGSUB);
8808                     else
8809                         TOKEN(ANONSUB);
8810                 }
8811                 force_ident_maybe_lex('&');
8812                 if (is_sigsub)
8813                     TOKEN(SIGSUB);
8814                 else
8815                     TOKEN(SUB);
8816             }
8817
8818         case KEY_system:
8819             LOP(OP_SYSTEM,XREF);
8820
8821         case KEY_symlink:
8822             LOP(OP_SYMLINK,XTERM);
8823
8824         case KEY_syscall:
8825             LOP(OP_SYSCALL,XTERM);
8826
8827         case KEY_sysopen:
8828             LOP(OP_SYSOPEN,XTERM);
8829
8830         case KEY_sysseek:
8831             LOP(OP_SYSSEEK,XTERM);
8832
8833         case KEY_sysread:
8834             LOP(OP_SYSREAD,XTERM);
8835
8836         case KEY_syswrite:
8837             LOP(OP_SYSWRITE,XTERM);
8838
8839         case KEY_tr:
8840         case KEY_y:
8841             s = scan_trans(s);
8842             TERM(sublex_start());
8843
8844         case KEY_tell:
8845             UNI(OP_TELL);
8846
8847         case KEY_telldir:
8848             UNI(OP_TELLDIR);
8849
8850         case KEY_tie:
8851             LOP(OP_TIE,XTERM);
8852
8853         case KEY_tied:
8854             UNI(OP_TIED);
8855
8856         case KEY_time:
8857             FUN0(OP_TIME);
8858
8859         case KEY_times:
8860             FUN0(OP_TMS);
8861
8862         case KEY_truncate:
8863             LOP(OP_TRUNCATE,XTERM);
8864
8865         case KEY_uc:
8866             UNI(OP_UC);
8867
8868         case KEY_ucfirst:
8869             UNI(OP_UCFIRST);
8870
8871         case KEY_untie:
8872             UNI(OP_UNTIE);
8873
8874         case KEY_until:
8875             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8876                 return REPORT(0);
8877             pl_yylval.ival = CopLINE(PL_curcop);
8878             OPERATOR(UNTIL);
8879
8880         case KEY_unless:
8881             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8882                 return REPORT(0);
8883             pl_yylval.ival = CopLINE(PL_curcop);
8884             OPERATOR(UNLESS);
8885
8886         case KEY_unlink:
8887             LOP(OP_UNLINK,XTERM);
8888
8889         case KEY_undef:
8890             UNIDOR(OP_UNDEF);
8891
8892         case KEY_unpack:
8893             LOP(OP_UNPACK,XTERM);
8894
8895         case KEY_utime:
8896             LOP(OP_UTIME,XTERM);
8897
8898         case KEY_umask:
8899             UNIDOR(OP_UMASK);
8900
8901         case KEY_unshift:
8902             LOP(OP_UNSHIFT,XTERM);
8903
8904         case KEY_use:
8905             s = tokenize_use(1, s);
8906             TOKEN(USE);
8907
8908         case KEY_values:
8909             UNI(OP_VALUES);
8910
8911         case KEY_vec:
8912             LOP(OP_VEC,XTERM);
8913
8914         case KEY_when:
8915             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8916                 return REPORT(0);
8917             pl_yylval.ival = CopLINE(PL_curcop);
8918             Perl_ck_warner_d(aTHX_
8919                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8920                 "when is experimental");
8921             OPERATOR(WHEN);
8922
8923         case KEY_while:
8924             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8925                 return REPORT(0);
8926             pl_yylval.ival = CopLINE(PL_curcop);
8927             OPERATOR(WHILE);
8928
8929         case KEY_warn:
8930             PL_hints |= HINT_BLOCK_SCOPE;
8931             LOP(OP_WARN,XTERM);
8932
8933         case KEY_wait:
8934             FUN0(OP_WAIT);
8935
8936         case KEY_waitpid:
8937             LOP(OP_WAITPID,XTERM);
8938
8939         case KEY_wantarray:
8940             FUN0(OP_WANTARRAY);
8941
8942         case KEY_write:
8943             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8944              * we use the same number on EBCDIC */
8945             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8946             UNI(OP_ENTERWRITE);
8947
8948         case KEY_x:
8949             if (PL_expect == XOPERATOR) {
8950                 if (*s == '=' && !PL_lex_allbrackets
8951                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8952                 {
8953                     return REPORT(0);
8954                 }
8955                 Mop(OP_REPEAT);
8956             }
8957             check_uni();
8958             goto just_a_word;
8959
8960         case KEY_xor:
8961             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8962                 return REPORT(0);
8963             pl_yylval.ival = OP_XOR;
8964             OPERATOR(OROP);
8965         }
8966     }}
8967 }
8968
8969 /*
8970   S_pending_ident
8971
8972   Looks up an identifier in the pad or in a package
8973
8974   is_sig indicates that this is a subroutine signature variable
8975   rather than a plain pad var.
8976
8977   Returns:
8978     PRIVATEREF if this is a lexical name.
8979     BAREWORD   if this belongs to a package.
8980
8981   Structure:
8982       if we're in a my declaration
8983           croak if they tried to say my($foo::bar)
8984           build the ops for a my() declaration
8985       if it's an access to a my() variable
8986           build ops for access to a my() variable
8987       if in a dq string, and they've said @foo and we can't find @foo
8988           warn
8989       build ops for a bareword
8990 */
8991
8992 static int
8993 S_pending_ident(pTHX)
8994 {
8995     PADOFFSET tmp = 0;
8996     const char pit = (char)pl_yylval.ival;
8997     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8998     /* All routes through this function want to know if there is a colon.  */
8999     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9000
9001     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9002           "### Pending identifier '%s'\n", PL_tokenbuf); });
9003     assert(tokenbuf_len >= 2);
9004
9005     /* if we're in a my(), we can't allow dynamics here.
9006        $foo'bar has already been turned into $foo::bar, so
9007        just check for colons.
9008
9009        if it's a legal name, the OP is a PADANY.
9010     */
9011     if (PL_in_my) {
9012         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9013             if (has_colon)
9014                 /* diag_listed_as: No package name allowed for variable %s
9015                                    in "our" */
9016                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9017                                   "%se %s in \"our\"",
9018                                   *PL_tokenbuf=='&' ?"subroutin":"variabl",
9019                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9020             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9021         }
9022         else {
9023             OP *o;
9024             if (has_colon) {
9025                 /* "my" variable %s can't be in a package */
9026                 /* PL_no_myglob is constant */
9027                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9028                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9029                             PL_in_my == KEY_my ? "my" : "state",
9030                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
9031                             PL_tokenbuf),
9032                             UTF ? SVf_UTF8 : 0);
9033                 GCC_DIAG_RESTORE_STMT;
9034             }
9035
9036             if (PL_in_my == KEY_sigvar) {
9037                 /* A signature 'padop' needs in addition, an op_first to
9038                  * point to a child sigdefelem, and an extra field to hold
9039                  * the signature index. We can achieve both by using an
9040                  * UNOP_AUX and (ab)using the op_aux field to hold the
9041                  * index. If we ever need more fields, use a real malloced
9042                  * aux strut instead.
9043                  */
9044                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9045                                     INT2PTR(UNOP_AUX_item *,
9046                                         (PL_parser->sig_elems)));
9047                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9048                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9049                                   :                         OPpARGELEM_HV);
9050             }
9051             else
9052                 o = newOP(OP_PADANY, 0);
9053             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9054                                                         UTF ? SVf_UTF8 : 0);
9055             if (PL_in_my == KEY_sigvar)
9056                 PL_in_my = 0;
9057
9058             pl_yylval.opval = o;
9059             return PRIVATEREF;
9060         }
9061     }
9062
9063     /*
9064        build the ops for accesses to a my() variable.
9065     */
9066
9067     if (!has_colon) {
9068         if (!PL_in_my)
9069             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9070                                  0);
9071         if (tmp != NOT_IN_PAD) {
9072             /* might be an "our" variable" */
9073             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9074                 /* build ops for a bareword */
9075                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9076                 HEK * const stashname = HvNAME_HEK(stash);
9077                 SV *  const sym = newSVhek(stashname);
9078                 sv_catpvs(sym, "::");
9079                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9080                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9081                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9082                 if (pit != '&')
9083                   gv_fetchsv(sym,
9084                     GV_ADDMULTI,
9085                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9086                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9087                      : SVt_PVHV));
9088                 return BAREWORD;
9089             }
9090
9091             pl_yylval.opval = newOP(OP_PADANY, 0);
9092             pl_yylval.opval->op_targ = tmp;
9093             return PRIVATEREF;
9094         }
9095     }
9096
9097     /*
9098        Whine if they've said @foo or @foo{key} in a doublequoted string,
9099        and @foo (or %foo) isn't a variable we can find in the symbol
9100        table.
9101     */
9102     if (ckWARN(WARN_AMBIGUOUS)
9103         && pit == '@'
9104         && PL_lex_state != LEX_NORMAL
9105         && !PL_lex_brackets)
9106     {
9107         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9108                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9109                                          SVt_PVAV);
9110         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9111            )
9112         {
9113             /* Downgraded from fatal to warning 20000522 mjd */
9114             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9115                         "Possible unintended interpolation of %" UTF8f
9116                         " in string",
9117                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9118         }
9119     }
9120
9121     /* build ops for a bareword */
9122     pl_yylval.opval = newSVOP(OP_CONST, 0,
9123                                    newSVpvn_flags(PL_tokenbuf + 1,
9124                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9125                                                       UTF ? SVf_UTF8 : 0 ));
9126     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9127     if (pit != '&')
9128         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9129                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9130                      | ( UTF ? SVf_UTF8 : 0 ),
9131                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9132                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9133                       : SVt_PVHV));
9134     return BAREWORD;
9135 }
9136
9137 STATIC void
9138 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9139 {
9140     PERL_ARGS_ASSERT_CHECKCOMMA;
9141
9142     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9143         if (ckWARN(WARN_SYNTAX)) {
9144             int level = 1;
9145             const char *w;
9146             for (w = s+2; *w && level; w++) {
9147                 if (*w == '(')
9148                     ++level;
9149                 else if (*w == ')')
9150                     --level;
9151             }
9152             while (isSPACE(*w))
9153                 ++w;
9154             /* the list of chars below is for end of statements or
9155              * block / parens, boolean operators (&&, ||, //) and branch
9156              * constructs (or, and, if, until, unless, while, err, for).
9157              * Not a very solid hack... */
9158             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9159                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9160                             "%s (...) interpreted as function",name);
9161         }
9162     }
9163     while (s < PL_bufend && isSPACE(*s))
9164         s++;
9165     if (*s == '(')
9166         s++;
9167     while (s < PL_bufend && isSPACE(*s))
9168         s++;
9169     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9170         const char * const w = s;
9171         s += UTF ? UTF8SKIP(s) : 1;
9172         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9173             s += UTF ? UTF8SKIP(s) : 1;
9174         while (s < PL_bufend && isSPACE(*s))
9175             s++;
9176         if (*s == ',') {
9177             GV* gv;
9178             if (keyword(w, s - w, 0))
9179                 return;
9180
9181             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9182             if (gv && GvCVu(gv))
9183                 return;
9184             if (s - w <= 254) {
9185                 PADOFFSET off;
9186                 char tmpbuf[256];
9187                 Copy(w, tmpbuf+1, s - w, char);
9188                 *tmpbuf = '&';
9189                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9190                 if (off != NOT_IN_PAD) return;
9191             }
9192             Perl_croak(aTHX_ "No comma allowed after %s", what);
9193         }
9194     }
9195 }
9196
9197 /* S_new_constant(): do any overload::constant lookup.
9198
9199    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9200    Best used as sv=new_constant(..., sv, ...).
9201    If s, pv are NULL, calls subroutine with one argument,
9202    and <type> is used with error messages only.
9203    <type> is assumed to be well formed UTF-8.
9204
9205    If error_msg is not NULL, *error_msg will be set to any error encountered.
9206    Otherwise yyerror() will be used to output it */
9207
9208 STATIC SV *
9209 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9210                SV *sv, SV *pv, const char *type, STRLEN typelen,
9211                const char ** error_msg)
9212 {
9213     dSP;
9214     HV * table = GvHV(PL_hintgv);                /* ^H */
9215     SV *res;
9216     SV *errsv = NULL;
9217     SV **cvp;
9218     SV *cv, *typesv;
9219     const char *why1 = "", *why2 = "", *why3 = "";
9220
9221     PERL_ARGS_ASSERT_NEW_CONSTANT;
9222     /* We assume that this is true: */
9223     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9224     assert(type || s);
9225
9226     sv_2mortal(sv);                     /* Parent created it permanently */
9227     if (!table
9228         || ! (PL_hints & HINT_LOCALIZE_HH)
9229         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9230         || ! SvOK(*cvp))
9231     {
9232         char *msg;
9233
9234         /* Here haven't found what we're looking for.  If it is charnames,
9235          * perhaps it needs to be loaded.  Try doing that before giving up */
9236         if (*key == 'c') {
9237             Perl_load_module(aTHX_
9238                             0,
9239                             newSVpvs("_charnames"),
9240                              /* version parameter; no need to specify it, as if
9241                               * we get too early a version, will fail anyway,
9242                               * not being able to find '_charnames' */
9243                             NULL,
9244                             newSVpvs(":full"),
9245                             newSVpvs(":short"),
9246                             NULL);
9247             assert(sp == PL_stack_sp);
9248             table = GvHV(PL_hintgv);
9249             if (table
9250                 && (PL_hints & HINT_LOCALIZE_HH)
9251                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9252                 && SvOK(*cvp))
9253             {
9254                 goto now_ok;
9255             }
9256         }
9257         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9258             msg = Perl_form(aTHX_
9259                                "Constant(%.*s) unknown",
9260                                 (int)(type ? typelen : len),
9261                                 (type ? type: s));
9262         }
9263         else {
9264             why1 = "$^H{";
9265             why2 = key;
9266             why3 = "} is not defined";
9267         report:
9268             if (*key == 'c') {
9269                 msg = Perl_form(aTHX_
9270                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9271                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9272                       );
9273             }
9274             else {
9275                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9276                                     (int)(type ? typelen : len),
9277                                     (type ? type: s), why1, why2, why3);
9278             }
9279         }
9280         if (error_msg) {
9281             *error_msg = msg;
9282         }
9283         else {
9284             yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9285         }
9286         return SvREFCNT_inc_simple_NN(sv);
9287     }
9288   now_ok:
9289     cv = *cvp;
9290     if (!pv && s)
9291         pv = newSVpvn_flags(s, len, SVs_TEMP);
9292     if (type && pv)
9293         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9294     else
9295         typesv = &PL_sv_undef;
9296
9297     PUSHSTACKi(PERLSI_OVERLOAD);
9298     ENTER ;
9299     SAVETMPS;
9300
9301     PUSHMARK(SP) ;
9302     EXTEND(sp, 3);
9303     if (pv)
9304         PUSHs(pv);
9305     PUSHs(sv);
9306     if (pv)
9307         PUSHs(typesv);
9308     PUTBACK;
9309     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9310
9311     SPAGAIN ;
9312
9313     /* Check the eval first */
9314     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9315         STRLEN errlen;
9316         const char * errstr;
9317         sv_catpvs(errsv, "Propagated");
9318         errstr = SvPV_const(errsv, errlen);
9319         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9320         (void)POPs;
9321         res = SvREFCNT_inc_simple_NN(sv);
9322     }
9323     else {
9324         res = POPs;
9325         SvREFCNT_inc_simple_void_NN(res);
9326     }
9327
9328     PUTBACK ;
9329     FREETMPS ;
9330     LEAVE ;
9331     POPSTACK;
9332
9333     if (!SvOK(res)) {
9334         why1 = "Call to &{$^H{";
9335         why2 = key;
9336         why3 = "}} did not return a defined value";
9337         sv = res;
9338         (void)sv_2mortal(sv);
9339         goto report;
9340     }
9341
9342     return res;
9343 }
9344
9345 PERL_STATIC_INLINE void
9346 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9347                     bool is_utf8, bool check_dollar, bool tick_warn)
9348 {
9349     int saw_tick = 0;
9350     const char *olds = *s;
9351     PERL_ARGS_ASSERT_PARSE_IDENT;
9352
9353     while (*s < PL_bufend) {
9354         if (*d >= e)
9355             Perl_croak(aTHX_ "%s", ident_too_long);
9356         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9357              /* The UTF-8 case must come first, otherwise things
9358              * like c\N{COMBINING TILDE} would start failing, as the
9359              * isWORDCHAR_A case below would gobble the 'c' up.
9360              */
9361
9362             char *t = *s + UTF8SKIP(*s);
9363             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9364                 t += UTF8SKIP(t);
9365             }
9366             if (*d + (t - *s) > e)
9367                 Perl_croak(aTHX_ "%s", ident_too_long);
9368             Copy(*s, *d, t - *s, char);
9369             *d += t - *s;
9370             *s = t;
9371         }
9372         else if ( isWORDCHAR_A(**s) ) {
9373             do {
9374                 *(*d)++ = *(*s)++;
9375             } while (isWORDCHAR_A(**s) && *d < e);
9376         }
9377         else if (   allow_package
9378                  && **s == '\''
9379                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9380         {
9381             *(*d)++ = ':';
9382             *(*d)++ = ':';
9383             (*s)++;
9384             saw_tick++;
9385         }
9386         else if (allow_package && **s == ':' && (*s)[1] == ':'
9387            /* Disallow things like Foo::$bar. For the curious, this is
9388             * the code path that triggers the "Bad name after" warning
9389             * when looking for barewords.
9390             */
9391            && !(check_dollar && (*s)[2] == '$')) {
9392             *(*d)++ = *(*s)++;
9393             *(*d)++ = *(*s)++;
9394         }
9395         else
9396             break;
9397     }
9398     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9399               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9400         char *d;
9401         char *d2;
9402         Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9403         d2 = d;
9404         SAVEFREEPV(d);
9405         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9406                          "Old package separator used in string");
9407         if (olds[-1] == '#')
9408             *d2++ = olds[-2];
9409         *d2++ = olds[-1];
9410         while (olds < *s) {
9411             if (*olds == '\'') {
9412                 *d2++ = '\\';
9413                 *d2++ = *olds++;
9414             }
9415             else
9416                 *d2++ = *olds++;
9417         }
9418         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9419                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9420                           UTF8fARG(is_utf8, d2-d, d));
9421     }
9422     return;
9423 }
9424
9425 /* Returns a NUL terminated string, with the length of the string written to
9426    *slp
9427    */
9428 STATIC char *
9429 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9430 {
9431     char *d = dest;
9432     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9433     bool is_utf8 = cBOOL(UTF);
9434
9435     PERL_ARGS_ASSERT_SCAN_WORD;
9436
9437     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9438     *d = '\0';
9439     *slp = d - dest;
9440     return s;
9441 }
9442
9443 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9444  * iff Unicode semantics are to be used.  The legal ones are any of:
9445  *  a) all ASCII characters except:
9446  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9447  *          2) '{'
9448  *     The final case currently doesn't get this far in the program, so we
9449  *     don't test for it.  If that were to change, it would be ok to allow it.
9450  *  b) When not under Unicode rules, any upper Latin1 character
9451  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9452  *
9453  *      Because all ASCII characters have the same representation whether
9454  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9455  *      '{' without knowing if is UTF-8 or not. */
9456 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9457     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9458                          ? isIDFIRST_utf8_safe(s, e)                        \
9459                          : (isGRAPH_L1(*s)                                  \
9460                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9461
9462 STATIC char *
9463 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9464 {
9465     I32 herelines = PL_parser->herelines;
9466     SSize_t bracket = -1;
9467     char funny = *s++;
9468     char *d = dest;
9469     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9470     bool is_utf8 = cBOOL(UTF);
9471     I32 orig_copline = 0, tmp_copline = 0;
9472
9473     PERL_ARGS_ASSERT_SCAN_IDENT;
9474
9475     if (isSPACE(*s) || !*s)
9476         s = skipspace(s);
9477     if (isDIGIT(*s)) {
9478         while (isDIGIT(*s)) {
9479             if (d >= e)
9480                 Perl_croak(aTHX_ "%s", ident_too_long);
9481             *d++ = *s++;
9482         }
9483     }
9484     else {  /* See if it is a "normal" identifier */
9485         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9486     }
9487     *d = '\0';
9488     d = dest;
9489     if (*d) {
9490         /* Either a digit variable, or parse_ident() found an identifier
9491            (anything valid as a bareword), so job done and return.  */
9492         if (PL_lex_state != LEX_NORMAL)
9493             PL_lex_state = LEX_INTERPENDMAYBE;
9494         return s;
9495     }
9496
9497     /* Here, it is not a run-of-the-mill identifier name */
9498
9499     if (*s == '$' && s[1]
9500         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9501             || isDIGIT_A((U8)s[1])
9502             || s[1] == '$'
9503             || s[1] == '{'
9504             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9505     {
9506         /* Dereferencing a value in a scalar variable.
9507            The alternatives are different syntaxes for a scalar variable.
9508            Using ' as a leading package separator isn't allowed. :: is.   */
9509         return s;
9510     }
9511     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9512     if (*s == '{') {
9513         bracket = s - SvPVX(PL_linestr);
9514         s++;
9515         orig_copline = CopLINE(PL_curcop);
9516         if (s < PL_bufend && isSPACE(*s)) {
9517             s = skipspace(s);
9518         }
9519     }
9520     if ((s <= PL_bufend - (is_utf8)
9521                           ? UTF8SKIP(s)
9522                           : 1)
9523         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9524     {
9525         if (is_utf8) {
9526             const STRLEN skip = UTF8SKIP(s);
9527             STRLEN i;
9528             d[skip] = '\0';
9529             for ( i = 0; i < skip; i++ )
9530                 d[i] = *s++;
9531         }
9532         else {
9533             *d = *s++;
9534             d[1] = '\0';
9535         }
9536     }
9537     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9538     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9539         *d = toCTRL(*s);
9540         s++;
9541     }
9542     /* Warn about ambiguous code after unary operators if {...} notation isn't
9543        used.  There's no difference in ambiguity; it's merely a heuristic
9544        about when not to warn.  */
9545     else if (ck_uni && bracket == -1)
9546         check_uni();
9547     if (bracket != -1) {
9548         bool skip;
9549         char *s2;
9550         /* If we were processing {...} notation then...  */
9551         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9552             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9553                  && isWORDCHAR(*s))
9554         ) {
9555             /* note we have to check for a normal identifier first,
9556              * as it handles utf8 symbols, and only after that has
9557              * been ruled out can we look at the caret words */
9558             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9559                 /* if it starts as a valid identifier, assume that it is one.
9560                    (the later check for } being at the expected point will trap
9561                    cases where this doesn't pan out.)  */
9562                 d += is_utf8 ? UTF8SKIP(d) : 1;
9563                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
9564                 *d = '\0';
9565             }
9566             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9567                 d++;
9568                 while (isWORDCHAR(*s) && d < e) {
9569                     *d++ = *s++;
9570                 }
9571                 if (d >= e)
9572                     Perl_croak(aTHX_ "%s", ident_too_long);
9573                 *d = '\0';
9574             }
9575             tmp_copline = CopLINE(PL_curcop);
9576             if (s < PL_bufend && isSPACE(*s)) {
9577                 s = skipspace(s);
9578             }
9579             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9580                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
9581                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9582                     const char * const brack =
9583                         (const char *)
9584                         ((*s == '[') ? "[...]" : "{...}");
9585                     orig_copline = CopLINE(PL_curcop);
9586                     CopLINE_set(PL_curcop, tmp_copline);
9587    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9588                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9589                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9590                         funny, dest, brack, funny, dest, brack);
9591                     CopLINE_set(PL_curcop, orig_copline);
9592                 }
9593                 bracket++;
9594                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9595                 PL_lex_allbrackets++;
9596                 return s;
9597             }
9598         }
9599
9600         if ( !tmp_copline )
9601             tmp_copline = CopLINE(PL_curcop);
9602         if ((skip = s < PL_bufend && isSPACE(*s))) {
9603             /* Avoid incrementing line numbers or resetting PL_linestart,
9604                in case we have to back up.  */
9605             STRLEN s_off = s - SvPVX(PL_linestr);
9606             s2 = peekspace(s);
9607             s = SvPVX(PL_linestr) + s_off;
9608         }
9609         else
9610             s2 = s;
9611
9612         /* Expect to find a closing } after consuming any trailing whitespace.
9613          */
9614         if (*s2 == '}') {
9615             /* Now increment line numbers if applicable.  */
9616             if (skip)
9617                 s = skipspace(s);
9618             s++;
9619             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9620                 PL_lex_state = LEX_INTERPEND;
9621                 PL_expect = XREF;
9622             }
9623             if (PL_lex_state == LEX_NORMAL) {
9624                 if (ckWARN(WARN_AMBIGUOUS)
9625                     && (keyword(dest, d - dest, 0)
9626                         || get_cvn_flags(dest, d - dest, is_utf8
9627                            ? SVf_UTF8
9628                            : 0)))
9629                 {
9630                     SV *tmp = newSVpvn_flags( dest, d - dest,
9631                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9632                     if (funny == '#')
9633                         funny = '@';
9634                     orig_copline = CopLINE(PL_curcop);
9635                     CopLINE_set(PL_curcop, tmp_copline);
9636                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9637                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9638                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9639                     CopLINE_set(PL_curcop, orig_copline);
9640                 }
9641             }
9642         }
9643         else {
9644             /* Didn't find the closing } at the point we expected, so restore
9645                state such that the next thing to process is the opening { and */
9646             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9647             CopLINE_set(PL_curcop, orig_copline);
9648             PL_parser->herelines = herelines;
9649             *dest = '\0';
9650             PL_parser->sub_no_recover = TRUE;
9651         }
9652     }
9653     else if (   PL_lex_state == LEX_INTERPNORMAL
9654              && !PL_lex_brackets
9655              && !intuit_more(s, PL_bufend))
9656         PL_lex_state = LEX_INTERPEND;
9657     return s;
9658 }
9659
9660 static bool
9661 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9662
9663     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9664      * found in the parse starting at 's', based on the subset that are valid
9665      * in this context input to this routine in 'valid_flags'. Advances s.
9666      * Returns TRUE if the input should be treated as a valid flag, so the next
9667      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9668      * upon first call on the current regex.  This routine will set it to any
9669      * charset modifier found.  The caller shouldn't change it.  This way,
9670      * another charset modifier encountered in the parse can be detected as an
9671      * error, as we have decided to allow only one */
9672
9673     const char c = **s;
9674     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9675
9676     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9677         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9678             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9679                        UTF ? SVf_UTF8 : 0);
9680             (*s) += charlen;
9681             /* Pretend that it worked, so will continue processing before
9682              * dieing */
9683             return TRUE;
9684         }
9685         return FALSE;
9686     }
9687
9688     switch (c) {
9689
9690         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9691         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9692         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9693         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9694         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9695         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9696         case LOCALE_PAT_MOD:
9697             if (*charset) {
9698                 goto multiple_charsets;
9699             }
9700             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9701             *charset = c;
9702             break;
9703         case UNICODE_PAT_MOD:
9704             if (*charset) {
9705                 goto multiple_charsets;
9706             }
9707             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9708             *charset = c;
9709             break;
9710         case ASCII_RESTRICT_PAT_MOD:
9711             if (! *charset) {
9712                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9713             }
9714             else {
9715
9716                 /* Error if previous modifier wasn't an 'a', but if it was, see
9717                  * if, and accept, a second occurrence (only) */
9718                 if (*charset != 'a'
9719                     || get_regex_charset(*pmfl)
9720                         != REGEX_ASCII_RESTRICTED_CHARSET)
9721                 {
9722                         goto multiple_charsets;
9723                 }
9724                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9725             }
9726             *charset = c;
9727             break;
9728         case DEPENDS_PAT_MOD:
9729             if (*charset) {
9730                 goto multiple_charsets;
9731             }
9732             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9733             *charset = c;
9734             break;
9735     }
9736
9737     (*s)++;
9738     return TRUE;
9739
9740     multiple_charsets:
9741         if (*charset != c) {
9742             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9743         }
9744         else if (c == 'a') {
9745   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9746             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9747         }
9748         else {
9749             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9750         }
9751
9752         /* Pretend that it worked, so will continue processing before dieing */
9753         (*s)++;
9754         return TRUE;
9755 }
9756
9757 STATIC char *
9758 S_scan_pat(pTHX_ char *start, I32 type)
9759 {
9760     PMOP *pm;
9761     char *s;
9762     const char * const valid_flags =
9763         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9764     char charset = '\0';    /* character set modifier */
9765     unsigned int x_mod_count = 0;
9766
9767     PERL_ARGS_ASSERT_SCAN_PAT;
9768
9769     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9770     if (!s)
9771         Perl_croak(aTHX_ "Search pattern not terminated");
9772
9773     pm = (PMOP*)newPMOP(type, 0);
9774     if (PL_multi_open == '?') {
9775         /* This is the only point in the code that sets PMf_ONCE:  */
9776         pm->op_pmflags |= PMf_ONCE;
9777
9778         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9779            allows us to restrict the list needed by reset to just the ??
9780            matches.  */
9781         assert(type != OP_TRANS);
9782         if (PL_curstash) {
9783             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9784             U32 elements;
9785             if (!mg) {
9786                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9787                                  0);
9788             }
9789             elements = mg->mg_len / sizeof(PMOP**);
9790             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9791             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9792             mg->mg_len = elements * sizeof(PMOP**);
9793             PmopSTASH_set(pm,PL_curstash);
9794         }
9795     }
9796
9797     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9798      * anon CV. False positives like qr/[(?{]/ are harmless */
9799
9800     if (type == OP_QR) {
9801         STRLEN len;
9802         char *e, *p = SvPV(PL_lex_stuff, len);
9803         e = p + len;
9804         for (; p < e; p++) {
9805             if (p[0] == '(' && p[1] == '?'
9806                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9807             {
9808                 pm->op_pmflags |= PMf_HAS_CV;
9809                 break;
9810             }
9811         }
9812         pm->op_pmflags |= PMf_IS_QR;
9813     }
9814
9815     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9816                                 &s, &charset, &x_mod_count))
9817     {};
9818     /* issue a warning if /c is specified,but /g is not */
9819     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9820     {
9821         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9822                        "Use of /c modifier is meaningless without /g" );
9823     }
9824
9825     PL_lex_op = (OP*)pm;
9826     pl_yylval.ival = OP_MATCH;
9827     return s;
9828 }
9829
9830 STATIC char *
9831 S_scan_subst(pTHX_ char *start)
9832 {
9833     char *s;
9834     PMOP *pm;
9835     I32 first_start;
9836     line_t first_line;
9837     line_t linediff = 0;
9838     I32 es = 0;
9839     char charset = '\0';    /* character set modifier */
9840     unsigned int x_mod_count = 0;
9841     char *t;
9842
9843     PERL_ARGS_ASSERT_SCAN_SUBST;
9844
9845     pl_yylval.ival = OP_NULL;
9846
9847     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9848
9849     if (!s)
9850         Perl_croak(aTHX_ "Substitution pattern not terminated");
9851
9852     s = t;
9853
9854     first_start = PL_multi_start;
9855     first_line = CopLINE(PL_curcop);
9856     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9857     if (!s) {
9858         SvREFCNT_dec_NN(PL_lex_stuff);
9859         PL_lex_stuff = NULL;
9860         Perl_croak(aTHX_ "Substitution replacement not terminated");
9861     }
9862     PL_multi_start = first_start;       /* so whole substitution is taken together */
9863
9864     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9865
9866
9867     while (*s) {
9868         if (*s == EXEC_PAT_MOD) {
9869             s++;
9870             es++;
9871         }
9872         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9873                                   &s, &charset, &x_mod_count))
9874         {
9875             break;
9876         }
9877     }
9878
9879     if ((pm->op_pmflags & PMf_CONTINUE)) {
9880         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9881     }
9882
9883     if (es) {
9884         SV * const repl = newSVpvs("");
9885
9886         PL_multi_end = 0;
9887         pm->op_pmflags |= PMf_EVAL;
9888         for (; es > 1; es--) {
9889             sv_catpvs(repl, "eval ");
9890         }
9891         sv_catpvs(repl, "do {");
9892         sv_catsv(repl, PL_parser->lex_sub_repl);
9893         sv_catpvs(repl, "}");
9894         SvREFCNT_dec(PL_parser->lex_sub_repl);
9895         PL_parser->lex_sub_repl = repl;
9896     }
9897
9898
9899     linediff = CopLINE(PL_curcop) - first_line;
9900     if (linediff)
9901         CopLINE_set(PL_curcop, first_line);
9902
9903     if (linediff || es) {
9904         /* the IVX field indicates that the replacement string is a s///e;
9905          * the NVX field indicates how many src code lines the replacement
9906          * spreads over */
9907         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9908         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
9909         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9910                                                                     cBOOL(es);
9911     }
9912
9913     PL_lex_op = (OP*)pm;
9914     pl_yylval.ival = OP_SUBST;
9915     return s;
9916 }
9917
9918 STATIC char *
9919 S_scan_trans(pTHX_ char *start)
9920 {
9921     char* s;
9922     OP *o;
9923     U8 squash;
9924     U8 del;
9925     U8 complement;
9926     bool nondestruct = 0;
9927     char *t;
9928
9929     PERL_ARGS_ASSERT_SCAN_TRANS;
9930
9931     pl_yylval.ival = OP_NULL;
9932
9933     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9934     if (!s)
9935         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9936
9937     s = t;
9938
9939     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9940     if (!s) {
9941         SvREFCNT_dec_NN(PL_lex_stuff);
9942         PL_lex_stuff = NULL;
9943         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9944     }
9945
9946     complement = del = squash = 0;
9947     while (1) {
9948         switch (*s) {
9949         case 'c':
9950             complement = OPpTRANS_COMPLEMENT;
9951             break;
9952         case 'd':
9953             del = OPpTRANS_DELETE;
9954             break;
9955         case 's':
9956             squash = OPpTRANS_SQUASH;
9957             break;
9958         case 'r':
9959             nondestruct = 1;
9960             break;
9961         default:
9962             goto no_more;
9963         }
9964         s++;
9965     }
9966   no_more:
9967
9968     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9969     o->op_private &= ~OPpTRANS_ALL;
9970     o->op_private |= del|squash|complement|
9971       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9972       (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
9973
9974     PL_lex_op = o;
9975     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9976
9977
9978     return s;
9979 }
9980
9981 /* scan_heredoc
9982    Takes a pointer to the first < in <<FOO.
9983    Returns a pointer to the byte following <<FOO.
9984
9985    This function scans a heredoc, which involves different methods
9986    depending on whether we are in a string eval, quoted construct, etc.
9987    This is because PL_linestr could containing a single line of input, or
9988    a whole string being evalled, or the contents of the current quote-
9989    like operator.
9990
9991    The two basic methods are:
9992     - Steal lines from the input stream
9993     - Scan the heredoc in PL_linestr and remove it therefrom
9994
9995    In a file scope or filtered eval, the first method is used; in a
9996    string eval, the second.
9997
9998    In a quote-like operator, we have to choose between the two,
9999    depending on where we can find a newline.  We peek into outer lex-
10000    ing scopes until we find one with a newline in it.  If we reach the
10001    outermost lexing scope and it is a file, we use the stream method.
10002    Otherwise it is treated as an eval.
10003 */
10004
10005 STATIC char *
10006 S_scan_heredoc(pTHX_ char *s)
10007 {
10008     I32 op_type = OP_SCALAR;
10009     I32 len;
10010     SV *tmpstr;
10011     char term;
10012     char *d;
10013     char *e;
10014     char *peek;
10015     char *indent = 0;
10016     I32 indent_len = 0;
10017     bool indented = FALSE;
10018     const bool infile = PL_rsfp || PL_parser->filtered;
10019     const line_t origline = CopLINE(PL_curcop);
10020     LEXSHARED *shared = PL_parser->lex_shared;
10021
10022     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10023
10024     s += 2;
10025     d = PL_tokenbuf + 1;
10026     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10027     *PL_tokenbuf = '\n';
10028     peek = s;
10029
10030     if (*peek == '~') {
10031         indented = TRUE;
10032         peek++; s++;
10033     }
10034
10035     while (SPACE_OR_TAB(*peek))
10036         peek++;
10037
10038     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10039         s = peek;
10040         term = *s++;
10041         s = delimcpy(d, e, s, PL_bufend, term, &len);
10042         if (s == PL_bufend)
10043             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10044         d += len;
10045         s++;
10046     }
10047     else {
10048         if (*s == '\\')
10049             /* <<\FOO is equivalent to <<'FOO' */
10050             s++, term = '\'';
10051         else
10052             term = '"';
10053
10054         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10055             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10056
10057         peek = s;
10058
10059         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10060             peek += UTF ? UTF8SKIP(peek) : 1;
10061         }
10062
10063         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10064         Copy(s, d, len, char);
10065         s += len;
10066         d += len;
10067     }
10068
10069     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10070         Perl_croak(aTHX_ "Delimiter for here document is too long");
10071
10072     *d++ = '\n';
10073     *d = '\0';
10074     len = d - PL_tokenbuf;
10075
10076 #ifndef PERL_STRICT_CR
10077     d = (char *) memchr(s, '\r', PL_bufend - s);
10078     if (d) {
10079         char * const olds = s;
10080         s = d;
10081         while (s < PL_bufend) {
10082             if (*s == '\r') {
10083                 *d++ = '\n';
10084                 if (*++s == '\n')
10085                     s++;
10086             }
10087             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10088                 *d++ = *s++;
10089                 s++;
10090             }
10091             else
10092                 *d++ = *s++;
10093         }
10094         *d = '\0';
10095         PL_bufend = d;
10096         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10097         s = olds;
10098     }
10099 #endif
10100
10101     tmpstr = newSV_type(SVt_PVIV);
10102     SvGROW(tmpstr, 80);
10103     if (term == '\'') {
10104         op_type = OP_CONST;
10105         SvIV_set(tmpstr, -1);
10106     }
10107     else if (term == '`') {
10108         op_type = OP_BACKTICK;
10109         SvIV_set(tmpstr, '\\');
10110     }
10111
10112     PL_multi_start = origline + 1 + PL_parser->herelines;
10113     PL_multi_open = PL_multi_close = '<';
10114
10115     /* inside a string eval or quote-like operator */
10116     if (!infile || PL_lex_inwhat) {
10117         SV *linestr;
10118         char *bufend;
10119         char * const olds = s;
10120         PERL_CONTEXT * const cx = CX_CUR();
10121         /* These two fields are not set until an inner lexing scope is
10122            entered.  But we need them set here. */
10123         shared->ls_bufptr  = s;
10124         shared->ls_linestr = PL_linestr;
10125
10126         if (PL_lex_inwhat) {
10127             /* Look for a newline.  If the current buffer does not have one,
10128              peek into the line buffer of the parent lexing scope, going
10129              up as many levels as necessary to find one with a newline
10130              after bufptr.
10131             */
10132             while (!(s = (char *)memchr(
10133                                 (void *)shared->ls_bufptr, '\n',
10134                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10135                 )))
10136             {
10137                 shared = shared->ls_prev;
10138                 /* shared is only null if we have gone beyond the outermost
10139                    lexing scope.  In a file, we will have broken out of the
10140                    loop in the previous iteration.  In an eval, the string buf-
10141                    fer ends with "\n;", so the while condition above will have
10142                    evaluated to false.  So shared can never be null.  Or so you
10143                    might think.  Odd syntax errors like s;@{<<; can gobble up
10144                    the implicit semicolon at the end of a flie, causing the
10145                    file handle to be closed even when we are not in a string
10146                    eval.  So shared may be null in that case.
10147                    (Closing '>>}' here to balance the earlier open brace for
10148                    editors that look for matched pairs.) */
10149                 if (UNLIKELY(!shared))
10150                     goto interminable;
10151                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10152                    most lexing scope.  In a file, shared->ls_linestr at that
10153                    level is just one line, so there is no body to steal. */
10154                 if (infile && !shared->ls_prev) {
10155                     s = olds;
10156                     goto streaming;
10157                 }
10158             }
10159         }
10160         else {  /* eval or we've already hit EOF */
10161             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10162             if (!s)
10163                 goto interminable;
10164         }
10165
10166         linestr = shared->ls_linestr;
10167         bufend = SvEND(linestr);
10168         d = s;
10169         if (indented) {
10170             char *myolds = s;
10171
10172             while (s < bufend - len + 1) {
10173                 if (*s++ == '\n')
10174                     ++PL_parser->herelines;
10175
10176                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10177                     char *backup = s;
10178                     indent_len = 0;
10179
10180                     /* Only valid if it's preceded by whitespace only */
10181                     while (backup != myolds && --backup >= myolds) {
10182                         if (! SPACE_OR_TAB(*backup)) {
10183                             break;
10184                         }
10185                         indent_len++;
10186                     }
10187
10188                     /* No whitespace or all! */
10189                     if (backup == s || *backup == '\n') {
10190                         Newx(indent, indent_len + 1, char);
10191                         memcpy(indent, backup + 1, indent_len);
10192                         indent[indent_len] = 0;
10193                         s--; /* before our delimiter */
10194                         PL_parser->herelines--; /* this line doesn't count */
10195                         break;
10196                     }
10197                 }
10198             }
10199         }
10200         else {
10201             while (s < bufend - len + 1
10202                    && memNE(s,PL_tokenbuf,len) )
10203             {
10204                 if (*s++ == '\n')
10205                     ++PL_parser->herelines;
10206             }
10207         }
10208
10209         if (s >= bufend - len + 1) {
10210             goto interminable;
10211         }
10212
10213         sv_setpvn(tmpstr,d+1,s-d);
10214         s += len - 1;
10215         /* the preceding stmt passes a newline */
10216         PL_parser->herelines++;
10217
10218         /* s now points to the newline after the heredoc terminator.
10219            d points to the newline before the body of the heredoc.
10220          */
10221
10222         /* We are going to modify linestr in place here, so set
10223            aside copies of the string if necessary for re-evals or
10224            (caller $n)[6]. */
10225         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10226            check shared->re_eval_str. */
10227         if (shared->re_eval_start || shared->re_eval_str) {
10228             /* Set aside the rest of the regexp */
10229             if (!shared->re_eval_str)
10230                 shared->re_eval_str =
10231                        newSVpvn(shared->re_eval_start,
10232                                 bufend - shared->re_eval_start);
10233             shared->re_eval_start -= s-d;
10234         }
10235
10236         if (cxstack_ix >= 0
10237             && CxTYPE(cx) == CXt_EVAL
10238             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10239             && cx->blk_eval.cur_text == linestr)
10240         {
10241             cx->blk_eval.cur_text = newSVsv(linestr);
10242             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10243         }
10244
10245         /* Copy everything from s onwards back to d. */
10246         Move(s,d,bufend-s + 1,char);
10247         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10248         /* Setting PL_bufend only applies when we have not dug deeper
10249            into other scopes, because sublex_done sets PL_bufend to
10250            SvEND(PL_linestr). */
10251         if (shared == PL_parser->lex_shared)
10252             PL_bufend = SvEND(linestr);
10253         s = olds;
10254     }
10255     else {
10256         SV *linestr_save;
10257         char *oldbufptr_save;
10258         char *oldoldbufptr_save;
10259       streaming:
10260         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10261         term = PL_tokenbuf[1];
10262         len--;
10263         linestr_save = PL_linestr; /* must restore this afterwards */
10264         d = s;                   /* and this */
10265         oldbufptr_save = PL_oldbufptr;
10266         oldoldbufptr_save = PL_oldoldbufptr;
10267         PL_linestr = newSVpvs("");
10268         PL_bufend = SvPVX(PL_linestr);
10269
10270         while (1) {
10271             PL_bufptr = PL_bufend;
10272             CopLINE_set(PL_curcop,
10273                         origline + 1 + PL_parser->herelines);
10274
10275             if (   !lex_next_chunk(LEX_NO_TERM)
10276                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10277             {
10278                 /* Simply freeing linestr_save might seem simpler here, as it
10279                    does not matter what PL_linestr points to, since we are
10280                    about to croak; but in a quote-like op, linestr_save
10281                    will have been prospectively freed already, via
10282                    SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10283                    restore PL_linestr. */
10284                 SvREFCNT_dec_NN(PL_linestr);
10285                 PL_linestr = linestr_save;
10286                 PL_oldbufptr = oldbufptr_save;
10287                 PL_oldoldbufptr = oldoldbufptr_save;
10288                 goto interminable;
10289             }
10290
10291             CopLINE_set(PL_curcop, origline);
10292
10293             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10294                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10295                 /* ^That should be enough to avoid this needing to grow:  */
10296                 sv_catpvs(PL_linestr, "\n\0");
10297                 assert(s == SvPVX(PL_linestr));
10298                 PL_bufend = SvEND(PL_linestr);
10299             }
10300
10301             s = PL_bufptr;
10302             PL_parser->herelines++;
10303             PL_last_lop = PL_last_uni = NULL;
10304
10305 #ifndef PERL_STRICT_CR
10306             if (PL_bufend - PL_linestart >= 2) {
10307                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10308                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10309                 {
10310                     PL_bufend[-2] = '\n';
10311                     PL_bufend--;
10312                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10313                 }
10314                 else if (PL_bufend[-1] == '\r')
10315                     PL_bufend[-1] = '\n';
10316             }
10317             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10318                 PL_bufend[-1] = '\n';
10319 #endif
10320
10321             if (indented && (PL_bufend-s) >= len) {
10322                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10323
10324                 if (found) {
10325                     char *backup = found;
10326                     indent_len = 0;
10327
10328                     /* Only valid if it's preceded by whitespace only */
10329                     while (backup != s && --backup >= s) {
10330                         if (! SPACE_OR_TAB(*backup)) {
10331                             break;
10332                         }
10333                         indent_len++;
10334                     }
10335
10336                     /* All whitespace or none! */
10337                     if (backup == found || SPACE_OR_TAB(*backup)) {
10338                         Newx(indent, indent_len + 1, char);
10339                         memcpy(indent, backup, indent_len);
10340                         indent[indent_len] = 0;
10341                         SvREFCNT_dec(PL_linestr);
10342                         PL_linestr = linestr_save;
10343                         PL_linestart = SvPVX(linestr_save);
10344                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10345                         PL_oldbufptr = oldbufptr_save;
10346                         PL_oldoldbufptr = oldoldbufptr_save;
10347                         s = d;
10348                         break;
10349                     }
10350                 }
10351
10352                 /* Didn't find it */
10353                 sv_catsv(tmpstr,PL_linestr);
10354             }
10355             else {
10356                 if (*s == term && PL_bufend-s >= len
10357                     && memEQ(s,PL_tokenbuf + 1,len))
10358                 {
10359                     SvREFCNT_dec(PL_linestr);
10360                     PL_linestr = linestr_save;
10361                     PL_linestart = SvPVX(linestr_save);
10362                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10363                     PL_oldbufptr = oldbufptr_save;
10364                     PL_oldoldbufptr = oldoldbufptr_save;
10365                     s = d;
10366                     break;
10367                 }
10368                 else {
10369                     sv_catsv(tmpstr,PL_linestr);
10370                 }
10371             }
10372         } /* while (1) */
10373     }
10374
10375     PL_multi_end = origline + PL_parser->herelines;
10376
10377     if (indented && indent) {
10378         STRLEN linecount = 1;
10379         STRLEN herelen = SvCUR(tmpstr);
10380         char *ss = SvPVX(tmpstr);
10381         char *se = ss + herelen;
10382         SV *newstr = newSV(herelen+1);
10383         SvPOK_on(newstr);
10384
10385         /* Trim leading whitespace */
10386         while (ss < se) {
10387             /* newline only? Copy and move on */
10388             if (*ss == '\n') {
10389                 sv_catpvs(newstr,"\n");
10390                 ss++;
10391                 linecount++;
10392
10393             /* Found our indentation? Strip it */
10394             }
10395             else if (se - ss >= indent_len
10396                        && memEQ(ss, indent, indent_len))
10397             {
10398                 STRLEN le = 0;
10399                 ss += indent_len;
10400
10401                 while ((ss + le) < se && *(ss + le) != '\n')
10402                     le++;
10403
10404                 sv_catpvn(newstr, ss, le);
10405                 ss += le;
10406
10407             /* Line doesn't begin with our indentation? Croak */
10408             }
10409             else {
10410                 Safefree(indent);
10411                 Perl_croak(aTHX_
10412                     "Indentation on line %d of here-doc doesn't match delimiter",
10413                     (int)linecount
10414                 );
10415             }
10416         } /* while */
10417
10418         /* avoid sv_setsv() as we dont wan't to COW here */
10419         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10420         Safefree(indent);
10421         SvREFCNT_dec_NN(newstr);
10422     }
10423
10424     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10425         SvPV_shrink_to_cur(tmpstr);
10426     }
10427
10428     if (!IN_BYTES) {
10429         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10430             SvUTF8_on(tmpstr);
10431     }
10432
10433     PL_lex_stuff = tmpstr;
10434     pl_yylval.ival = op_type;
10435     return s;
10436
10437   interminable:
10438     if (indent)
10439         Safefree(indent);
10440     SvREFCNT_dec(tmpstr);
10441     CopLINE_set(PL_curcop, origline);
10442     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10443 }
10444
10445
10446 /* scan_inputsymbol
10447    takes: position of first '<' in input buffer
10448    returns: position of first char following the matching '>' in
10449             input buffer
10450    side-effects: pl_yylval and lex_op are set.
10451
10452    This code handles:
10453
10454    <>           read from ARGV
10455    <<>>         read from ARGV without magic open
10456    <FH>         read from filehandle
10457    <pkg::FH>    read from package qualified filehandle
10458    <pkg'FH>     read from package qualified filehandle
10459    <$fh>        read from filehandle in $fh
10460    <*.h>        filename glob
10461
10462 */
10463
10464 STATIC char *
10465 S_scan_inputsymbol(pTHX_ char *start)
10466 {
10467     char *s = start;            /* current position in buffer */
10468     char *end;
10469     I32 len;
10470     bool nomagicopen = FALSE;
10471     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10472     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10473
10474     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10475
10476     end = (char *) memchr(s, '\n', PL_bufend - s);
10477     if (!end)
10478         end = PL_bufend;
10479     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10480         nomagicopen = TRUE;
10481         *d = '\0';
10482         len = 0;
10483         s += 3;
10484     }
10485     else
10486         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10487
10488     /* die if we didn't have space for the contents of the <>,
10489        or if it didn't end, or if we see a newline
10490     */
10491
10492     if (len >= (I32)sizeof PL_tokenbuf)
10493         Perl_croak(aTHX_ "Excessively long <> operator");
10494     if (s >= end)
10495         Perl_croak(aTHX_ "Unterminated <> operator");
10496
10497     s++;
10498
10499     /* check for <$fh>
10500        Remember, only scalar variables are interpreted as filehandles by
10501        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10502        treated as a glob() call.
10503        This code makes use of the fact that except for the $ at the front,
10504        a scalar variable and a filehandle look the same.
10505     */
10506     if (*d == '$' && d[1]) d++;
10507
10508     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10509     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10510         d += UTF ? UTF8SKIP(d) : 1;
10511     }
10512
10513     /* If we've tried to read what we allow filehandles to look like, and
10514        there's still text left, then it must be a glob() and not a getline.
10515        Use scan_str to pull out the stuff between the <> and treat it
10516        as nothing more than a string.
10517     */
10518
10519     if (d - PL_tokenbuf != len) {
10520         pl_yylval.ival = OP_GLOB;
10521         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10522         if (!s)
10523            Perl_croak(aTHX_ "Glob not terminated");
10524         return s;
10525     }
10526     else {
10527         bool readline_overriden = FALSE;
10528         GV *gv_readline;
10529         /* we're in a filehandle read situation */
10530         d = PL_tokenbuf;
10531
10532         /* turn <> into <ARGV> */
10533         if (!len)
10534             Copy("ARGV",d,5,char);
10535
10536         /* Check whether readline() is overriden */
10537         if ((gv_readline = gv_override("readline",8)))
10538             readline_overriden = TRUE;
10539
10540         /* if <$fh>, create the ops to turn the variable into a
10541            filehandle
10542         */
10543         if (*d == '$') {
10544             /* try to find it in the pad for this block, otherwise find
10545                add symbol table ops
10546             */
10547             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10548             if (tmp != NOT_IN_PAD) {
10549                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10550                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10551                     HEK * const stashname = HvNAME_HEK(stash);
10552                     SV * const sym = sv_2mortal(newSVhek(stashname));
10553                     sv_catpvs(sym, "::");
10554                     sv_catpv(sym, d+1);
10555                     d = SvPVX(sym);
10556                     goto intro_sym;
10557                 }
10558                 else {
10559                     OP * const o = newOP(OP_PADSV, 0);
10560                     o->op_targ = tmp;
10561                     PL_lex_op = readline_overriden
10562                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10563                                 op_append_elem(OP_LIST, o,
10564                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10565                         : newUNOP(OP_READLINE, 0, o);
10566                 }
10567             }
10568             else {
10569                 GV *gv;
10570                 ++d;
10571               intro_sym:
10572                 gv = gv_fetchpv(d,
10573                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10574                                 SVt_PV);
10575                 PL_lex_op = readline_overriden
10576                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10577                             op_append_elem(OP_LIST,
10578                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10579                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10580                     : newUNOP(OP_READLINE, 0,
10581                             newUNOP(OP_RV2SV, 0,
10582                                 newGVOP(OP_GV, 0, gv)));
10583             }
10584             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10585             pl_yylval.ival = OP_NULL;
10586         }
10587
10588         /* If it's none of the above, it must be a literal filehandle
10589            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10590         else {
10591             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10592             PL_lex_op = readline_overriden
10593                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10594                         op_append_elem(OP_LIST,
10595                             newGVOP(OP_GV, 0, gv),
10596                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10597                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10598             pl_yylval.ival = OP_NULL;
10599         }
10600     }
10601
10602     return s;
10603 }
10604
10605
10606 /* scan_str
10607    takes:
10608         start                   position in buffer
10609         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
10610                                 only if they are of the open/close form
10611         keep_delims             preserve the delimiters around the string
10612         re_reparse              compiling a run-time /(?{})/:
10613                                    collapse // to /,  and skip encoding src
10614         delimp                  if non-null, this is set to the position of
10615                                 the closing delimiter, or just after it if
10616                                 the closing and opening delimiters differ
10617                                 (i.e., the opening delimiter of a substitu-
10618                                 tion replacement)
10619    returns: position to continue reading from buffer
10620    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10621         updates the read buffer.
10622
10623    This subroutine pulls a string out of the input.  It is called for:
10624         q               single quotes           q(literal text)
10625         '               single quotes           'literal text'
10626         qq              double quotes           qq(interpolate $here please)
10627         "               double quotes           "interpolate $here please"
10628         qx              backticks               qx(/bin/ls -l)
10629         `               backticks               `/bin/ls -l`
10630         qw              quote words             @EXPORT_OK = qw( func() $spam )
10631         m//             regexp match            m/this/
10632         s///            regexp substitute       s/this/that/
10633         tr///           string transliterate    tr/this/that/
10634         y///            string transliterate    y/this/that/
10635         ($*@)           sub prototypes          sub foo ($)
10636         (stuff)         sub attr parameters     sub foo : attr(stuff)
10637         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10638
10639    In most of these cases (all but <>, patterns and transliterate)
10640    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10641    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10642    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10643    calls scan_str().
10644
10645    It skips whitespace before the string starts, and treats the first
10646    character as the delimiter.  If the delimiter is one of ([{< then
10647    the corresponding "close" character )]}> is used as the closing
10648    delimiter.  It allows quoting of delimiters, and if the string has
10649    balanced delimiters ([{<>}]) it allows nesting.
10650
10651    On success, the SV with the resulting string is put into lex_stuff or,
10652    if that is already non-NULL, into lex_repl. The second case occurs only
10653    when parsing the RHS of the special constructs s/// and tr/// (y///).
10654    For convenience, the terminating delimiter character is stuffed into
10655    SvIVX of the SV.
10656 */
10657
10658 STATIC char *
10659 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10660                  char **delimp
10661     )
10662 {
10663     SV *sv;                     /* scalar value: string */
10664     const char *tmps;           /* temp string, used for delimiter matching */
10665     char *s = start;            /* current position in the buffer */
10666     char term;                  /* terminating character */
10667     char *to;                   /* current position in the sv's data */
10668     I32 brackets = 1;           /* bracket nesting level */
10669     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10670     IV termcode;                /* terminating char. code */
10671     U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
10672     STRLEN termlen;             /* length of terminating string */
10673     line_t herelines;
10674
10675     /* The delimiters that have a mirror-image closing one */
10676     const char * opening_delims = "([{<";
10677     const char * closing_delims = ")]}>";
10678
10679     /* The only non-UTF character that isn't a stand alone grapheme is
10680      * white-space, hence can't be a delimiter. */
10681     const char * non_grapheme_msg = "Use of unassigned code point or"
10682                                     " non-standalone grapheme for a delimiter"
10683                                     " is not allowed";
10684     PERL_ARGS_ASSERT_SCAN_STR;
10685
10686     /* skip space before the delimiter */
10687     if (isSPACE(*s)) {
10688         s = skipspace(s);
10689     }
10690
10691     /* mark where we are, in case we need to report errors */
10692     CLINE;
10693
10694     /* after skipping whitespace, the next character is the terminator */
10695     term = *s;
10696     if (!UTF || UTF8_IS_INVARIANT(term)) {
10697         termcode = termstr[0] = term;
10698         termlen = 1;
10699     }
10700     else {
10701         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10702         if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
10703                                            (U8 *) s,
10704                                            (U8 *) PL_bufend,
10705                                                   termcode)))
10706         {
10707             yyerror(non_grapheme_msg);
10708         }
10709
10710         Copy(s, termstr, termlen, U8);
10711     }
10712
10713     /* mark where we are */
10714     PL_multi_start = CopLINE(PL_curcop);
10715     PL_multi_open = termcode;
10716     herelines = PL_parser->herelines;
10717
10718     /* If the delimiter has a mirror-image closing one, get it */
10719     if (term && (tmps = strchr(opening_delims, term))) {
10720         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10721     }
10722
10723     PL_multi_close = termcode;
10724
10725     if (PL_multi_open == PL_multi_close) {
10726         keep_bracketed_quoted = FALSE;
10727     }
10728
10729     /* create a new SV to hold the contents.  79 is the SV's initial length.
10730        What a random number. */
10731     sv = newSV_type(SVt_PVIV);
10732     SvGROW(sv, 80);
10733     SvIV_set(sv, termcode);
10734     (void)SvPOK_only(sv);               /* validate pointer */
10735
10736     /* move past delimiter and try to read a complete string */
10737     if (keep_delims)
10738         sv_catpvn(sv, s, termlen);
10739     s += termlen;
10740     for (;;) {
10741         /* extend sv if need be */
10742         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10743         /* set 'to' to the next character in the sv's string */
10744         to = SvPVX(sv)+SvCUR(sv);
10745
10746         /* if open delimiter is the close delimiter read unbridle */
10747         if (PL_multi_open == PL_multi_close) {
10748             for (; s < PL_bufend; s++,to++) {
10749                 /* embedded newlines increment the current line number */
10750                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10751                     COPLINE_INC_WITH_HERELINES;
10752                 /* handle quoted delimiters */
10753                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10754                     if (!keep_bracketed_quoted
10755                         && (s[1] == term
10756                             || (re_reparse && s[1] == '\\'))
10757                     )
10758                         s++;
10759                     else /* any other quotes are simply copied straight through */
10760                         *to++ = *s++;
10761                 }
10762                 /* terminate when run out of buffer (the for() condition), or
10763                    have found the terminator */
10764                 else if (*s == term) {  /* First byte of terminator matches */
10765                     if (termlen == 1)   /* If is the only byte, are done */
10766                         break;
10767
10768                     /* If the remainder of the terminator matches, also are
10769                      * done, after checking that is a separate grapheme */
10770                     if (   s + termlen <= PL_bufend
10771                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10772                     {
10773                         if (   UTF
10774                             && UNLIKELY(! _is_grapheme((U8 *) start,
10775                                                        (U8 *) s,
10776                                                        (U8 *) PL_bufend,
10777                                                               termcode)))
10778                         {
10779                             yyerror(non_grapheme_msg);
10780                         }
10781                         break;
10782                     }
10783                 }
10784                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10785                     has_utf8 = TRUE;
10786                 }
10787
10788                 *to = *s;
10789             }
10790         }
10791
10792         /* if the terminator isn't the same as the start character (e.g.,
10793            matched brackets), we have to allow more in the quoting, and
10794            be prepared for nested brackets.
10795         */
10796         else {
10797             /* read until we run out of string, or we find the terminator */
10798             for (; s < PL_bufend; s++,to++) {
10799                 /* embedded newlines increment the line count */
10800                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10801                     COPLINE_INC_WITH_HERELINES;
10802                 /* backslashes can escape the open or closing characters */
10803                 if (*s == '\\' && s+1 < PL_bufend) {
10804                     if (!keep_bracketed_quoted
10805                        && ( ((UV)s[1] == PL_multi_open)
10806                          || ((UV)s[1] == PL_multi_close) ))
10807                     {
10808                         s++;
10809                     }
10810                     else
10811                         *to++ = *s++;
10812                 }
10813                 /* allow nested opens and closes */
10814                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10815                     break;
10816                 else if ((UV)*s == PL_multi_open)
10817                     brackets++;
10818                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10819                     has_utf8 = TRUE;
10820                 *to = *s;
10821             }
10822         }
10823         /* terminate the copied string and update the sv's end-of-string */
10824         *to = '\0';
10825         SvCUR_set(sv, to - SvPVX_const(sv));
10826
10827         /*
10828          * this next chunk reads more into the buffer if we're not done yet
10829          */
10830
10831         if (s < PL_bufend)
10832             break;              /* handle case where we are done yet :-) */
10833
10834 #ifndef PERL_STRICT_CR
10835         if (to - SvPVX_const(sv) >= 2) {
10836             if (   (to[-2] == '\r' && to[-1] == '\n')
10837                 || (to[-2] == '\n' && to[-1] == '\r'))
10838             {
10839                 to[-2] = '\n';
10840                 to--;
10841                 SvCUR_set(sv, to - SvPVX_const(sv));
10842             }
10843             else if (to[-1] == '\r')
10844                 to[-1] = '\n';
10845         }
10846         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10847             to[-1] = '\n';
10848 #endif
10849
10850         /* if we're out of file, or a read fails, bail and reset the current
10851            line marker so we can report where the unterminated string began
10852         */
10853         COPLINE_INC_WITH_HERELINES;
10854         PL_bufptr = PL_bufend;
10855         if (!lex_next_chunk(0)) {
10856             sv_free(sv);
10857             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10858             return NULL;
10859         }
10860         s = start = PL_bufptr;
10861     }
10862
10863     /* at this point, we have successfully read the delimited string */
10864
10865     if (keep_delims)
10866             sv_catpvn(sv, s, termlen);
10867     s += termlen;
10868
10869     if (has_utf8)
10870         SvUTF8_on(sv);
10871
10872     PL_multi_end = CopLINE(PL_curcop);
10873     CopLINE_set(PL_curcop, PL_multi_start);
10874     PL_parser->herelines = herelines;
10875
10876     /* if we allocated too much space, give some back */
10877     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10878         SvLEN_set(sv, SvCUR(sv) + 1);
10879         SvPV_renew(sv, SvLEN(sv));
10880     }
10881
10882     /* decide whether this is the first or second quoted string we've read
10883        for this op
10884     */
10885
10886     if (PL_lex_stuff)
10887         PL_parser->lex_sub_repl = sv;
10888     else
10889         PL_lex_stuff = sv;
10890     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10891     return s;
10892 }
10893
10894 /*
10895   scan_num
10896   takes: pointer to position in buffer
10897   returns: pointer to new position in buffer
10898   side-effects: builds ops for the constant in pl_yylval.op
10899
10900   Read a number in any of the formats that Perl accepts:
10901
10902   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10903   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10904   0b[01](_?[01])*                                       binary integers
10905   0[0-7](_?[0-7])*                                      octal integers
10906   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10907   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10908
10909   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10910   thing it reads.
10911
10912   If it reads a number without a decimal point or an exponent, it will
10913   try converting the number to an integer and see if it can do so
10914   without loss of precision.
10915 */
10916
10917 char *
10918 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10919 {
10920     const char *s = start;      /* current position in buffer */
10921     char *d;                    /* destination in temp buffer */
10922     char *e;                    /* end of temp buffer */
10923     NV nv;                              /* number read, as a double */
10924     SV *sv = NULL;                      /* place to put the converted number */
10925     bool floatit;                       /* boolean: int or float? */
10926     const char *lastub = NULL;          /* position of last underbar */
10927     static const char* const number_too_long = "Number too long";
10928     bool warned_about_underscore = 0;
10929 #define WARN_ABOUT_UNDERSCORE() \
10930         do { \
10931             if (!warned_about_underscore) { \
10932                 warned_about_underscore = 1; \
10933                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10934                                "Misplaced _ in number"); \
10935             } \
10936         } while(0)
10937     /* Hexadecimal floating point.
10938      *
10939      * In many places (where we have quads and NV is IEEE 754 double)
10940      * we can fit the mantissa bits of a NV into an unsigned quad.
10941      * (Note that UVs might not be quads even when we have quads.)
10942      * This will not work everywhere, though (either no quads, or
10943      * using long doubles), in which case we have to resort to NV,
10944      * which will probably mean horrible loss of precision due to
10945      * multiple fp operations. */
10946     bool hexfp = FALSE;
10947     int total_bits = 0;
10948     int significant_bits = 0;
10949 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10950 #  define HEXFP_UQUAD
10951     Uquad_t hexfp_uquad = 0;
10952     int hexfp_frac_bits = 0;
10953 #else
10954 #  define HEXFP_NV
10955     NV hexfp_nv = 0.0;
10956 #endif
10957     NV hexfp_mult = 1.0;
10958     UV high_non_zero = 0; /* highest digit */
10959     int non_zero_integer_digits = 0;
10960
10961     PERL_ARGS_ASSERT_SCAN_NUM;
10962
10963     /* We use the first character to decide what type of number this is */
10964
10965     switch (*s) {
10966     default:
10967         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10968
10969     /* if it starts with a 0, it could be an octal number, a decimal in
10970        0.13 disguise, or a hexadecimal number, or a binary number. */
10971     case '0':
10972         {
10973           /* variables:
10974              u          holds the "number so far"
10975              shift      the power of 2 of the base
10976                         (hex == 4, octal == 3, binary == 1)
10977              overflowed was the number more than we can hold?
10978
10979              Shift is used when we add a digit.  It also serves as an "are
10980              we in octal/hex/binary?" indicator to disallow hex characters
10981              when in octal mode.
10982            */
10983             NV n = 0.0;
10984             UV u = 0;
10985             I32 shift;
10986             bool overflowed = FALSE;
10987             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10988             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10989             static const char* const bases[5] =
10990               { "", "binary", "", "octal", "hexadecimal" };
10991             static const char* const Bases[5] =
10992               { "", "Binary", "", "Octal", "Hexadecimal" };
10993             static const char* const maxima[5] =
10994               { "",
10995                 "0b11111111111111111111111111111111",
10996                 "",
10997                 "037777777777",
10998                 "0xffffffff" };
10999             const char *base, *Base, *max;
11000
11001             /* check for hex */
11002             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11003                 shift = 4;
11004                 s += 2;
11005                 just_zero = FALSE;
11006             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11007                 shift = 1;
11008                 s += 2;
11009                 just_zero = FALSE;
11010             }
11011             /* check for a decimal in disguise */
11012             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11013                 goto decimal;
11014             /* so it must be octal */
11015             else {
11016                 shift = 3;
11017                 s++;
11018             }
11019
11020             if (*s == '_') {
11021                 WARN_ABOUT_UNDERSCORE();
11022                lastub = s++;
11023             }
11024
11025             base = bases[shift];
11026             Base = Bases[shift];
11027             max  = maxima[shift];
11028
11029             /* read the rest of the number */
11030             for (;;) {
11031                 /* x is used in the overflow test,
11032                    b is the digit we're adding on. */
11033                 UV x, b;
11034
11035                 switch (*s) {
11036
11037                 /* if we don't mention it, we're done */
11038                 default:
11039                     goto out;
11040
11041                 /* _ are ignored -- but warned about if consecutive */
11042                 case '_':
11043                     if (lastub && s == lastub + 1)
11044                         WARN_ABOUT_UNDERSCORE();
11045                     lastub = s++;
11046                     break;
11047
11048                 /* 8 and 9 are not octal */
11049                 case '8': case '9':
11050                     if (shift == 3)
11051                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11052                     /* FALLTHROUGH */
11053
11054                 /* octal digits */
11055                 case '2': case '3': case '4':
11056                 case '5': case '6': case '7':
11057                     if (shift == 1)
11058                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11059                     /* FALLTHROUGH */
11060
11061                 case '0': case '1':
11062                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11063                     goto digit;
11064
11065                 /* hex digits */
11066                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11067                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11068                     /* make sure they said 0x */
11069                     if (shift != 4)
11070                         goto out;
11071                     b = (*s++ & 7) + 9;
11072
11073                     /* Prepare to put the digit we have onto the end
11074                        of the number so far.  We check for overflows.
11075                     */
11076
11077                   digit:
11078                     just_zero = FALSE;
11079                     if (!overflowed) {
11080                         assert(shift >= 0);
11081                         x = u << shift; /* make room for the digit */
11082
11083                         total_bits += shift;
11084
11085                         if ((x >> shift) != u
11086                             && !(PL_hints & HINT_NEW_BINARY)) {
11087                             overflowed = TRUE;
11088                             n = (NV) u;
11089                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11090                                              "Integer overflow in %s number",
11091                                              base);
11092                         } else
11093                             u = x | b;          /* add the digit to the end */
11094                     }
11095                     if (overflowed) {
11096                         n *= nvshift[shift];
11097                         /* If an NV has not enough bits in its
11098                          * mantissa to represent an UV this summing of
11099                          * small low-order numbers is a waste of time
11100                          * (because the NV cannot preserve the
11101                          * low-order bits anyway): we could just
11102                          * remember when did we overflow and in the
11103                          * end just multiply n by the right
11104                          * amount. */
11105                         n += (NV) b;
11106                     }
11107
11108                     if (high_non_zero == 0 && b > 0)
11109                         high_non_zero = b;
11110
11111                     if (high_non_zero)
11112                         non_zero_integer_digits++;
11113
11114                     /* this could be hexfp, but peek ahead
11115                      * to avoid matching ".." */
11116                     if (UNLIKELY(HEXFP_PEEK(s))) {
11117                         goto out;
11118                     }
11119
11120                     break;
11121                 }
11122             }
11123
11124           /* if we get here, we had success: make a scalar value from
11125              the number.
11126           */
11127           out:
11128
11129             /* final misplaced underbar check */
11130             if (s[-1] == '_')
11131                 WARN_ABOUT_UNDERSCORE();
11132
11133             if (UNLIKELY(HEXFP_PEEK(s))) {
11134                 /* Do sloppy (on the underbars) but quick detection
11135                  * (and value construction) for hexfp, the decimal
11136                  * detection will shortly be more thorough with the
11137                  * underbar checks. */
11138                 const char* h = s;
11139                 significant_bits = non_zero_integer_digits * shift;
11140 #ifdef HEXFP_UQUAD
11141                 hexfp_uquad = u;
11142 #else /* HEXFP_NV */
11143                 hexfp_nv = u;
11144 #endif
11145                 /* Ignore the leading zero bits of
11146                  * the high (first) non-zero digit. */
11147                 if (high_non_zero) {
11148                     if (high_non_zero < 0x8)
11149                         significant_bits--;
11150                     if (high_non_zero < 0x4)
11151                         significant_bits--;
11152                     if (high_non_zero < 0x2)
11153                         significant_bits--;
11154                 }
11155
11156                 if (*h == '.') {
11157 #ifdef HEXFP_NV
11158                     NV nv_mult = 1.0;
11159 #endif
11160                     bool accumulate = TRUE;
11161                     U8 b;
11162                     int lim = 1 << shift;
11163                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11164                                *h == '_'); h++) {
11165                         if (isXDIGIT(*h)) {
11166                             significant_bits += shift;
11167 #ifdef HEXFP_UQUAD
11168                             if (accumulate) {
11169                                 if (significant_bits < NV_MANT_DIG) {
11170                                     /* We are in the long "run" of xdigits,
11171                                      * accumulate the full four bits. */
11172                                     assert(shift >= 0);
11173                                     hexfp_uquad <<= shift;
11174                                     hexfp_uquad |= b;
11175                                     hexfp_frac_bits += shift;
11176                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11177                                     /* We are at a hexdigit either at,
11178                                      * or straddling, the edge of mantissa.
11179                                      * We will try grabbing as many as
11180                                      * possible bits. */
11181                                     int tail =
11182                                       significant_bits - NV_MANT_DIG;
11183                                     if (tail <= 0)
11184                                        tail += shift;
11185                                     assert(tail >= 0);
11186                                     hexfp_uquad <<= tail;
11187                                     assert((shift - tail) >= 0);
11188                                     hexfp_uquad |= b >> (shift - tail);
11189                                     hexfp_frac_bits += tail;
11190
11191                                     /* Ignore the trailing zero bits
11192                                      * of the last non-zero xdigit.
11193                                      *
11194                                      * The assumption here is that if
11195                                      * one has input of e.g. the xdigit
11196                                      * eight (0x8), there is only one
11197                                      * bit being input, not the full
11198                                      * four bits.  Conversely, if one
11199                                      * specifies a zero xdigit, the
11200                                      * assumption is that one really
11201                                      * wants all those bits to be zero. */
11202                                     if (b) {
11203                                         if ((b & 0x1) == 0x0) {
11204                                             significant_bits--;
11205                                             if ((b & 0x2) == 0x0) {
11206                                                 significant_bits--;
11207                                                 if ((b & 0x4) == 0x0) {
11208                                                     significant_bits--;
11209                                                 }
11210                                             }
11211                                         }
11212                                     }
11213
11214                                     accumulate = FALSE;
11215                                 }
11216                             } else {
11217                                 /* Keep skipping the xdigits, and
11218                                  * accumulating the significant bits,
11219                                  * but do not shift the uquad
11220                                  * (which would catastrophically drop
11221                                  * high-order bits) or accumulate the
11222                                  * xdigits anymore. */
11223                             }
11224 #else /* HEXFP_NV */
11225                             if (accumulate) {
11226                                 nv_mult /= nvshift[shift];
11227                                 if (nv_mult > 0.0)
11228                                     hexfp_nv += b * nv_mult;
11229                                 else
11230                                     accumulate = FALSE;
11231                             }
11232 #endif
11233                         }
11234                         if (significant_bits >= NV_MANT_DIG)
11235                             accumulate = FALSE;
11236                     }
11237                 }
11238
11239                 if ((total_bits > 0 || significant_bits > 0) &&
11240                     isALPHA_FOLD_EQ(*h, 'p')) {
11241                     bool negexp = FALSE;
11242                     h++;
11243                     if (*h == '+')
11244                         h++;
11245                     else if (*h == '-') {
11246                         negexp = TRUE;
11247                         h++;
11248                     }
11249                     if (isDIGIT(*h)) {
11250                         I32 hexfp_exp = 0;
11251                         while (isDIGIT(*h) || *h == '_') {
11252                             if (isDIGIT(*h)) {
11253                                 hexfp_exp *= 10;
11254                                 hexfp_exp += *h - '0';
11255 #ifdef NV_MIN_EXP
11256                                 if (negexp
11257                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11258                                     /* NOTE: this means that the exponent
11259                                      * underflow warning happens for
11260                                      * the IEEE 754 subnormals (denormals),
11261                                      * because DBL_MIN_EXP etc are the lowest
11262                                      * possible binary (or, rather, DBL_RADIX-base)
11263                                      * exponent for normals, not subnormals.
11264                                      *
11265                                      * This may or may not be a good thing. */
11266                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11267                                                    "Hexadecimal float: exponent underflow");
11268                                     break;
11269                                 }
11270 #endif
11271 #ifdef NV_MAX_EXP
11272                                 if (!negexp
11273                                     && hexfp_exp > NV_MAX_EXP - 1) {
11274                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11275                                                    "Hexadecimal float: exponent overflow");
11276                                     break;
11277                                 }
11278 #endif
11279                             }
11280                             h++;
11281                         }
11282                         if (negexp)
11283                             hexfp_exp = -hexfp_exp;
11284 #ifdef HEXFP_UQUAD
11285                         hexfp_exp -= hexfp_frac_bits;
11286 #endif
11287                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11288                         hexfp = TRUE;
11289                         goto decimal;
11290                     }
11291                 }
11292             }
11293
11294             if (overflowed) {
11295                 if (n > 4294967295.0)
11296                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11297                                    "%s number > %s non-portable",
11298                                    Base, max);
11299                 sv = newSVnv(n);
11300             }
11301             else {
11302 #if UVSIZE > 4
11303                 if (u > 0xffffffff)
11304                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11305                                    "%s number > %s non-portable",
11306                                    Base, max);
11307 #endif
11308                 sv = newSVuv(u);
11309             }
11310             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11311                 sv = new_constant(start, s - start, "integer",
11312                                   sv, NULL, NULL, 0, NULL);
11313             else if (PL_hints & HINT_NEW_BINARY)
11314                 sv = new_constant(start, s - start, "binary",
11315                                   sv, NULL, NULL, 0, NULL);
11316         }
11317         break;
11318
11319     /*
11320       handle decimal numbers.
11321       we're also sent here when we read a 0 as the first digit
11322     */
11323     case '1': case '2': case '3': case '4': case '5':
11324     case '6': case '7': case '8': case '9': case '.':
11325       decimal:
11326         d = PL_tokenbuf;
11327         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11328         floatit = FALSE;
11329         if (hexfp) {
11330             floatit = TRUE;
11331             *d++ = '0';
11332             *d++ = 'x';
11333             s = start + 2;
11334         }
11335
11336         /* read next group of digits and _ and copy into d */
11337         while (isDIGIT(*s)
11338                || *s == '_'
11339                || UNLIKELY(hexfp && isXDIGIT(*s)))
11340         {
11341             /* skip underscores, checking for misplaced ones
11342                if -w is on
11343             */
11344             if (*s == '_') {
11345                 if (lastub && s == lastub + 1)
11346                     WARN_ABOUT_UNDERSCORE();
11347                 lastub = s++;
11348             }
11349             else {
11350                 /* check for end of fixed-length buffer */
11351                 if (d >= e)
11352                     Perl_croak(aTHX_ "%s", number_too_long);
11353                 /* if we're ok, copy the character */
11354                 *d++ = *s++;
11355             }
11356         }
11357
11358         /* final misplaced underbar check */
11359         if (lastub && s == lastub + 1)
11360             WARN_ABOUT_UNDERSCORE();
11361
11362         /* read a decimal portion if there is one.  avoid
11363            3..5 being interpreted as the number 3. followed
11364            by .5
11365         */
11366         if (*s == '.' && s[1] != '.') {
11367             floatit = TRUE;
11368             *d++ = *s++;
11369
11370             if (*s == '_') {
11371                 WARN_ABOUT_UNDERSCORE();
11372                 lastub = s;
11373             }
11374
11375             /* copy, ignoring underbars, until we run out of digits.
11376             */
11377             for (; isDIGIT(*s)
11378                    || *s == '_'
11379                    || UNLIKELY(hexfp && isXDIGIT(*s));
11380                  s++)
11381             {
11382                 /* fixed length buffer check */
11383                 if (d >= e)
11384                     Perl_croak(aTHX_ "%s", number_too_long);
11385                 if (*s == '_') {
11386                    if (lastub && s == lastub + 1)
11387                         WARN_ABOUT_UNDERSCORE();
11388                    lastub = s;
11389                 }
11390                 else
11391                     *d++ = *s;
11392             }
11393             /* fractional part ending in underbar? */
11394             if (s[-1] == '_')
11395                 WARN_ABOUT_UNDERSCORE();
11396             if (*s == '.' && isDIGIT(s[1])) {
11397                 /* oops, it's really a v-string, but without the "v" */
11398                 s = start;
11399                 goto vstring;
11400             }
11401         }
11402
11403         /* read exponent part, if present */
11404         if ((isALPHA_FOLD_EQ(*s, 'e')
11405               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11406             && strchr("+-0123456789_", s[1]))
11407         {
11408             int exp_digits = 0;
11409             const char *save_s = s;
11410             char * save_d = d;
11411
11412             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11413                ditto for p (hexfloats) */
11414             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11415                 /* At least some Mach atof()s don't grok 'E' */
11416                 *d++ = 'e';
11417             }
11418             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11419                 *d++ = 'p';
11420             }
11421
11422             s++;
11423
11424
11425             /* stray preinitial _ */
11426             if (*s == '_') {
11427                 WARN_ABOUT_UNDERSCORE();
11428                 lastub = s++;
11429             }
11430
11431             /* allow positive or negative exponent */
11432             if (*s == '+' || *s == '-')
11433                 *d++ = *s++;
11434
11435             /* stray initial _ */
11436             if (*s == '_') {
11437                 WARN_ABOUT_UNDERSCORE();
11438                 lastub = s++;
11439             }
11440
11441             /* read digits of exponent */
11442             while (isDIGIT(*s) || *s == '_') {
11443                 if (isDIGIT(*s)) {
11444                     ++exp_digits;
11445                     if (d >= e)
11446                         Perl_croak(aTHX_ "%s", number_too_long);
11447                     *d++ = *s++;
11448                 }
11449                 else {
11450                    if (((lastub && s == lastub + 1)
11451                         || (!isDIGIT(s[1]) && s[1] != '_')))
11452                         WARN_ABOUT_UNDERSCORE();
11453                    lastub = s++;
11454                 }
11455             }
11456
11457             if (!exp_digits) {
11458                 /* no exponent digits, the [eEpP] could be for something else,
11459                  * though in practice we don't get here for p since that's preparsed
11460                  * earlier, and results in only the 0xX being consumed, so behave similarly
11461                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
11462                  * next token.
11463                  */
11464                 s = save_s;
11465                 d = save_d;
11466             }
11467             else {
11468                 floatit = TRUE;
11469             }
11470         }
11471
11472
11473         /*
11474            We try to do an integer conversion first if no characters
11475            indicating "float" have been found.
11476          */
11477
11478         if (!floatit) {
11479             UV uv;
11480             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11481
11482             if (flags == IS_NUMBER_IN_UV) {
11483               if (uv <= IV_MAX)
11484                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11485               else
11486                 sv = newSVuv(uv);
11487             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11488               if (uv <= (UV) IV_MIN)
11489                 sv = newSViv(-(IV)uv);
11490               else
11491                 floatit = TRUE;
11492             } else
11493               floatit = TRUE;
11494         }
11495         if (floatit) {
11496             /* terminate the string */
11497             *d = '\0';
11498             if (UNLIKELY(hexfp)) {
11499 #  ifdef NV_MANT_DIG
11500                 if (significant_bits > NV_MANT_DIG)
11501                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11502                                    "Hexadecimal float: mantissa overflow");
11503 #  endif
11504 #ifdef HEXFP_UQUAD
11505                 nv = hexfp_uquad * hexfp_mult;
11506 #else /* HEXFP_NV */
11507                 nv = hexfp_nv * hexfp_mult;
11508 #endif
11509             } else {
11510                 nv = Atof(PL_tokenbuf);
11511             }
11512             sv = newSVnv(nv);
11513         }
11514
11515         if ( floatit
11516              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11517             const char *const key = floatit ? "float" : "integer";
11518             const STRLEN keylen = floatit ? 5 : 7;
11519             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11520                                 key, keylen, sv, NULL, NULL, 0, NULL);
11521         }
11522         break;
11523
11524     /* if it starts with a v, it could be a v-string */
11525     case 'v':
11526     vstring:
11527                 sv = newSV(5); /* preallocate storage space */
11528                 ENTER_with_name("scan_vstring");
11529                 SAVEFREESV(sv);
11530                 s = scan_vstring(s, PL_bufend, sv);
11531                 SvREFCNT_inc_simple_void_NN(sv);
11532                 LEAVE_with_name("scan_vstring");
11533         break;
11534     }
11535
11536     /* make the op for the constant and return */
11537
11538     if (sv)
11539         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11540     else
11541         lvalp->opval = NULL;
11542
11543     return (char *)s;
11544 }
11545
11546 STATIC char *
11547 S_scan_formline(pTHX_ char *s)
11548 {
11549     SV * const stuff = newSVpvs("");
11550     bool needargs = FALSE;
11551     bool eofmt = FALSE;
11552
11553     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11554
11555     while (!needargs) {
11556         char *eol;
11557         if (*s == '.') {
11558             char *t = s+1;
11559 #ifdef PERL_STRICT_CR
11560             while (SPACE_OR_TAB(*t))
11561                 t++;
11562 #else
11563             while (SPACE_OR_TAB(*t) || *t == '\r')
11564                 t++;
11565 #endif
11566             if (*t == '\n' || t == PL_bufend) {
11567                 eofmt = TRUE;
11568                 break;
11569             }
11570         }
11571         eol = (char *) memchr(s,'\n',PL_bufend-s);
11572         if (!eol++)
11573                 eol = PL_bufend;
11574         if (*s != '#') {
11575             char *t;
11576             for (t = s; t < eol; t++) {
11577                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11578                     needargs = FALSE;
11579                     goto enough;        /* ~~ must be first line in formline */
11580                 }
11581                 if (*t == '@' || *t == '^')
11582                     needargs = TRUE;
11583             }
11584             if (eol > s) {
11585                 sv_catpvn(stuff, s, eol-s);
11586 #ifndef PERL_STRICT_CR
11587                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11588                     char *end = SvPVX(stuff) + SvCUR(stuff);
11589                     end[-2] = '\n';
11590                     end[-1] = '\0';
11591                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11592                 }
11593 #endif
11594             }
11595             else
11596               break;
11597         }
11598         s = (char*)eol;
11599         if ((PL_rsfp || PL_parser->filtered)
11600          && PL_parser->form_lex_state == LEX_NORMAL) {
11601             bool got_some;
11602             PL_bufptr = PL_bufend;
11603             COPLINE_INC_WITH_HERELINES;
11604             got_some = lex_next_chunk(0);
11605             CopLINE_dec(PL_curcop);
11606             s = PL_bufptr;
11607             if (!got_some)
11608                 break;
11609         }
11610         incline(s, PL_bufend);
11611     }
11612   enough:
11613     if (!SvCUR(stuff) || needargs)
11614         PL_lex_state = PL_parser->form_lex_state;
11615     if (SvCUR(stuff)) {
11616         PL_expect = XSTATE;
11617         if (needargs) {
11618             const char *s2 = s;
11619             while (isSPACE(*s2) && *s2 != '\n')
11620                 s2++;
11621             if (*s2 == '{') {
11622                 PL_expect = XTERMBLOCK;
11623                 NEXTVAL_NEXTTOKE.ival = 0;
11624                 force_next(DO);
11625             }
11626             NEXTVAL_NEXTTOKE.ival = 0;
11627             force_next(FORMLBRACK);
11628         }
11629         if (!IN_BYTES) {
11630             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11631                 SvUTF8_on(stuff);
11632         }
11633         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11634         force_next(THING);
11635     }
11636     else {
11637         SvREFCNT_dec(stuff);
11638         if (eofmt)
11639             PL_lex_formbrack = 0;
11640     }
11641     return s;
11642 }
11643
11644 I32
11645 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11646 {
11647     const I32 oldsavestack_ix = PL_savestack_ix;
11648     CV* const outsidecv = PL_compcv;
11649
11650     SAVEI32(PL_subline);
11651     save_item(PL_subname);
11652     SAVESPTR(PL_compcv);
11653
11654     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11655     CvFLAGS(PL_compcv) |= flags;
11656
11657     PL_subline = CopLINE(PL_curcop);
11658     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11659     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11660     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11661     if (outsidecv && CvPADLIST(outsidecv))
11662         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11663
11664     return oldsavestack_ix;
11665 }
11666
11667
11668 /* Do extra initialisation of a CV (typically one just created by
11669  * start_subparse()) if that CV is for a named sub
11670  */
11671
11672 void
11673 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
11674 {
11675     PERL_ARGS_ASSERT_INIT_NAMED_CV;
11676
11677     if (nameop->op_type == OP_CONST) {
11678         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
11679         if (   strEQ(name, "BEGIN")
11680             || strEQ(name, "END")
11681             || strEQ(name, "INIT")
11682             || strEQ(name, "CHECK")
11683             || strEQ(name, "UNITCHECK")
11684         )
11685           CvSPECIAL_on(cv);
11686     }
11687     else
11688     /* State subs inside anonymous subs need to be
11689      clonable themselves. */
11690     if (   CvANON(CvOUTSIDE(cv))
11691         || CvCLONE(CvOUTSIDE(cv))
11692         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
11693                         CvOUTSIDE(cv)
11694                      ))[nameop->op_targ])
11695     )
11696       CvCLONE_on(cv);
11697 }
11698
11699
11700 static int
11701 S_yywarn(pTHX_ const char *const s, U32 flags)
11702 {
11703     PERL_ARGS_ASSERT_YYWARN;
11704
11705     PL_in_eval |= EVAL_WARNONLY;
11706     yyerror_pv(s, flags);
11707     return 0;
11708 }
11709
11710 void
11711 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11712 {
11713     PERL_ARGS_ASSERT_ABORT_EXECUTION;
11714
11715     if (PL_minus_c)
11716         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11717     else {
11718         Perl_croak(aTHX_
11719                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11720     }
11721     NOT_REACHED; /* NOTREACHED */
11722 }
11723
11724 void
11725 Perl_yyquit(pTHX)
11726 {
11727     /* Called, after at least one error has been found, to abort the parse now,
11728      * instead of trying to forge ahead */
11729
11730     yyerror_pvn(NULL, 0, 0);
11731 }
11732
11733 int
11734 Perl_yyerror(pTHX_ const char *const s)
11735 {
11736     PERL_ARGS_ASSERT_YYERROR;
11737     return yyerror_pvn(s, strlen(s), 0);
11738 }
11739
11740 int
11741 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11742 {
11743     PERL_ARGS_ASSERT_YYERROR_PV;
11744     return yyerror_pvn(s, strlen(s), flags);
11745 }
11746
11747 int
11748 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11749 {
11750     const char *context = NULL;
11751     int contlen = -1;
11752     SV *msg;
11753     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11754     int yychar  = PL_parser->yychar;
11755
11756     /* Output error message 's' with length 'len'.  'flags' are SV flags that
11757      * apply.  If the number of errors found is large enough, it abandons
11758      * parsing.  If 's' is NULL, there is no message, and it abandons
11759      * processing unconditionally */
11760
11761     if (s != NULL) {
11762         if (!yychar || (yychar == ';' && !PL_rsfp))
11763             sv_catpvs(where_sv, "at EOF");
11764         else if (   PL_oldoldbufptr
11765                  && PL_bufptr > PL_oldoldbufptr
11766                  && PL_bufptr - PL_oldoldbufptr < 200
11767                  && PL_oldoldbufptr != PL_oldbufptr
11768                  && PL_oldbufptr != PL_bufptr)
11769         {
11770             /*
11771                     Only for NetWare:
11772                     The code below is removed for NetWare because it
11773                     abends/crashes on NetWare when the script has error such as
11774                     not having the closing quotes like:
11775                         if ($var eq "value)
11776                     Checking of white spaces is anyway done in NetWare code.
11777             */
11778 #ifndef NETWARE
11779             while (isSPACE(*PL_oldoldbufptr))
11780                 PL_oldoldbufptr++;
11781 #endif
11782             context = PL_oldoldbufptr;
11783             contlen = PL_bufptr - PL_oldoldbufptr;
11784         }
11785         else if (  PL_oldbufptr
11786                 && PL_bufptr > PL_oldbufptr
11787                 && PL_bufptr - PL_oldbufptr < 200
11788                 && PL_oldbufptr != PL_bufptr) {
11789             /*
11790                     Only for NetWare:
11791                     The code below is removed for NetWare because it
11792                     abends/crashes on NetWare when the script has error such as
11793                     not having the closing quotes like:
11794                         if ($var eq "value)
11795                     Checking of white spaces is anyway done in NetWare code.
11796             */
11797 #ifndef NETWARE
11798             while (isSPACE(*PL_oldbufptr))
11799                 PL_oldbufptr++;
11800 #endif
11801             context = PL_oldbufptr;
11802             contlen = PL_bufptr - PL_oldbufptr;
11803         }
11804         else if (yychar > 255)
11805             sv_catpvs(where_sv, "next token ???");
11806         else if (yychar == YYEMPTY) {
11807             if (PL_lex_state == LEX_NORMAL)
11808                 sv_catpvs(where_sv, "at end of line");
11809             else if (PL_lex_inpat)
11810                 sv_catpvs(where_sv, "within pattern");
11811             else
11812                 sv_catpvs(where_sv, "within string");
11813         }
11814         else {
11815             sv_catpvs(where_sv, "next char ");
11816             if (yychar < 32)
11817                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11818             else if (isPRINT_LC(yychar)) {
11819                 const char string = yychar;
11820                 sv_catpvn(where_sv, &string, 1);
11821             }
11822             else
11823                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11824         }
11825         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11826         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11827             OutCopFILE(PL_curcop),
11828             (IV)(PL_parser->preambling == NOLINE
11829                    ? CopLINE(PL_curcop)
11830                    : PL_parser->preambling));
11831         if (context)
11832             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11833                                  UTF8fARG(UTF, contlen, context));
11834         else
11835             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11836         if (   PL_multi_start < PL_multi_end
11837             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11838         {
11839             Perl_sv_catpvf(aTHX_ msg,
11840             "  (Might be a runaway multi-line %c%c string starting on"
11841             " line %" IVdf ")\n",
11842                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11843             PL_multi_end = 0;
11844         }
11845         if (PL_in_eval & EVAL_WARNONLY) {
11846             PL_in_eval &= ~EVAL_WARNONLY;
11847             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11848         }
11849         else {
11850             qerror(msg);
11851         }
11852     }
11853     if (s == NULL || PL_error_count >= 10) {
11854         const char * msg = "";
11855         const char * const name = OutCopFILE(PL_curcop);
11856
11857         if (PL_in_eval) {
11858             SV * errsv = ERRSV;
11859             if (SvCUR(errsv)) {
11860                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11861             }
11862         }
11863
11864         if (s == NULL) {
11865             abort_execution(msg, name);
11866         }
11867         else {
11868             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11869         }
11870     }
11871     PL_in_my = 0;
11872     PL_in_my_stash = NULL;
11873     return 0;
11874 }
11875
11876 STATIC char*
11877 S_swallow_bom(pTHX_ U8 *s)
11878 {
11879     const STRLEN slen = SvCUR(PL_linestr);
11880
11881     PERL_ARGS_ASSERT_SWALLOW_BOM;
11882
11883     switch (s[0]) {
11884     case 0xFF:
11885         if (s[1] == 0xFE) {
11886             /* UTF-16 little-endian? (or UTF-32LE?) */
11887             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11888                 /* diag_listed_as: Unsupported script encoding %s */
11889                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11890 #ifndef PERL_NO_UTF16_FILTER
11891 #ifdef DEBUGGING
11892             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11893 #endif
11894             s += 2;
11895             if (PL_bufend > (char*)s) {
11896                 s = add_utf16_textfilter(s, TRUE);
11897             }
11898 #else
11899             /* diag_listed_as: Unsupported script encoding %s */
11900             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11901 #endif
11902         }
11903         break;
11904     case 0xFE:
11905         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11906 #ifndef PERL_NO_UTF16_FILTER
11907 #ifdef DEBUGGING
11908             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11909 #endif
11910             s += 2;
11911             if (PL_bufend > (char *)s) {
11912                 s = add_utf16_textfilter(s, FALSE);
11913             }
11914 #else
11915             /* diag_listed_as: Unsupported script encoding %s */
11916             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11917 #endif
11918         }
11919         break;
11920     case BOM_UTF8_FIRST_BYTE: {
11921         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
11922 #ifdef DEBUGGING
11923             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11924 #endif
11925             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
11926         }
11927         break;
11928     }
11929     case 0:
11930         if (slen > 3) {
11931              if (s[1] == 0) {
11932                   if (s[2] == 0xFE && s[3] == 0xFF) {
11933                        /* UTF-32 big-endian */
11934                        /* diag_listed_as: Unsupported script encoding %s */
11935                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11936                   }
11937              }
11938              else if (s[2] == 0 && s[3] != 0) {
11939                   /* Leading bytes
11940                    * 00 xx 00 xx
11941                    * are a good indicator of UTF-16BE. */
11942 #ifndef PERL_NO_UTF16_FILTER
11943 #ifdef DEBUGGING
11944                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11945 #endif
11946                   s = add_utf16_textfilter(s, FALSE);
11947 #else
11948                   /* diag_listed_as: Unsupported script encoding %s */
11949                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11950 #endif
11951              }
11952         }
11953         break;
11954
11955     default:
11956          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11957                   /* Leading bytes
11958                    * xx 00 xx 00
11959                    * are a good indicator of UTF-16LE. */
11960 #ifndef PERL_NO_UTF16_FILTER
11961 #ifdef DEBUGGING
11962               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11963 #endif
11964               s = add_utf16_textfilter(s, TRUE);
11965 #else
11966               /* diag_listed_as: Unsupported script encoding %s */
11967               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11968 #endif
11969          }
11970     }
11971     return (char*)s;
11972 }
11973
11974
11975 #ifndef PERL_NO_UTF16_FILTER
11976 static I32
11977 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11978 {
11979     SV *const filter = FILTER_DATA(idx);
11980     /* We re-use this each time round, throwing the contents away before we
11981        return.  */
11982     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11983     SV *const utf8_buffer = filter;
11984     IV status = IoPAGE(filter);
11985     const bool reverse = cBOOL(IoLINES(filter));
11986     I32 retval;
11987
11988     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11989
11990     /* As we're automatically added, at the lowest level, and hence only called
11991        from this file, we can be sure that we're not called in block mode. Hence
11992        don't bother writing code to deal with block mode.  */
11993     if (maxlen) {
11994         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11995     }
11996     if (status < 0) {
11997         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11998     }
11999     DEBUG_P(PerlIO_printf(Perl_debug_log,
12000                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12001                           FPTR2DPTR(void *, S_utf16_textfilter),
12002                           reverse ? 'l' : 'b', idx, maxlen, status,
12003                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12004
12005     while (1) {
12006         STRLEN chars;
12007         STRLEN have;
12008         I32 newlen;
12009         U8 *end;
12010         /* First, look in our buffer of existing UTF-8 data:  */
12011         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12012
12013         if (nl) {
12014             ++nl;
12015         } else if (status == 0) {
12016             /* EOF */
12017             IoPAGE(filter) = 0;
12018             nl = SvEND(utf8_buffer);
12019         }
12020         if (nl) {
12021             STRLEN got = nl - SvPVX(utf8_buffer);
12022             /* Did we have anything to append?  */
12023             retval = got != 0;
12024             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12025             /* Everything else in this code works just fine if SVp_POK isn't
12026                set.  This, however, needs it, and we need it to work, else
12027                we loop infinitely because the buffer is never consumed.  */
12028             sv_chop(utf8_buffer, nl);
12029             break;
12030         }
12031
12032         /* OK, not a complete line there, so need to read some more UTF-16.
12033            Read an extra octect if the buffer currently has an odd number. */
12034         while (1) {
12035             if (status <= 0)
12036                 break;
12037             if (SvCUR(utf16_buffer) >= 2) {
12038                 /* Location of the high octet of the last complete code point.
12039                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12040                    *coupled* with all the benefits of partial reads and
12041                    endianness.  */
12042                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12043                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12044
12045                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12046                     break;
12047                 }
12048
12049                 /* We have the first half of a surrogate. Read more.  */
12050                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12051             }
12052
12053             status = FILTER_READ(idx + 1, utf16_buffer,
12054                                  160 + (SvCUR(utf16_buffer) & 1));
12055             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12056             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12057             if (status < 0) {
12058                 /* Error */
12059                 IoPAGE(filter) = status;
12060                 return status;
12061             }
12062         }
12063
12064         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12065          * require 4 bytes per char */
12066         chars = SvCUR(utf16_buffer) >> 1;
12067         have = SvCUR(utf8_buffer);
12068
12069         /* Assume the worst case size as noted by the functions: twice the
12070          * number of input bytes */
12071         SvGROW(utf8_buffer, have + chars * 4 + 1);
12072
12073         if (reverse) {
12074             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12075                                          (U8*)SvPVX_const(utf8_buffer) + have,
12076                                          chars * 2, &newlen);
12077         } else {
12078             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12079                                 (U8*)SvPVX_const(utf8_buffer) + have,
12080                                 chars * 2, &newlen);
12081         }
12082         SvCUR_set(utf8_buffer, have + newlen);
12083         *end = '\0';
12084
12085         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12086            it's private to us, and utf16_to_utf8{,reversed} take a
12087            (pointer,length) pair, rather than a NUL-terminated string.  */
12088         if(SvCUR(utf16_buffer) & 1) {
12089             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12090             SvCUR_set(utf16_buffer, 1);
12091         } else {
12092             SvCUR_set(utf16_buffer, 0);
12093         }
12094     }
12095     DEBUG_P(PerlIO_printf(Perl_debug_log,
12096                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12097                           status,
12098                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12099     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12100     return retval;
12101 }
12102
12103 static U8 *
12104 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12105 {
12106     SV *filter = filter_add(S_utf16_textfilter, NULL);
12107
12108     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12109
12110     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12111     SvPVCLEAR(filter);
12112     IoLINES(filter) = reversed;
12113     IoPAGE(filter) = 1; /* Not EOF */
12114
12115     /* Sadly, we have to return a valid pointer, come what may, so we have to
12116        ignore any error return from this.  */
12117     SvCUR_set(PL_linestr, 0);
12118     if (FILTER_READ(0, PL_linestr, 0)) {
12119         SvUTF8_on(PL_linestr);
12120     } else {
12121         SvUTF8_on(PL_linestr);
12122     }
12123     PL_bufend = SvEND(PL_linestr);
12124     return (U8*)SvPVX(PL_linestr);
12125 }
12126 #endif
12127
12128 /*
12129 Returns a pointer to the next character after the parsed
12130 vstring, as well as updating the passed in sv.
12131
12132 Function must be called like
12133
12134         sv = sv_2mortal(newSV(5));
12135         s = scan_vstring(s,e,sv);
12136
12137 where s and e are the start and end of the string.
12138 The sv should already be large enough to store the vstring
12139 passed in, for performance reasons.
12140
12141 This function may croak if fatal warnings are enabled in the
12142 calling scope, hence the sv_2mortal in the example (to prevent
12143 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12144 sv_2mortal.
12145
12146 */
12147
12148 char *
12149 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12150 {
12151     const char *pos = s;
12152     const char *start = s;
12153
12154     PERL_ARGS_ASSERT_SCAN_VSTRING;
12155
12156     if (*pos == 'v') pos++;  /* get past 'v' */
12157     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12158         pos++;
12159     if ( *pos != '.') {
12160         /* this may not be a v-string if followed by => */
12161         const char *next = pos;
12162         while (next < e && isSPACE(*next))
12163             ++next;
12164         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12165             /* return string not v-string */
12166             sv_setpvn(sv,(char *)s,pos-s);
12167             return (char *)pos;
12168         }
12169     }
12170
12171     if (!isALPHA(*pos)) {
12172         U8 tmpbuf[UTF8_MAXBYTES+1];
12173
12174         if (*s == 'v')
12175             s++;  /* get past 'v' */
12176
12177         SvPVCLEAR(sv);
12178
12179         for (;;) {
12180             /* this is atoi() that tolerates underscores */
12181             U8 *tmpend;
12182             UV rev = 0;
12183             const char *end = pos;
12184             UV mult = 1;
12185             while (--end >= s) {
12186                 if (*end != '_') {
12187                     const UV orev = rev;
12188                     rev += (*end - '0') * mult;
12189                     mult *= 10;
12190                     if (orev > rev)
12191                         /* diag_listed_as: Integer overflow in %s number */
12192                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12193                                          "Integer overflow in decimal number");
12194                 }
12195             }
12196
12197             /* Append native character for the rev point */
12198             tmpend = uvchr_to_utf8(tmpbuf, rev);
12199             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12200             if (!UVCHR_IS_INVARIANT(rev))
12201                  SvUTF8_on(sv);
12202             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12203                  s = ++pos;
12204             else {
12205                  s = pos;
12206                  break;
12207             }
12208             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12209                  pos++;
12210         }
12211         SvPOK_on(sv);
12212         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12213         SvRMAGICAL_on(sv);
12214     }
12215     return (char *)s;
12216 }
12217
12218 int
12219 Perl_keyword_plugin_standard(pTHX_
12220         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12221 {
12222     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12223     PERL_UNUSED_CONTEXT;
12224     PERL_UNUSED_ARG(keyword_ptr);
12225     PERL_UNUSED_ARG(keyword_len);
12226     PERL_UNUSED_ARG(op_ptr);
12227     return KEYWORD_PLUGIN_DECLINE;
12228 }
12229
12230 /*
12231 =for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
12232
12233 Puts a C function into the chain of keyword plugins.  This is the
12234 preferred way to manipulate the L</PL_keyword_plugin> variable.
12235 C<new_plugin> is a pointer to the C function that is to be added to the
12236 keyword plugin chain, and C<old_plugin_p> points to the storage location
12237 where a pointer to the next function in the chain will be stored.  The
12238 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12239 while the value previously stored there is written to C<*old_plugin_p>.
12240
12241 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12242 to hook keyword parsing may find itself invoked more than once per
12243 process, typically in different threads.  To handle that situation, this
12244 function is idempotent.  The location C<*old_plugin_p> must initially
12245 (once per process) contain a null pointer.  A C variable of static
12246 duration (declared at file scope, typically also marked C<static> to give
12247 it internal linkage) will be implicitly initialised appropriately, if it
12248 does not have an explicit initialiser.  This function will only actually
12249 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12250 function is also thread safe on the small scale.  It uses appropriate
12251 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12252
12253 When this function is called, the function referenced by C<new_plugin>
12254 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12255 In a threading situation, C<new_plugin> may be called immediately, even
12256 before this function has returned.  C<*old_plugin_p> will always be
12257 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12258 decides not to do anything special with the identifier that it is given
12259 (which is the usual case for most calls to a keyword plugin), it must
12260 chain the plugin function referenced by C<*old_plugin_p>.
12261
12262 Taken all together, XS code to install a keyword plugin should typically
12263 look something like this:
12264
12265     static Perl_keyword_plugin_t next_keyword_plugin;
12266     static OP *my_keyword_plugin(pTHX_
12267         char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
12268     {
12269         if (memEQs(keyword_ptr, keyword_len,
12270                    "my_new_keyword")) {
12271             ...
12272         } else {
12273             return next_keyword_plugin(aTHX_
12274                 keyword_ptr, keyword_len, op_ptr);
12275         }
12276     }
12277     BOOT:
12278         wrap_keyword_plugin(my_keyword_plugin,
12279                             &next_keyword_plugin);
12280
12281 Direct access to L</PL_keyword_plugin> should be avoided.
12282
12283 =cut
12284 */
12285
12286 void
12287 Perl_wrap_keyword_plugin(pTHX_
12288     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12289 {
12290     dVAR;
12291
12292     PERL_UNUSED_CONTEXT;
12293     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12294     if (*old_plugin_p) return;
12295     KEYWORD_PLUGIN_MUTEX_LOCK;
12296     if (!*old_plugin_p) {
12297         *old_plugin_p = PL_keyword_plugin;
12298         PL_keyword_plugin = new_plugin;
12299     }
12300     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12301 }
12302
12303 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12304 static void
12305 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12306 {
12307     SAVEI32(PL_lex_brackets);
12308     if (PL_lex_brackets > 100)
12309         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12310     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12311     SAVEI32(PL_lex_allbrackets);
12312     PL_lex_allbrackets = 0;
12313     SAVEI8(PL_lex_fakeeof);
12314     PL_lex_fakeeof = (U8)fakeeof;
12315     if(yyparse(gramtype) && !PL_parser->error_count)
12316         qerror(Perl_mess(aTHX_ "Parse error"));
12317 }
12318
12319 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12320 static OP *
12321 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12322 {
12323     OP *o;
12324     ENTER;
12325     SAVEVPTR(PL_eval_root);
12326     PL_eval_root = NULL;
12327     parse_recdescent(gramtype, fakeeof);
12328     o = PL_eval_root;
12329     LEAVE;
12330     return o;
12331 }
12332
12333 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12334 static OP *
12335 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12336 {
12337     OP *exprop;
12338     if (flags & ~PARSE_OPTIONAL)
12339         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12340     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12341     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12342         if (!PL_parser->error_count)
12343             qerror(Perl_mess(aTHX_ "Parse error"));
12344         exprop = newOP(OP_NULL, 0);
12345     }
12346     return exprop;
12347 }
12348
12349 /*
12350 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12351
12352 Parse a Perl arithmetic expression.  This may contain operators of precedence
12353 down to the bit shift operators.  The expression must be followed (and thus
12354 terminated) either by a comparison or lower-precedence operator or by
12355 something that would normally terminate an expression such as semicolon.
12356 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12357 otherwise it is mandatory.  It is up to the caller to ensure that the
12358 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12359 the source of the code to be parsed and the lexical context for the
12360 expression.
12361
12362 The op tree representing the expression is returned.  If an optional
12363 expression is absent, a null pointer is returned, otherwise the pointer
12364 will be non-null.
12365
12366 If an error occurs in parsing or compilation, in most cases a valid op
12367 tree is returned anyway.  The error is reflected in the parser state,
12368 normally resulting in a single exception at the top level of parsing
12369 which covers all the compilation errors that occurred.  Some compilation
12370 errors, however, will throw an exception immediately.
12371
12372 =cut
12373 */
12374
12375 OP *
12376 Perl_parse_arithexpr(pTHX_ U32 flags)
12377 {
12378     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12379 }
12380
12381 /*
12382 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12383
12384 Parse a Perl term expression.  This may contain operators of precedence
12385 down to the assignment operators.  The expression must be followed (and thus
12386 terminated) either by a comma or lower-precedence operator or by
12387 something that would normally terminate an expression such as semicolon.
12388 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12389 otherwise it is mandatory.  It is up to the caller to ensure that the
12390 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12391 the source of the code to be parsed and the lexical context for the
12392 expression.
12393
12394 The op tree representing the expression is returned.  If an optional
12395 expression is absent, a null pointer is returned, otherwise the pointer
12396 will be non-null.
12397
12398 If an error occurs in parsing or compilation, in most cases a valid op
12399 tree is returned anyway.  The error is reflected in the parser state,
12400 normally resulting in a single exception at the top level of parsing
12401 which covers all the compilation errors that occurred.  Some compilation
12402 errors, however, will throw an exception immediately.
12403
12404 =cut
12405 */
12406
12407 OP *
12408 Perl_parse_termexpr(pTHX_ U32 flags)
12409 {
12410     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12411 }
12412
12413 /*
12414 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12415
12416 Parse a Perl list expression.  This may contain operators of precedence
12417 down to the comma operator.  The expression must be followed (and thus
12418 terminated) either by a low-precedence logic operator such as C<or> or by
12419 something that would normally terminate an expression such as semicolon.
12420 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12421 otherwise it is mandatory.  It is up to the caller to ensure that the
12422 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12423 the source of the code to be parsed and the lexical context for the
12424 expression.
12425
12426 The op tree representing the expression is returned.  If an optional
12427 expression is absent, a null pointer is returned, otherwise the pointer
12428 will be non-null.
12429
12430 If an error occurs in parsing or compilation, in most cases a valid op
12431 tree is returned anyway.  The error is reflected in the parser state,
12432 normally resulting in a single exception at the top level of parsing
12433 which covers all the compilation errors that occurred.  Some compilation
12434 errors, however, will throw an exception immediately.
12435
12436 =cut
12437 */
12438
12439 OP *
12440 Perl_parse_listexpr(pTHX_ U32 flags)
12441 {
12442     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12443 }
12444
12445 /*
12446 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12447
12448 Parse a single complete Perl expression.  This allows the full
12449 expression grammar, including the lowest-precedence operators such
12450 as C<or>.  The expression must be followed (and thus terminated) by a
12451 token that an expression would normally be terminated by: end-of-file,
12452 closing bracketing punctuation, semicolon, or one of the keywords that
12453 signals a postfix expression-statement modifier.  If C<flags> has the
12454 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12455 mandatory.  It is up to the caller to ensure that the dynamic parser
12456 state (L</PL_parser> et al) is correctly set to reflect the source of
12457 the code to be parsed and the lexical context for the expression.
12458
12459 The op tree representing the expression is returned.  If an optional
12460 expression is absent, a null pointer is returned, otherwise the pointer
12461 will be non-null.
12462
12463 If an error occurs in parsing or compilation, in most cases a valid op
12464 tree is returned anyway.  The error is reflected in the parser state,
12465 normally resulting in a single exception at the top level of parsing
12466 which covers all the compilation errors that occurred.  Some compilation
12467 errors, however, will throw an exception immediately.
12468
12469 =cut
12470 */
12471
12472 OP *
12473 Perl_parse_fullexpr(pTHX_ U32 flags)
12474 {
12475     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12476 }
12477
12478 /*
12479 =for apidoc Amx|OP *|parse_block|U32 flags
12480
12481 Parse a single complete Perl code block.  This consists of an opening
12482 brace, a sequence of statements, and a closing brace.  The block
12483 constitutes a lexical scope, so C<my> variables and various compile-time
12484 effects can be contained within it.  It is up to the caller to ensure
12485 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12486 reflect the source of the code to be parsed and the lexical context for
12487 the statement.
12488
12489 The op tree representing the code block is returned.  This is always a
12490 real op, never a null pointer.  It will normally be a C<lineseq> list,
12491 including C<nextstate> or equivalent ops.  No ops to construct any kind
12492 of runtime scope are included by virtue of it being a block.
12493
12494 If an error occurs in parsing or compilation, in most cases a valid op
12495 tree (most likely null) is returned anyway.  The error is reflected in
12496 the parser state, normally resulting in a single exception at the top
12497 level of parsing which covers all the compilation errors that occurred.
12498 Some compilation errors, however, will throw an exception immediately.
12499
12500 The C<flags> parameter is reserved for future use, and must always
12501 be zero.
12502
12503 =cut
12504 */
12505
12506 OP *
12507 Perl_parse_block(pTHX_ U32 flags)
12508 {
12509     if (flags)
12510         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12511     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12512 }
12513
12514 /*
12515 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12516
12517 Parse a single unadorned Perl statement.  This may be a normal imperative
12518 statement or a declaration that has compile-time effect.  It does not
12519 include any label or other affixture.  It is up to the caller to ensure
12520 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12521 reflect the source of the code to be parsed and the lexical context for
12522 the statement.
12523
12524 The op tree representing the statement is returned.  This may be a
12525 null pointer if the statement is null, for example if it was actually
12526 a subroutine definition (which has compile-time side effects).  If not
12527 null, it will be ops directly implementing the statement, suitable to
12528 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12529 equivalent op (except for those embedded in a scope contained entirely
12530 within the statement).
12531
12532 If an error occurs in parsing or compilation, in most cases a valid op
12533 tree (most likely null) is returned anyway.  The error is reflected in
12534 the parser state, normally resulting in a single exception at the top
12535 level of parsing which covers all the compilation errors that occurred.
12536 Some compilation errors, however, will throw an exception immediately.
12537
12538 The C<flags> parameter is reserved for future use, and must always
12539 be zero.
12540
12541 =cut
12542 */
12543
12544 OP *
12545 Perl_parse_barestmt(pTHX_ U32 flags)
12546 {
12547     if (flags)
12548         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12549     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12550 }
12551
12552 /*
12553 =for apidoc Amx|SV *|parse_label|U32 flags
12554
12555 Parse a single label, possibly optional, of the type that may prefix a
12556 Perl statement.  It is up to the caller to ensure that the dynamic parser
12557 state (L</PL_parser> et al) is correctly set to reflect the source of
12558 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12559 label is optional, otherwise it is mandatory.
12560
12561 The name of the label is returned in the form of a fresh scalar.  If an
12562 optional label is absent, a null pointer is returned.
12563
12564 If an error occurs in parsing, which can only occur if the label is
12565 mandatory, a valid label is returned anyway.  The error is reflected in
12566 the parser state, normally resulting in a single exception at the top
12567 level of parsing which covers all the compilation errors that occurred.
12568
12569 =cut
12570 */
12571
12572 SV *
12573 Perl_parse_label(pTHX_ U32 flags)
12574 {
12575     if (flags & ~PARSE_OPTIONAL)
12576         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12577     if (PL_nexttoke) {
12578         PL_parser->yychar = yylex();
12579         if (PL_parser->yychar == LABEL) {
12580             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
12581             PL_parser->yychar = YYEMPTY;
12582             cSVOPx(pl_yylval.opval)->op_sv = NULL;
12583             op_free(pl_yylval.opval);
12584             return labelsv;
12585         } else {
12586             yyunlex();
12587             goto no_label;
12588         }
12589     } else {
12590         char *s, *t;
12591         STRLEN wlen, bufptr_pos;
12592         lex_read_space(0);
12593         t = s = PL_bufptr;
12594         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12595             goto no_label;
12596         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12597         if (word_takes_any_delimiter(s, wlen))
12598             goto no_label;
12599         bufptr_pos = s - SvPVX(PL_linestr);
12600         PL_bufptr = t;
12601         lex_read_space(LEX_KEEP_PREVIOUS);
12602         t = PL_bufptr;
12603         s = SvPVX(PL_linestr) + bufptr_pos;
12604         if (t[0] == ':' && t[1] != ':') {
12605             PL_oldoldbufptr = PL_oldbufptr;
12606             PL_oldbufptr = s;
12607             PL_bufptr = t+1;
12608             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12609         } else {
12610             PL_bufptr = s;
12611             no_label:
12612             if (flags & PARSE_OPTIONAL) {
12613                 return NULL;
12614             } else {
12615                 qerror(Perl_mess(aTHX_ "Parse error"));
12616                 return newSVpvs("x");
12617             }
12618         }
12619     }
12620 }
12621
12622 /*
12623 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12624
12625 Parse a single complete Perl statement.  This may be a normal imperative
12626 statement or a declaration that has compile-time effect, and may include
12627 optional labels.  It is up to the caller to ensure that the dynamic
12628 parser state (L</PL_parser> et al) is correctly set to reflect the source
12629 of the code to be parsed and the lexical context for the statement.
12630
12631 The op tree representing the statement is returned.  This may be a
12632 null pointer if the statement is null, for example if it was actually
12633 a subroutine definition (which has compile-time side effects).  If not
12634 null, it will be the result of a L</newSTATEOP> call, normally including
12635 a C<nextstate> or equivalent op.
12636
12637 If an error occurs in parsing or compilation, in most cases a valid op
12638 tree (most likely null) is returned anyway.  The error is reflected in
12639 the parser state, normally resulting in a single exception at the top
12640 level of parsing which covers all the compilation errors that occurred.
12641 Some compilation errors, however, will throw an exception immediately.
12642
12643 The C<flags> parameter is reserved for future use, and must always
12644 be zero.
12645
12646 =cut
12647 */
12648
12649 OP *
12650 Perl_parse_fullstmt(pTHX_ U32 flags)
12651 {
12652     if (flags)
12653         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12654     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12655 }
12656
12657 /*
12658 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12659
12660 Parse a sequence of zero or more Perl statements.  These may be normal
12661 imperative statements, including optional labels, or declarations
12662 that have compile-time effect, or any mixture thereof.  The statement
12663 sequence ends when a closing brace or end-of-file is encountered in a
12664 place where a new statement could have validly started.  It is up to
12665 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12666 is correctly set to reflect the source of the code to be parsed and the
12667 lexical context for the statements.
12668
12669 The op tree representing the statement sequence is returned.  This may
12670 be a null pointer if the statements were all null, for example if there
12671 were no statements or if there were only subroutine definitions (which
12672 have compile-time side effects).  If not null, it will be a C<lineseq>
12673 list, normally including C<nextstate> or equivalent ops.
12674
12675 If an error occurs in parsing or compilation, in most cases a valid op
12676 tree is returned anyway.  The error is reflected in the parser state,
12677 normally resulting in a single exception at the top level of parsing
12678 which covers all the compilation errors that occurred.  Some compilation
12679 errors, however, will throw an exception immediately.
12680
12681 The C<flags> parameter is reserved for future use, and must always
12682 be zero.
12683
12684 =cut
12685 */
12686
12687 OP *
12688 Perl_parse_stmtseq(pTHX_ U32 flags)
12689 {
12690     OP *stmtseqop;
12691     I32 c;
12692     if (flags)
12693         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12694     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12695     c = lex_peek_unichar(0);
12696     if (c != -1 && c != /*{*/'}')
12697         qerror(Perl_mess(aTHX_ "Parse error"));
12698     return stmtseqop;
12699 }
12700
12701 /*
12702  * ex: set ts=8 sts=4 sw=4 et:
12703  */