toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_static.c"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_defer            (PL_parser->lex_defer)
  56 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  57 #define PL_lex_expect           (PL_parser->lex_expect)
  58 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  59 #define PL_lex_inpat            (PL_parser->lex_inpat)
  60 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  61 #define PL_lex_op               (PL_parser->lex_op)
  62 #define PL_lex_repl             (PL_parser->lex_repl)
  63 #define PL_lex_starts           (PL_parser->lex_starts)
  64 #define PL_lex_stuff            (PL_parser->lex_stuff)
  65 #define PL_multi_start          (PL_parser->multi_start)
  66 #define PL_multi_open           (PL_parser->multi_open)
  67 #define PL_multi_close          (PL_parser->multi_close)
  68 #define PL_preambled            (PL_parser->preambled)
  69 #define PL_sublex_info          (PL_parser->sublex_info)
  70 #define PL_linestr              (PL_parser->linestr)
  71 #define PL_expect               (PL_parser->expect)
  72 #define PL_copline              (PL_parser->copline)
  73 #define PL_bufptr               (PL_parser->bufptr)
  74 #define PL_oldbufptr            (PL_parser->oldbufptr)
  75 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  76 #define PL_linestart            (PL_parser->linestart)
  77 #define PL_bufend               (PL_parser->bufend)
  78 #define PL_last_uni             (PL_parser->last_uni)
  79 #define PL_last_lop             (PL_parser->last_lop)
  80 #define PL_last_lop_op          (PL_parser->last_lop_op)
  81 #define PL_lex_state            (PL_parser->lex_state)
  82 #define PL_rsfp                 (PL_parser->rsfp)
  83 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  84 #define PL_in_my                (PL_parser->in_my)
  85 #define PL_in_my_stash          (PL_parser->in_my_stash)
  86 #define PL_tokenbuf             (PL_parser->tokenbuf)
  87 #define PL_multi_end            (PL_parser->multi_end)
  88 #define PL_error_count          (PL_parser->error_count)
  89
  90 #  define PL_nexttoke           (PL_parser->nexttoke)
  91 #  define PL_nexttype           (PL_parser->nexttype)
  92 #  define PL_nextval            (PL_parser->nextval)
  93
  94 static const char* const ident_too_long = "Identifier too long";
  95
  96 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  97
  98 #define XENUMMASK  0x3f
  99 #define XFAKEEOF   0x40
 100 #define XFAKEBRACK 0x80
 101
 102 #ifdef USE_UTF8_SCRIPTS
 103 #   define UTF (!IN_BYTES)
 104 #else
 105 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 106 #endif
 107
 108 /* The maximum number of characters preceding the unrecognized one to display */
 109 #define UNRECOGNIZED_PRECEDE_COUNT 10
 110
 111 /* In variables named $^X, these are the legal values for X.
 112  * 1999-02-27 mjd-perl-patch@plover.com */
 113 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 114
 115 #define SPACE_OR_TAB(c) isBLANK_A(c)
 116
 117 /* LEX_* are values for PL_lex_state, the state of the lexer.
 118  * They are arranged oddly so that the guard on the switch statement
 119  * can get by with a single comparison (if the compiler is smart enough).
 120  *
 121  * These values refer to the various states within a sublex parse,
 122  * i.e. within a double quotish string
 123  */
 124
 125 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 126
 127 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 128 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 129 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 130 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 131 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 132
 133                                    /* at end of code, eg "$x" followed by:  */
 134 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 135 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 136
 137 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 138                                         string or after \E, $foo, etc       */
 139 #define LEX_INTERPCONST          2 /* NOT USED */
 140 #define LEX_FORMLINE             1 /* expecting a format line               */
 141 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 142
 143
 144 #ifdef DEBUGGING
 145 static const char* const lex_state_names[] = {
 146     "KNOWNEXT",
 147     "FORMLINE",
 148     "INTERPCONST",
 149     "INTERPCONCAT",
 150     "INTERPENDMAYBE",
 151     "INTERPEND",
 152     "INTERPSTART",
 153     "INTERPPUSH",
 154     "INTERPCASEMOD",
 155     "INTERPNORMAL",
 156     "NORMAL"
 157 };
 158 #endif
 159
 160 #include "keywords.h"
 161
 162 /* CLINE is a macro that ensures PL_copline has a sane value */
 163
 164 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 165
 166 #  define SKIPSPACE0(s) skipspace(s)
 167 #  define SKIPSPACE1(s) skipspace(s)
 168 #  define SKIPSPACE2(s,tsv) skipspace(s)
 169 #  define PEEKSPACE(s) skipspace(s)
 170
 171 /*
 172  * Convenience functions to return different tokens and prime the
 173  * lexer for the next token.  They all take an argument.
 174  *
 175  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 176  * OPERATOR     : generic operator
 177  * AOPERATOR    : assignment operator
 178  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 179  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 180  * PREREF       : *EXPR where EXPR is not a simple identifier
 181  * TERM         : expression term
 182  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 183  * LOOPX        : loop exiting command (goto, last, dump, etc)
 184  * FTST         : file test operator
 185  * FUN0         : zero-argument function
 186  * FUN0OP       : zero-argument function, with its op created in this file
 187  * FUN1         : not used, except for not, which isn't a UNIOP
 188  * BOop         : bitwise or or xor
 189  * BAop         : bitwise and
 190  * SHop         : shift operator
 191  * PWop         : power operator
 192  * PMop         : pattern-matching operator
 193  * Aop          : addition-level operator
 194  * Mop          : multiplication-level operator
 195  * Eop          : equality-testing operator
 196  * Rop          : relational operator <= != gt
 197  *
 198  * Also see LOP and lop() below.
 199  */
 200
 201 #ifdef DEBUGGING /* Serve -DT. */
 202 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 203 #else
 204 #   define REPORT(retval) (retval)
 205 #endif
 206
 207 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 208 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 209 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 210 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 211 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 212 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 213 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 214 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 215 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 216 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 217 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 218 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 219 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 220 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 221 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 222 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 223 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 224 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 225 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 226 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 227 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 228 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 229
 230 /* This bit of chicanery makes a unary function followed by
 231  * a parenthesis into a function with one argument, highest precedence.
 232  * The UNIDOR macro is for unary functions that can be followed by the //
 233  * operator (such as C<shift // 0>).
 234  */
 235 #define UNI3(f,x,have_x) { \
 236         pl_yylval.ival = f; \
 237         if (have_x) PL_expect = x; \
 238         PL_bufptr = s; \
 239         PL_last_uni = PL_oldbufptr; \
 240         PL_last_lop_op = f; \
 241         if (*s == '(') \
 242             return REPORT( (int)FUNC1 ); \
 243         s = PEEKSPACE(s); \
 244         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 245         }
 246 #define UNI(f)    UNI3(f,XTERM,1)
 247 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 248 #define UNIPROTO(f,optional) { \
 249         if (optional) PL_last_uni = PL_oldbufptr; \
 250         OPERATOR(f); \
 251         }
 252
 253 #define UNIBRACK(f) UNI3(f,0,0)
 254
 255 /* grandfather return to old style */
 256 #define OLDLOP(f) \
 257         do { \
 258             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 259                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 260             pl_yylval.ival = (f); \
 261             PL_expect = XTERM; \
 262             PL_bufptr = s; \
 263             return (int)LSTOP; \
 264         } while(0)
 265
 266 #define COPLINE_INC_WITH_HERELINES                  \
 267     STMT_START {                                     \
 268         CopLINE_inc(PL_curcop);                       \
 269         if (PL_parser->herelines)                      \
 270             CopLINE(PL_curcop) += PL_parser->herelines, \
 271             PL_parser->herelines = 0;                    \
 272     } STMT_END
 273 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 274  * is no sublex_push to follow. */
 275 #define COPLINE_SET_FROM_MULTI_END            \
 276     STMT_START {                               \
 277         CopLINE_set(PL_curcop, PL_multi_end);   \
 278         if (PL_multi_end != PL_multi_start)      \
 279             PL_parser->herelines = 0;             \
 280     } STMT_END
 281
 282
 283 #ifdef DEBUGGING
 284
 285 /* how to interpret the pl_yylval associated with the token */
 286 enum token_type {
 287     TOKENTYPE_NONE,
 288     TOKENTYPE_IVAL,
 289     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 290     TOKENTYPE_PVAL,
 291     TOKENTYPE_OPVAL
 292 };
 293
 294 static struct debug_tokens {
 295     const int token;
 296     enum token_type type;
 297     const char *name;
 298 } const debug_tokens[] =
 299 {
 300     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 301     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 302     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 303     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 304     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 305     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 306     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 307     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 308     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 309     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 310     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 311     { DO,               TOKENTYPE_NONE,         "DO" },
 312     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 313     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 314     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 315     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 316     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 317     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 318     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 319     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 320     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 321     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 322     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 323     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 324     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 325     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 326     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 327     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 328     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 329     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 330     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 331     { IF,               TOKENTYPE_IVAL,         "IF" },
 332     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 333     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 334     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 335     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 336     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 337     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 338     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 339     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 340     { MY,               TOKENTYPE_IVAL,         "MY" },
 341     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 342     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 343     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 344     { OROR,             TOKENTYPE_NONE,         "OROR" },
 345     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 346     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 347     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 348     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 349     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 350     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 351     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 352     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 353     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 354     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 355     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 356     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 357     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 358     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 359     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 360     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 361     { SUB,              TOKENTYPE_NONE,         "SUB" },
 362     { THING,            TOKENTYPE_OPVAL,        "THING" },
 363     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 364     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 365     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 366     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 367     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 368     { USE,              TOKENTYPE_IVAL,         "USE" },
 369     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 370     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 371     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 372     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 373     { 0,                TOKENTYPE_NONE,         NULL }
 374 };
 375
 376 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 377
 378 STATIC int
 379 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 380 {
 381     PERL_ARGS_ASSERT_TOKEREPORT;
 382
 383     if (DEBUG_T_TEST) {
 384         const char *name = NULL;
 385         enum token_type type = TOKENTYPE_NONE;
 386         const struct debug_tokens *p;
 387         SV* const report = newSVpvs("<== ");
 388
 389         for (p = debug_tokens; p->token; p++) {
 390             if (p->token == (int)rv) {
 391                 name = p->name;
 392                 type = p->type;
 393                 break;
 394             }
 395         }
 396         if (name)
 397             Perl_sv_catpv(aTHX_ report, name);
 398         else if ((char)rv > ' ' && (char)rv <= '~')
 399         {
 400             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 401             if ((char)rv == 'p')
 402                 sv_catpvs(report, " (pending identifier)");
 403         }
 404         else if (!rv)
 405             sv_catpvs(report, "EOF");
 406         else
 407             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 408         switch (type) {
 409         case TOKENTYPE_NONE:
 410             break;
 411         case TOKENTYPE_IVAL:
 412             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 413             break;
 414         case TOKENTYPE_OPNUM:
 415             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 416                                     PL_op_name[lvalp->ival]);
 417             break;
 418         case TOKENTYPE_PVAL:
 419             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 420             break;
 421         case TOKENTYPE_OPVAL:
 422             if (lvalp->opval) {
 423                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 424                                     PL_op_name[lvalp->opval->op_type]);
 425                 if (lvalp->opval->op_type == OP_CONST) {
 426                     Perl_sv_catpvf(aTHX_ report, " %s",
 427                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 428                 }
 429
 430             }
 431             else
 432                 sv_catpvs(report, "(opval=null)");
 433             break;
 434         }
 435         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 436     };
 437     return (int)rv;
 438 }
 439
 440
 441 /* print the buffer with suitable escapes */
 442
 443 STATIC void
 444 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 445 {
 446     SV* const tmp = newSVpvs("");
 447
 448     PERL_ARGS_ASSERT_PRINTBUF;
 449
 450     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 451     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 452     GCC_DIAG_RESTORE;
 453     SvREFCNT_dec(tmp);
 454 }
 455
 456 #endif
 457
 458 static int
 459 S_deprecate_commaless_var_list(pTHX) {
 460     PL_expect = XTERM;
 461     deprecate("comma-less variable list");
 462     return REPORT(','); /* grandfather non-comma-format format */
 463 }
 464
 465 /*
 466  * S_ao
 467  *
 468  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 469  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 470  */
 471
 472 STATIC int
 473 S_ao(pTHX_ int toketype)
 474 {
 475     if (*PL_bufptr == '=') {
 476         PL_bufptr++;
 477         if (toketype == ANDAND)
 478             pl_yylval.ival = OP_ANDASSIGN;
 479         else if (toketype == OROR)
 480             pl_yylval.ival = OP_ORASSIGN;
 481         else if (toketype == DORDOR)
 482             pl_yylval.ival = OP_DORASSIGN;
 483         toketype = ASSIGNOP;
 484     }
 485     return toketype;
 486 }
 487
 488 /*
 489  * S_no_op
 490  * When Perl expects an operator and finds something else, no_op
 491  * prints the warning.  It always prints "<something> found where
 492  * operator expected.  It prints "Missing semicolon on previous line?"
 493  * if the surprise occurs at the start of the line.  "do you need to
 494  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 495  * where the compiler doesn't know if foo is a method call or a function.
 496  * It prints "Missing operator before end of line" if there's nothing
 497  * after the missing operator, or "... before <...>" if there is something
 498  * after the missing operator.
 499  */
 500
 501 STATIC void
 502 S_no_op(pTHX_ const char *const what, char *s)
 503 {
 504     char * const oldbp = PL_bufptr;
 505     const bool is_first = (PL_oldbufptr == PL_linestart);
 506
 507     PERL_ARGS_ASSERT_NO_OP;
 508
 509     if (!s)
 510         s = oldbp;
 511     else
 512         PL_bufptr = s;
 513     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 514     if (ckWARN_d(WARN_SYNTAX)) {
 515         if (is_first)
 516             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 517                     "\t(Missing semicolon on previous line?)\n");
 518         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 519             const char *t;
 520             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 521                                                             t += UTF ? UTF8SKIP(t) : 1)
 522                 NOOP;
 523             if (t < PL_bufptr && isSPACE(*t))
 524                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 525                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 526                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 527         }
 528         else {
 529             assert(s >= oldbp);
 530             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 531                     "\t(Missing operator before %"UTF8f"?)\n",
 532                      UTF8fARG(UTF, s - oldbp, oldbp));
 533         }
 534     }
 535     PL_bufptr = oldbp;
 536 }
 537
 538 /*
 539  * S_missingterm
 540  * Complain about missing quote/regexp/heredoc terminator.
 541  * If it's called with NULL then it cauterizes the line buffer.
 542  * If we're in a delimited string and the delimiter is a control
 543  * character, it's reformatted into a two-char sequence like ^C.
 544  * This is fatal.
 545  */
 546
 547 STATIC void
 548 S_missingterm(pTHX_ char *s)
 549 {
 550     char tmpbuf[3];
 551     char q;
 552     if (s) {
 553         char * const nl = strrchr(s,'\n');
 554         if (nl)
 555             *nl = '\0';
 556     }
 557     else if ((U8) PL_multi_close < 32) {
 558         *tmpbuf = '^';
 559         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 560         tmpbuf[2] = '\0';
 561         s = tmpbuf;
 562     }
 563     else {
 564         *tmpbuf = (char)PL_multi_close;
 565         tmpbuf[1] = '\0';
 566         s = tmpbuf;
 567     }
 568     q = strchr(s,'"') ? '\'' : '"';
 569     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 570 }
 571
 572 #include "feature.h"
 573
 574 /*
 575  * Check whether the named feature is enabled.
 576  */
 577 bool
 578 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 579 {
 580     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 581
 582     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 583
 584     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 585
 586     if (namelen > MAX_FEATURE_LEN)
 587         return FALSE;
 588     memcpy(&he_name[8], name, namelen);
 589
 590     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 591                                      REFCOUNTED_HE_EXISTS));
 592 }
 593
 594 /*
 595  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 596  * utf16-to-utf8-reversed.
 597  */
 598
 599 #ifdef PERL_CR_FILTER
 600 static void
 601 strip_return(SV *sv)
 602 {
 603     const char *s = SvPVX_const(sv);
 604     const char * const e = s + SvCUR(sv);
 605
 606     PERL_ARGS_ASSERT_STRIP_RETURN;
 607
 608     /* outer loop optimized to do nothing if there are no CR-LFs */
 609     while (s < e) {
 610         if (*s++ == '\r' && *s == '\n') {
 611             /* hit a CR-LF, need to copy the rest */
 612             char *d = s - 1;
 613             *d++ = *s++;
 614             while (s < e) {
 615                 if (*s == '\r' && s[1] == '\n')
 616                     s++;
 617                 *d++ = *s++;
 618             }
 619             SvCUR(sv) -= s - d;
 620             return;
 621         }
 622     }
 623 }
 624
 625 STATIC I32
 626 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 627 {
 628     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 629     if (count > 0 && !maxlen)
 630         strip_return(sv);
 631     return count;
 632 }
 633 #endif
 634
 635 /*
 636 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 637
 638 Creates and initialises a new lexer/parser state object, supplying
 639 a context in which to lex and parse from a new source of Perl code.
 640 A pointer to the new state object is placed in L</PL_parser>.  An entry
 641 is made on the save stack so that upon unwinding the new state object
 642 will be destroyed and the former value of L</PL_parser> will be restored.
 643 Nothing else need be done to clean up the parsing context.
 644
 645 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 646 non-null, provides a string (in SV form) containing code to be parsed.
 647 A copy of the string is made, so subsequent modification of I<line>
 648 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 649 from which code will be read to be parsed.  If both are non-null, the
 650 code in I<line> comes first and must consist of complete lines of input,
 651 and I<rsfp> supplies the remainder of the source.
 652
 653 The I<flags> parameter is reserved for future use.  Currently it is only
 654 used by perl internally, so extensions should always pass zero.
 655
 656 =cut
 657 */
 658
 659 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 660    can share filters with the current parser.
 661    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 662    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 663    destruction. This is used to handle the case of defaulting to reading the
 664    script from the standard input because no filename was given on the command
 665    line (without getting confused by situation where STDIN has been closed, so
 666    the script handle is opened on fd 0)  */
 667
 668 void
 669 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 670 {
 671     const char *s = NULL;
 672     yy_parser *parser, *oparser;
 673     if (flags && flags & ~LEX_START_FLAGS)
 674         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 675
 676     /* create and initialise a parser */
 677
 678     Newxz(parser, 1, yy_parser);
 679     parser->old_parser = oparser = PL_parser;
 680     PL_parser = parser;
 681
 682     parser->stack = NULL;
 683     parser->ps = NULL;
 684     parser->stack_size = 0;
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692     parser->nexttoke = 0;
 693     parser->error_count = oparser ? oparser->error_count : 0;
 694     parser->copline = parser->preambling = NOLINE;
 695     parser->lex_state = LEX_NORMAL;
 696     parser->expect = XSTATE;
 697     parser->rsfp = rsfp;
 698     parser->rsfp_filters =
 699       !(flags & LEX_START_SAME_FILTER) || !oparser
 700         ? NULL
 701         : MUTABLE_AV(SvREFCNT_inc(
 702             oparser->rsfp_filters
 703              ? oparser->rsfp_filters
 704              : (oparser->rsfp_filters = newAV())
 705           ));
 706
 707     Newx(parser->lex_brackstack, 120, char);
 708     Newx(parser->lex_casestack, 12, char);
 709     *parser->lex_casestack = '\0';
 710     Newxz(parser->lex_shared, 1, LEXSHARED);
 711
 712     if (line) {
 713         STRLEN len;
 714         s = SvPV_const(line, len);
 715         parser->linestr = flags & LEX_START_COPIED
 716                             ? SvREFCNT_inc_simple_NN(line)
 717                             : newSVpvn_flags(s, len, SvUTF8(line));
 718         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 719     } else {
 720         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 721     }
 722     parser->oldoldbufptr =
 723         parser->oldbufptr =
 724         parser->bufptr =
 725         parser->linestart = SvPVX(parser->linestr);
 726     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 727     parser->last_lop = parser->last_uni = NULL;
 728
 729     assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 730                                                         |LEX_DONT_CLOSE_RSFP));
 731     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 732                                                         |LEX_DONT_CLOSE_RSFP));
 733
 734     parser->in_pod = parser->filtered = 0;
 735 }
 736
 737
 738 /* delete a parser object */
 739
 740 void
 741 Perl_parser_free(pTHX_  const yy_parser *parser)
 742 {
 743     PERL_ARGS_ASSERT_PARSER_FREE;
 744
 745     PL_curcop = parser->saved_curcop;
 746     SvREFCNT_dec(parser->linestr);
 747
 748     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 749         PerlIO_clearerr(parser->rsfp);
 750     else if (parser->rsfp && (!parser->old_parser ||
 751                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 752         PerlIO_close(parser->rsfp);
 753     SvREFCNT_dec(parser->rsfp_filters);
 754     SvREFCNT_dec(parser->lex_stuff);
 755     SvREFCNT_dec(parser->sublex_info.repl);
 756
 757     Safefree(parser->lex_brackstack);
 758     Safefree(parser->lex_casestack);
 759     Safefree(parser->lex_shared);
 760     PL_parser = parser->old_parser;
 761     Safefree(parser);
 762 }
 763
 764 void
 765 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 766 {
 767     I32 nexttoke = parser->nexttoke;
 768     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 769     while (nexttoke--) {
 770         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 771          && parser->nextval[nexttoke].opval
 772          && parser->nextval[nexttoke].opval->op_slabbed
 773          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 774             op_free(parser->nextval[nexttoke].opval);
 775             parser->nextval[nexttoke].opval = NULL;
 776         }
 777     }
 778 }
 779
 780
 781 /*
 782 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 783
 784 Buffer scalar containing the chunk currently under consideration of the
 785 text currently being lexed.  This is always a plain string scalar (for
 786 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 787 normal scalar means; instead refer to the buffer directly by the pointer
 788 variables described below.
 789
 790 The lexer maintains various C<char*> pointers to things in the
 791 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 792 reallocated, all of these pointers must be updated.  Don't attempt to
 793 do this manually, but rather use L</lex_grow_linestr> if you need to
 794 reallocate the buffer.
 795
 796 The content of the text chunk in the buffer is commonly exactly one
 797 complete line of input, up to and including a newline terminator,
 798 but there are situations where it is otherwise.  The octets of the
 799 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 800 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 801 flag on this scalar, which may disagree with it.
 802
 803 For direct examination of the buffer, the variable
 804 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 805 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 806 of these pointers is usually preferable to examination of the scalar
 807 through normal scalar means.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 810
 811 Direct pointer to the end of the chunk of text currently being lexed, the
 812 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 813 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 814 always located at the end of the buffer, and does not count as part of
 815 the buffer's contents.
 816
 817 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 818
 819 Points to the current position of lexing inside the lexer buffer.
 820 Characters around this point may be freely examined, within
 821 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 822 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 823 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 824
 825 Lexing code (whether in the Perl core or not) moves this pointer past
 826 the characters that it consumes.  It is also expected to perform some
 827 bookkeeping whenever a newline character is consumed.  This movement
 828 can be more conveniently performed by the function L</lex_read_to>,
 829 which handles newlines appropriately.
 830
 831 Interpretation of the buffer's octets can be abstracted out by
 832 using the slightly higher-level functions L</lex_peek_unichar> and
 833 L</lex_read_unichar>.
 834
 835 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 836
 837 Points to the start of the current line inside the lexer buffer.
 838 This is useful for indicating at which column an error occurred, and
 839 not much else.  This must be updated by any lexing code that consumes
 840 a newline; the function L</lex_read_to> handles this detail.
 841
 842 =cut
 843 */
 844
 845 /*
 846 =for apidoc Amx|bool|lex_bufutf8
 847
 848 Indicates whether the octets in the lexer buffer
 849 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 850 of Unicode characters.  If not, they should be interpreted as Latin-1
 851 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 852
 853 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 854 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 855 encoding.
 856
 857 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 858 is significant, but not the whole story regarding the input character
 859 encoding.  Normally, when a file is being read, the scalar contains octets
 860 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 861 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 862 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 863 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 864 is in effect.  This logic may change in the future; use this function
 865 instead of implementing the logic yourself.
 866
 867 =cut
 868 */
 869
 870 bool
 871 Perl_lex_bufutf8(pTHX)
 872 {
 873     return UTF;
 874 }
 875
 876 /*
 877 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 878
 879 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 880 at least I<len> octets (including terminating C<NUL>).  Returns a
 881 pointer to the reallocated buffer.  This is necessary before making
 882 any direct modification of the buffer that would increase its length.
 883 L</lex_stuff_pvn> provides a more convenient way to insert text into
 884 the buffer.
 885
 886 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 887 this function updates all of the lexer's variables that point directly
 888 into the buffer.
 889
 890 =cut
 891 */
 892
 893 char *
 894 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 895 {
 896     SV *linestr;
 897     char *buf;
 898     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 899     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 900     linestr = PL_parser->linestr;
 901     buf = SvPVX(linestr);
 902     if (len <= SvLEN(linestr))
 903         return buf;
 904     bufend_pos = PL_parser->bufend - buf;
 905     bufptr_pos = PL_parser->bufptr - buf;
 906     oldbufptr_pos = PL_parser->oldbufptr - buf;
 907     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 908     linestart_pos = PL_parser->linestart - buf;
 909     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 910     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 911     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 912                             PL_parser->lex_shared->re_eval_start - buf : 0;
 913
 914     buf = sv_grow(linestr, len);
 915
 916     PL_parser->bufend = buf + bufend_pos;
 917     PL_parser->bufptr = buf + bufptr_pos;
 918     PL_parser->oldbufptr = buf + oldbufptr_pos;
 919     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 920     PL_parser->linestart = buf + linestart_pos;
 921     if (PL_parser->last_uni)
 922         PL_parser->last_uni = buf + last_uni_pos;
 923     if (PL_parser->last_lop)
 924         PL_parser->last_lop = buf + last_lop_pos;
 925     if (PL_parser->lex_shared->re_eval_start)
 926         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 927     return buf;
 928 }
 929
 930 /*
 931 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 932
 933 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 934 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 935 reallocating the buffer if necessary.  This means that lexing code that
 936 runs later will see the characters as if they had appeared in the input.
 937 It is not recommended to do this as part of normal parsing, and most
 938 uses of this facility run the risk of the inserted characters being
 939 interpreted in an unintended manner.
 940
 941 The string to be inserted is represented by I<len> octets starting
 942 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 943 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 944 The characters are recoded for the lexer buffer, according to how the
 945 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 946 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 947 function is more convenient.
 948
 949 =cut
 950 */
 951
 952 void
 953 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 954 {
 955     dVAR;
 956     char *bufptr;
 957     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 958     if (flags & ~(LEX_STUFF_UTF8))
 959         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 960     if (UTF) {
 961         if (flags & LEX_STUFF_UTF8) {
 962             goto plain_copy;
 963         } else {
 964             STRLEN highhalf = 0;    /* Count of variants */
 965             const char *p, *e = pv+len;
 966             for (p = pv; p != e; p++) {
 967                 if (! UTF8_IS_INVARIANT(*p)) {
 968                     highhalf++;
 969                 }
 970             }
 971             if (!highhalf)
 972                 goto plain_copy;
 973             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 974             bufptr = PL_parser->bufptr;
 975             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 976             SvCUR_set(PL_parser->linestr,
 977                 SvCUR(PL_parser->linestr) + len+highhalf);
 978             PL_parser->bufend += len+highhalf;
 979             for (p = pv; p != e; p++) {
 980                 U8 c = (U8)*p;
 981                 if (! UTF8_IS_INVARIANT(c)) {
 982                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
 983                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
 984                 } else {
 985                     *bufptr++ = (char)c;
 986                 }
 987             }
 988         }
 989     } else {
 990         if (flags & LEX_STUFF_UTF8) {
 991             STRLEN highhalf = 0;
 992             const char *p, *e = pv+len;
 993             for (p = pv; p != e; p++) {
 994                 U8 c = (U8)*p;
 995                 if (UTF8_IS_ABOVE_LATIN1(c)) {
 996                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 997                                 "non-Latin-1 character into Latin-1 input");
 998                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
 999                     p++;
1000                     highhalf++;
1001                 } else if (! UTF8_IS_INVARIANT(c)) {
1002                     /* malformed UTF-8 */
1003                     ENTER;
1004                     SAVESPTR(PL_warnhook);
1005                     PL_warnhook = PERL_WARNHOOK_FATAL;
1006                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1007                     LEAVE;
1008                 }
1009             }
1010             if (!highhalf)
1011                 goto plain_copy;
1012             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1013             bufptr = PL_parser->bufptr;
1014             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1015             SvCUR_set(PL_parser->linestr,
1016                 SvCUR(PL_parser->linestr) + len-highhalf);
1017             PL_parser->bufend += len-highhalf;
1018             p = pv;
1019             while (p < e) {
1020                 if (UTF8_IS_INVARIANT(*p)) {
1021                     *bufptr++ = *p;
1022                     p++;
1023                 }
1024                 else {
1025                     assert(p < e -1 );
1026                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1027                     p += 2;
1028                 }
1029             }
1030         } else {
1031           plain_copy:
1032             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1033             bufptr = PL_parser->bufptr;
1034             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1035             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1036             PL_parser->bufend += len;
1037             Copy(pv, bufptr, len, char);
1038         }
1039     }
1040 }
1041
1042 /*
1043 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1044
1045 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1046 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1047 reallocating the buffer if necessary.  This means that lexing code that
1048 runs later will see the characters as if they had appeared in the input.
1049 It is not recommended to do this as part of normal parsing, and most
1050 uses of this facility run the risk of the inserted characters being
1051 interpreted in an unintended manner.
1052
1053 The string to be inserted is represented by octets starting at I<pv>
1054 and continuing to the first nul.  These octets are interpreted as either
1055 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1056 in I<flags>.  The characters are recoded for the lexer buffer, according
1057 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1058 If it is not convenient to nul-terminate a string to be inserted, the
1059 L</lex_stuff_pvn> function is more appropriate.
1060
1061 =cut
1062 */
1063
1064 void
1065 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1066 {
1067     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1068     lex_stuff_pvn(pv, strlen(pv), flags);
1069 }
1070
1071 /*
1072 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1073
1074 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1075 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1076 reallocating the buffer if necessary.  This means that lexing code that
1077 runs later will see the characters as if they had appeared in the input.
1078 It is not recommended to do this as part of normal parsing, and most
1079 uses of this facility run the risk of the inserted characters being
1080 interpreted in an unintended manner.
1081
1082 The string to be inserted is the string value of I<sv>.  The characters
1083 are recoded for the lexer buffer, according to how the buffer is currently
1084 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1085 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1086 need to construct a scalar.
1087
1088 =cut
1089 */
1090
1091 void
1092 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1093 {
1094     char *pv;
1095     STRLEN len;
1096     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1097     if (flags)
1098         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1099     pv = SvPV(sv, len);
1100     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1101 }
1102
1103 /*
1104 =for apidoc Amx|void|lex_unstuff|char *ptr
1105
1106 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1107 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1108 This hides the discarded text from any lexing code that runs later,
1109 as if the text had never appeared.
1110
1111 This is not the normal way to consume lexed text.  For that, use
1112 L</lex_read_to>.
1113
1114 =cut
1115 */
1116
1117 void
1118 Perl_lex_unstuff(pTHX_ char *ptr)
1119 {
1120     char *buf, *bufend;
1121     STRLEN unstuff_len;
1122     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1123     buf = PL_parser->bufptr;
1124     if (ptr < buf)
1125         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1126     if (ptr == buf)
1127         return;
1128     bufend = PL_parser->bufend;
1129     if (ptr > bufend)
1130         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1131     unstuff_len = ptr - buf;
1132     Move(ptr, buf, bufend+1-ptr, char);
1133     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1134     PL_parser->bufend = bufend - unstuff_len;
1135 }
1136
1137 /*
1138 =for apidoc Amx|void|lex_read_to|char *ptr
1139
1140 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1141 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1142 performing the correct bookkeeping whenever a newline character is passed.
1143 This is the normal way to consume lexed text.
1144
1145 Interpretation of the buffer's octets can be abstracted out by
1146 using the slightly higher-level functions L</lex_peek_unichar> and
1147 L</lex_read_unichar>.
1148
1149 =cut
1150 */
1151
1152 void
1153 Perl_lex_read_to(pTHX_ char *ptr)
1154 {
1155     char *s;
1156     PERL_ARGS_ASSERT_LEX_READ_TO;
1157     s = PL_parser->bufptr;
1158     if (ptr < s || ptr > PL_parser->bufend)
1159         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1160     for (; s != ptr; s++)
1161         if (*s == '\n') {
1162             COPLINE_INC_WITH_HERELINES;
1163             PL_parser->linestart = s+1;
1164         }
1165     PL_parser->bufptr = ptr;
1166 }
1167
1168 /*
1169 =for apidoc Amx|void|lex_discard_to|char *ptr
1170
1171 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1172 up to I<ptr>.  The remaining content of the buffer will be moved, and
1173 all pointers into the buffer updated appropriately.  I<ptr> must not
1174 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1175 it is not permitted to discard text that has yet to be lexed.
1176
1177 Normally it is not necessarily to do this directly, because it suffices to
1178 use the implicit discarding behaviour of L</lex_next_chunk> and things
1179 based on it.  However, if a token stretches across multiple lines,
1180 and the lexing code has kept multiple lines of text in the buffer for
1181 that purpose, then after completion of the token it would be wise to
1182 explicitly discard the now-unneeded earlier lines, to avoid future
1183 multi-line tokens growing the buffer without bound.
1184
1185 =cut
1186 */
1187
1188 void
1189 Perl_lex_discard_to(pTHX_ char *ptr)
1190 {
1191     char *buf;
1192     STRLEN discard_len;
1193     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1194     buf = SvPVX(PL_parser->linestr);
1195     if (ptr < buf)
1196         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1197     if (ptr == buf)
1198         return;
1199     if (ptr > PL_parser->bufptr)
1200         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1201     discard_len = ptr - buf;
1202     if (PL_parser->oldbufptr < ptr)
1203         PL_parser->oldbufptr = ptr;
1204     if (PL_parser->oldoldbufptr < ptr)
1205         PL_parser->oldoldbufptr = ptr;
1206     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1207         PL_parser->last_uni = NULL;
1208     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1209         PL_parser->last_lop = NULL;
1210     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1211     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1212     PL_parser->bufend -= discard_len;
1213     PL_parser->bufptr -= discard_len;
1214     PL_parser->oldbufptr -= discard_len;
1215     PL_parser->oldoldbufptr -= discard_len;
1216     if (PL_parser->last_uni)
1217         PL_parser->last_uni -= discard_len;
1218     if (PL_parser->last_lop)
1219         PL_parser->last_lop -= discard_len;
1220 }
1221
1222 /*
1223 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1224
1225 Reads in the next chunk of text to be lexed, appending it to
1226 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1227 looked to the end of the current chunk and wants to know more.  It is
1228 usual, but not necessary, for lexing to have consumed the entirety of
1229 the current chunk at this time.
1230
1231 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1232 chunk (i.e., the current chunk has been entirely consumed), normally the
1233 current chunk will be discarded at the same time that the new chunk is
1234 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1235 will not be discarded.  If the current chunk has not been entirely
1236 consumed, then it will not be discarded regardless of the flag.
1237
1238 Returns true if some new text was added to the buffer, or false if the
1239 buffer has reached the end of the input text.
1240
1241 =cut
1242 */
1243
1244 #define LEX_FAKE_EOF 0x80000000
1245 #define LEX_NO_TERM  0x40000000
1246
1247 bool
1248 Perl_lex_next_chunk(pTHX_ U32 flags)
1249 {
1250     SV *linestr;
1251     char *buf;
1252     STRLEN old_bufend_pos, new_bufend_pos;
1253     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1254     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1255     bool got_some_for_debugger = 0;
1256     bool got_some;
1257     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1258         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1259     linestr = PL_parser->linestr;
1260     buf = SvPVX(linestr);
1261     if (!(flags & LEX_KEEP_PREVIOUS) &&
1262             PL_parser->bufptr == PL_parser->bufend) {
1263         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1264         linestart_pos = 0;
1265         if (PL_parser->last_uni != PL_parser->bufend)
1266             PL_parser->last_uni = NULL;
1267         if (PL_parser->last_lop != PL_parser->bufend)
1268             PL_parser->last_lop = NULL;
1269         last_uni_pos = last_lop_pos = 0;
1270         *buf = 0;
1271         SvCUR(linestr) = 0;
1272     } else {
1273         old_bufend_pos = PL_parser->bufend - buf;
1274         bufptr_pos = PL_parser->bufptr - buf;
1275         oldbufptr_pos = PL_parser->oldbufptr - buf;
1276         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1277         linestart_pos = PL_parser->linestart - buf;
1278         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1279         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1280     }
1281     if (flags & LEX_FAKE_EOF) {
1282         goto eof;
1283     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1284         got_some = 0;
1285     } else if (filter_gets(linestr, old_bufend_pos)) {
1286         got_some = 1;
1287         got_some_for_debugger = 1;
1288     } else if (flags & LEX_NO_TERM) {
1289         got_some = 0;
1290     } else {
1291         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1292             sv_setpvs(linestr, "");
1293         eof:
1294         /* End of real input.  Close filehandle (unless it was STDIN),
1295          * then add implicit termination.
1296          */
1297         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1298             PerlIO_clearerr(PL_parser->rsfp);
1299         else if (PL_parser->rsfp)
1300             (void)PerlIO_close(PL_parser->rsfp);
1301         PL_parser->rsfp = NULL;
1302         PL_parser->in_pod = PL_parser->filtered = 0;
1303         if (!PL_in_eval && PL_minus_p) {
1304             sv_catpvs(linestr,
1305                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1306             PL_minus_n = PL_minus_p = 0;
1307         } else if (!PL_in_eval && PL_minus_n) {
1308             sv_catpvs(linestr, /*{*/";}");
1309             PL_minus_n = 0;
1310         } else
1311             sv_catpvs(linestr, ";");
1312         got_some = 1;
1313     }
1314     buf = SvPVX(linestr);
1315     new_bufend_pos = SvCUR(linestr);
1316     PL_parser->bufend = buf + new_bufend_pos;
1317     PL_parser->bufptr = buf + bufptr_pos;
1318     PL_parser->oldbufptr = buf + oldbufptr_pos;
1319     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1320     PL_parser->linestart = buf + linestart_pos;
1321     if (PL_parser->last_uni)
1322         PL_parser->last_uni = buf + last_uni_pos;
1323     if (PL_parser->last_lop)
1324         PL_parser->last_lop = buf + last_lop_pos;
1325     if (PL_parser->preambling != NOLINE) {
1326         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1327         PL_parser->preambling = NOLINE;
1328     }
1329     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1330             PL_curstash != PL_debstash) {
1331         /* debugger active and we're not compiling the debugger code,
1332          * so store the line into the debugger's array of lines
1333          */
1334         update_debugger_info(NULL, buf+old_bufend_pos,
1335             new_bufend_pos-old_bufend_pos);
1336     }
1337     return got_some;
1338 }
1339
1340 /*
1341 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1342
1343 Looks ahead one (Unicode) character in the text currently being lexed.
1344 Returns the codepoint (unsigned integer value) of the next character,
1345 or -1 if lexing has reached the end of the input text.  To consume the
1346 peeked character, use L</lex_read_unichar>.
1347
1348 If the next character is in (or extends into) the next chunk of input
1349 text, the next chunk will be read in.  Normally the current chunk will be
1350 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1351 then the current chunk will not be discarded.
1352
1353 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1354 is encountered, an exception is generated.
1355
1356 =cut
1357 */
1358
1359 I32
1360 Perl_lex_peek_unichar(pTHX_ U32 flags)
1361 {
1362     dVAR;
1363     char *s, *bufend;
1364     if (flags & ~(LEX_KEEP_PREVIOUS))
1365         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1366     s = PL_parser->bufptr;
1367     bufend = PL_parser->bufend;
1368     if (UTF) {
1369         U8 head;
1370         I32 unichar;
1371         STRLEN len, retlen;
1372         if (s == bufend) {
1373             if (!lex_next_chunk(flags))
1374                 return -1;
1375             s = PL_parser->bufptr;
1376             bufend = PL_parser->bufend;
1377         }
1378         head = (U8)*s;
1379         if (UTF8_IS_INVARIANT(head))
1380             return head;
1381         if (UTF8_IS_START(head)) {
1382             len = UTF8SKIP(&head);
1383             while ((STRLEN)(bufend-s) < len) {
1384                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1385                     break;
1386                 s = PL_parser->bufptr;
1387                 bufend = PL_parser->bufend;
1388             }
1389         }
1390         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1391         if (retlen == (STRLEN)-1) {
1392             /* malformed UTF-8 */
1393             ENTER;
1394             SAVESPTR(PL_warnhook);
1395             PL_warnhook = PERL_WARNHOOK_FATAL;
1396             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1397             LEAVE;
1398         }
1399         return unichar;
1400     } else {
1401         if (s == bufend) {
1402             if (!lex_next_chunk(flags))
1403                 return -1;
1404             s = PL_parser->bufptr;
1405         }
1406         return (U8)*s;
1407     }
1408 }
1409
1410 /*
1411 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1412
1413 Reads the next (Unicode) character in the text currently being lexed.
1414 Returns the codepoint (unsigned integer value) of the character read,
1415 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1416 if lexing has reached the end of the input text.  To non-destructively
1417 examine the next character, use L</lex_peek_unichar> instead.
1418
1419 If the next character is in (or extends into) the next chunk of input
1420 text, the next chunk will be read in.  Normally the current chunk will be
1421 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1422 then the current chunk will not be discarded.
1423
1424 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1425 is encountered, an exception is generated.
1426
1427 =cut
1428 */
1429
1430 I32
1431 Perl_lex_read_unichar(pTHX_ U32 flags)
1432 {
1433     I32 c;
1434     if (flags & ~(LEX_KEEP_PREVIOUS))
1435         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1436     c = lex_peek_unichar(flags);
1437     if (c != -1) {
1438         if (c == '\n')
1439             COPLINE_INC_WITH_HERELINES;
1440         if (UTF)
1441             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1442         else
1443             ++(PL_parser->bufptr);
1444     }
1445     return c;
1446 }
1447
1448 /*
1449 =for apidoc Amx|void|lex_read_space|U32 flags
1450
1451 Reads optional spaces, in Perl style, in the text currently being
1452 lexed.  The spaces may include ordinary whitespace characters and
1453 Perl-style comments.  C<#line> directives are processed if encountered.
1454 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1455 at a non-space character (or the end of the input text).
1456
1457 If spaces extend into the next chunk of input text, the next chunk will
1458 be read in.  Normally the current chunk will be discarded at the same
1459 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1460 chunk will not be discarded.
1461
1462 =cut
1463 */
1464
1465 #define LEX_NO_INCLINE    0x40000000
1466 #define LEX_NO_NEXT_CHUNK 0x80000000
1467
1468 void
1469 Perl_lex_read_space(pTHX_ U32 flags)
1470 {
1471     char *s, *bufend;
1472     const bool can_incline = !(flags & LEX_NO_INCLINE);
1473     bool need_incline = 0;
1474     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1475         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1476     s = PL_parser->bufptr;
1477     bufend = PL_parser->bufend;
1478     while (1) {
1479         char c = *s;
1480         if (c == '#') {
1481             do {
1482                 c = *++s;
1483             } while (!(c == '\n' || (c == 0 && s == bufend)));
1484         } else if (c == '\n') {
1485             s++;
1486             if (can_incline) {
1487                 PL_parser->linestart = s;
1488                 if (s == bufend)
1489                     need_incline = 1;
1490                 else
1491                     incline(s);
1492             }
1493         } else if (isSPACE(c)) {
1494             s++;
1495         } else if (c == 0 && s == bufend) {
1496             bool got_more;
1497             line_t l;
1498             if (flags & LEX_NO_NEXT_CHUNK)
1499                 break;
1500             PL_parser->bufptr = s;
1501             l = CopLINE(PL_curcop);
1502             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1503             got_more = lex_next_chunk(flags);
1504             CopLINE_set(PL_curcop, l);
1505             s = PL_parser->bufptr;
1506             bufend = PL_parser->bufend;
1507             if (!got_more)
1508                 break;
1509             if (can_incline && need_incline && PL_parser->rsfp) {
1510                 incline(s);
1511                 need_incline = 0;
1512             }
1513         } else {
1514             break;
1515         }
1516     }
1517     PL_parser->bufptr = s;
1518 }
1519
1520 /*
1521
1522 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1523
1524 This function performs syntax checking on a prototype, C<proto>.
1525 If C<warn> is true, any illegal characters or mismatched brackets
1526 will trigger illegalproto warnings, declaring that they were
1527 detected in the prototype for C<name>.
1528
1529 The return value is C<true> if this is a valid prototype, and
1530 C<false> if it is not, regardless of whether C<warn> was C<true> or
1531 C<false>.
1532
1533 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1534
1535 =cut
1536
1537  */
1538
1539 bool
1540 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1541 {
1542     STRLEN len, origlen;
1543     char *p = proto ? SvPV(proto, len) : NULL;
1544     bool bad_proto = FALSE;
1545     bool in_brackets = FALSE;
1546     bool after_slash = FALSE;
1547     char greedy_proto = ' ';
1548     bool proto_after_greedy_proto = FALSE;
1549     bool must_be_last = FALSE;
1550     bool underscore = FALSE;
1551     bool bad_proto_after_underscore = FALSE;
1552
1553     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1554
1555     if (!proto)
1556         return TRUE;
1557
1558     origlen = len;
1559     for (; len--; p++) {
1560         if (!isSPACE(*p)) {
1561             if (must_be_last)
1562                 proto_after_greedy_proto = TRUE;
1563             if (underscore) {
1564                 if (!strchr(";@%", *p))
1565                     bad_proto_after_underscore = TRUE;
1566                 underscore = FALSE;
1567             }
1568             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1569                 bad_proto = TRUE;
1570             }
1571             else {
1572                 if (*p == '[')
1573                     in_brackets = TRUE;
1574                 else if (*p == ']')
1575                     in_brackets = FALSE;
1576                 else if ((*p == '@' || *p == '%') &&
1577                     !after_slash &&
1578                     !in_brackets ) {
1579                     must_be_last = TRUE;
1580                     greedy_proto = *p;
1581                 }
1582                 else if (*p == '_')
1583                     underscore = TRUE;
1584             }
1585             if (*p == '\\')
1586                 after_slash = TRUE;
1587             else
1588                 after_slash = FALSE;
1589         }
1590     }
1591
1592     if (warn) {
1593         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1594         p -= origlen;
1595         p = SvUTF8(proto)
1596             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1597                              origlen, UNI_DISPLAY_ISPRINT)
1598             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1599
1600         if (proto_after_greedy_proto)
1601             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1602                         "Prototype after '%c' for %"SVf" : %s",
1603                         greedy_proto, SVfARG(name), p);
1604         if (in_brackets)
1605             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1606                         "Missing ']' in prototype for %"SVf" : %s",
1607                         SVfARG(name), p);
1608         if (bad_proto)
1609             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1610                         "Illegal character in prototype for %"SVf" : %s",
1611                         SVfARG(name), p);
1612         if (bad_proto_after_underscore)
1613             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1614                         "Illegal character after '_' in prototype for %"SVf" : %s",
1615                         SVfARG(name), p);
1616     }
1617
1618     return (! (proto_after_greedy_proto || bad_proto) );
1619 }
1620
1621 /*
1622  * S_incline
1623  * This subroutine has nothing to do with tilting, whether at windmills
1624  * or pinball tables.  Its name is short for "increment line".  It
1625  * increments the current line number in CopLINE(PL_curcop) and checks
1626  * to see whether the line starts with a comment of the form
1627  *    # line 500 "foo.pm"
1628  * If so, it sets the current line number and file to the values in the comment.
1629  */
1630
1631 STATIC void
1632 S_incline(pTHX_ const char *s)
1633 {
1634     const char *t;
1635     const char *n;
1636     const char *e;
1637     line_t line_num;
1638
1639     PERL_ARGS_ASSERT_INCLINE;
1640
1641     COPLINE_INC_WITH_HERELINES;
1642     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1643      && s+1 == PL_bufend && *s == ';') {
1644         /* fake newline in string eval */
1645         CopLINE_dec(PL_curcop);
1646         return;
1647     }
1648     if (*s++ != '#')
1649         return;
1650     while (SPACE_OR_TAB(*s))
1651         s++;
1652     if (strnEQ(s, "line", 4))
1653         s += 4;
1654     else
1655         return;
1656     if (SPACE_OR_TAB(*s))
1657         s++;
1658     else
1659         return;
1660     while (SPACE_OR_TAB(*s))
1661         s++;
1662     if (!isDIGIT(*s))
1663         return;
1664
1665     n = s;
1666     while (isDIGIT(*s))
1667         s++;
1668     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1669         return;
1670     while (SPACE_OR_TAB(*s))
1671         s++;
1672     if (*s == '"' && (t = strchr(s+1, '"'))) {
1673         s++;
1674         e = t + 1;
1675     }
1676     else {
1677         t = s;
1678         while (!isSPACE(*t))
1679             t++;
1680         e = t;
1681     }
1682     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1683         e++;
1684     if (*e != '\n' && *e != '\0')
1685         return;         /* false alarm */
1686
1687     line_num = grok_atou(n, &e) - 1;
1688
1689     if (t - s > 0) {
1690         const STRLEN len = t - s;
1691
1692         if (!PL_rsfp && !PL_parser->filtered) {
1693             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1694              * to *{"::_<newfilename"} */
1695             /* However, the long form of evals is only turned on by the
1696                debugger - usually they're "(eval %lu)" */
1697             GV * const cfgv = CopFILEGV(PL_curcop);
1698             if (cfgv) {
1699                 char smallbuf[128];
1700                 STRLEN tmplen2 = len;
1701                 char *tmpbuf2;
1702                 GV *gv2;
1703
1704                 if (tmplen2 + 2 <= sizeof smallbuf)
1705                     tmpbuf2 = smallbuf;
1706                 else
1707                     Newx(tmpbuf2, tmplen2 + 2, char);
1708
1709                 tmpbuf2[0] = '_';
1710                 tmpbuf2[1] = '<';
1711
1712                 memcpy(tmpbuf2 + 2, s, tmplen2);
1713                 tmplen2 += 2;
1714
1715                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1716                 if (!isGV(gv2)) {
1717                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1718                     /* adjust ${"::_<newfilename"} to store the new file name */
1719                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1720                     /* The line number may differ. If that is the case,
1721                        alias the saved lines that are in the array.
1722                        Otherwise alias the whole array. */
1723                     if (CopLINE(PL_curcop) == line_num) {
1724                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1725                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1726                     }
1727                     else if (GvAV(cfgv)) {
1728                         AV * const av = GvAV(cfgv);
1729                         const I32 start = CopLINE(PL_curcop)+1;
1730                         I32 items = AvFILLp(av) - start;
1731                         if (items > 0) {
1732                             AV * const av2 = GvAVn(gv2);
1733                             SV **svp = AvARRAY(av) + start;
1734                             I32 l = (I32)line_num+1;
1735                             while (items--)
1736                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1737                         }
1738                     }
1739                 }
1740
1741                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1742             }
1743         }
1744         CopFILE_free(PL_curcop);
1745         CopFILE_setn(PL_curcop, s, len);
1746     }
1747     CopLINE_set(PL_curcop, line_num);
1748 }
1749
1750 #define skipspace(s) skipspace_flags(s, 0)
1751
1752
1753 STATIC void
1754 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1755 {
1756     AV *av = CopFILEAVx(PL_curcop);
1757     if (av) {
1758         SV * sv;
1759         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1760         else {
1761             sv = *av_fetch(av, 0, 1);
1762             SvUPGRADE(sv, SVt_PVMG);
1763         }
1764         if (!SvPOK(sv)) sv_setpvs(sv,"");
1765         if (orig_sv)
1766             sv_catsv(sv, orig_sv);
1767         else
1768             sv_catpvn(sv, buf, len);
1769         if (!SvIOK(sv)) {
1770             (void)SvIOK_on(sv);
1771             SvIV_set(sv, 0);
1772         }
1773         if (PL_parser->preambling == NOLINE)
1774             av_store(av, CopLINE(PL_curcop), sv);
1775     }
1776 }
1777
1778 /*
1779  * S_skipspace
1780  * Called to gobble the appropriate amount and type of whitespace.
1781  * Skips comments as well.
1782  */
1783
1784 STATIC char *
1785 S_skipspace_flags(pTHX_ char *s, U32 flags)
1786 {
1787     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1788     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1789         while (s < PL_bufend && SPACE_OR_TAB(*s))
1790             s++;
1791     } else {
1792         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1793         PL_bufptr = s;
1794         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1795                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1796                     LEX_NO_NEXT_CHUNK : 0));
1797         s = PL_bufptr;
1798         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1799         if (PL_linestart > PL_bufptr)
1800             PL_bufptr = PL_linestart;
1801         return s;
1802     }
1803     return s;
1804 }
1805
1806 /*
1807  * S_check_uni
1808  * Check the unary operators to ensure there's no ambiguity in how they're
1809  * used.  An ambiguous piece of code would be:
1810  *     rand + 5
1811  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1812  * the +5 is its argument.
1813  */
1814
1815 STATIC void
1816 S_check_uni(pTHX)
1817 {
1818     const char *s;
1819     const char *t;
1820
1821     if (PL_oldoldbufptr != PL_last_uni)
1822         return;
1823     while (isSPACE(*PL_last_uni))
1824         PL_last_uni++;
1825     s = PL_last_uni;
1826     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1827         s++;
1828     if ((t = strchr(s, '(')) && t < PL_bufptr)
1829         return;
1830
1831     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1832                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1833                      (int)(s - PL_last_uni), PL_last_uni);
1834 }
1835
1836 /*
1837  * LOP : macro to build a list operator.  Its behaviour has been replaced
1838  * with a subroutine, S_lop() for which LOP is just another name.
1839  */
1840
1841 #define LOP(f,x) return lop(f,x,s)
1842
1843 /*
1844  * S_lop
1845  * Build a list operator (or something that might be one).  The rules:
1846  *  - if we have a next token, then it's a list operator [why?]
1847  *  - if the next thing is an opening paren, then it's a function
1848  *  - else it's a list operator
1849  */
1850
1851 STATIC I32
1852 S_lop(pTHX_ I32 f, int x, char *s)
1853 {
1854     PERL_ARGS_ASSERT_LOP;
1855
1856     pl_yylval.ival = f;
1857     CLINE;
1858     PL_expect = x;
1859     PL_bufptr = s;
1860     PL_last_lop = PL_oldbufptr;
1861     PL_last_lop_op = (OPCODE)f;
1862     if (PL_nexttoke)
1863         goto lstop;
1864     if (*s == '(')
1865         return REPORT(FUNC);
1866     s = PEEKSPACE(s);
1867     if (*s == '(')
1868         return REPORT(FUNC);
1869     else {
1870         lstop:
1871         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1872             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1873         return REPORT(LSTOP);
1874     }
1875 }
1876
1877 /*
1878  * S_force_next
1879  * When the lexer realizes it knows the next token (for instance,
1880  * it is reordering tokens for the parser) then it can call S_force_next
1881  * to know what token to return the next time the lexer is called.  Caller
1882  * will need to set PL_nextval[] and possibly PL_expect to ensure
1883  * the lexer handles the token correctly.
1884  */
1885
1886 STATIC void
1887 S_force_next(pTHX_ I32 type)
1888 {
1889 #ifdef DEBUGGING
1890     if (DEBUG_T_TEST) {
1891         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1892         tokereport(type, &NEXTVAL_NEXTTOKE);
1893     }
1894 #endif
1895     PL_nexttype[PL_nexttoke] = type;
1896     PL_nexttoke++;
1897     if (PL_lex_state != LEX_KNOWNEXT) {
1898         PL_lex_defer = PL_lex_state;
1899         PL_lex_expect = PL_expect;
1900         PL_lex_state = LEX_KNOWNEXT;
1901     }
1902 }
1903
1904 /*
1905  * S_postderef
1906  *
1907  * This subroutine handles postfix deref syntax after the arrow has already
1908  * been emitted.  @* $* etc. are emitted as two separate token right here.
1909  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1910  * only the first, leaving yylex to find the next.
1911  */
1912
1913 static int
1914 S_postderef(pTHX_ int const funny, char const next)
1915 {
1916     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1917     assert(strchr("*[{", next));
1918     if (next == '*') {
1919         PL_expect = XOPERATOR;
1920         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1921             assert('@' == funny || '$' == funny || DOLSHARP == funny);
1922             PL_lex_state = LEX_INTERPEND;
1923             force_next(POSTJOIN);
1924         }
1925         force_next(next);
1926         PL_bufptr+=2;
1927     }
1928     else {
1929         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1930          && !PL_lex_brackets)
1931             PL_lex_dojoin = 2;
1932         PL_expect = XOPERATOR;
1933         PL_bufptr++;
1934     }
1935     return funny;
1936 }
1937
1938 void
1939 Perl_yyunlex(pTHX)
1940 {
1941     int yyc = PL_parser->yychar;
1942     if (yyc != YYEMPTY) {
1943         if (yyc) {
1944             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1945             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1946                 PL_lex_allbrackets--;
1947                 PL_lex_brackets--;
1948                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1949             } else if (yyc == '('/*)*/) {
1950                 PL_lex_allbrackets--;
1951                 yyc |= (2<<24);
1952             }
1953             force_next(yyc);
1954         }
1955         PL_parser->yychar = YYEMPTY;
1956     }
1957 }
1958
1959 STATIC SV *
1960 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1961 {
1962     SV * const sv = newSVpvn_utf8(start, len,
1963                                   !IN_BYTES
1964                                   && UTF
1965                                   && !is_ascii_string((const U8*)start, len)
1966                                   && is_utf8_string((const U8*)start, len));
1967     return sv;
1968 }
1969
1970 /*
1971  * S_force_word
1972  * When the lexer knows the next thing is a word (for instance, it has
1973  * just seen -> and it knows that the next char is a word char, then
1974  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1975  * lookahead.
1976  *
1977  * Arguments:
1978  *   char *start : buffer position (must be within PL_linestr)
1979  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1980  *   int check_keyword : if true, Perl checks to make sure the word isn't
1981  *       a keyword (do this if the word is a label, e.g. goto FOO)
1982  *   int allow_pack : if true, : characters will also be allowed (require,
1983  *       use, etc. do this)
1984  *   int allow_initial_tick : used by the "sub" lexer only.
1985  */
1986
1987 STATIC char *
1988 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
1989 {
1990     char *s;
1991     STRLEN len;
1992
1993     PERL_ARGS_ASSERT_FORCE_WORD;
1994
1995     start = SKIPSPACE1(start);
1996     s = start;
1997     if (isIDFIRST_lazy_if(s,UTF) ||
1998         (allow_pack && *s == ':') )
1999     {
2000         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2001         if (check_keyword) {
2002           char *s2 = PL_tokenbuf;
2003           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2004             s2 += 6, len -= 6;
2005           if (keyword(s2, len, 0))
2006             return start;
2007         }
2008         if (token == METHOD) {
2009             s = SKIPSPACE1(s);
2010             if (*s == '(')
2011                 PL_expect = XTERM;
2012             else {
2013                 PL_expect = XOPERATOR;
2014             }
2015         }
2016         NEXTVAL_NEXTTOKE.opval
2017             = (OP*)newSVOP(OP_CONST,0,
2018                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2019         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2020         force_next(token);
2021     }
2022     return s;
2023 }
2024
2025 /*
2026  * S_force_ident
2027  * Called when the lexer wants $foo *foo &foo etc, but the program
2028  * text only contains the "foo" portion.  The first argument is a pointer
2029  * to the "foo", and the second argument is the type symbol to prefix.
2030  * Forces the next token to be a "WORD".
2031  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2032  */
2033
2034 STATIC void
2035 S_force_ident(pTHX_ const char *s, int kind)
2036 {
2037     PERL_ARGS_ASSERT_FORCE_IDENT;
2038
2039     if (s[0]) {
2040         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2041         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2042                                                                 UTF ? SVf_UTF8 : 0));
2043         NEXTVAL_NEXTTOKE.opval = o;
2044         force_next(WORD);
2045         if (kind) {
2046             o->op_private = OPpCONST_ENTERED;
2047             /* XXX see note in pp_entereval() for why we forgo typo
2048                warnings if the symbol must be introduced in an eval.
2049                GSAR 96-10-12 */
2050             gv_fetchpvn_flags(s, len,
2051                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2052                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2053                               kind == '$' ? SVt_PV :
2054                               kind == '@' ? SVt_PVAV :
2055                               kind == '%' ? SVt_PVHV :
2056                               SVt_PVGV
2057                               );
2058         }
2059     }
2060 }
2061
2062 static void
2063 S_force_ident_maybe_lex(pTHX_ char pit)
2064 {
2065     NEXTVAL_NEXTTOKE.ival = pit;
2066     force_next('p');
2067 }
2068
2069 NV
2070 Perl_str_to_version(pTHX_ SV *sv)
2071 {
2072     NV retval = 0.0;
2073     NV nshift = 1.0;
2074     STRLEN len;
2075     const char *start = SvPV_const(sv,len);
2076     const char * const end = start + len;
2077     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2078
2079     PERL_ARGS_ASSERT_STR_TO_VERSION;
2080
2081     while (start < end) {
2082         STRLEN skip;
2083         UV n;
2084         if (utf)
2085             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2086         else {
2087             n = *(U8*)start;
2088             skip = 1;
2089         }
2090         retval += ((NV)n)/nshift;
2091         start += skip;
2092         nshift *= 1000;
2093     }
2094     return retval;
2095 }
2096
2097 /*
2098  * S_force_version
2099  * Forces the next token to be a version number.
2100  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2101  * and if "guessing" is TRUE, then no new token is created (and the caller
2102  * must use an alternative parsing method).
2103  */
2104
2105 STATIC char *
2106 S_force_version(pTHX_ char *s, int guessing)
2107 {
2108     OP *version = NULL;
2109     char *d;
2110
2111     PERL_ARGS_ASSERT_FORCE_VERSION;
2112
2113     s = SKIPSPACE1(s);
2114
2115     d = s;
2116     if (*d == 'v')
2117         d++;
2118     if (isDIGIT(*d)) {
2119         while (isDIGIT(*d) || *d == '_' || *d == '.')
2120             d++;
2121         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2122             SV *ver;
2123             s = scan_num(s, &pl_yylval);
2124             version = pl_yylval.opval;
2125             ver = cSVOPx(version)->op_sv;
2126             if (SvPOK(ver) && !SvNIOK(ver)) {
2127                 SvUPGRADE(ver, SVt_PVNV);
2128                 SvNV_set(ver, str_to_version(ver));
2129                 SvNOK_on(ver);          /* hint that it is a version */
2130             }
2131         }
2132         else if (guessing) {
2133             return s;
2134         }
2135     }
2136
2137     /* NOTE: The parser sees the package name and the VERSION swapped */
2138     NEXTVAL_NEXTTOKE.opval = version;
2139     force_next(WORD);
2140
2141     return s;
2142 }
2143
2144 /*
2145  * S_force_strict_version
2146  * Forces the next token to be a version number using strict syntax rules.
2147  */
2148
2149 STATIC char *
2150 S_force_strict_version(pTHX_ char *s)
2151 {
2152     OP *version = NULL;
2153     const char *errstr = NULL;
2154
2155     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2156
2157     while (isSPACE(*s)) /* leading whitespace */
2158         s++;
2159
2160     if (is_STRICT_VERSION(s,&errstr)) {
2161         SV *ver = newSV(0);
2162         s = (char *)scan_version(s, ver, 0);
2163         version = newSVOP(OP_CONST, 0, ver);
2164     }
2165     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2166             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2167     {
2168         PL_bufptr = s;
2169         if (errstr)
2170             yyerror(errstr); /* version required */
2171         return s;
2172     }
2173
2174     /* NOTE: The parser sees the package name and the VERSION swapped */
2175     NEXTVAL_NEXTTOKE.opval = version;
2176     force_next(WORD);
2177
2178     return s;
2179 }
2180
2181 /*
2182  * S_tokeq
2183  * Tokenize a quoted string passed in as an SV.  It finds the next
2184  * chunk, up to end of string or a backslash.  It may make a new
2185  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2186  * turns \\ into \.
2187  */
2188
2189 STATIC SV *
2190 S_tokeq(pTHX_ SV *sv)
2191 {
2192     char *s;
2193     char *send;
2194     char *d;
2195     SV *pv = sv;
2196
2197     PERL_ARGS_ASSERT_TOKEQ;
2198
2199     assert (SvPOK(sv));
2200     assert (SvLEN(sv));
2201     assert (!SvIsCOW(sv));
2202     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2203         goto finish;
2204     s = SvPVX(sv);
2205     send = SvEND(sv);
2206     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2207     while (s < send && !(*s == '\\' && s[1] == '\\'))
2208         s++;
2209     if (s == send)
2210         goto finish;
2211     d = s;
2212     if ( PL_hints & HINT_NEW_STRING ) {
2213         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2214                             SVs_TEMP | SvUTF8(sv));
2215     }
2216     while (s < send) {
2217         if (*s == '\\') {
2218             if (s + 1 < send && (s[1] == '\\'))
2219                 s++;            /* all that, just for this */
2220         }
2221         *d++ = *s++;
2222     }
2223     *d = '\0';
2224     SvCUR_set(sv, d - SvPVX_const(sv));
2225   finish:
2226     if ( PL_hints & HINT_NEW_STRING )
2227        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2228     return sv;
2229 }
2230
2231 /*
2232  * Now come three functions related to double-quote context,
2233  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2234  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2235  * interact with PL_lex_state, and create fake ( ... ) argument lists
2236  * to handle functions and concatenation.
2237  * For example,
2238  *   "foo\lbar"
2239  * is tokenised as
2240  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2241  */
2242
2243 /*
2244  * S_sublex_start
2245  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2246  *
2247  * Pattern matching will set PL_lex_op to the pattern-matching op to
2248  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2249  *
2250  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2251  *
2252  * Everything else becomes a FUNC.
2253  *
2254  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2255  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2256  * call to S_sublex_push().
2257  */
2258
2259 STATIC I32
2260 S_sublex_start(pTHX)
2261 {
2262     const I32 op_type = pl_yylval.ival;
2263
2264     if (op_type == OP_NULL) {
2265         pl_yylval.opval = PL_lex_op;
2266         PL_lex_op = NULL;
2267         return THING;
2268     }
2269     if (op_type == OP_CONST) {
2270         SV *sv = tokeq(PL_lex_stuff);
2271
2272         if (SvTYPE(sv) == SVt_PVIV) {
2273             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2274             STRLEN len;
2275             const char * const p = SvPV_const(sv, len);
2276             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2277             SvREFCNT_dec(sv);
2278             sv = nsv;
2279         }
2280         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2281         PL_lex_stuff = NULL;
2282         return THING;
2283     }
2284
2285     PL_sublex_info.super_state = PL_lex_state;
2286     PL_sublex_info.sub_inwhat = (U16)op_type;
2287     PL_sublex_info.sub_op = PL_lex_op;
2288     PL_lex_state = LEX_INTERPPUSH;
2289
2290     PL_expect = XTERM;
2291     if (PL_lex_op) {
2292         pl_yylval.opval = PL_lex_op;
2293         PL_lex_op = NULL;
2294         return PMFUNC;
2295     }
2296     else
2297         return FUNC;
2298 }
2299
2300 /*
2301  * S_sublex_push
2302  * Create a new scope to save the lexing state.  The scope will be
2303  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2304  * to the uc, lc, etc. found before.
2305  * Sets PL_lex_state to LEX_INTERPCONCAT.
2306  */
2307
2308 STATIC I32
2309 S_sublex_push(pTHX)
2310 {
2311     LEXSHARED *shared;
2312     const bool is_heredoc = PL_multi_close == '<';
2313     ENTER;
2314
2315     PL_lex_state = PL_sublex_info.super_state;
2316     SAVEI8(PL_lex_dojoin);
2317     SAVEI32(PL_lex_brackets);
2318     SAVEI32(PL_lex_allbrackets);
2319     SAVEI32(PL_lex_formbrack);
2320     SAVEI8(PL_lex_fakeeof);
2321     SAVEI32(PL_lex_casemods);
2322     SAVEI32(PL_lex_starts);
2323     SAVEI8(PL_lex_state);
2324     SAVESPTR(PL_lex_repl);
2325     SAVEVPTR(PL_lex_inpat);
2326     SAVEI16(PL_lex_inwhat);
2327     if (is_heredoc)
2328     {
2329         SAVECOPLINE(PL_curcop);
2330         SAVEI32(PL_multi_end);
2331         SAVEI32(PL_parser->herelines);
2332         PL_parser->herelines = 0;
2333     }
2334     SAVEI8(PL_multi_close);
2335     SAVEPPTR(PL_bufptr);
2336     SAVEPPTR(PL_bufend);
2337     SAVEPPTR(PL_oldbufptr);
2338     SAVEPPTR(PL_oldoldbufptr);
2339     SAVEPPTR(PL_last_lop);
2340     SAVEPPTR(PL_last_uni);
2341     SAVEPPTR(PL_linestart);
2342     SAVESPTR(PL_linestr);
2343     SAVEGENERICPV(PL_lex_brackstack);
2344     SAVEGENERICPV(PL_lex_casestack);
2345     SAVEGENERICPV(PL_parser->lex_shared);
2346     SAVEBOOL(PL_parser->lex_re_reparsing);
2347     SAVEI32(PL_copline);
2348
2349     /* The here-doc parser needs to be able to peek into outer lexing
2350        scopes to find the body of the here-doc.  So we put PL_linestr and
2351        PL_bufptr into lex_shared, to ‘share’ those values.
2352      */
2353     PL_parser->lex_shared->ls_linestr = PL_linestr;
2354     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2355
2356     PL_linestr = PL_lex_stuff;
2357     PL_lex_repl = PL_sublex_info.repl;
2358     PL_lex_stuff = NULL;
2359     PL_sublex_info.repl = NULL;
2360
2361     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2362         = SvPVX(PL_linestr);
2363     PL_bufend += SvCUR(PL_linestr);
2364     PL_last_lop = PL_last_uni = NULL;
2365     SAVEFREESV(PL_linestr);
2366     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2367
2368     PL_lex_dojoin = FALSE;
2369     PL_lex_brackets = PL_lex_formbrack = 0;
2370     PL_lex_allbrackets = 0;
2371     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2372     Newx(PL_lex_brackstack, 120, char);
2373     Newx(PL_lex_casestack, 12, char);
2374     PL_lex_casemods = 0;
2375     *PL_lex_casestack = '\0';
2376     PL_lex_starts = 0;
2377     PL_lex_state = LEX_INTERPCONCAT;
2378     if (is_heredoc)
2379         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2380     PL_copline = NOLINE;
2381
2382     Newxz(shared, 1, LEXSHARED);
2383     shared->ls_prev = PL_parser->lex_shared;
2384     PL_parser->lex_shared = shared;
2385
2386     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2387     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2388     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2389         PL_lex_inpat = PL_sublex_info.sub_op;
2390     else
2391         PL_lex_inpat = NULL;
2392
2393     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2394     PL_in_eval &= ~EVAL_RE_REPARSING;
2395
2396     return '(';
2397 }
2398
2399 /*
2400  * S_sublex_done
2401  * Restores lexer state after a S_sublex_push.
2402  */
2403
2404 STATIC I32
2405 S_sublex_done(pTHX)
2406 {
2407     if (!PL_lex_starts++) {
2408         SV * const sv = newSVpvs("");
2409         if (SvUTF8(PL_linestr))
2410             SvUTF8_on(sv);
2411         PL_expect = XOPERATOR;
2412         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2413         return THING;
2414     }
2415
2416     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2417         PL_lex_state = LEX_INTERPCASEMOD;
2418         return yylex();
2419     }
2420
2421     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2422     assert(PL_lex_inwhat != OP_TRANSR);
2423     if (PL_lex_repl) {
2424         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2425         PL_linestr = PL_lex_repl;
2426         PL_lex_inpat = 0;
2427         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2428         PL_bufend += SvCUR(PL_linestr);
2429         PL_last_lop = PL_last_uni = NULL;
2430         PL_lex_dojoin = FALSE;
2431         PL_lex_brackets = 0;
2432         PL_lex_allbrackets = 0;
2433         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2434         PL_lex_casemods = 0;
2435         *PL_lex_casestack = '\0';
2436         PL_lex_starts = 0;
2437         if (SvEVALED(PL_lex_repl)) {
2438             PL_lex_state = LEX_INTERPNORMAL;
2439             PL_lex_starts++;
2440             /*  we don't clear PL_lex_repl here, so that we can check later
2441                 whether this is an evalled subst; that means we rely on the
2442                 logic to ensure sublex_done() is called again only via the
2443                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2444         }
2445         else {
2446             PL_lex_state = LEX_INTERPCONCAT;
2447             PL_lex_repl = NULL;
2448         }
2449         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2450             CopLINE(PL_curcop) +=
2451                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2452                  + PL_parser->herelines;
2453             PL_parser->herelines = 0;
2454         }
2455         return ',';
2456     }
2457     else {
2458         const line_t l = CopLINE(PL_curcop);
2459         LEAVE;
2460         if (PL_multi_close == '<')
2461             PL_parser->herelines += l - PL_multi_end;
2462         PL_bufend = SvPVX(PL_linestr);
2463         PL_bufend += SvCUR(PL_linestr);
2464         PL_expect = XOPERATOR;
2465         PL_sublex_info.sub_inwhat = 0;
2466         return ')';
2467     }
2468 }
2469
2470 PERL_STATIC_INLINE SV*
2471 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2472 {
2473     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2474      * interior, hence to the "}".  Finds what the name resolves to, returning
2475      * an SV* containing it; NULL if no valid one found */
2476
2477     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2478
2479     HV * table;
2480     SV **cvp;
2481     SV *cv;
2482     SV *rv;
2483     HV *stash;
2484     const U8* first_bad_char_loc;
2485     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2486
2487     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2488
2489     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2490                                      e - backslash_ptr,
2491                                      &first_bad_char_loc))
2492     {
2493         /* If warnings are on, this will print a more detailed analysis of what
2494          * is wrong than the error message below */
2495         utf8n_to_uvchr(first_bad_char_loc,
2496                        e - ((char *) first_bad_char_loc),
2497                        NULL, 0);
2498
2499         /* We deliberately don't try to print the malformed character, which
2500          * might not print very well; it also may be just the first of many
2501          * malformations, so don't print what comes after it */
2502         yyerror(Perl_form(aTHX_
2503             "Malformed UTF-8 character immediately after '%.*s'",
2504             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2505         return NULL;
2506     }
2507
2508     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2509                         /* include the <}> */
2510                         e - backslash_ptr + 1);
2511     if (! SvPOK(res)) {
2512         SvREFCNT_dec_NN(res);
2513         return NULL;
2514     }
2515
2516     /* See if the charnames handler is the Perl core's, and if so, we can skip
2517      * the validation needed for a user-supplied one, as Perl's does its own
2518      * validation. */
2519     table = GvHV(PL_hintgv);             /* ^H */
2520     cvp = hv_fetchs(table, "charnames", FALSE);
2521     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2522         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2523     {
2524         const char * const name = HvNAME(stash);
2525         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2526          && strEQ(name, "_charnames")) {
2527            return res;
2528        }
2529     }
2530
2531     /* Here, it isn't Perl's charname handler.  We can't rely on a
2532      * user-supplied handler to validate the input name.  For non-ut8 input,
2533      * look to see that the first character is legal.  Then loop through the
2534      * rest checking that each is a continuation */
2535
2536     /* This code makes the reasonable assumption that the only Latin1-range
2537      * characters that begin a character name alias are alphabetic, otherwise
2538      * would have to create a isCHARNAME_BEGIN macro */
2539
2540     if (! UTF) {
2541         if (! isALPHAU(*s)) {
2542             goto bad_charname;
2543         }
2544         s++;
2545         while (s < e) {
2546             if (! isCHARNAME_CONT(*s)) {
2547                 goto bad_charname;
2548             }
2549             if (*s == ' ' && *(s-1) == ' ') {
2550                 goto multi_spaces;
2551             }
2552             if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2553                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2554                            "NO-BREAK SPACE in a charnames "
2555                            "alias definition is deprecated");
2556             }
2557             s++;
2558         }
2559     }
2560     else {
2561         /* Similarly for utf8.  For invariants can check directly; for other
2562          * Latin1, can calculate their code point and check; otherwise  use a
2563          * swash */
2564         if (UTF8_IS_INVARIANT(*s)) {
2565             if (! isALPHAU(*s)) {
2566                 goto bad_charname;
2567             }
2568             s++;
2569         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2570             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2571                 goto bad_charname;
2572             }
2573             s += 2;
2574         }
2575         else {
2576             if (! PL_utf8_charname_begin) {
2577                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2578                 PL_utf8_charname_begin = _core_swash_init("utf8",
2579                                                         "_Perl_Charname_Begin",
2580                                                         &PL_sv_undef,
2581                                                         1, 0, NULL, &flags);
2582             }
2583             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2584                 goto bad_charname;
2585             }
2586             s += UTF8SKIP(s);
2587         }
2588
2589         while (s < e) {
2590             if (UTF8_IS_INVARIANT(*s)) {
2591                 if (! isCHARNAME_CONT(*s)) {
2592                     goto bad_charname;
2593                 }
2594                 if (*s == ' ' && *(s-1) == ' ') {
2595                     goto multi_spaces;
2596                 }
2597                 s++;
2598             }
2599             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2600                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2601                 {
2602                     goto bad_charname;
2603                 }
2604                 if (*s == *NBSP_UTF8
2605                     && *(s+1) == *(NBSP_UTF8+1)
2606                     && ckWARN_d(WARN_DEPRECATED))
2607                 {
2608                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2609                                 "NO-BREAK SPACE in a charnames "
2610                                 "alias definition is deprecated");
2611                 }
2612                 s += 2;
2613             }
2614             else {
2615                 if (! PL_utf8_charname_continue) {
2616                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2617                     PL_utf8_charname_continue = _core_swash_init("utf8",
2618                                                 "_Perl_Charname_Continue",
2619                                                 &PL_sv_undef,
2620                                                 1, 0, NULL, &flags);
2621                 }
2622                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2623                     goto bad_charname;
2624                 }
2625                 s += UTF8SKIP(s);
2626             }
2627         }
2628     }
2629     if (*(s-1) == ' ') {
2630         yyerror_pv(
2631             Perl_form(aTHX_
2632             "charnames alias definitions may not contain trailing "
2633             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2634             (int)(s - backslash_ptr + 1), backslash_ptr,
2635             (int)(e - s + 1), s + 1
2636             ),
2637         UTF ? SVf_UTF8 : 0);
2638         return NULL;
2639     }
2640
2641     if (SvUTF8(res)) { /* Don't accept malformed input */
2642         const U8* first_bad_char_loc;
2643         STRLEN len;
2644         const char* const str = SvPV_const(res, len);
2645         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2646             /* If warnings are on, this will print a more detailed analysis of
2647              * what is wrong than the error message below */
2648             utf8n_to_uvchr(first_bad_char_loc,
2649                            (char *) first_bad_char_loc - str,
2650                            NULL, 0);
2651
2652             /* We deliberately don't try to print the malformed character,
2653              * which might not print very well; it also may be just the first
2654              * of many malformations, so don't print what comes after it */
2655             yyerror_pv(
2656               Perl_form(aTHX_
2657                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2658                  (int) (e - backslash_ptr + 1), backslash_ptr,
2659                  (int) ((char *) first_bad_char_loc - str), str
2660               ),
2661               SVf_UTF8);
2662             return NULL;
2663         }
2664     }
2665
2666     return res;
2667
2668   bad_charname: {
2669
2670         /* The final %.*s makes sure that should the trailing NUL be missing
2671          * that this print won't run off the end of the string */
2672         yyerror_pv(
2673           Perl_form(aTHX_
2674             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2675             (int)(s - backslash_ptr + 1), backslash_ptr,
2676             (int)(e - s + 1), s + 1
2677           ),
2678           UTF ? SVf_UTF8 : 0);
2679         return NULL;
2680     }
2681
2682   multi_spaces:
2683         yyerror_pv(
2684           Perl_form(aTHX_
2685             "charnames alias definitions may not contain a sequence of "
2686             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2687             (int)(s - backslash_ptr + 1), backslash_ptr,
2688             (int)(e - s + 1), s + 1
2689           ),
2690           UTF ? SVf_UTF8 : 0);
2691         return NULL;
2692 }
2693
2694 /*
2695   scan_const
2696
2697   Extracts the next constant part of a pattern, double-quoted string,
2698   or transliteration.  This is terrifying code.
2699
2700   For example, in parsing the double-quoted string "ab\x63$d", it would
2701   stop at the '$' and return an OP_CONST containing 'abc'.
2702
2703   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2704   processing a pattern (PL_lex_inpat is true), a transliteration
2705   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2706
2707   Returns a pointer to the character scanned up to. If this is
2708   advanced from the start pointer supplied (i.e. if anything was
2709   successfully parsed), will leave an OP_CONST for the substring scanned
2710   in pl_yylval. Caller must intuit reason for not parsing further
2711   by looking at the next characters herself.
2712
2713   In patterns:
2714     expand:
2715       \N{FOO}  => \N{U+hex_for_character_FOO}
2716       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2717
2718     pass through:
2719         all other \-char, including \N and \N{ apart from \N{ABC}
2720
2721     stops on:
2722         @ and $ where it appears to be a var, but not for $ as tail anchor
2723         \l \L \u \U \Q \E
2724         (?{  or  (??{
2725
2726
2727   In transliterations:
2728     characters are VERY literal, except for - not at the start or end
2729     of the string, which indicates a range. If the range is in bytes,
2730     scan_const expands the range to the full set of intermediate
2731     characters. If the range is in utf8, the hyphen is replaced with
2732     a certain range mark which will be handled by pmtrans() in op.c.
2733
2734   In double-quoted strings:
2735     backslashes:
2736       double-quoted style: \r and \n
2737       constants: \x31, etc.
2738       deprecated backrefs: \1 (in substitution replacements)
2739       case and quoting: \U \Q \E
2740     stops on @ and $
2741
2742   scan_const does *not* construct ops to handle interpolated strings.
2743   It stops processing as soon as it finds an embedded $ or @ variable
2744   and leaves it to the caller to work out what's going on.
2745
2746   embedded arrays (whether in pattern or not) could be:
2747       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2748
2749   $ in double-quoted strings must be the symbol of an embedded scalar.
2750
2751   $ in pattern could be $foo or could be tail anchor.  Assumption:
2752   it's a tail anchor if $ is the last thing in the string, or if it's
2753   followed by one of "()| \r\n\t"
2754
2755   \1 (backreferences) are turned into $1 in substitutions
2756
2757   The structure of the code is
2758       while (there's a character to process) {
2759           handle transliteration ranges
2760           skip regexp comments /(?#comment)/ and codes /(?{code})/
2761           skip #-initiated comments in //x patterns
2762           check for embedded arrays
2763           check for embedded scalars
2764           if (backslash) {
2765               deprecate \1 in substitution replacements
2766               handle string-changing backslashes \l \U \Q \E, etc.
2767               switch (what was escaped) {
2768                   handle \- in a transliteration (becomes a literal -)
2769                   if a pattern and not \N{, go treat as regular character
2770                   handle \132 (octal characters)
2771                   handle \x15 and \x{1234} (hex characters)
2772                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2773                   handle \cV (control characters)
2774                   handle printf-style backslashes (\f, \r, \n, etc)
2775               } (end switch)
2776               continue
2777           } (end if backslash)
2778           handle regular character
2779     } (end while character to read)
2780
2781 */
2782
2783 STATIC char *
2784 S_scan_const(pTHX_ char *start)
2785 {
2786     char *send = PL_bufend;             /* end of the constant */
2787     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2788                                            on sizing. */
2789     char *s = start;                    /* start of the constant */
2790     char *d = SvPVX(sv);                /* destination for copies */
2791     bool dorange = FALSE;               /* are we in a translit range? */
2792     bool didrange = FALSE;              /* did we just finish a range? */
2793     bool in_charclass = FALSE;          /* within /[...]/ */
2794     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2795     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2796                                            UTF8?  But, this can show as true
2797                                            when the source isn't utf8, as for
2798                                            example when it is entirely composed
2799                                            of hex constants */
2800     SV *res;                            /* result from charnames */
2801
2802     /* Note on sizing:  The scanned constant is placed into sv, which is
2803      * initialized by newSV() assuming one byte of output for every byte of
2804      * input.  This routine expects newSV() to allocate an extra byte for a
2805      * trailing NUL, which this routine will append if it gets to the end of
2806      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2807      * CAPITAL LETTER A}), or more output than input if the constant ends up
2808      * recoded to utf8, but each time a construct is found that might increase
2809      * the needed size, SvGROW() is called.  Its size parameter each time is
2810      * based on the best guess estimate at the time, namely the length used so
2811      * far, plus the length the current construct will occupy, plus room for
2812      * the trailing NUL, plus one byte for every input byte still unscanned */
2813
2814     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2815                        before set */
2816 #ifdef EBCDIC
2817     UV literal_endpoint = 0;
2818     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2819 #endif
2820
2821     PERL_ARGS_ASSERT_SCAN_CONST;
2822
2823     assert(PL_lex_inwhat != OP_TRANSR);
2824     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2825         /* If we are doing a trans and we know we want UTF8 set expectation */
2826         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2827         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2828     }
2829
2830     /* Protect sv from errors and fatal warnings. */
2831     ENTER_with_name("scan_const");
2832     SAVEFREESV(sv);
2833
2834     while (s < send || dorange) {
2835
2836         /* get transliterations out of the way (they're most literal) */
2837         if (PL_lex_inwhat == OP_TRANS) {
2838             /* expand a range A-Z to the full set of characters.  AIE! */
2839             if (dorange) {
2840                 I32 i;                          /* current expanded character */
2841                 I32 min;                        /* first character in range */
2842                 I32 max;                        /* last character in range */
2843
2844 #ifdef EBCDIC
2845                 UV uvmax = 0;
2846 #endif
2847
2848                 if (has_utf8
2849 #ifdef EBCDIC
2850                     && !native_range
2851 #endif
2852                 ) {
2853                     char * const c = (char*)utf8_hop((U8*)d, -1);
2854                     char *e = d++;
2855                     while (e-- > c)
2856                         *(e + 1) = *e;
2857                     *c = (char) ILLEGAL_UTF8_BYTE;
2858                     /* mark the range as done, and continue */
2859                     dorange = FALSE;
2860                     didrange = TRUE;
2861                     continue;
2862                 }
2863
2864                 i = d - SvPVX_const(sv);                /* remember current offset */
2865 #ifdef EBCDIC
2866                 SvGROW(sv,
2867                        SvLEN(sv) + ((has_utf8)
2868                                     ?  (512 - UTF_CONTINUATION_MARK
2869                                         + UNISKIP(0x100))
2870                                     : 256));
2871                 /* How many two-byte within 0..255: 128 in UTF-8,
2872                  * 96 in UTF-8-mod. */
2873 #else
2874                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2875 #endif
2876                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2877 #ifdef EBCDIC
2878                 if (has_utf8) {
2879                     int j;
2880                     for (j = 0; j <= 1; j++) {
2881                         char * const c = (char*)utf8_hop((U8*)d, -1);
2882                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2883                         if (j)
2884                             min = (U8)uv;
2885                         else if (uv < 256)
2886                             max = (U8)uv;
2887                         else {
2888                             max = (U8)0xff; /* only to \xff */
2889                             uvmax = uv; /* \x{100} to uvmax */
2890                         }
2891                         d = c; /* eat endpoint chars */
2892                      }
2893                 }
2894                else {
2895 #endif
2896                    d -= 2;              /* eat the first char and the - */
2897                    min = (U8)*d;        /* first char in range */
2898                    max = (U8)d[1];      /* last char in range  */
2899 #ifdef EBCDIC
2900                }
2901 #endif
2902
2903                 if (min > max) {
2904                     Perl_croak(aTHX_
2905                                "Invalid range \"%c-%c\" in transliteration operator",
2906                                (char)min, (char)max);
2907                 }
2908
2909 #ifdef EBCDIC
2910                 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
2911                  * any subsets of these ranges into individual characters */
2912                 if (literal_endpoint == 2 &&
2913                     ((isLOWER_A(min) && isLOWER_A(max)) ||
2914                      (isUPPER_A(min) && isUPPER_A(max))))
2915                 {
2916                     for (i = min; i <= max; i++) {
2917                         if (isALPHA_A(i))
2918                             *d++ = i;
2919                     }
2920                 }
2921                 else
2922 #endif
2923                     for (i = min; i <= max; i++)
2924 #ifdef EBCDIC
2925                         if (has_utf8) {
2926                             append_utf8_from_native_byte(i, &d);
2927                         }
2928                         else
2929 #endif
2930                             *d++ = (char)i;
2931
2932 #ifdef EBCDIC
2933                 if (uvmax) {
2934                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2935                     if (uvmax > 0x101)
2936                         *d++ = (char) ILLEGAL_UTF8_BYTE;
2937                     if (uvmax > 0x100)
2938                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2939                 }
2940 #endif
2941
2942                 /* mark the range as done, and continue */
2943                 dorange = FALSE;
2944                 didrange = TRUE;
2945 #ifdef EBCDIC
2946                 literal_endpoint = 0;
2947 #endif
2948                 continue;
2949             }
2950
2951             /* range begins (ignore - as first or last char) */
2952             else if (*s == '-' && s+1 < send  && s != start) {
2953                 if (didrange) {
2954                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2955                 }
2956                 if (has_utf8
2957 #ifdef EBCDIC
2958                     && !native_range
2959 #endif
2960                     ) {
2961                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
2962                     s++;
2963                     continue;
2964                 }
2965                 dorange = TRUE;
2966                 s++;
2967             }
2968             else {
2969                 didrange = FALSE;
2970 #ifdef EBCDIC
2971                 literal_endpoint = 0;
2972                 native_range = TRUE;
2973 #endif
2974             }
2975         }
2976
2977         /* if we get here, we're not doing a transliteration */
2978
2979         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2980             char *s1 = s-1;
2981             int esc = 0;
2982             while (s1 >= start && *s1-- == '\\')
2983                 esc = !esc;
2984             if (!esc)
2985                 in_charclass = TRUE;
2986         }
2987
2988         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2989             char *s1 = s-1;
2990             int esc = 0;
2991             while (s1 >= start && *s1-- == '\\')
2992                 esc = !esc;
2993             if (!esc)
2994                 in_charclass = FALSE;
2995         }
2996
2997         /* skip for regexp comments /(?#comment)/, except for the last
2998          * char, which will be done separately.
2999          * Stop on (?{..}) and friends */
3000
3001         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3002             if (s[2] == '#') {
3003                 while (s+1 < send && *s != ')')
3004                     *d++ = *s++;
3005             }
3006             else if (!PL_lex_casemods &&
3007                      (    s[2] == '{' /* This should match regcomp.c */
3008                       || (s[2] == '?' && s[3] == '{')))
3009             {
3010                 break;
3011             }
3012         }
3013
3014         /* likewise skip #-initiated comments in //x patterns */
3015         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3016           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3017             while (s+1 < send && *s != '\n')
3018                 *d++ = *s++;
3019         }
3020
3021         /* no further processing of single-quoted regex */
3022         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3023             goto default_action;
3024
3025         /* check for embedded arrays
3026            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3027            */
3028         else if (*s == '@' && s[1]) {
3029             if (isWORDCHAR_lazy_if(s+1,UTF))
3030                 break;
3031             if (strchr(":'{$", s[1]))
3032                 break;
3033             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3034                 break; /* in regexp, neither @+ nor @- are interpolated */
3035         }
3036
3037         /* check for embedded scalars.  only stop if we're sure it's a
3038            variable.
3039         */
3040         else if (*s == '$') {
3041             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3042                 break;
3043             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3044                 if (s[1] == '\\') {
3045                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3046                                    "Possible unintended interpolation of $\\ in regex");
3047                 }
3048                 break;          /* in regexp, $ might be tail anchor */
3049             }
3050         }
3051
3052         /* End of else if chain - OP_TRANS rejoin rest */
3053
3054         /* backslashes */
3055         if (*s == '\\' && s+1 < send) {
3056             char* e;    /* Can be used for ending '}', etc. */
3057
3058             s++;
3059
3060             /* warn on \1 - \9 in substitution replacements, but note that \11
3061              * is an octal; and \19 is \1 followed by '9' */
3062             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3063                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3064             {
3065                 /* diag_listed_as: \%d better written as $%d */
3066                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3067                 *--s = '$';
3068                 break;
3069             }
3070
3071             /* string-change backslash escapes */
3072             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3073                 --s;
3074                 break;
3075             }
3076             /* In a pattern, process \N, but skip any other backslash escapes.
3077              * This is because we don't want to translate an escape sequence
3078              * into a meta symbol and have the regex compiler use the meta
3079              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3080              * in spite of this, we do have to process \N here while the proper
3081              * charnames handler is in scope.  See bugs #56444 and #62056.
3082              * There is a complication because \N in a pattern may also stand
3083              * for 'match a non-nl', and not mean a charname, in which case its
3084              * processing should be deferred to the regex compiler.  To be a
3085              * charname it must be followed immediately by a '{', and not look
3086              * like \N followed by a curly quantifier, i.e., not something like
3087              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3088              * quantifier */
3089             else if (PL_lex_inpat
3090                     && (*s != 'N'
3091                         || s[1] != '{'
3092                         || regcurly(s + 1)))
3093             {
3094                 *d++ = '\\';
3095                 goto default_action;
3096             }
3097
3098             switch (*s) {
3099
3100             /* quoted - in transliterations */
3101             case '-':
3102                 if (PL_lex_inwhat == OP_TRANS) {
3103                     *d++ = *s++;
3104                     continue;
3105                 }
3106                 /* FALLTHROUGH */
3107             default:
3108                 {
3109                     if ((isALPHANUMERIC(*s)))
3110                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3111                                        "Unrecognized escape \\%c passed through",
3112                                        *s);
3113                     /* default action is to copy the quoted character */
3114                     goto default_action;
3115                 }
3116
3117             /* eg. \132 indicates the octal constant 0132 */
3118             case '0': case '1': case '2': case '3':
3119             case '4': case '5': case '6': case '7':
3120                 {
3121                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3122                     STRLEN len = 3;
3123                     uv = grok_oct(s, &len, &flags, NULL);
3124                     s += len;
3125                     if (len < 3 && s < send && isDIGIT(*s)
3126                         && ckWARN(WARN_MISC))
3127                     {
3128                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3129                                     "%s", form_short_octal_warning(s, len));
3130                     }
3131                 }
3132                 goto NUM_ESCAPE_INSERT;
3133
3134             /* eg. \o{24} indicates the octal constant \024 */
3135             case 'o':
3136                 {
3137                     const char* error;
3138
3139                     bool valid = grok_bslash_o(&s, &uv, &error,
3140                                                TRUE, /* Output warning */
3141                                                FALSE, /* Not strict */
3142                                                TRUE, /* Output warnings for
3143                                                          non-portables */
3144                                                UTF);
3145                     if (! valid) {
3146                         yyerror(error);
3147                         continue;
3148                     }
3149                     goto NUM_ESCAPE_INSERT;
3150                 }
3151
3152             /* eg. \x24 indicates the hex constant 0x24 */
3153             case 'x':
3154                 {
3155                     const char* error;
3156
3157                     bool valid = grok_bslash_x(&s, &uv, &error,
3158                                                TRUE, /* Output warning */
3159                                                FALSE, /* Not strict */
3160                                                TRUE,  /* Output warnings for
3161                                                          non-portables */
3162                                                UTF);
3163                     if (! valid) {
3164                         yyerror(error);
3165                         continue;
3166                     }
3167                 }
3168
3169               NUM_ESCAPE_INSERT:
3170                 /* Insert oct or hex escaped character.  There will always be
3171                  * enough room in sv since such escapes will be longer than any
3172                  * UTF-8 sequence they can end up as, except if they force us
3173                  * to recode the rest of the string into utf8 */
3174
3175                 /* Here uv is the ordinal of the next character being added */
3176                 if (!UVCHR_IS_INVARIANT(uv)) {
3177                     if (!has_utf8 && uv > 255) {
3178                         /* Might need to recode whatever we have accumulated so
3179                          * far if it contains any chars variant in utf8 or
3180                          * utf-ebcdic. */
3181
3182                         SvCUR_set(sv, d - SvPVX_const(sv));
3183                         SvPOK_on(sv);
3184                         *d = '\0';
3185                         /* See Note on sizing above.  */
3186                         sv_utf8_upgrade_flags_grow(sv,
3187                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3188                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3189                         d = SvPVX(sv) + SvCUR(sv);
3190                         has_utf8 = TRUE;
3191                     }
3192
3193                     if (has_utf8) {
3194                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3195                         if (PL_lex_inwhat == OP_TRANS &&
3196                             PL_sublex_info.sub_op) {
3197                             PL_sublex_info.sub_op->op_private |=
3198                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3199                                              : OPpTRANS_TO_UTF);
3200                         }
3201 #ifdef EBCDIC
3202                         if (uv > 255 && !dorange)
3203                             native_range = FALSE;
3204 #endif
3205                     }
3206                     else {
3207                         *d++ = (char)uv;
3208                     }
3209                 }
3210                 else {
3211                     *d++ = (char) uv;
3212                 }
3213                 continue;
3214
3215             case 'N':
3216                 /* In a non-pattern \N must be a named character, like \N{LATIN
3217                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3218                  * mean to match a non-newline.  For non-patterns, named
3219                  * characters are converted to their string equivalents. In
3220                  * patterns, named characters are not converted to their
3221                  * ultimate forms for the same reasons that other escapes
3222                  * aren't.  Instead, they are converted to the \N{U+...} form
3223                  * to get the value from the charnames that is in effect right
3224                  * now, while preserving the fact that it was a named character
3225                  * so that the regex compiler knows this */
3226
3227                 /* The structure of this section of code (besides checking for
3228                  * errors and upgrading to utf8) is:
3229                  *  Further disambiguate between the two meanings of \N, and if
3230                  *      not a charname, go process it elsewhere
3231                  *  If of form \N{U+...}, pass it through if a pattern;
3232                  *      otherwise convert to utf8
3233                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3234                  *  pattern; otherwise convert to utf8 */
3235
3236                 /* Here, s points to the 'N'; the test below is guaranteed to
3237                  * succeed if we are being called on a pattern as we already
3238                  * know from a test above that the next character is a '{'.
3239                  * On a non-pattern \N must mean 'named sequence, which
3240                  * requires braces */
3241                 s++;
3242                 if (*s != '{') {
3243                     yyerror("Missing braces on \\N{}");
3244                     continue;
3245                 }
3246                 s++;
3247
3248                 /* If there is no matching '}', it is an error. */
3249                 if (! (e = strchr(s, '}'))) {
3250                     if (! PL_lex_inpat) {
3251                         yyerror("Missing right brace on \\N{}");
3252                     } else {
3253                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3254                     }
3255                     continue;
3256                 }
3257
3258                 /* Here it looks like a named character */
3259
3260                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3261                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3262                                 | PERL_SCAN_DISALLOW_PREFIX;
3263                     STRLEN len;
3264
3265                     /* For \N{U+...}, the '...' is a unicode value even on
3266                      * EBCDIC machines */
3267                     s += 2;         /* Skip to next char after the 'U+' */
3268                     len = e - s;
3269                     uv = grok_hex(s, &len, &flags, NULL);
3270                     if (len == 0 || len != (STRLEN)(e - s)) {
3271                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3272                         s = e + 1;
3273                         continue;
3274                     }
3275
3276                     if (PL_lex_inpat) {
3277
3278                         /* On non-EBCDIC platforms, pass through to the regex
3279                          * compiler unchanged.  The reason we evaluated the
3280                          * number above is to make sure there wasn't a syntax
3281                          * error.  But on EBCDIC we convert to native so
3282                          * downstream code can continue to assume it's native
3283                          */
3284                         s -= 5;     /* Include the '\N{U+' */
3285 #ifdef EBCDIC
3286                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3287                                                                and the \0 */
3288                                     "\\N{U+%X}",
3289                                     (unsigned int) UNI_TO_NATIVE(uv));
3290 #else
3291                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3292                         d += e - s + 1;
3293 #endif
3294                     }
3295                     else {  /* Not a pattern: convert the hex to string */
3296
3297                          /* If destination is not in utf8, unconditionally
3298                           * recode it to be so.  This is because \N{} implies
3299                           * Unicode semantics, and scalars have to be in utf8
3300                           * to guarantee those semantics */
3301                         if (! has_utf8) {
3302                             SvCUR_set(sv, d - SvPVX_const(sv));
3303                             SvPOK_on(sv);
3304                             *d = '\0';
3305                             /* See Note on sizing above.  */
3306                             sv_utf8_upgrade_flags_grow(
3307                                         sv,
3308                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3309                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3310                             d = SvPVX(sv) + SvCUR(sv);
3311                             has_utf8 = TRUE;
3312                         }
3313
3314                         /* Add the (Unicode) code point to the output. */
3315                         if (UNI_IS_INVARIANT(uv)) {
3316                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3317                         }
3318                         else {
3319                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3320                         }
3321                     }
3322                 }
3323                 else /* Here is \N{NAME} but not \N{U+...}. */
3324                      if ((res = get_and_check_backslash_N_name(s, e)))
3325                 {
3326                     STRLEN len;
3327                     const char *str = SvPV_const(res, len);
3328                     if (PL_lex_inpat) {
3329
3330                         if (! len) { /* The name resolved to an empty string */
3331                             Copy("\\N{}", d, 4, char);
3332                             d += 4;
3333                         }
3334                         else {
3335                             /* In order to not lose information for the regex
3336                             * compiler, pass the result in the specially made
3337                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3338                             * the code points in hex of each character
3339                             * returned by charnames */
3340
3341                             const char *str_end = str + len;
3342                             const STRLEN off = d - SvPVX_const(sv);
3343
3344                             if (! SvUTF8(res)) {
3345                                 /* For the non-UTF-8 case, we can determine the
3346                                  * exact length needed without having to parse
3347                                  * through the string.  Each character takes up
3348                                  * 2 hex digits plus either a trailing dot or
3349                                  * the "}" */
3350                                 d = off + SvGROW(sv, off
3351                                                     + 3 * len
3352                                                     + 6 /* For the "\N{U+", and
3353                                                            trailing NUL */
3354                                                     + (STRLEN)(send - e));
3355                                 Copy("\\N{U+", d, 5, char);
3356                                 d += 5;
3357                                 while (str < str_end) {
3358                                     char hex_string[4];
3359                                     int len =
3360                                         my_snprintf(hex_string,
3361                                                     sizeof(hex_string),
3362                                                     "%02X.", (U8) *str);
3363                                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
3364                                     Copy(hex_string, d, 3, char);
3365                                     d += 3;
3366                                     str++;
3367                                 }
3368                                 d--;    /* We will overwrite below the final
3369                                            dot with a right brace */
3370                             }
3371                             else {
3372                                 STRLEN char_length; /* cur char's byte length */
3373
3374                                 /* and the number of bytes after this is
3375                                  * translated into hex digits */
3376                                 STRLEN output_length;
3377
3378                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3379                                  * for max('U+', '.'); and 1 for NUL */
3380                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3381
3382                                 /* Get the first character of the result. */
3383                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3384                                                         len,
3385                                                         &char_length,
3386                                                         UTF8_ALLOW_ANYUV);
3387                                 /* Convert first code point to hex, including
3388                                  * the boiler plate before it. */
3389                                 output_length =
3390                                     my_snprintf(hex_string, sizeof(hex_string),
3391                                                 "\\N{U+%X",
3392                                                 (unsigned int) uv);
3393
3394                                 /* Make sure there is enough space to hold it */
3395                                 d = off + SvGROW(sv, off
3396                                                     + output_length
3397                                                     + (STRLEN)(send - e)
3398                                                     + 2);       /* '}' + NUL */
3399                                 /* And output it */
3400                                 Copy(hex_string, d, output_length, char);
3401                                 d += output_length;
3402
3403                                 /* For each subsequent character, append dot and
3404                                 * its ordinal in hex */
3405                                 while ((str += char_length) < str_end) {
3406                                     const STRLEN off = d - SvPVX_const(sv);
3407                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3408                                                             str_end - str,
3409                                                             &char_length,
3410                                                             UTF8_ALLOW_ANYUV);
3411                                     output_length =
3412                                         my_snprintf(hex_string,
3413                                                     sizeof(hex_string),
3414                                                     ".%X",
3415                                                     (unsigned int) uv);
3416
3417                                     d = off + SvGROW(sv, off
3418                                                         + output_length
3419                                                         + (STRLEN)(send - e)
3420                                                         + 2);   /* '}' +  NUL */
3421                                     Copy(hex_string, d, output_length, char);
3422                                     d += output_length;
3423                                 }
3424                             }
3425
3426                             *d++ = '}'; /* Done.  Add the trailing brace */
3427                         }
3428                     }
3429                     else { /* Here, not in a pattern.  Convert the name to a
3430                             * string. */
3431
3432                          /* If destination is not in utf8, unconditionally
3433                           * recode it to be so.  This is because \N{} implies
3434                           * Unicode semantics, and scalars have to be in utf8
3435                           * to guarantee those semantics */
3436                         if (! has_utf8) {
3437                             SvCUR_set(sv, d - SvPVX_const(sv));
3438                             SvPOK_on(sv);
3439                             *d = '\0';
3440                             /* See Note on sizing above.  */
3441                             sv_utf8_upgrade_flags_grow(sv,
3442                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3443                                                 len + (STRLEN)(send - s) + 1);
3444                             d = SvPVX(sv) + SvCUR(sv);
3445                             has_utf8 = TRUE;
3446                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3447
3448                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3449                              * set correctly here). */
3450                             const STRLEN off = d - SvPVX_const(sv);
3451                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3452                         }
3453                         if (! SvUTF8(res)) {    /* Make sure is \N{} return is UTF-8 */
3454                             sv_utf8_upgrade(res);
3455                             str = SvPV_const(res, len);
3456                         }
3457                         Copy(str, d, len, char);
3458                         d += len;
3459                     }
3460
3461                     SvREFCNT_dec(res);
3462
3463                 } /* End \N{NAME} */
3464 #ifdef EBCDIC
3465                 if (!dorange)
3466                     native_range = FALSE; /* \N{} is defined to be Unicode */
3467 #endif
3468                 s = e + 1;  /* Point to just after the '}' */
3469                 continue;
3470
3471             /* \c is a control character */
3472             case 'c':
3473                 s++;
3474                 if (s < send) {
3475                     *d++ = grok_bslash_c(*s++, 1);
3476                 }
3477                 else {
3478                     yyerror("Missing control char name in \\c");
3479                 }
3480                 continue;
3481
3482             /* printf-style backslashes, formfeeds, newlines, etc */
3483             case 'b':
3484                 *d++ = '\b';
3485                 break;
3486             case 'n':
3487                 *d++ = '\n';
3488                 break;
3489             case 'r':
3490                 *d++ = '\r';
3491                 break;
3492             case 'f':
3493                 *d++ = '\f';
3494                 break;
3495             case 't':
3496                 *d++ = '\t';
3497                 break;
3498             case 'e':
3499                 *d++ = ASCII_TO_NATIVE('\033');
3500                 break;
3501             case 'a':
3502                 *d++ = '\a';
3503                 break;
3504             } /* end switch */
3505
3506             s++;
3507             continue;
3508         } /* end if (backslash) */
3509 #ifdef EBCDIC
3510         else
3511             literal_endpoint++;
3512 #endif
3513
3514     default_action:
3515         /* If we started with encoded form, or already know we want it,
3516            then encode the next character */
3517         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3518             STRLEN len  = 1;
3519
3520
3521             /* One might think that it is wasted effort in the case of the
3522              * source being utf8 (this_utf8 == TRUE) to take the next character
3523              * in the source, convert it to an unsigned value, and then convert
3524              * it back again.  But the source has not been validated here.  The
3525              * routine that does the conversion checks for errors like
3526              * malformed utf8 */
3527
3528             const UV nextuv   = (this_utf8)
3529                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3530                                 : (UV) ((U8) *s);
3531             const STRLEN need = UNISKIP(nextuv);
3532             if (!has_utf8) {
3533                 SvCUR_set(sv, d - SvPVX_const(sv));
3534                 SvPOK_on(sv);
3535                 *d = '\0';
3536                 /* See Note on sizing above.  */
3537                 sv_utf8_upgrade_flags_grow(sv,
3538                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3539                                         need + (STRLEN)(send - s) + 1);
3540                 d = SvPVX(sv) + SvCUR(sv);
3541                 has_utf8 = TRUE;
3542             } else if (need > len) {
3543                 /* encoded value larger than old, may need extra space (NOTE:
3544                  * SvCUR() is not set correctly here).   See Note on sizing
3545                  * above.  */
3546                 const STRLEN off = d - SvPVX_const(sv);
3547                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3548             }
3549             s += len;
3550
3551             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3552 #ifdef EBCDIC
3553             if (uv > 255 && !dorange)
3554                 native_range = FALSE;
3555 #endif
3556         }
3557         else {
3558             *d++ = *s++;
3559         }
3560     } /* while loop to process each character */
3561
3562     /* terminate the string and set up the sv */
3563     *d = '\0';
3564     SvCUR_set(sv, d - SvPVX_const(sv));
3565     if (SvCUR(sv) >= SvLEN(sv))
3566         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3567                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3568
3569     SvPOK_on(sv);
3570     if (PL_encoding && !has_utf8) {
3571         sv_recode_to_utf8(sv, PL_encoding);
3572         if (SvUTF8(sv))
3573             has_utf8 = TRUE;
3574     }
3575     if (has_utf8) {
3576         SvUTF8_on(sv);
3577         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3578             PL_sublex_info.sub_op->op_private |=
3579                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3580         }
3581     }
3582
3583     /* shrink the sv if we allocated more than we used */
3584     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3585         SvPV_shrink_to_cur(sv);
3586     }
3587
3588     /* return the substring (via pl_yylval) only if we parsed anything */
3589     if (s > start) {
3590         char *s2 = start;
3591         for (; s2 < s; s2++) {
3592             if (*s2 == '\n')
3593                 COPLINE_INC_WITH_HERELINES;
3594         }
3595         SvREFCNT_inc_simple_void_NN(sv);
3596         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3597             && ! PL_parser->lex_re_reparsing)
3598         {
3599             const char *const key = PL_lex_inpat ? "qr" : "q";
3600             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3601             const char *type;
3602             STRLEN typelen;
3603
3604             if (PL_lex_inwhat == OP_TRANS) {
3605                 type = "tr";
3606                 typelen = 2;
3607             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3608                 type = "s";
3609                 typelen = 1;
3610             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3611                 type = "q";
3612                 typelen = 1;
3613             } else  {
3614                 type = "qq";
3615                 typelen = 2;
3616             }
3617
3618             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3619                                 type, typelen);
3620         }
3621         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3622     }
3623     LEAVE_with_name("scan_const");
3624     return s;
3625 }
3626
3627 /* S_intuit_more
3628  * Returns TRUE if there's more to the expression (e.g., a subscript),
3629  * FALSE otherwise.
3630  *
3631  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3632  *
3633  * ->[ and ->{ return TRUE
3634  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3635  * { and [ outside a pattern are always subscripts, so return TRUE
3636  * if we're outside a pattern and it's not { or [, then return FALSE
3637  * if we're in a pattern and the first char is a {
3638  *   {4,5} (any digits around the comma) returns FALSE
3639  * if we're in a pattern and the first char is a [
3640  *   [] returns FALSE
3641  *   [SOMETHING] has a funky algorithm to decide whether it's a
3642  *      character class or not.  It has to deal with things like
3643  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3644  * anything else returns TRUE
3645  */
3646
3647 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3648
3649 STATIC int
3650 S_intuit_more(pTHX_ char *s)
3651 {
3652     PERL_ARGS_ASSERT_INTUIT_MORE;
3653
3654     if (PL_lex_brackets)
3655         return TRUE;
3656     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3657         return TRUE;
3658     if (*s == '-' && s[1] == '>'
3659      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3660      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3661         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3662         return TRUE;
3663     if (*s != '{' && *s != '[')
3664         return FALSE;
3665     if (!PL_lex_inpat)
3666         return TRUE;
3667
3668     /* In a pattern, so maybe we have {n,m}. */
3669     if (*s == '{') {
3670         if (regcurly(s)) {
3671             return FALSE;
3672         }
3673         return TRUE;
3674     }
3675
3676     /* On the other hand, maybe we have a character class */
3677
3678     s++;
3679     if (*s == ']' || *s == '^')
3680         return FALSE;
3681     else {
3682         /* this is terrifying, and it works */
3683         int weight;
3684         char seen[256];
3685         const char * const send = strchr(s,']');
3686         unsigned char un_char, last_un_char;
3687         char tmpbuf[sizeof PL_tokenbuf * 4];
3688
3689         if (!send)              /* has to be an expression */
3690             return TRUE;
3691         weight = 2;             /* let's weigh the evidence */
3692
3693         if (*s == '$')
3694             weight -= 3;
3695         else if (isDIGIT(*s)) {
3696             if (s[1] != ']') {
3697                 if (isDIGIT(s[1]) && s[2] == ']')
3698                     weight -= 10;
3699             }
3700             else
3701                 weight -= 100;
3702         }
3703         Zero(seen,256,char);
3704         un_char = 255;
3705         for (; s < send; s++) {
3706             last_un_char = un_char;
3707             un_char = (unsigned char)*s;
3708             switch (*s) {
3709             case '@':
3710             case '&':
3711             case '$':
3712                 weight -= seen[un_char] * 10;
3713                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3714                     int len;
3715                     char *tmp = PL_bufend;
3716                     PL_bufend = (char*)send;
3717                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3718                     PL_bufend = tmp;
3719                     len = (int)strlen(tmpbuf);
3720                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3721                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3722                         weight -= 100;
3723                     else
3724                         weight -= 10;
3725                 }
3726                 else if (*s == '$' && s[1] &&
3727                   strchr("[#!%*<>()-=",s[1])) {
3728                     if (/*{*/ strchr("])} =",s[2]))
3729                         weight -= 10;
3730                     else
3731                         weight -= 1;
3732                 }
3733                 break;
3734             case '\\':
3735                 un_char = 254;
3736                 if (s[1]) {
3737                     if (strchr("wds]",s[1]))
3738                         weight += 100;
3739                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3740                         weight += 1;
3741                     else if (strchr("rnftbxcav",s[1]))
3742                         weight += 40;
3743                     else if (isDIGIT(s[1])) {
3744                         weight += 40;
3745                         while (s[1] && isDIGIT(s[1]))
3746                             s++;
3747                     }
3748                 }
3749                 else
3750                     weight += 100;
3751                 break;
3752             case '-':
3753                 if (s[1] == '\\')
3754                     weight += 50;
3755                 if (strchr("aA01! ",last_un_char))
3756                     weight += 30;
3757                 if (strchr("zZ79~",s[1]))
3758                     weight += 30;
3759                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3760                     weight -= 5;        /* cope with negative subscript */
3761                 break;
3762             default:
3763                 if (!isWORDCHAR(last_un_char)
3764                     && !(last_un_char == '$' || last_un_char == '@'
3765                          || last_un_char == '&')
3766                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3767                     char *d = tmpbuf;
3768                     while (isALPHA(*s))
3769                         *d++ = *s++;
3770                     *d = '\0';
3771                     if (keyword(tmpbuf, d - tmpbuf, 0))
3772                         weight -= 150;
3773                 }
3774                 if (un_char == last_un_char + 1)
3775                     weight += 5;
3776                 weight -= seen[un_char];
3777                 break;
3778             }
3779             seen[un_char]++;
3780         }
3781         if (weight >= 0)        /* probably a character class */
3782             return FALSE;
3783     }
3784
3785     return TRUE;
3786 }
3787
3788 /*
3789  * S_intuit_method
3790  *
3791  * Does all the checking to disambiguate
3792  *   foo bar
3793  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3794  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3795  *
3796  * First argument is the stuff after the first token, e.g. "bar".
3797  *
3798  * Not a method if foo is a filehandle.
3799  * Not a method if foo is a subroutine prototyped to take a filehandle.
3800  * Not a method if it's really "Foo $bar"
3801  * Method if it's "foo $bar"
3802  * Not a method if it's really "print foo $bar"
3803  * Method if it's really "foo package::" (interpreted as package->foo)
3804  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3805  * Not a method if bar is a filehandle or package, but is quoted with
3806  *   =>
3807  */
3808
3809 STATIC int
3810 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3811 {
3812     char *s = start + (*start == '$');
3813     char tmpbuf[sizeof PL_tokenbuf];
3814     STRLEN len;
3815     GV* indirgv;
3816
3817     PERL_ARGS_ASSERT_INTUIT_METHOD;
3818
3819     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3820             return 0;
3821     if (cv && SvPOK(cv)) {
3822         const char *proto = CvPROTO(cv);
3823         if (proto) {
3824             while (*proto && (isSPACE(*proto) || *proto == ';'))
3825                 proto++;
3826             if (*proto == '*')
3827                 return 0;
3828         }
3829     }
3830
3831     if (*start == '$') {
3832         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3833                 isUPPER(*PL_tokenbuf))
3834             return 0;
3835         s = PEEKSPACE(s);
3836         PL_bufptr = start;
3837         PL_expect = XREF;
3838         return *s == '(' ? FUNCMETH : METHOD;
3839     }
3840
3841     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3842     /* start is the beginning of the possible filehandle/object,
3843      * and s is the end of it
3844      * tmpbuf is a copy of it (but with single quotes as double colons)
3845      */
3846
3847     if (!keyword(tmpbuf, len, 0)) {
3848         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3849             len -= 2;
3850             tmpbuf[len] = '\0';
3851             goto bare_package;
3852         }
3853         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3854         if (indirgv && GvCVu(indirgv))
3855             return 0;
3856         /* filehandle or package name makes it a method */
3857         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3858             s = PEEKSPACE(s);
3859             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3860                 return 0;       /* no assumptions -- "=>" quotes bareword */
3861       bare_package:
3862             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3863                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3864             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3865             PL_expect = XTERM;
3866             force_next(WORD);
3867             PL_bufptr = s;
3868             return *s == '(' ? FUNCMETH : METHOD;
3869         }
3870     }
3871     return 0;
3872 }
3873
3874 /* Encoded script support. filter_add() effectively inserts a
3875  * 'pre-processing' function into the current source input stream.
3876  * Note that the filter function only applies to the current source file
3877  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3878  *
3879  * The datasv parameter (which may be NULL) can be used to pass
3880  * private data to this instance of the filter. The filter function
3881  * can recover the SV using the FILTER_DATA macro and use it to
3882  * store private buffers and state information.
3883  *
3884  * The supplied datasv parameter is upgraded to a PVIO type
3885  * and the IoDIRP/IoANY field is used to store the function pointer,
3886  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3887  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3888  * private use must be set using malloc'd pointers.
3889  */
3890
3891 SV *
3892 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3893 {
3894     if (!funcp)
3895         return NULL;
3896
3897     if (!PL_parser)
3898         return NULL;
3899
3900     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3901         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3902
3903     if (!PL_rsfp_filters)
3904         PL_rsfp_filters = newAV();
3905     if (!datasv)
3906         datasv = newSV(0);
3907     SvUPGRADE(datasv, SVt_PVIO);
3908     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3909     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3910     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3911                           FPTR2DPTR(void *, IoANY(datasv)),
3912                           SvPV_nolen(datasv)));
3913     av_unshift(PL_rsfp_filters, 1);
3914     av_store(PL_rsfp_filters, 0, datasv) ;
3915     if (
3916         !PL_parser->filtered
3917      && PL_parser->lex_flags & LEX_EVALBYTES
3918      && PL_bufptr < PL_bufend
3919     ) {
3920         const char *s = PL_bufptr;
3921         while (s < PL_bufend) {
3922             if (*s == '\n') {
3923                 SV *linestr = PL_parser->linestr;
3924                 char *buf = SvPVX(linestr);
3925                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3926                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3927                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3928                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3929                 STRLEN const last_uni_pos =
3930                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3931                 STRLEN const last_lop_pos =
3932                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3933                 av_push(PL_rsfp_filters, linestr);
3934                 PL_parser->linestr =
3935                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3936                 buf = SvPVX(PL_parser->linestr);
3937                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3938                 PL_parser->bufptr = buf + bufptr_pos;
3939                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3940                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3941                 PL_parser->linestart = buf + linestart_pos;
3942                 if (PL_parser->last_uni)
3943                     PL_parser->last_uni = buf + last_uni_pos;
3944                 if (PL_parser->last_lop)
3945                     PL_parser->last_lop = buf + last_lop_pos;
3946                 SvLEN(linestr) = SvCUR(linestr);
3947                 SvCUR(linestr) = s-SvPVX(linestr);
3948                 PL_parser->filtered = 1;
3949                 break;
3950             }
3951             s++;
3952         }
3953     }
3954     return(datasv);
3955 }
3956
3957
3958 /* Delete most recently added instance of this filter function. */
3959 void
3960 Perl_filter_del(pTHX_ filter_t funcp)
3961 {
3962     SV *datasv;
3963
3964     PERL_ARGS_ASSERT_FILTER_DEL;
3965
3966 #ifdef DEBUGGING
3967     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3968                           FPTR2DPTR(void*, funcp)));
3969 #endif
3970     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3971         return;
3972     /* if filter is on top of stack (usual case) just pop it off */
3973     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3974     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3975         sv_free(av_pop(PL_rsfp_filters));
3976
3977         return;
3978     }
3979     /* we need to search for the correct entry and clear it     */
3980     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3981 }
3982
3983
3984 /* Invoke the idxth filter function for the current rsfp.        */
3985 /* maxlen 0 = read one text line */
3986 I32
3987 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3988 {
3989     filter_t funcp;
3990     SV *datasv = NULL;
3991     /* This API is bad. It should have been using unsigned int for maxlen.
3992        Not sure if we want to change the API, but if not we should sanity
3993        check the value here.  */
3994     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
3995
3996     PERL_ARGS_ASSERT_FILTER_READ;
3997
3998     if (!PL_parser || !PL_rsfp_filters)
3999         return -1;
4000     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4001         /* Provide a default input filter to make life easy.    */
4002         /* Note that we append to the line. This is handy.      */
4003         DEBUG_P(PerlIO_printf(Perl_debug_log,
4004                               "filter_read %d: from rsfp\n", idx));
4005         if (correct_length) {
4006             /* Want a block */
4007             int len ;
4008             const int old_len = SvCUR(buf_sv);
4009
4010             /* ensure buf_sv is large enough */
4011             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4012             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4013                                    correct_length)) <= 0) {
4014                 if (PerlIO_error(PL_rsfp))
4015                     return -1;          /* error */
4016                 else
4017                     return 0 ;          /* end of file */
4018             }
4019             SvCUR_set(buf_sv, old_len + len) ;
4020             SvPVX(buf_sv)[old_len + len] = '\0';
4021         } else {
4022             /* Want a line */
4023             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4024                 if (PerlIO_error(PL_rsfp))
4025                     return -1;          /* error */
4026                 else
4027                     return 0 ;          /* end of file */
4028             }
4029         }
4030         return SvCUR(buf_sv);
4031     }
4032     /* Skip this filter slot if filter has been deleted */
4033     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4034         DEBUG_P(PerlIO_printf(Perl_debug_log,
4035                               "filter_read %d: skipped (filter deleted)\n",
4036                               idx));
4037         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4038     }
4039     if (SvTYPE(datasv) != SVt_PVIO) {
4040         if (correct_length) {
4041             /* Want a block */
4042             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4043             if (!remainder) return 0; /* eof */
4044             if (correct_length > remainder) correct_length = remainder;
4045             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4046             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4047         } else {
4048             /* Want a line */
4049             const char *s = SvEND(datasv);
4050             const char *send = SvPVX(datasv) + SvLEN(datasv);
4051             while (s < send) {
4052                 if (*s == '\n') {
4053                     s++;
4054                     break;
4055                 }
4056                 s++;
4057             }
4058             if (s == send) return 0; /* eof */
4059             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4060             SvCUR_set(datasv, s-SvPVX(datasv));
4061         }
4062         return SvCUR(buf_sv);
4063     }
4064     /* Get function pointer hidden within datasv        */
4065     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4066     DEBUG_P(PerlIO_printf(Perl_debug_log,
4067                           "filter_read %d: via function %p (%s)\n",
4068                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4069     /* Call function. The function is expected to       */
4070     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4071     /* Return: <0:error, =0:eof, >0:not eof             */
4072     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4073 }
4074
4075 STATIC char *
4076 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4077 {
4078     PERL_ARGS_ASSERT_FILTER_GETS;
4079
4080 #ifdef PERL_CR_FILTER
4081     if (!PL_rsfp_filters) {
4082         filter_add(S_cr_textfilter,NULL);
4083     }
4084 #endif
4085     if (PL_rsfp_filters) {
4086         if (!append)
4087             SvCUR_set(sv, 0);   /* start with empty line        */
4088         if (FILTER_READ(0, sv, 0) > 0)
4089             return ( SvPVX(sv) ) ;
4090         else
4091             return NULL ;
4092     }
4093     else
4094         return (sv_gets(sv, PL_rsfp, append));
4095 }
4096
4097 STATIC HV *
4098 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4099 {
4100     GV *gv;
4101
4102     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4103
4104     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4105         return PL_curstash;
4106
4107     if (len > 2 &&
4108         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4109         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4110     {
4111         return GvHV(gv);                        /* Foo:: */
4112     }
4113
4114     /* use constant CLASS => 'MyClass' */
4115     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4116     if (gv && GvCV(gv)) {
4117         SV * const sv = cv_const_sv(GvCV(gv));
4118         if (sv)
4119             pkgname = SvPV_const(sv, len);
4120     }
4121
4122     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4123 }
4124
4125
4126 STATIC char *
4127 S_tokenize_use(pTHX_ int is_use, char *s) {
4128     PERL_ARGS_ASSERT_TOKENIZE_USE;
4129
4130     if (PL_expect != XSTATE)
4131         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4132                     is_use ? "use" : "no"));
4133     PL_expect = XTERM;
4134     s = SKIPSPACE1(s);
4135     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4136         s = force_version(s, TRUE);
4137         if (*s == ';' || *s == '}'
4138                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4139             NEXTVAL_NEXTTOKE.opval = NULL;
4140             force_next(WORD);
4141         }
4142         else if (*s == 'v') {
4143             s = force_word(s,WORD,FALSE,TRUE);
4144             s = force_version(s, FALSE);
4145         }
4146     }
4147     else {
4148         s = force_word(s,WORD,FALSE,TRUE);
4149         s = force_version(s, FALSE);
4150     }
4151     pl_yylval.ival = is_use;
4152     return s;
4153 }
4154 #ifdef DEBUGGING
4155     static const char* const exp_name[] =
4156         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4157           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4158           "TERMORDORDOR"
4159         };
4160 #endif
4161
4162 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4163 STATIC bool
4164 S_word_takes_any_delimeter(char *p, STRLEN len)
4165 {
4166     return (len == 1 && strchr("msyq", p[0])) ||
4167            (len == 2 && (
4168             (p[0] == 't' && p[1] == 'r') ||
4169             (p[0] == 'q' && strchr("qwxr", p[1]))));
4170 }
4171
4172 static void
4173 S_check_scalar_slice(pTHX_ char *s)
4174 {
4175     s++;
4176     while (*s == ' ' || *s == '\t') s++;
4177     if (*s == 'q' && s[1] == 'w'
4178      && !isWORDCHAR_lazy_if(s+2,UTF))
4179         return;
4180     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4181         s += UTF ? UTF8SKIP(s) : 1;
4182     if (*s == '}' || *s == ']')
4183         pl_yylval.ival = OPpSLICEWARNING;
4184 }
4185
4186 /*
4187   yylex
4188
4189   Works out what to call the token just pulled out of the input
4190   stream.  The yacc parser takes care of taking the ops we return and
4191   stitching them into a tree.
4192
4193   Returns:
4194     The type of the next token
4195
4196   Structure:
4197       Switch based on the current state:
4198           - if we already built the token before, use it
4199           - if we have a case modifier in a string, deal with that
4200           - handle other cases of interpolation inside a string
4201           - scan the next line if we are inside a format
4202       In the normal state switch on the next character:
4203           - default:
4204             if alphabetic, go to key lookup
4205             unrecoginized character - croak
4206           - 0/4/26: handle end-of-line or EOF
4207           - cases for whitespace
4208           - \n and #: handle comments and line numbers
4209           - various operators, brackets and sigils
4210           - numbers
4211           - quotes
4212           - 'v': vstrings (or go to key lookup)
4213           - 'x' repetition operator (or go to key lookup)
4214           - other ASCII alphanumerics (key lookup begins here):
4215               word before => ?
4216               keyword plugin
4217               scan built-in keyword (but do nothing with it yet)
4218               check for statement label
4219               check for lexical subs
4220                   goto just_a_word if there is one
4221               see whether built-in keyword is overridden
4222               switch on keyword number:
4223                   - default: just_a_word:
4224                       not a built-in keyword; handle bareword lookup
4225                       disambiguate between method and sub call
4226                       fall back to bareword
4227                   - cases for built-in keywords
4228 */
4229
4230
4231 int
4232 Perl_yylex(pTHX)
4233 {
4234     dVAR;
4235     char *s = PL_bufptr;
4236     char *d;
4237     STRLEN len;
4238     bool bof = FALSE;
4239     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4240     U8 formbrack = 0;
4241     U32 fake_eof = 0;
4242
4243     /* orig_keyword, gvp, and gv are initialized here because
4244      * jump to the label just_a_word_zero can bypass their
4245      * initialization later. */
4246     I32 orig_keyword = 0;
4247     GV *gv = NULL;
4248     GV **gvp = NULL;
4249
4250     DEBUG_T( {
4251         SV* tmp = newSVpvs("");
4252         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4253             (IV)CopLINE(PL_curcop),
4254             lex_state_names[PL_lex_state],
4255             exp_name[PL_expect],
4256             pv_display(tmp, s, strlen(s), 0, 60));
4257         SvREFCNT_dec(tmp);
4258     } );
4259
4260     switch (PL_lex_state) {
4261     case LEX_NORMAL:
4262     case LEX_INTERPNORMAL:
4263         break;
4264
4265     /* when we've already built the next token, just pull it out of the queue */
4266     case LEX_KNOWNEXT:
4267         PL_nexttoke--;
4268         pl_yylval = PL_nextval[PL_nexttoke];
4269         if (!PL_nexttoke) {
4270             PL_lex_state = PL_lex_defer;
4271             PL_expect = PL_lex_expect;
4272             PL_lex_defer = LEX_NORMAL;
4273         }
4274         {
4275             I32 next_type;
4276             next_type = PL_nexttype[PL_nexttoke];
4277             if (next_type & (7<<24)) {
4278                 if (next_type & (1<<24)) {
4279                     if (PL_lex_brackets > 100)
4280                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4281                     PL_lex_brackstack[PL_lex_brackets++] =
4282                         (char) ((next_type >> 16) & 0xff);
4283                 }
4284                 if (next_type & (2<<24))
4285                     PL_lex_allbrackets++;
4286                 if (next_type & (4<<24))
4287                     PL_lex_allbrackets--;
4288                 next_type &= 0xffff;
4289             }
4290             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4291         }
4292
4293     /* interpolated case modifiers like \L \U, including \Q and \E.
4294        when we get here, PL_bufptr is at the \
4295     */
4296     case LEX_INTERPCASEMOD:
4297 #ifdef DEBUGGING
4298         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4299             Perl_croak(aTHX_
4300                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4301                        PL_bufptr, PL_bufend, *PL_bufptr);
4302 #endif
4303         /* handle \E or end of string */
4304         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4305             /* if at a \E */
4306             if (PL_lex_casemods) {
4307                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4308                 PL_lex_casestack[PL_lex_casemods] = '\0';
4309
4310                 if (PL_bufptr != PL_bufend
4311                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4312                         || oldmod == 'F')) {
4313                     PL_bufptr += 2;
4314                     PL_lex_state = LEX_INTERPCONCAT;
4315                 }
4316                 PL_lex_allbrackets--;
4317                 return REPORT(')');
4318             }
4319             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4320                /* Got an unpaired \E */
4321                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4322                         "Useless use of \\E");
4323             }
4324             if (PL_bufptr != PL_bufend)
4325                 PL_bufptr += 2;
4326             PL_lex_state = LEX_INTERPCONCAT;
4327             return yylex();
4328         }
4329         else {
4330             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4331               "### Saw case modifier\n"); });
4332             s = PL_bufptr + 1;
4333             if (s[1] == '\\' && s[2] == 'E') {
4334                 PL_bufptr = s + 3;
4335                 PL_lex_state = LEX_INTERPCONCAT;
4336                 return yylex();
4337             }
4338             else {
4339                 I32 tmp;
4340                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4341                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4342                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4343                     (strchr(PL_lex_casestack, 'L')
4344                         || strchr(PL_lex_casestack, 'U')
4345                         || strchr(PL_lex_casestack, 'F'))) {
4346                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4347                     PL_lex_allbrackets--;
4348                     return REPORT(')');
4349                 }
4350                 if (PL_lex_casemods > 10)
4351                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4352                 PL_lex_casestack[PL_lex_casemods++] = *s;
4353                 PL_lex_casestack[PL_lex_casemods] = '\0';
4354                 PL_lex_state = LEX_INTERPCONCAT;
4355                 NEXTVAL_NEXTTOKE.ival = 0;
4356                 force_next((2<<24)|'(');
4357                 if (*s == 'l')
4358                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4359                 else if (*s == 'u')
4360                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4361                 else if (*s == 'L')
4362                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4363                 else if (*s == 'U')
4364                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4365                 else if (*s == 'Q')
4366                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4367                 else if (*s == 'F')
4368                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4369                 else
4370                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4371                 PL_bufptr = s + 1;
4372             }
4373             force_next(FUNC);
4374             if (PL_lex_starts) {
4375                 s = PL_bufptr;
4376                 PL_lex_starts = 0;
4377                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4378                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4379                     OPERATOR(',');
4380                 else
4381                     Aop(OP_CONCAT);
4382             }
4383             else
4384                 return yylex();
4385         }
4386
4387     case LEX_INTERPPUSH:
4388         return REPORT(sublex_push());
4389
4390     case LEX_INTERPSTART:
4391         if (PL_bufptr == PL_bufend)
4392             return REPORT(sublex_done());
4393         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4394               "### Interpolated variable\n"); });
4395         PL_expect = XTERM;
4396         /* for /@a/, we leave the joining for the regex engine to do
4397          * (unless we're within \Q etc) */
4398         PL_lex_dojoin = (*PL_bufptr == '@'
4399                             && (!PL_lex_inpat || PL_lex_casemods));
4400         PL_lex_state = LEX_INTERPNORMAL;
4401         if (PL_lex_dojoin) {
4402             NEXTVAL_NEXTTOKE.ival = 0;
4403             force_next(',');
4404             force_ident("\"", '$');
4405             NEXTVAL_NEXTTOKE.ival = 0;
4406             force_next('$');
4407             NEXTVAL_NEXTTOKE.ival = 0;
4408             force_next((2<<24)|'(');
4409             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4410             force_next(FUNC);
4411         }
4412         /* Convert (?{...}) and friends to 'do {...}' */
4413         if (PL_lex_inpat && *PL_bufptr == '(') {
4414             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4415             PL_bufptr += 2;
4416             if (*PL_bufptr != '{')
4417                 PL_bufptr++;
4418             PL_expect = XTERMBLOCK;
4419             force_next(DO);
4420         }
4421
4422         if (PL_lex_starts++) {
4423             s = PL_bufptr;
4424             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4425             if (!PL_lex_casemods && PL_lex_inpat)
4426                 OPERATOR(',');
4427             else
4428                 Aop(OP_CONCAT);
4429         }
4430         return yylex();
4431
4432     case LEX_INTERPENDMAYBE:
4433         if (intuit_more(PL_bufptr)) {
4434             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4435             break;
4436         }
4437         /* FALLTHROUGH */
4438
4439     case LEX_INTERPEND:
4440         if (PL_lex_dojoin) {
4441             const U8 dojoin_was = PL_lex_dojoin;
4442             PL_lex_dojoin = FALSE;
4443             PL_lex_state = LEX_INTERPCONCAT;
4444             PL_lex_allbrackets--;
4445             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4446         }
4447         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4448             && SvEVALED(PL_lex_repl))
4449         {
4450             if (PL_bufptr != PL_bufend)
4451                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4452             PL_lex_repl = NULL;
4453         }
4454         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4455            re_eval_str.  If the here-doc body’s length equals the previous
4456            value of re_eval_start, re_eval_start will now be null.  So
4457            check re_eval_str as well. */
4458         if (PL_parser->lex_shared->re_eval_start
4459          || PL_parser->lex_shared->re_eval_str) {
4460             SV *sv;
4461             if (*PL_bufptr != ')')
4462                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4463             PL_bufptr++;
4464             /* having compiled a (?{..}) expression, return the original
4465              * text too, as a const */
4466             if (PL_parser->lex_shared->re_eval_str) {
4467                 sv = PL_parser->lex_shared->re_eval_str;
4468                 PL_parser->lex_shared->re_eval_str = NULL;
4469                 SvCUR_set(sv,
4470                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4471                 SvPV_shrink_to_cur(sv);
4472             }
4473             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4474                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4475             NEXTVAL_NEXTTOKE.opval =
4476                     (OP*)newSVOP(OP_CONST, 0,
4477                                  sv);
4478             force_next(THING);
4479             PL_parser->lex_shared->re_eval_start = NULL;
4480             PL_expect = XTERM;
4481             return REPORT(',');
4482         }
4483
4484         /* FALLTHROUGH */
4485     case LEX_INTERPCONCAT:
4486 #ifdef DEBUGGING
4487         if (PL_lex_brackets)
4488             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4489                        (long) PL_lex_brackets);
4490 #endif
4491         if (PL_bufptr == PL_bufend)
4492             return REPORT(sublex_done());
4493
4494         /* m'foo' still needs to be parsed for possible (?{...}) */
4495         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4496             SV *sv = newSVsv(PL_linestr);
4497             sv = tokeq(sv);
4498             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4499             s = PL_bufend;
4500         }
4501         else {
4502             s = scan_const(PL_bufptr);
4503             if (*s == '\\')
4504                 PL_lex_state = LEX_INTERPCASEMOD;
4505             else
4506                 PL_lex_state = LEX_INTERPSTART;
4507         }
4508
4509         if (s != PL_bufptr) {
4510             NEXTVAL_NEXTTOKE = pl_yylval;
4511             PL_expect = XTERM;
4512             force_next(THING);
4513             if (PL_lex_starts++) {
4514                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4515                 if (!PL_lex_casemods && PL_lex_inpat)
4516                     OPERATOR(',');
4517                 else
4518                     Aop(OP_CONCAT);
4519             }
4520             else {
4521                 PL_bufptr = s;
4522                 return yylex();
4523             }
4524         }
4525
4526         return yylex();
4527     case LEX_FORMLINE:
4528         s = scan_formline(PL_bufptr);
4529         if (!PL_lex_formbrack)
4530         {
4531             formbrack = 1;
4532             goto rightbracket;
4533         }
4534         PL_bufptr = s;
4535         return yylex();
4536     }
4537
4538     /* We really do *not* want PL_linestr ever becoming a COW. */
4539     assert (!SvIsCOW(PL_linestr));
4540     s = PL_bufptr;
4541     PL_oldoldbufptr = PL_oldbufptr;
4542     PL_oldbufptr = s;
4543     PL_parser->saw_infix_sigil = 0;
4544
4545   retry:
4546     switch (*s) {
4547     default:
4548         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4549             goto keylookup;
4550         {
4551         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4552         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4553                                                     UTF8SKIP(s),
4554                                                     SVs_TEMP | SVf_UTF8),
4555                                             10, UNI_DISPLAY_ISPRINT)
4556                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4557         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4558         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4559             d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4560         } else {
4561             d = PL_linestart;
4562         }
4563         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4564                           UTF8fARG(UTF, (s - d), d),
4565                          (int) len + 1);
4566     }
4567     case 4:
4568     case 26:
4569         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4570     case 0:
4571         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4572             PL_last_uni = 0;
4573             PL_last_lop = 0;
4574             if (PL_lex_brackets &&
4575                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4576                 yyerror((const char *)
4577                         (PL_lex_formbrack
4578                          ? "Format not terminated"
4579                          : "Missing right curly or square bracket"));
4580             }
4581             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4582                         "### Tokener got EOF\n");
4583             } );
4584             TOKEN(0);
4585         }
4586         if (s++ < PL_bufend)
4587             goto retry;                 /* ignore stray nulls */
4588         PL_last_uni = 0;
4589         PL_last_lop = 0;
4590         if (!PL_in_eval && !PL_preambled) {
4591             PL_preambled = TRUE;
4592             if (PL_perldb) {
4593                 /* Generate a string of Perl code to load the debugger.
4594                  * If PERL5DB is set, it will return the contents of that,
4595                  * otherwise a compile-time require of perl5db.pl.  */
4596
4597                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4598
4599                 if (pdb) {
4600                     sv_setpv(PL_linestr, pdb);
4601                     sv_catpvs(PL_linestr,";");
4602                 } else {
4603                     SETERRNO(0,SS_NORMAL);
4604                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4605                 }
4606                 PL_parser->preambling = CopLINE(PL_curcop);
4607             } else
4608                 sv_setpvs(PL_linestr,"");
4609             if (PL_preambleav) {
4610                 SV **svp = AvARRAY(PL_preambleav);
4611                 SV **const end = svp + AvFILLp(PL_preambleav);
4612                 while(svp <= end) {
4613                     sv_catsv(PL_linestr, *svp);
4614                     ++svp;
4615                     sv_catpvs(PL_linestr, ";");
4616                 }
4617                 sv_free(MUTABLE_SV(PL_preambleav));
4618                 PL_preambleav = NULL;
4619             }
4620             if (PL_minus_E)
4621                 sv_catpvs(PL_linestr,
4622                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4623             if (PL_minus_n || PL_minus_p) {
4624                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4625                 if (PL_minus_l)
4626                     sv_catpvs(PL_linestr,"chomp;");
4627                 if (PL_minus_a) {
4628                     if (PL_minus_F) {
4629                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4630                              || *PL_splitstr == '"')
4631                               && strchr(PL_splitstr + 1, *PL_splitstr))
4632                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4633                         else {
4634                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4635                                bytes can be used as quoting characters.  :-) */
4636                             const char *splits = PL_splitstr;
4637                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4638                             do {
4639                                 /* Need to \ \s  */
4640                                 if (*splits == '\\')
4641                                     sv_catpvn(PL_linestr, splits, 1);
4642                                 sv_catpvn(PL_linestr, splits, 1);
4643                             } while (*splits++);
4644                             /* This loop will embed the trailing NUL of
4645                                PL_linestr as the last thing it does before
4646                                terminating.  */
4647                             sv_catpvs(PL_linestr, ");");
4648                         }
4649                     }
4650                     else
4651                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4652                 }
4653             }
4654             sv_catpvs(PL_linestr, "\n");
4655             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4656             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4657             PL_last_lop = PL_last_uni = NULL;
4658             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4659                 update_debugger_info(PL_linestr, NULL, 0);
4660             goto retry;
4661         }
4662         do {
4663             fake_eof = 0;
4664             bof = PL_rsfp ? TRUE : FALSE;
4665             if (0) {
4666               fake_eof:
4667                 fake_eof = LEX_FAKE_EOF;
4668             }
4669             PL_bufptr = PL_bufend;
4670             COPLINE_INC_WITH_HERELINES;
4671             if (!lex_next_chunk(fake_eof)) {
4672                 CopLINE_dec(PL_curcop);
4673                 s = PL_bufptr;
4674                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4675             }
4676             CopLINE_dec(PL_curcop);
4677             s = PL_bufptr;
4678             /* If it looks like the start of a BOM or raw UTF-16,
4679              * check if it in fact is. */
4680             if (bof && PL_rsfp &&
4681                      (*s == 0 ||
4682                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4683                       *(U8*)s >= 0xFE ||
4684                       s[1] == 0)) {
4685                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4686                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4687 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4688                 /* offset may include swallowed CR */
4689                 if (!bof)
4690                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4691 #endif
4692                 if (bof) {
4693                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4694                     s = swallow_bom((U8*)s);
4695                 }
4696             }
4697             if (PL_parser->in_pod) {
4698                 /* Incest with pod. */
4699                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4700                     sv_setpvs(PL_linestr, "");
4701                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4702                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4703                     PL_last_lop = PL_last_uni = NULL;
4704                     PL_parser->in_pod = 0;
4705                 }
4706             }
4707             if (PL_rsfp || PL_parser->filtered)
4708                 incline(s);
4709         } while (PL_parser->in_pod);
4710         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4711         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4712         PL_last_lop = PL_last_uni = NULL;
4713         if (CopLINE(PL_curcop) == 1) {
4714             while (s < PL_bufend && isSPACE(*s))
4715                 s++;
4716             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4717                 s++;
4718             d = NULL;
4719             if (!PL_in_eval) {
4720                 if (*s == '#' && *(s+1) == '!')
4721                     d = s + 2;
4722 #ifdef ALTERNATE_SHEBANG
4723                 else {
4724                     static char const as[] = ALTERNATE_SHEBANG;
4725                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4726                         d = s + (sizeof(as) - 1);
4727                 }
4728 #endif /* ALTERNATE_SHEBANG */
4729             }
4730             if (d) {
4731                 char *ipath;
4732                 char *ipathend;
4733
4734                 while (isSPACE(*d))
4735                     d++;
4736                 ipath = d;
4737                 while (*d && !isSPACE(*d))
4738                     d++;
4739                 ipathend = d;
4740
4741 #ifdef ARG_ZERO_IS_SCRIPT
4742                 if (ipathend > ipath) {
4743                     /*
4744                      * HP-UX (at least) sets argv[0] to the script name,
4745                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4746                      * at least, set argv[0] to the basename of the Perl
4747                      * interpreter. So, having found "#!", we'll set it right.
4748                      */
4749                     SV* copfilesv = CopFILESV(PL_curcop);
4750                     if (copfilesv) {
4751                         SV * const x =
4752                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4753                                              SVt_PV)); /* $^X */
4754                         assert(SvPOK(x) || SvGMAGICAL(x));
4755                         if (sv_eq(x, copfilesv)) {
4756                             sv_setpvn(x, ipath, ipathend - ipath);
4757                             SvSETMAGIC(x);
4758                         }
4759                         else {
4760                             STRLEN blen;
4761                             STRLEN llen;
4762                             const char *bstart = SvPV_const(copfilesv, blen);
4763                             const char * const lstart = SvPV_const(x, llen);
4764                             if (llen < blen) {
4765                                 bstart += blen - llen;
4766                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
4767                                     sv_setpvn(x, ipath, ipathend - ipath);
4768                                     SvSETMAGIC(x);
4769                                 }
4770                             }
4771                         }
4772                     }
4773                     else {
4774                         /* Anything to do if no copfilesv? */
4775                     }
4776                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4777                 }
4778 #endif /* ARG_ZERO_IS_SCRIPT */
4779
4780                 /*
4781                  * Look for options.
4782                  */
4783                 d = instr(s,"perl -");
4784                 if (!d) {
4785                     d = instr(s,"perl");
4786 #if defined(DOSISH)
4787                     /* avoid getting into infinite loops when shebang
4788                      * line contains "Perl" rather than "perl" */
4789                     if (!d) {
4790                         for (d = ipathend-4; d >= ipath; --d) {
4791                             if ((*d == 'p' || *d == 'P')
4792                                 && !ibcmp(d, "perl", 4))
4793                             {
4794                                 break;
4795                             }
4796                         }
4797                         if (d < ipath)
4798                             d = NULL;
4799                     }
4800 #endif
4801                 }
4802 #ifdef ALTERNATE_SHEBANG
4803                 /*
4804                  * If the ALTERNATE_SHEBANG on this system starts with a
4805                  * character that can be part of a Perl expression, then if
4806                  * we see it but not "perl", we're probably looking at the
4807                  * start of Perl code, not a request to hand off to some
4808                  * other interpreter.  Similarly, if "perl" is there, but
4809                  * not in the first 'word' of the line, we assume the line
4810                  * contains the start of the Perl program.
4811                  */
4812                 if (d && *s != '#') {
4813                     const char *c = ipath;
4814                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4815                         c++;
4816                     if (c < d)
4817                         d = NULL;       /* "perl" not in first word; ignore */
4818                     else
4819                         *s = '#';       /* Don't try to parse shebang line */
4820                 }
4821 #endif /* ALTERNATE_SHEBANG */
4822                 if (!d &&
4823                     *s == '#' &&
4824                     ipathend > ipath &&
4825                     !PL_minus_c &&
4826                     !instr(s,"indir") &&
4827                     instr(PL_origargv[0],"perl"))
4828                 {
4829                     dVAR;
4830                     char **newargv;
4831
4832                     *ipathend = '\0';
4833                     s = ipathend + 1;
4834                     while (s < PL_bufend && isSPACE(*s))
4835                         s++;
4836                     if (s < PL_bufend) {
4837                         Newx(newargv,PL_origargc+3,char*);
4838                         newargv[1] = s;
4839                         while (s < PL_bufend && !isSPACE(*s))
4840                             s++;
4841                         *s = '\0';
4842                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4843                     }
4844                     else
4845                         newargv = PL_origargv;
4846                     newargv[0] = ipath;
4847                     PERL_FPU_PRE_EXEC
4848                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4849                     PERL_FPU_POST_EXEC
4850                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4851                 }
4852                 if (d) {
4853                     while (*d && !isSPACE(*d))
4854                         d++;
4855                     while (SPACE_OR_TAB(*d))
4856                         d++;
4857
4858                     if (*d++ == '-') {
4859                         const bool switches_done = PL_doswitches;
4860                         const U32 oldpdb = PL_perldb;
4861                         const bool oldn = PL_minus_n;
4862                         const bool oldp = PL_minus_p;
4863                         const char *d1 = d;
4864
4865                         do {
4866                             bool baduni = FALSE;
4867                             if (*d1 == 'C') {
4868                                 const char *d2 = d1 + 1;
4869                                 if (parse_unicode_opts((const char **)&d2)
4870                                     != PL_unicode)
4871                                     baduni = TRUE;
4872                             }
4873                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4874                                 const char * const m = d1;
4875                                 while (*d1 && !isSPACE(*d1))
4876                                     d1++;
4877                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4878                                       (int)(d1 - m), m);
4879                             }
4880                             d1 = moreswitches(d1);
4881                         } while (d1);
4882                         if (PL_doswitches && !switches_done) {
4883                             int argc = PL_origargc;
4884                             char **argv = PL_origargv;
4885                             do {
4886                                 argc--,argv++;
4887                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4888                             init_argv_symbols(argc,argv);
4889                         }
4890                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4891                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4892                               /* if we have already added "LINE: while (<>) {",
4893                                  we must not do it again */
4894                         {
4895                             sv_setpvs(PL_linestr, "");
4896                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4897                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4898                             PL_last_lop = PL_last_uni = NULL;
4899                             PL_preambled = FALSE;
4900                             if (PERLDB_LINE || PERLDB_SAVESRC)
4901                                 (void)gv_fetchfile(PL_origfilename);
4902                             goto retry;
4903                         }
4904                     }
4905                 }
4906             }
4907         }
4908         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4909             PL_lex_state = LEX_FORMLINE;
4910             NEXTVAL_NEXTTOKE.ival = 0;
4911             force_next(FORMRBRACK);
4912             TOKEN(';');
4913         }
4914         goto retry;
4915     case '\r':
4916 #ifdef PERL_STRICT_CR
4917         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4918         Perl_croak(aTHX_
4919       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4920 #endif
4921     case ' ': case '\t': case '\f': case 013:
4922         s++;
4923         goto retry;
4924     case '#':
4925     case '\n':
4926         if (PL_lex_state != LEX_NORMAL ||
4927              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
4928             const bool in_comment = *s == '#';
4929             if (*s == '#' && s == PL_linestart && PL_in_eval
4930              && !PL_rsfp && !PL_parser->filtered) {
4931                 /* handle eval qq[#line 1 "foo"\n ...] */
4932                 CopLINE_dec(PL_curcop);
4933                 incline(s);
4934             }
4935             d = s;
4936             while (d < PL_bufend && *d != '\n')
4937                 d++;
4938             if (d < PL_bufend)
4939                 d++;
4940             else if (d > PL_bufend)
4941                 /* Found by Ilya: feed random input to Perl. */
4942                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
4943                            d, PL_bufend);
4944             s = d;
4945             if (in_comment && d == PL_bufend
4946                 && PL_lex_state == LEX_INTERPNORMAL
4947                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
4948                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
4949             else
4950                 incline(s);
4951             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4952                 PL_lex_state = LEX_FORMLINE;
4953                 NEXTVAL_NEXTTOKE.ival = 0;
4954                 force_next(FORMRBRACK);
4955                 TOKEN(';');
4956             }
4957         }
4958         else {
4959             while (s < PL_bufend && *s != '\n')
4960                 s++;
4961             if (s < PL_bufend)
4962                 {
4963                     s++;
4964                     if (s < PL_bufend)
4965                         incline(s);
4966                 }
4967             else if (s > PL_bufend)
4968                 /* Found by Ilya: feed random input to Perl. */
4969                 Perl_croak(aTHX_ "panic: input overflow");
4970         }
4971         goto retry;
4972     case '-':
4973         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
4974             I32 ftst = 0;
4975             char tmp;
4976
4977             s++;
4978             PL_bufptr = s;
4979             tmp = *s++;
4980
4981             while (s < PL_bufend && SPACE_OR_TAB(*s))
4982                 s++;
4983
4984             if (strnEQ(s,"=>",2)) {
4985                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
4986                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4987                 OPERATOR('-');          /* unary minus */
4988             }
4989             switch (tmp) {
4990             case 'r': ftst = OP_FTEREAD;        break;
4991             case 'w': ftst = OP_FTEWRITE;       break;
4992             case 'x': ftst = OP_FTEEXEC;        break;
4993             case 'o': ftst = OP_FTEOWNED;       break;
4994             case 'R': ftst = OP_FTRREAD;        break;
4995             case 'W': ftst = OP_FTRWRITE;       break;
4996             case 'X': ftst = OP_FTREXEC;        break;
4997             case 'O': ftst = OP_FTROWNED;       break;
4998             case 'e': ftst = OP_FTIS;           break;
4999             case 'z': ftst = OP_FTZERO;         break;
5000             case 's': ftst = OP_FTSIZE;         break;
5001             case 'f': ftst = OP_FTFILE;         break;
5002             case 'd': ftst = OP_FTDIR;          break;
5003             case 'l': ftst = OP_FTLINK;         break;
5004             case 'p': ftst = OP_FTPIPE;         break;
5005             case 'S': ftst = OP_FTSOCK;         break;
5006             case 'u': ftst = OP_FTSUID;         break;
5007             case 'g': ftst = OP_FTSGID;         break;
5008             case 'k': ftst = OP_FTSVTX;         break;
5009             case 'b': ftst = OP_FTBLK;          break;
5010             case 'c': ftst = OP_FTCHR;          break;
5011             case 't': ftst = OP_FTTTY;          break;
5012             case 'T': ftst = OP_FTTEXT;         break;
5013             case 'B': ftst = OP_FTBINARY;       break;
5014             case 'M': case 'A': case 'C':
5015                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5016                 switch (tmp) {
5017                 case 'M': ftst = OP_FTMTIME;    break;
5018                 case 'A': ftst = OP_FTATIME;    break;
5019                 case 'C': ftst = OP_FTCTIME;    break;
5020                 default:                        break;
5021                 }
5022                 break;
5023             default:
5024                 break;
5025             }
5026             if (ftst) {
5027                 PL_last_uni = PL_oldbufptr;
5028                 PL_last_lop_op = (OPCODE)ftst;
5029                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5030                         "### Saw file test %c\n", (int)tmp);
5031                 } );
5032                 FTST(ftst);
5033             }
5034             else {
5035                 /* Assume it was a minus followed by a one-letter named
5036                  * subroutine call (or a -bareword), then. */
5037                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5038                         "### '-%c' looked like a file test but was not\n",
5039                         (int) tmp);
5040                 } );
5041                 s = --PL_bufptr;
5042             }
5043         }
5044         {
5045             const char tmp = *s++;
5046             if (*s == tmp) {
5047                 s++;
5048                 if (PL_expect == XOPERATOR)
5049                     TERM(POSTDEC);
5050                 else
5051                     OPERATOR(PREDEC);
5052             }
5053             else if (*s == '>') {
5054                 s++;
5055                 s = SKIPSPACE1(s);
5056                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5057                     ((*s == '$' || *s == '&') && s[1] == '*')
5058                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5059                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5060                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5061                  ))
5062                 {
5063                     Perl_ck_warner_d(aTHX_
5064                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5065                         "Postfix dereference is experimental"
5066                     );
5067                     PL_expect = XPOSTDEREF;
5068                     TOKEN(ARROW);
5069                 }
5070                 if (isIDFIRST_lazy_if(s,UTF)) {
5071                     s = force_word(s,METHOD,FALSE,TRUE);
5072                     TOKEN(ARROW);
5073                 }
5074                 else if (*s == '$')
5075                     OPERATOR(ARROW);
5076                 else
5077                     TERM(ARROW);
5078             }
5079             if (PL_expect == XOPERATOR) {
5080                 if (*s == '=' && !PL_lex_allbrackets &&
5081                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5082                     s--;
5083                     TOKEN(0);
5084                 }
5085                 Aop(OP_SUBTRACT);
5086             }
5087             else {
5088                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5089                     check_uni();
5090                 OPERATOR('-');          /* unary minus */
5091             }
5092         }
5093
5094     case '+':
5095         {
5096             const char tmp = *s++;
5097             if (*s == tmp) {
5098                 s++;
5099                 if (PL_expect == XOPERATOR)
5100                     TERM(POSTINC);
5101                 else
5102                     OPERATOR(PREINC);
5103             }
5104             if (PL_expect == XOPERATOR) {
5105                 if (*s == '=' && !PL_lex_allbrackets &&
5106                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5107                     s--;
5108                     TOKEN(0);
5109                 }
5110                 Aop(OP_ADD);
5111             }
5112             else {
5113                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5114                     check_uni();
5115                 OPERATOR('+');
5116             }
5117         }
5118
5119     case '*':
5120         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5121         if (PL_expect != XOPERATOR) {
5122             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5123             PL_expect = XOPERATOR;
5124             force_ident(PL_tokenbuf, '*');
5125             if (!*PL_tokenbuf)
5126                 PREREF('*');
5127             TERM('*');
5128         }
5129         s++;
5130         if (*s == '*') {
5131             s++;
5132             if (*s == '=' && !PL_lex_allbrackets &&
5133                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5134                 s -= 2;
5135                 TOKEN(0);
5136             }
5137             PWop(OP_POW);
5138         }
5139         if (*s == '=' && !PL_lex_allbrackets &&
5140                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5141             s--;
5142             TOKEN(0);
5143         }
5144         PL_parser->saw_infix_sigil = 1;
5145         Mop(OP_MULTIPLY);
5146
5147     case '%':
5148     {
5149         if (PL_expect == XOPERATOR) {
5150             if (s[1] == '=' && !PL_lex_allbrackets &&
5151                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5152                 TOKEN(0);
5153             ++s;
5154             PL_parser->saw_infix_sigil = 1;
5155             Mop(OP_MODULO);
5156         }
5157         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5158         PL_tokenbuf[0] = '%';
5159         s = scan_ident(s, PL_tokenbuf + 1,
5160                 sizeof PL_tokenbuf - 1, FALSE);
5161         pl_yylval.ival = 0;
5162         if (!PL_tokenbuf[1]) {
5163             PREREF('%');
5164         }
5165         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5166             if (*s == '[')
5167                 PL_tokenbuf[0] = '@';
5168         }
5169         PL_expect = XOPERATOR;
5170         force_ident_maybe_lex('%');
5171         TERM('%');
5172     }
5173     case '^':
5174         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5175                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5176             TOKEN(0);
5177         s++;
5178         BOop(OP_BIT_XOR);
5179     case '[':
5180         if (PL_lex_brackets > 100)
5181             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5182         PL_lex_brackstack[PL_lex_brackets++] = 0;
5183         PL_lex_allbrackets++;
5184         {
5185             const char tmp = *s++;
5186             OPERATOR(tmp);
5187         }
5188     case '~':
5189         if (s[1] == '~'
5190             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5191         {
5192             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5193                 TOKEN(0);
5194             s += 2;
5195             Perl_ck_warner_d(aTHX_
5196                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5197                 "Smartmatch is experimental");
5198             Eop(OP_SMARTMATCH);
5199         }
5200         s++;
5201         OPERATOR('~');
5202     case ',':
5203         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5204             TOKEN(0);
5205         s++;
5206         OPERATOR(',');
5207     case ':':
5208         if (s[1] == ':') {
5209             len = 0;
5210             goto just_a_word_zero_gv;
5211         }
5212         s++;
5213         {
5214         OP *attrs;
5215
5216         switch (PL_expect) {
5217         case XOPERATOR:
5218             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5219                 break;
5220             PL_bufptr = s;      /* update in case we back off */
5221             if (*s == '=') {
5222                 Perl_croak(aTHX_
5223                            "Use of := for an empty attribute list is not allowed");
5224             }
5225             goto grabattrs;
5226         case XATTRBLOCK:
5227             PL_expect = XBLOCK;
5228             goto grabattrs;
5229         case XATTRTERM:
5230             PL_expect = XTERMBLOCK;
5231          grabattrs:
5232             s = PEEKSPACE(s);
5233             attrs = NULL;
5234             while (isIDFIRST_lazy_if(s,UTF)) {
5235                 I32 tmp;
5236                 SV *sv;
5237                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5238                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5239                     if (tmp < 0) tmp = -tmp;
5240                     switch (tmp) {
5241                     case KEY_or:
5242                     case KEY_and:
5243                     case KEY_for:
5244                     case KEY_foreach:
5245                     case KEY_unless:
5246                     case KEY_if:
5247                     case KEY_while:
5248                     case KEY_until:
5249                         goto got_attrs;
5250                     default:
5251                         break;
5252                     }
5253                 }
5254                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5255                 if (*d == '(') {
5256                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5257                     COPLINE_SET_FROM_MULTI_END;
5258                     if (!d) {
5259                         /* MUST advance bufptr here to avoid bogus
5260                            "at end of line" context messages from yyerror().
5261                          */
5262                         PL_bufptr = s + len;
5263                         yyerror("Unterminated attribute parameter in attribute list");
5264                         if (attrs)
5265                             op_free(attrs);
5266                         sv_free(sv);
5267                         return REPORT(0);       /* EOF indicator */
5268                     }
5269                 }
5270                 if (PL_lex_stuff) {
5271                     sv_catsv(sv, PL_lex_stuff);
5272                     attrs = op_append_elem(OP_LIST, attrs,
5273                                         newSVOP(OP_CONST, 0, sv));
5274                     SvREFCNT_dec(PL_lex_stuff);
5275                     PL_lex_stuff = NULL;
5276                 }
5277                 else {
5278                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5279                         sv_free(sv);
5280                         if (PL_in_my == KEY_our) {
5281                             deprecate(":unique");
5282                         }
5283                         else
5284                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5285                     }
5286
5287                     /* NOTE: any CV attrs applied here need to be part of
5288                        the CVf_BUILTIN_ATTRS define in cv.h! */
5289                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5290                         sv_free(sv);
5291                         CvLVALUE_on(PL_compcv);
5292                     }
5293                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5294                         sv_free(sv);
5295                         deprecate(":locked");
5296                     }
5297                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5298                         sv_free(sv);
5299                         CvMETHOD_on(PL_compcv);
5300                     }
5301                     /* After we've set the flags, it could be argued that
5302                        we don't need to do the attributes.pm-based setting
5303                        process, and shouldn't bother appending recognized
5304                        flags.  To experiment with that, uncomment the
5305                        following "else".  (Note that's already been
5306                        uncommented.  That keeps the above-applied built-in
5307                        attributes from being intercepted (and possibly
5308                        rejected) by a package's attribute routines, but is
5309                        justified by the performance win for the common case
5310                        of applying only built-in attributes.) */
5311                     else
5312                         attrs = op_append_elem(OP_LIST, attrs,
5313                                             newSVOP(OP_CONST, 0,
5314                                                     sv));
5315                 }
5316                 s = PEEKSPACE(d);
5317                 if (*s == ':' && s[1] != ':')
5318                     s = PEEKSPACE(s+1);
5319                 else if (s == d)
5320                     break;      /* require real whitespace or :'s */
5321                 /* XXX losing whitespace on sequential attributes here */
5322             }
5323             {
5324                 if (*s != ';' && *s != '}' &&
5325                     !(PL_expect == XOPERATOR
5326                         ? (*s == '=' ||  *s == ')')
5327                         : (*s == '{' ||  *s == '('))) {
5328                     const char q = ((*s == '\'') ? '"' : '\'');
5329                     /* If here for an expression, and parsed no attrs, back
5330                        off. */
5331                     if (PL_expect == XOPERATOR && !attrs) {
5332                         s = PL_bufptr;
5333                         break;
5334                     }
5335                     /* MUST advance bufptr here to avoid bogus "at end of line"
5336                        context messages from yyerror().
5337                     */
5338                     PL_bufptr = s;
5339                     yyerror( (const char *)
5340                              (*s
5341                               ? Perl_form(aTHX_ "Invalid separator character "
5342                                           "%c%c%c in attribute list", q, *s, q)
5343                               : "Unterminated attribute list" ) );
5344                     if (attrs)
5345                         op_free(attrs);
5346                     OPERATOR(':');
5347                 }
5348             }
5349         got_attrs:
5350             if (attrs) {
5351                 NEXTVAL_NEXTTOKE.opval = attrs;
5352                 force_next(THING);
5353             }
5354             TOKEN(COLONATTR);
5355         }
5356         }
5357         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5358             s--;
5359             TOKEN(0);
5360         }
5361         PL_lex_allbrackets--;
5362         OPERATOR(':');
5363     case '(':
5364         s++;
5365         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5366             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5367         else
5368             PL_expect = XTERM;
5369         s = SKIPSPACE1(s);
5370         PL_lex_allbrackets++;
5371         TOKEN('(');
5372     case ';':
5373         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5374             TOKEN(0);
5375         CLINE;
5376         s++;
5377         OPERATOR(';');
5378     case ')':
5379         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5380             TOKEN(0);
5381         s++;
5382         PL_lex_allbrackets--;
5383         s = SKIPSPACE1(s);
5384         if (*s == '{')
5385             PREBLOCK(')');
5386         TERM(')');
5387     case ']':
5388         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5389             TOKEN(0);
5390         s++;
5391         if (PL_lex_brackets <= 0)
5392             /* diag_listed_as: Unmatched right %s bracket */
5393             yyerror("Unmatched right square bracket");
5394         else
5395             --PL_lex_brackets;
5396         PL_lex_allbrackets--;
5397         if (PL_lex_state == LEX_INTERPNORMAL) {
5398             if (PL_lex_brackets == 0) {
5399                 if (*s == '-' && s[1] == '>')
5400                     PL_lex_state = LEX_INTERPENDMAYBE;
5401                 else if (*s != '[' && *s != '{')
5402                     PL_lex_state = LEX_INTERPEND;
5403             }
5404         }
5405         TERM(']');
5406     case '{':
5407         s++;
5408       leftbracket:
5409         if (PL_lex_brackets > 100) {
5410             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5411         }
5412         switch (PL_expect) {
5413         case XTERM:
5414             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5415             PL_lex_allbrackets++;
5416             OPERATOR(HASHBRACK);
5417         case XOPERATOR:
5418             while (s < PL_bufend && SPACE_OR_TAB(*s))
5419                 s++;
5420             d = s;
5421             PL_tokenbuf[0] = '\0';
5422             if (d < PL_bufend && *d == '-') {
5423                 PL_tokenbuf[0] = '-';
5424                 d++;
5425                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5426                     d++;
5427             }
5428             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5429                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5430                               FALSE, &len);
5431                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5432                     d++;
5433                 if (*d == '}') {
5434                     const char minus = (PL_tokenbuf[0] == '-');
5435                     s = force_word(s + minus, WORD, FALSE, TRUE);
5436                     if (minus)
5437                         force_next('-');
5438                 }
5439             }
5440             /* FALLTHROUGH */
5441         case XATTRBLOCK:
5442         case XBLOCK:
5443             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5444             PL_lex_allbrackets++;
5445             PL_expect = XSTATE;
5446             break;
5447         case XATTRTERM:
5448         case XTERMBLOCK:
5449             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5450             PL_lex_allbrackets++;
5451             PL_expect = XSTATE;
5452             break;
5453         case XBLOCKTERM:
5454             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5455             PL_lex_allbrackets++;
5456             PL_expect = XSTATE;
5457             break;
5458         default: {
5459                 const char *t;
5460                 if (PL_oldoldbufptr == PL_last_lop)
5461                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5462                 else
5463                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5464                 PL_lex_allbrackets++;
5465                 s = SKIPSPACE1(s);
5466                 if (*s == '}') {
5467                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5468                         PL_expect = XTERM;
5469                         /* This hack is to get the ${} in the message. */
5470                         PL_bufptr = s+1;
5471                         yyerror("syntax error");
5472                         break;
5473                     }
5474                     OPERATOR(HASHBRACK);
5475                 }
5476                 /* This hack serves to disambiguate a pair of curlies
5477                  * as being a block or an anon hash.  Normally, expectation
5478                  * determines that, but in cases where we're not in a
5479                  * position to expect anything in particular (like inside
5480                  * eval"") we have to resolve the ambiguity.  This code
5481                  * covers the case where the first term in the curlies is a
5482                  * quoted string.  Most other cases need to be explicitly
5483                  * disambiguated by prepending a "+" before the opening
5484                  * curly in order to force resolution as an anon hash.
5485                  *
5486                  * XXX should probably propagate the outer expectation
5487                  * into eval"" to rely less on this hack, but that could
5488                  * potentially break current behavior of eval"".
5489                  * GSAR 97-07-21
5490                  */
5491                 t = s;
5492                 if (*s == '\'' || *s == '"' || *s == '`') {
5493                     /* common case: get past first string, handling escapes */
5494                     for (t++; t < PL_bufend && *t != *s;)
5495                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5496                             t++;
5497                     t++;
5498                 }
5499                 else if (*s == 'q') {
5500                     if (++t < PL_bufend
5501                         && (!isWORDCHAR(*t)
5502                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5503                                 && !isWORDCHAR(*t))))
5504                     {
5505                         /* skip q//-like construct */
5506                         const char *tmps;
5507                         char open, close, term;
5508                         I32 brackets = 1;
5509
5510                         while (t < PL_bufend && isSPACE(*t))
5511                             t++;
5512                         /* check for q => */
5513                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5514                             OPERATOR(HASHBRACK);
5515                         }
5516                         term = *t;
5517                         open = term;
5518                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5519                             term = tmps[5];
5520                         close = term;
5521                         if (open == close)
5522                             for (t++; t < PL_bufend; t++) {
5523                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5524                                     t++;
5525                                 else if (*t == open)
5526                                     break;
5527                             }
5528                         else {
5529                             for (t++; t < PL_bufend; t++) {
5530                                 if (*t == '\\' && t+1 < PL_bufend)
5531                                     t++;
5532                                 else if (*t == close && --brackets <= 0)
5533                                     break;
5534                                 else if (*t == open)
5535                                     brackets++;
5536                             }
5537                         }
5538                         t++;
5539                     }
5540                     else
5541                         /* skip plain q word */
5542                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5543                              t += UTF8SKIP(t);
5544                 }
5545                 else if (isWORDCHAR_lazy_if(t,UTF)) {
5546                     t += UTF8SKIP(t);
5547                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5548                          t += UTF8SKIP(t);
5549                 }
5550                 while (t < PL_bufend && isSPACE(*t))
5551                     t++;
5552                 /* if comma follows first term, call it an anon hash */
5553                 /* XXX it could be a comma expression with loop modifiers */
5554                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5555                                    || (*t == '=' && t[1] == '>')))
5556                     OPERATOR(HASHBRACK);
5557                 if (PL_expect == XREF)
5558                     PL_expect = XTERM;
5559                 else {
5560                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5561                     PL_expect = XSTATE;
5562                 }
5563             }
5564             break;
5565         }
5566         pl_yylval.ival = CopLINE(PL_curcop);
5567         if (isSPACE(*s) || *s == '#')
5568             PL_copline = NOLINE;   /* invalidate current command line number */
5569         TOKEN(formbrack ? '=' : '{');
5570     case '}':
5571         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5572             TOKEN(0);
5573       rightbracket:
5574         s++;
5575         if (PL_lex_brackets <= 0)
5576             /* diag_listed_as: Unmatched right %s bracket */
5577             yyerror("Unmatched right curly bracket");
5578         else
5579             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5580         PL_lex_allbrackets--;
5581         if (PL_lex_state == LEX_INTERPNORMAL) {
5582             if (PL_lex_brackets == 0) {
5583                 if (PL_expect & XFAKEBRACK) {
5584                     PL_expect &= XENUMMASK;
5585                     PL_lex_state = LEX_INTERPEND;
5586                     PL_bufptr = s;
5587                     return yylex();     /* ignore fake brackets */
5588                 }
5589                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5590                  && SvEVALED(PL_lex_repl))
5591                     PL_lex_state = LEX_INTERPEND;
5592                 else if (*s == '-' && s[1] == '>')
5593                     PL_lex_state = LEX_INTERPENDMAYBE;
5594                 else if (*s != '[' && *s != '{')
5595                     PL_lex_state = LEX_INTERPEND;
5596             }
5597         }
5598         if (PL_expect & XFAKEBRACK) {
5599             PL_expect &= XENUMMASK;
5600             PL_bufptr = s;
5601             return yylex();             /* ignore fake brackets */
5602         }
5603         force_next(formbrack ? '.' : '}');
5604         if (formbrack) LEAVE;
5605         if (formbrack == 2) { /* means . where arguments were expected */
5606             force_next(';');
5607             TOKEN(FORMRBRACK);
5608         }
5609         TOKEN(';');
5610     case '&':
5611         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5612         s++;
5613         if (*s++ == '&') {
5614             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5615                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5616                 s -= 2;
5617                 TOKEN(0);
5618             }
5619             AOPERATOR(ANDAND);
5620         }
5621         s--;
5622         if (PL_expect == XOPERATOR) {
5623             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5624                 && isIDFIRST_lazy_if(s,UTF))
5625             {
5626                 CopLINE_dec(PL_curcop);
5627                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5628                 CopLINE_inc(PL_curcop);
5629             }
5630             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5631                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5632                 s--;
5633                 TOKEN(0);
5634             }
5635             PL_parser->saw_infix_sigil = 1;
5636             BAop(OP_BIT_AND);
5637         }
5638
5639         PL_tokenbuf[0] = '&';
5640         s = scan_ident(s - 1, PL_tokenbuf + 1,
5641                        sizeof PL_tokenbuf - 1, TRUE);
5642         if (PL_tokenbuf[1]) {
5643             PL_expect = XOPERATOR;
5644             force_ident_maybe_lex('&');
5645         }
5646         else
5647             PREREF('&');
5648         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5649         TERM('&');
5650
5651     case '|':
5652         s++;
5653         if (*s++ == '|') {
5654             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5655                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5656                 s -= 2;
5657                 TOKEN(0);
5658             }
5659             AOPERATOR(OROR);
5660         }
5661         s--;
5662         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5663                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5664             s--;
5665             TOKEN(0);
5666         }
5667         BOop(OP_BIT_OR);
5668     case '=':
5669         s++;
5670         {
5671             const char tmp = *s++;
5672             if (tmp == '=') {
5673                 if (!PL_lex_allbrackets &&
5674                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5675                     s -= 2;
5676                     TOKEN(0);
5677                 }
5678                 Eop(OP_EQ);
5679             }
5680             if (tmp == '>') {
5681                 if (!PL_lex_allbrackets &&
5682                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5683                     s -= 2;
5684                     TOKEN(0);
5685                 }
5686                 OPERATOR(',');
5687             }
5688             if (tmp == '~')
5689                 PMop(OP_MATCH);
5690             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5691                 && strchr("+-*/%.^&|<",tmp))
5692                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5693                             "Reversed %c= operator",(int)tmp);
5694             s--;
5695             if (PL_expect == XSTATE && isALPHA(tmp) &&
5696                 (s == PL_linestart+1 || s[-2] == '\n') )
5697                 {
5698                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5699                         || PL_lex_state != LEX_NORMAL) {
5700                         d = PL_bufend;
5701                         while (s < d) {
5702                             if (*s++ == '\n') {
5703                                 incline(s);
5704                                 if (strnEQ(s,"=cut",4)) {
5705                                     s = strchr(s,'\n');
5706                                     if (s)
5707                                         s++;
5708                                     else
5709                                         s = d;
5710                                     incline(s);
5711                                     goto retry;
5712                                 }
5713                             }
5714                         }
5715                         goto retry;
5716                     }
5717                     s = PL_bufend;
5718                     PL_parser->in_pod = 1;
5719                     goto retry;
5720                 }
5721         }
5722         if (PL_expect == XBLOCK) {
5723             const char *t = s;
5724 #ifdef PERL_STRICT_CR
5725             while (SPACE_OR_TAB(*t))
5726 #else
5727             while (SPACE_OR_TAB(*t) || *t == '\r')
5728 #endif
5729                 t++;
5730             if (*t == '\n' || *t == '#') {
5731                 formbrack = 1;
5732                 ENTER;
5733                 SAVEI8(PL_parser->form_lex_state);
5734                 SAVEI32(PL_lex_formbrack);
5735                 PL_parser->form_lex_state = PL_lex_state;
5736                 PL_lex_formbrack = PL_lex_brackets + 1;
5737                 goto leftbracket;
5738             }
5739         }
5740         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5741             s--;
5742             TOKEN(0);
5743         }
5744         pl_yylval.ival = 0;
5745         OPERATOR(ASSIGNOP);
5746     case '!':
5747         s++;
5748         {
5749             const char tmp = *s++;
5750             if (tmp == '=') {
5751                 /* was this !=~ where !~ was meant?
5752                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5753
5754                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5755                     const char *t = s+1;
5756
5757                     while (t < PL_bufend && isSPACE(*t))
5758                         ++t;
5759
5760                     if (*t == '/' || *t == '?' ||
5761                         ((*t == 'm' || *t == 's' || *t == 'y')
5762                          && !isWORDCHAR(t[1])) ||
5763                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5764                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5765                                     "!=~ should be !~");
5766                 }
5767                 if (!PL_lex_allbrackets &&
5768                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5769                     s -= 2;
5770                     TOKEN(0);
5771                 }
5772                 Eop(OP_NE);
5773             }
5774             if (tmp == '~')
5775                 PMop(OP_NOT);
5776         }
5777         s--;
5778         OPERATOR('!');
5779     case '<':
5780         if (PL_expect != XOPERATOR) {
5781             if (s[1] != '<' && !strchr(s,'>'))
5782                 check_uni();
5783             if (s[1] == '<')
5784                 s = scan_heredoc(s);
5785             else
5786                 s = scan_inputsymbol(s);
5787             PL_expect = XOPERATOR;
5788             TOKEN(sublex_start());
5789         }
5790         s++;
5791         {
5792             char tmp = *s++;
5793             if (tmp == '<') {
5794                 if (*s == '=' && !PL_lex_allbrackets &&
5795                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5796                     s -= 2;
5797                     TOKEN(0);
5798                 }
5799                 SHop(OP_LEFT_SHIFT);
5800             }
5801             if (tmp == '=') {
5802                 tmp = *s++;
5803                 if (tmp == '>') {
5804                     if (!PL_lex_allbrackets &&
5805                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5806                         s -= 3;
5807                         TOKEN(0);
5808                     }
5809                     Eop(OP_NCMP);
5810                 }
5811                 s--;
5812                 if (!PL_lex_allbrackets &&
5813                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5814                     s -= 2;
5815                     TOKEN(0);
5816                 }
5817                 Rop(OP_LE);
5818             }
5819         }
5820         s--;
5821         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5822             s--;
5823             TOKEN(0);
5824         }
5825         Rop(OP_LT);
5826     case '>':
5827         s++;
5828         {
5829             const char tmp = *s++;
5830             if (tmp == '>') {
5831                 if (*s == '=' && !PL_lex_allbrackets &&
5832                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5833                     s -= 2;
5834                     TOKEN(0);
5835                 }
5836                 SHop(OP_RIGHT_SHIFT);
5837             }
5838             else if (tmp == '=') {
5839                 if (!PL_lex_allbrackets &&
5840                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5841                     s -= 2;
5842                     TOKEN(0);
5843                 }
5844                 Rop(OP_GE);
5845             }
5846         }
5847         s--;
5848         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5849             s--;
5850             TOKEN(0);
5851         }
5852         Rop(OP_GT);
5853
5854     case '$':
5855         CLINE;
5856
5857         if (PL_expect == XOPERATOR) {
5858             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5859                 return deprecate_commaless_var_list();
5860             }
5861         }
5862         else if (PL_expect == XPOSTDEREF) {
5863             if (s[1] == '#') {
5864                 s++;
5865                 POSTDEREF(DOLSHARP);
5866             }
5867             POSTDEREF('$');
5868         }
5869
5870         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5871             PL_tokenbuf[0] = '@';
5872             s = scan_ident(s + 1, PL_tokenbuf + 1,
5873                            sizeof PL_tokenbuf - 1, FALSE);
5874             if (PL_expect == XOPERATOR)
5875                 no_op("Array length", s);
5876             if (!PL_tokenbuf[1])
5877                 PREREF(DOLSHARP);
5878             PL_expect = XOPERATOR;
5879             force_ident_maybe_lex('#');
5880             TOKEN(DOLSHARP);
5881         }
5882
5883         PL_tokenbuf[0] = '$';
5884         s = scan_ident(s, PL_tokenbuf + 1,
5885                        sizeof PL_tokenbuf - 1, FALSE);
5886         if (PL_expect == XOPERATOR)
5887             no_op("Scalar", s);
5888         if (!PL_tokenbuf[1]) {
5889             if (s == PL_bufend)
5890                 yyerror("Final $ should be \\$ or $name");
5891             PREREF('$');
5892         }
5893
5894         d = s;
5895         {
5896             const char tmp = *s;
5897             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5898                 s = SKIPSPACE1(s);
5899
5900             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5901                 && intuit_more(s)) {
5902                 if (*s == '[') {
5903                     PL_tokenbuf[0] = '@';
5904                     if (ckWARN(WARN_SYNTAX)) {
5905                         char *t = s+1;
5906
5907                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
5908                             t++;
5909                         if (*t++ == ',') {
5910                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5911                             while (t < PL_bufend && *t != ']')
5912                                 t++;
5913                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5914                                         "Multidimensional syntax %.*s not supported",
5915                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5916                         }
5917                     }
5918                 }
5919                 else if (*s == '{') {
5920                     char *t;
5921                     PL_tokenbuf[0] = '%';
5922                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5923                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5924                         {
5925                             char tmpbuf[sizeof PL_tokenbuf];
5926                             do {
5927                                 t++;
5928                             } while (isSPACE(*t));
5929                             if (isIDFIRST_lazy_if(t,UTF)) {
5930                                 STRLEN len;
5931                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5932                                               &len);
5933                                 while (isSPACE(*t))
5934                                     t++;
5935                                 if (*t == ';'
5936                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
5937                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5938                                         "You need to quote \"%"UTF8f"\"",
5939                                          UTF8fARG(UTF, len, tmpbuf));
5940                             }
5941                         }
5942                 }
5943             }
5944
5945             PL_expect = XOPERATOR;
5946             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5947                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5948                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5949                     PL_expect = XOPERATOR;
5950                 else if (strchr("$@\"'`q", *s))
5951                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5952                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5953                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5954                 else if (isIDFIRST_lazy_if(s,UTF)) {
5955                     char tmpbuf[sizeof PL_tokenbuf];
5956                     int t2;
5957                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5958                     if ((t2 = keyword(tmpbuf, len, 0))) {
5959                         /* binary operators exclude handle interpretations */
5960                         switch (t2) {
5961                         case -KEY_x:
5962                         case -KEY_eq:
5963                         case -KEY_ne:
5964                         case -KEY_gt:
5965                         case -KEY_lt:
5966                         case -KEY_ge:
5967                         case -KEY_le:
5968                         case -KEY_cmp:
5969                             break;
5970                         default:
5971                             PL_expect = XTERM;  /* e.g. print $fh length() */
5972                             break;
5973                         }
5974                     }
5975                     else {
5976                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5977                     }
5978                 }
5979                 else if (isDIGIT(*s))
5980                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5981                 else if (*s == '.' && isDIGIT(s[1]))
5982                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5983                 else if ((*s == '?' || *s == '-' || *s == '+')
5984                          && !isSPACE(s[1]) && s[1] != '=')
5985                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5986                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5987                          && s[1] != '/')
5988                     PL_expect = XTERM;          /* e.g. print $fh /.../
5989                                                    XXX except DORDOR operator
5990                                                 */
5991                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5992                          && s[2] != '=')
5993                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5994             }
5995         }
5996         force_ident_maybe_lex('$');
5997         TOKEN('$');
5998
5999     case '@':
6000         if (PL_expect == XOPERATOR)
6001             no_op("Array", s);
6002         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6003         PL_tokenbuf[0] = '@';
6004         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6005         pl_yylval.ival = 0;
6006         if (!PL_tokenbuf[1]) {
6007             PREREF('@');
6008         }
6009         if (PL_lex_state == LEX_NORMAL)
6010             s = SKIPSPACE1(s);
6011         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6012             if (*s == '{')
6013                 PL_tokenbuf[0] = '%';
6014
6015             /* Warn about @ where they meant $. */
6016             if (*s == '[' || *s == '{') {
6017                 if (ckWARN(WARN_SYNTAX)) {
6018                     S_check_scalar_slice(aTHX_ s);
6019                 }
6020             }
6021         }
6022         PL_expect = XOPERATOR;
6023         force_ident_maybe_lex('@');
6024         TERM('@');
6025
6026      case '/':                  /* may be division, defined-or, or pattern */
6027         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6028             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6029                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6030                 TOKEN(0);
6031             s += 2;
6032             AOPERATOR(DORDOR);
6033         }
6034         else if (PL_expect == XOPERATOR) {
6035             s++;
6036             if (*s == '=' && !PL_lex_allbrackets &&
6037                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6038                 s--;
6039                 TOKEN(0);
6040             }
6041             Mop(OP_DIVIDE);
6042         }
6043         else {
6044             /* Disable warning on "study /blah/" */
6045             if (PL_oldoldbufptr == PL_last_uni
6046              && (*PL_last_uni != 's' || s - PL_last_uni < 5
6047                  || memNE(PL_last_uni, "study", 5)
6048                  || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6049              ))
6050                 check_uni();
6051             s = scan_pat(s,OP_MATCH);
6052             TERM(sublex_start());
6053         }
6054
6055      case '?':                  /* conditional */
6056         s++;
6057         if (!PL_lex_allbrackets &&
6058             PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6059             s--;
6060             TOKEN(0);
6061         }
6062         PL_lex_allbrackets++;
6063         OPERATOR('?');
6064
6065     case '.':
6066         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6067 #ifdef PERL_STRICT_CR
6068             && s[1] == '\n'
6069 #else
6070             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6071 #endif
6072             && (s == PL_linestart || s[-1] == '\n') )
6073         {
6074             PL_expect = XSTATE;
6075             formbrack = 2; /* dot seen where arguments expected */
6076             goto rightbracket;
6077         }
6078         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6079             s += 3;
6080             OPERATOR(YADAYADA);
6081         }
6082         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6083             char tmp = *s++;
6084             if (*s == tmp) {
6085                 if (!PL_lex_allbrackets &&
6086                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6087                     s--;
6088                     TOKEN(0);
6089                 }
6090                 s++;
6091                 if (*s == tmp) {
6092                     s++;
6093                     pl_yylval.ival = OPf_SPECIAL;
6094                 }
6095                 else
6096                     pl_yylval.ival = 0;
6097                 OPERATOR(DOTDOT);
6098             }
6099             if (*s == '=' && !PL_lex_allbrackets &&
6100                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6101                 s--;
6102                 TOKEN(0);
6103             }
6104             Aop(OP_CONCAT);
6105         }
6106         /* FALLTHROUGH */
6107     case '0': case '1': case '2': case '3': case '4':
6108     case '5': case '6': case '7': case '8': case '9':
6109         s = scan_num(s, &pl_yylval);
6110         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6111         if (PL_expect == XOPERATOR)
6112             no_op("Number",s);
6113         TERM(THING);
6114
6115     case '\'':
6116         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6117         if (!s)
6118             missingterm(NULL);
6119         COPLINE_SET_FROM_MULTI_END;
6120         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6121         if (PL_expect == XOPERATOR) {
6122             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6123                 return deprecate_commaless_var_list();
6124             }
6125             else
6126                 no_op("String",s);
6127         }
6128         pl_yylval.ival = OP_CONST;
6129         TERM(sublex_start());
6130
6131     case '"':
6132         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6133         DEBUG_T( {
6134             if (s)
6135                 printbuf("### Saw string before %s\n", s);
6136             else
6137                 PerlIO_printf(Perl_debug_log,
6138                              "### Saw unterminated string\n");
6139         } );
6140         if (PL_expect == XOPERATOR) {
6141             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6142                 return deprecate_commaless_var_list();
6143             }
6144             else
6145                 no_op("String",s);
6146         }
6147         if (!s)
6148             missingterm(NULL);
6149         pl_yylval.ival = OP_CONST;
6150         /* FIXME. I think that this can be const if char *d is replaced by
6151            more localised variables.  */
6152         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6153             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154                 pl_yylval.ival = OP_STRINGIFY;
6155                 break;
6156             }
6157         }
6158         if (pl_yylval.ival == OP_CONST)
6159             COPLINE_SET_FROM_MULTI_END;
6160         TERM(sublex_start());
6161
6162     case '`':
6163         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6164         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6165         if (PL_expect == XOPERATOR)
6166             no_op("Backticks",s);
6167         if (!s)
6168             missingterm(NULL);
6169         pl_yylval.ival = OP_BACKTICK;
6170         TERM(sublex_start());
6171
6172     case '\\':
6173         s++;
6174         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6175          && isDIGIT(*s))
6176             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6177                            *s, *s);
6178         if (PL_expect == XOPERATOR)
6179             no_op("Backslash",s);
6180         OPERATOR(REFGEN);
6181
6182     case 'v':
6183         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6184             char *start = s + 2;
6185             while (isDIGIT(*start) || *start == '_')
6186                 start++;
6187             if (*start == '.' && isDIGIT(start[1])) {
6188                 s = scan_num(s, &pl_yylval);
6189                 TERM(THING);
6190             }
6191             else if ((*start == ':' && start[1] == ':')
6192                   || (PL_expect == XSTATE && *start == ':'))
6193                 goto keylookup;
6194             else if (PL_expect == XSTATE) {
6195                 d = start;
6196                 while (d < PL_bufend && isSPACE(*d)) d++;
6197                 if (*d == ':') goto keylookup;
6198             }
6199             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6200             if (!isALPHA(*start) && (PL_expect == XTERM
6201                         || PL_expect == XREF || PL_expect == XSTATE
6202                         || PL_expect == XTERMORDORDOR)) {
6203                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6204                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6205                 if (!gv) {
6206                     s = scan_num(s, &pl_yylval);
6207                     TERM(THING);
6208                 }
6209             }
6210         }
6211         goto keylookup;
6212     case 'x':
6213         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6214             s++;
6215             Mop(OP_REPEAT);
6216         }
6217         goto keylookup;
6218
6219     case '_':
6220     case 'a': case 'A':
6221     case 'b': case 'B':
6222     case 'c': case 'C':
6223     case 'd': case 'D':
6224     case 'e': case 'E':
6225     case 'f': case 'F':
6226     case 'g': case 'G':
6227     case 'h': case 'H':
6228     case 'i': case 'I':
6229     case 'j': case 'J':
6230     case 'k': case 'K':
6231     case 'l': case 'L':
6232     case 'm': case 'M':
6233     case 'n': case 'N':
6234     case 'o': case 'O':
6235     case 'p': case 'P':
6236     case 'q': case 'Q':
6237     case 'r': case 'R':
6238     case 's': case 'S':
6239     case 't': case 'T':
6240     case 'u': case 'U':
6241               case 'V':
6242     case 'w': case 'W':
6243               case 'X':
6244     case 'y': case 'Y':
6245     case 'z': case 'Z':
6246
6247       keylookup: {
6248         bool anydelim;
6249         bool lex;
6250         I32 tmp;
6251         SV *sv;
6252         CV *cv;
6253         PADOFFSET off;
6254         OP *rv2cv_op;
6255
6256         lex = FALSE;
6257         orig_keyword = 0;
6258         off = 0;
6259         sv = NULL;
6260         cv = NULL;
6261         gv = NULL;
6262         gvp = NULL;
6263         rv2cv_op = NULL;
6264
6265         PL_bufptr = s;
6266         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6267
6268         /* Some keywords can be followed by any delimiter, including ':' */
6269         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6270
6271         /* x::* is just a word, unless x is "CORE" */
6272         if (!anydelim && *s == ':' && s[1] == ':') {
6273             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6274             goto just_a_word;
6275         }
6276
6277         d = s;
6278         while (d < PL_bufend && isSPACE(*d))
6279                 d++;    /* no comments skipped here, or s### is misparsed */
6280
6281         /* Is this a word before a => operator? */
6282         if (*d == '=' && d[1] == '>') {
6283           fat_arrow:
6284             CLINE;
6285             pl_yylval.opval
6286                 = (OP*)newSVOP(OP_CONST, 0,
6287                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6288             pl_yylval.opval->op_private = OPpCONST_BARE;
6289             TERM(WORD);
6290         }
6291
6292         /* Check for plugged-in keyword */
6293         {
6294             OP *o;
6295             int result;
6296             char *saved_bufptr = PL_bufptr;
6297             PL_bufptr = s;
6298             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6299             s = PL_bufptr;
6300             if (result == KEYWORD_PLUGIN_DECLINE) {
6301                 /* not a plugged-in keyword */
6302                 PL_bufptr = saved_bufptr;
6303             } else if (result == KEYWORD_PLUGIN_STMT) {
6304                 pl_yylval.opval = o;
6305                 CLINE;
6306                 PL_expect = XSTATE;
6307                 return REPORT(PLUGSTMT);
6308             } else if (result == KEYWORD_PLUGIN_EXPR) {
6309                 pl_yylval.opval = o;
6310                 CLINE;
6311                 PL_expect = XOPERATOR;
6312                 return REPORT(PLUGEXPR);
6313             } else {
6314                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6315                                         PL_tokenbuf);
6316             }
6317         }
6318
6319         /* Check for built-in keyword */
6320         tmp = keyword(PL_tokenbuf, len, 0);
6321
6322         /* Is this a label? */
6323         if (!anydelim && PL_expect == XSTATE
6324               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6325             s = d + 1;
6326             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6327             pl_yylval.pval[len] = '\0';
6328             pl_yylval.pval[len+1] = UTF ? 1 : 0;
6329             CLINE;
6330             TOKEN(LABEL);
6331         }
6332
6333         /* Check for lexical sub */
6334         if (PL_expect != XOPERATOR) {
6335             char tmpbuf[sizeof PL_tokenbuf + 1];
6336             *tmpbuf = '&';
6337             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6338             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6339             if (off != NOT_IN_PAD) {
6340                 assert(off); /* we assume this is boolean-true below */
6341                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6342                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6343                     HEK * const stashname = HvNAME_HEK(stash);
6344                     sv = newSVhek(stashname);
6345                     sv_catpvs(sv, "::");
6346                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6347                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6348                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6349                                     SVt_PVCV);
6350                     off = 0;
6351                     if (!gv) {
6352                         sv_free(sv);
6353                         sv = NULL;
6354                         goto just_a_word;
6355                     }
6356                 }
6357                 else {
6358                     rv2cv_op = newOP(OP_PADANY, 0);
6359                     rv2cv_op->op_targ = off;
6360                     cv = find_lexical_cv(off);
6361                 }
6362                 lex = TRUE;
6363                 goto just_a_word;
6364             }
6365             off = 0;
6366         }
6367
6368         if (tmp < 0) {                  /* second-class keyword? */
6369             GV *ogv = NULL;     /* override (winner) */
6370             GV *hgv = NULL;     /* hidden (loser) */
6371             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6372                 CV *cv;
6373                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6374                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6375                                             SVt_PVCV)) &&
6376                     (cv = GvCVu(gv)))
6377                 {
6378                     if (GvIMPORTED_CV(gv))
6379                         ogv = gv;
6380                     else if (! CvMETHOD(cv))
6381                         hgv = gv;
6382                 }
6383                 if (!ogv &&
6384                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6385                                           len, FALSE)) &&
6386                     (gv = *gvp) && (
6387                         isGV_with_GP(gv)
6388                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
6389                             :   SvPCS_IMPORTED(gv)
6390                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6391                                          len, 0), 1)
6392                    ))
6393                 {
6394                     ogv = gv;
6395                 }
6396             }
6397             if (ogv) {
6398                 orig_keyword = tmp;
6399                 tmp = 0;                /* overridden by import or by GLOBAL */
6400             }
6401             else if (gv && !gvp
6402                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6403                      && GvCVu(gv))
6404             {
6405                 tmp = 0;                /* any sub overrides "weak" keyword */
6406             }
6407             else {                      /* no override */
6408                 tmp = -tmp;
6409                 if (tmp == KEY_dump) {
6410                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6411                                    "dump() better written as CORE::dump()");
6412                 }
6413                 gv = NULL;
6414                 gvp = 0;
6415                 if (hgv && tmp != KEY_x)        /* never ambiguous */
6416                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6417                                    "Ambiguous call resolved as CORE::%s(), "
6418                                    "qualify as such or use &",
6419                                    GvENAME(hgv));
6420             }
6421         }
6422
6423         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6424          && (!anydelim || *s != '#')) {
6425             /* no override, and not s### either; skipspace is safe here
6426              * check for => on following line */
6427             bool arrow;
6428             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6429             STRLEN   soff = s         - SvPVX(PL_linestr);
6430             s = skipspace_flags(s, LEX_NO_INCLINE);
6431             arrow = *s == '=' && s[1] == '>';
6432             PL_bufptr = SvPVX(PL_linestr) + bufoff;
6433             s         = SvPVX(PL_linestr) +   soff;
6434             if (arrow)
6435                 goto fat_arrow;
6436         }
6437
6438       reserved_word:
6439         switch (tmp) {
6440
6441         default:                        /* not a keyword */
6442             /* Trade off - by using this evil construction we can pull the
6443                variable gv into the block labelled keylookup. If not, then
6444                we have to give it function scope so that the goto from the
6445                earlier ':' case doesn't bypass the initialisation.  */
6446             if (0) {
6447             just_a_word_zero_gv:
6448                 sv = NULL;
6449                 cv = NULL;
6450                 gv = NULL;
6451                 gvp = NULL;
6452                 rv2cv_op = NULL;
6453                 orig_keyword = 0;
6454                 lex = 0;
6455                 off = 0;
6456             }
6457           just_a_word: {
6458                 int pkgname = 0;
6459                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6460                 const char penultchar =
6461                     lastchar && PL_bufptr - 2 >= PL_linestart
6462                          ? PL_bufptr[-2]
6463                          : 0;
6464
6465
6466                 /* Get the rest if it looks like a package qualifier */
6467
6468                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6469                     STRLEN morelen;
6470                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6471                                   TRUE, &morelen);
6472                     if (!morelen)
6473                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6474                                 UTF8fARG(UTF, len, PL_tokenbuf),
6475                                 *s == '\'' ? "'" : "::");
6476                     len += morelen;
6477                     pkgname = 1;
6478                 }
6479
6480                 if (PL_expect == XOPERATOR) {
6481                     if (PL_bufptr == PL_linestart) {
6482                         CopLINE_dec(PL_curcop);
6483                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6484                         CopLINE_inc(PL_curcop);
6485                     }
6486                     else
6487                         no_op("Bareword",s);
6488                 }
6489
6490                 /* Look for a subroutine with this name in current package,
6491                    unless this is a lexical sub, or name is "Foo::",
6492                    in which case Foo is a bareword
6493                    (and a package name). */
6494
6495                 if (len > 2 &&
6496                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6497                 {
6498                     if (ckWARN(WARN_BAREWORD)
6499                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6500                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6501                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
6502                            UTF8fARG(UTF, len, PL_tokenbuf));
6503                     len -= 2;
6504                     PL_tokenbuf[len] = '\0';
6505                     gv = NULL;
6506                     gvp = 0;
6507                 }
6508                 else {
6509                     if (!lex && !gv) {
6510                         /* Mustn't actually add anything to a symbol table.
6511                            But also don't want to "initialise" any placeholder
6512                            constants that might already be there into full
6513                            blown PVGVs with attached PVCV.  */
6514                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6515                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6516                                                SVt_PVCV);
6517                     }
6518                     len = 0;
6519                 }
6520
6521                 /* if we saw a global override before, get the right name */
6522
6523                 if (!sv)
6524                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6525                     len ? len : strlen(PL_tokenbuf));
6526                 if (gvp) {
6527                     SV * const tmp_sv = sv;
6528                     sv = newSVpvs("CORE::GLOBAL::");
6529                     sv_catsv(sv, tmp_sv);
6530                     SvREFCNT_dec(tmp_sv);
6531                 }
6532
6533
6534                 /* Presume this is going to be a bareword of some sort. */
6535                 CLINE;
6536                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6537                 pl_yylval.opval->op_private = OPpCONST_BARE;
6538
6539                 /* And if "Foo::", then that's what it certainly is. */
6540                 if (len)
6541                     goto safe_bareword;
6542
6543                 if (!off)
6544                 {
6545                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6546                     const_op->op_private = OPpCONST_BARE;
6547                     rv2cv_op = newCVREF(0, const_op);
6548                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
6549                 }
6550
6551                 /* See if it's the indirect object for a list operator. */
6552
6553                 if (PL_oldoldbufptr &&
6554                     PL_oldoldbufptr < PL_bufptr &&
6555                     (PL_oldoldbufptr == PL_last_lop
6556                      || PL_oldoldbufptr == PL_last_uni) &&
6557                     /* NO SKIPSPACE BEFORE HERE! */
6558                     (PL_expect == XREF ||
6559                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6560                 {
6561                     bool immediate_paren = *s == '(';
6562
6563                     /* (Now we can afford to cross potential line boundary.) */
6564                     s = SKIPSPACE2(s,nextPL_nextwhite);
6565
6566                     /* Two barewords in a row may indicate method call. */
6567
6568                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6569                         (tmp = intuit_method(s, gv, cv))) {
6570                         op_free(rv2cv_op);
6571                         if (tmp == METHOD && !PL_lex_allbrackets &&
6572                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6573                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6574                         return REPORT(tmp);
6575                     }
6576
6577                     /* If not a declared subroutine, it's an indirect object. */
6578                     /* (But it's an indir obj regardless for sort.) */
6579                     /* Also, if "_" follows a filetest operator, it's a bareword */
6580
6581                     if (
6582                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6583                          (!cv &&
6584                         (PL_last_lop_op != OP_MAPSTART &&
6585                          PL_last_lop_op != OP_GREPSTART))))
6586                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6587                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6588                        )
6589                     {
6590                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6591                         goto bareword;
6592                     }
6593                 }
6594
6595                 PL_expect = XOPERATOR;
6596                 s = skipspace(s);
6597
6598                 /* Is this a word before a => operator? */
6599                 if (*s == '=' && s[1] == '>' && !pkgname) {
6600                     op_free(rv2cv_op);
6601                     CLINE;
6602                     /* This is our own scalar, created a few lines above,
6603                        so this is safe. */
6604                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
6605                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6606                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6607                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6608                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
6609                     TERM(WORD);
6610                 }
6611
6612                 /* If followed by a paren, it's certainly a subroutine. */
6613                 if (*s == '(') {
6614                     CLINE;
6615                     if (cv) {
6616                         d = s + 1;
6617                         while (SPACE_OR_TAB(*d))
6618                             d++;
6619                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6620                             s = d + 1;
6621                             goto its_constant;
6622                         }
6623                     }
6624                     NEXTVAL_NEXTTOKE.opval =
6625                         off ? rv2cv_op : pl_yylval.opval;
6626                     PL_expect = XOPERATOR;
6627                     if (off)
6628                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
6629                     else op_free(rv2cv_op),        force_next(WORD);
6630                     pl_yylval.ival = 0;
6631                     TOKEN('&');
6632                 }
6633
6634                 /* If followed by var or block, call it a method (unless sub) */
6635
6636                 if ((*s == '$' || *s == '{') && !cv) {
6637                     op_free(rv2cv_op);
6638                     PL_last_lop = PL_oldbufptr;
6639                     PL_last_lop_op = OP_METHOD;
6640                     if (!PL_lex_allbrackets &&
6641                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6642                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6643                     PL_expect = XBLOCKTERM;
6644                     PL_bufptr = s;
6645                     return REPORT(METHOD);
6646                 }
6647
6648                 /* If followed by a bareword, see if it looks like indir obj. */
6649
6650                 if (!orig_keyword
6651                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6652                         && (tmp = intuit_method(s, gv, cv))) {
6653                     op_free(rv2cv_op);
6654                     if (tmp == METHOD && !PL_lex_allbrackets &&
6655                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6656                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6657                     return REPORT(tmp);
6658                 }
6659
6660                 /* Not a method, so call it a subroutine (if defined) */
6661
6662                 if (cv) {
6663                     if (lastchar == '-' && penultchar != '-') {
6664                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
6665                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6666                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
6667                              UTF8fARG(UTF, l, PL_tokenbuf),
6668                              UTF8fARG(UTF, l, PL_tokenbuf));
6669                     }
6670                     /* Check for a constant sub */
6671                     if ((sv = cv_const_sv_or_av(cv))) {
6672                   its_constant:
6673                         op_free(rv2cv_op);
6674                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6675                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6676                         if (SvTYPE(sv) == SVt_PVAV)
6677                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6678                                                       pl_yylval.opval);
6679                         else {
6680                             pl_yylval.opval->op_private = 0;
6681                             pl_yylval.opval->op_folded = 1;
6682                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
6683                         }
6684                         TOKEN(WORD);
6685                     }
6686
6687                     op_free(pl_yylval.opval);
6688                     pl_yylval.opval =
6689                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6690                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6691                     PL_last_lop = PL_oldbufptr;
6692                     PL_last_lop_op = OP_ENTERSUB;
6693                     /* Is there a prototype? */
6694                     if (
6695                         SvPOK(cv))
6696                     {
6697                         STRLEN protolen = CvPROTOLEN(cv);
6698                         const char *proto = CvPROTO(cv);
6699                         bool optional;
6700                         proto = S_strip_spaces(aTHX_ proto, &protolen);
6701                         if (!protolen)
6702                             TERM(FUNC0SUB);
6703                         if ((optional = *proto == ';'))
6704                           do
6705                             proto++;
6706                           while (*proto == ';');
6707                         if (
6708                             (
6709                                 (
6710                                     *proto == '$' || *proto == '_'
6711                                  || *proto == '*' || *proto == '+'
6712                                 )
6713                              && proto[1] == '\0'
6714                             )
6715                          || (
6716                              *proto == '\\' && proto[1] && proto[2] == '\0'
6717                             )
6718                         )
6719                             UNIPROTO(UNIOPSUB,optional);
6720                         if (*proto == '\\' && proto[1] == '[') {
6721                             const char *p = proto + 2;
6722                             while(*p && *p != ']')
6723                                 ++p;
6724                             if(*p == ']' && !p[1])
6725                                 UNIPROTO(UNIOPSUB,optional);
6726                         }
6727                         if (*proto == '&' && *s == '{') {
6728                             if (PL_curstash)
6729                                 sv_setpvs(PL_subname, "__ANON__");
6730                             else
6731                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6732                             if (!PL_lex_allbrackets &&
6733                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6734                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6735                             PREBLOCK(LSTOPSUB);
6736                         }
6737                     }
6738                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6739                     PL_expect = XTERM;
6740                     force_next(off ? PRIVATEREF : WORD);
6741                     if (!PL_lex_allbrackets &&
6742                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6743                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6744                     TOKEN(NOAMP);
6745                 }
6746
6747                 /* Call it a bare word */
6748
6749                 if (PL_hints & HINT_STRICT_SUBS)
6750                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6751                 else {
6752                 bareword:
6753                     /* after "print" and similar functions (corresponding to
6754                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6755                      * a filehandle should be subject to "strict subs".
6756                      * Likewise for the optional indirect-object argument to system
6757                      * or exec, which can't be a bareword */
6758                     if ((PL_last_lop_op == OP_PRINT
6759                             || PL_last_lop_op == OP_PRTF
6760                             || PL_last_lop_op == OP_SAY
6761                             || PL_last_lop_op == OP_SYSTEM
6762                             || PL_last_lop_op == OP_EXEC)
6763                             && (PL_hints & HINT_STRICT_SUBS))
6764                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6765                     if (lastchar != '-') {
6766                         if (ckWARN(WARN_RESERVED)) {
6767                             d = PL_tokenbuf;
6768                             while (isLOWER(*d))
6769                                 d++;
6770                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6771                             {
6772                                 /* PL_warn_reserved is constant */
6773                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6774                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6775                                        PL_tokenbuf);
6776                                 GCC_DIAG_RESTORE;
6777                             }
6778                         }
6779                     }
6780                 }
6781                 op_free(rv2cv_op);
6782
6783             safe_bareword:
6784                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6785                  && saw_infix_sigil) {
6786                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6787                                      "Operator or semicolon missing before %c%"UTF8f,
6788                                      lastchar,
6789                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
6790                                               PL_tokenbuf));
6791                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6792                                      "Ambiguous use of %c resolved as operator %c",
6793                                      lastchar, lastchar);
6794                 }
6795                 TOKEN(WORD);
6796             }
6797
6798         case KEY___FILE__:
6799             FUN0OP(
6800                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6801             );
6802
6803         case KEY___LINE__:
6804             FUN0OP(
6805                 (OP*)newSVOP(OP_CONST, 0,
6806                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6807             );
6808
6809         case KEY___PACKAGE__:
6810             FUN0OP(
6811                 (OP*)newSVOP(OP_CONST, 0,
6812                                         (PL_curstash
6813                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6814                                          : &PL_sv_undef))
6815             );
6816
6817         case KEY___DATA__:
6818         case KEY___END__: {
6819             GV *gv;
6820             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6821                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6822                                         ? PL_curstash
6823                                         : PL_defstash;
6824                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6825                 if (!isGV(gv))
6826                     gv_init(gv,stash,"DATA",4,0);
6827                 GvMULTI_on(gv);
6828                 if (!GvIO(gv))
6829                     GvIOp(gv) = newIO();
6830                 IoIFP(GvIOp(gv)) = PL_rsfp;
6831 #if defined(HAS_FCNTL) && defined(F_SETFD)
6832                 {
6833                     const int fd = PerlIO_fileno(PL_rsfp);
6834                     fcntl(fd,F_SETFD,fd >= 3);
6835                 }
6836 #endif
6837                 /* Mark this internal pseudo-handle as clean */
6838                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6839                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6840                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6841                 else
6842                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6843 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6844                 /* if the script was opened in binmode, we need to revert
6845                  * it to text mode for compatibility; but only iff it has CRs
6846                  * XXX this is a questionable hack at best. */
6847                 if (PL_bufend-PL_bufptr > 2
6848                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6849                 {
6850                     Off_t loc = 0;
6851                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6852                         loc = PerlIO_tell(PL_rsfp);
6853                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6854                     }
6855 #ifdef NETWARE
6856                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6857 #else
6858                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6859 #endif  /* NETWARE */
6860                         if (loc > 0)
6861                             PerlIO_seek(PL_rsfp, loc, 0);
6862                     }
6863                 }
6864 #endif
6865 #ifdef PERLIO_LAYERS
6866                 if (!IN_BYTES) {
6867                     if (UTF)
6868                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6869                     else if (PL_encoding) {
6870                         SV *name;
6871                         dSP;
6872                         ENTER;
6873                         SAVETMPS;
6874                         PUSHMARK(sp);
6875                         XPUSHs(PL_encoding);
6876                         PUTBACK;
6877                         call_method("name", G_SCALAR);
6878                         SPAGAIN;
6879                         name = POPs;
6880                         PUTBACK;
6881                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6882                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6883                                                       SVfARG(name)));
6884                         FREETMPS;
6885                         LEAVE;
6886                     }
6887                 }
6888 #endif
6889                 PL_rsfp = NULL;
6890             }
6891             goto fake_eof;
6892         }
6893
6894         case KEY___SUB__:
6895             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
6896
6897         case KEY_AUTOLOAD:
6898         case KEY_DESTROY:
6899         case KEY_BEGIN:
6900         case KEY_UNITCHECK:
6901         case KEY_CHECK:
6902         case KEY_INIT:
6903         case KEY_END:
6904             if (PL_expect == XSTATE) {
6905                 s = PL_bufptr;
6906                 goto really_sub;
6907             }
6908             goto just_a_word;
6909
6910         case_KEY_CORE:
6911             {
6912                 STRLEN olen = len;
6913                 d = s;
6914                 s += 2;
6915                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6916                 if ((*s == ':' && s[1] == ':')
6917                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
6918                 {
6919                     s = d;
6920                     len = olen;
6921                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
6922                     goto just_a_word;
6923                 }
6924                 if (!tmp)
6925                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
6926                                       UTF8fARG(UTF, len, PL_tokenbuf));
6927                 if (tmp < 0)
6928                     tmp = -tmp;
6929                 else if (tmp == KEY_require || tmp == KEY_do
6930                       || tmp == KEY_glob)
6931                     /* that's a way to remember we saw "CORE::" */
6932                     orig_keyword = tmp;
6933                 goto reserved_word;
6934             }
6935
6936         case KEY_abs:
6937             UNI(OP_ABS);
6938
6939         case KEY_alarm:
6940             UNI(OP_ALARM);
6941
6942         case KEY_accept:
6943             LOP(OP_ACCEPT,XTERM);
6944
6945         case KEY_and:
6946             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
6947                 return REPORT(0);
6948             OPERATOR(ANDOP);
6949
6950         case KEY_atan2:
6951             LOP(OP_ATAN2,XTERM);
6952
6953         case KEY_bind:
6954             LOP(OP_BIND,XTERM);
6955
6956         case KEY_binmode:
6957             LOP(OP_BINMODE,XTERM);
6958
6959         case KEY_bless:
6960             LOP(OP_BLESS,XTERM);
6961
6962         case KEY_break:
6963             FUN0(OP_BREAK);
6964
6965         case KEY_chop:
6966             UNI(OP_CHOP);
6967
6968         case KEY_continue:
6969                     /* We have to disambiguate the two senses of
6970                       "continue". If the next token is a '{' then
6971                       treat it as the start of a continue block;
6972                       otherwise treat it as a control operator.
6973                      */
6974                     s = skipspace(s);
6975                     if (*s == '{')
6976             PREBLOCK(CONTINUE);
6977                     else
6978                         FUN0(OP_CONTINUE);
6979
6980         case KEY_chdir:
6981             /* may use HOME */
6982             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6983             UNI(OP_CHDIR);
6984
6985         case KEY_close:
6986             UNI(OP_CLOSE);
6987
6988         case KEY_closedir:
6989             UNI(OP_CLOSEDIR);
6990
6991         case KEY_cmp:
6992             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6993                 return REPORT(0);
6994             Eop(OP_SCMP);
6995
6996         case KEY_caller:
6997             UNI(OP_CALLER);
6998
6999         case KEY_crypt:
7000 #ifdef FCRYPT
7001             if (!PL_cryptseen) {
7002                 PL_cryptseen = TRUE;
7003                 init_des();
7004             }
7005 #endif
7006             LOP(OP_CRYPT,XTERM);
7007
7008         case KEY_chmod:
7009             LOP(OP_CHMOD,XTERM);
7010
7011         case KEY_chown:
7012             LOP(OP_CHOWN,XTERM);
7013
7014         case KEY_connect:
7015             LOP(OP_CONNECT,XTERM);
7016
7017         case KEY_chr:
7018             UNI(OP_CHR);
7019
7020         case KEY_cos:
7021             UNI(OP_COS);
7022
7023         case KEY_chroot:
7024             UNI(OP_CHROOT);
7025
7026         case KEY_default:
7027             PREBLOCK(DEFAULT);
7028
7029         case KEY_do:
7030             s = SKIPSPACE1(s);
7031             if (*s == '{')
7032                 PRETERMBLOCK(DO);
7033             if (*s != '\'') {
7034                 *PL_tokenbuf = '&';
7035                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7036                               1, &len);
7037                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7038                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7039                     d = SKIPSPACE1(d);
7040                     if (*d == '(') {
7041                         force_ident_maybe_lex('&');
7042                         s = d;
7043                     }
7044                 }
7045             }
7046             if (orig_keyword == KEY_do) {
7047                 orig_keyword = 0;
7048                 pl_yylval.ival = 1;
7049             }
7050             else
7051                 pl_yylval.ival = 0;
7052             OPERATOR(DO);
7053
7054         case KEY_die:
7055             PL_hints |= HINT_BLOCK_SCOPE;
7056             LOP(OP_DIE,XTERM);
7057
7058         case KEY_defined:
7059             UNI(OP_DEFINED);
7060
7061         case KEY_delete:
7062             UNI(OP_DELETE);
7063
7064         case KEY_dbmopen:
7065             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7066                               STR_WITH_LEN("NDBM_File::"),
7067                               STR_WITH_LEN("DB_File::"),
7068                               STR_WITH_LEN("GDBM_File::"),
7069                               STR_WITH_LEN("SDBM_File::"),
7070                               STR_WITH_LEN("ODBM_File::"),
7071                               NULL);
7072             LOP(OP_DBMOPEN,XTERM);
7073
7074         case KEY_dbmclose:
7075             UNI(OP_DBMCLOSE);
7076
7077         case KEY_dump:
7078             PL_expect = XOPERATOR;
7079             s = force_word(s,WORD,TRUE,FALSE);
7080             LOOPX(OP_DUMP);
7081
7082         case KEY_else:
7083             PREBLOCK(ELSE);
7084
7085         case KEY_elsif:
7086             pl_yylval.ival = CopLINE(PL_curcop);
7087             OPERATOR(ELSIF);
7088
7089         case KEY_eq:
7090             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7091                 return REPORT(0);
7092             Eop(OP_SEQ);
7093
7094         case KEY_exists:
7095             UNI(OP_EXISTS);
7096
7097         case KEY_exit:
7098             UNI(OP_EXIT);
7099
7100         case KEY_eval:
7101             s = SKIPSPACE1(s);
7102             if (*s == '{') { /* block eval */
7103                 PL_expect = XTERMBLOCK;
7104                 UNIBRACK(OP_ENTERTRY);
7105             }
7106             else { /* string eval */
7107                 PL_expect = XTERM;
7108                 UNIBRACK(OP_ENTEREVAL);
7109             }
7110
7111         case KEY_evalbytes:
7112             PL_expect = XTERM;
7113             UNIBRACK(-OP_ENTEREVAL);
7114
7115         case KEY_eof:
7116             UNI(OP_EOF);
7117
7118         case KEY_exp:
7119             UNI(OP_EXP);
7120
7121         case KEY_each:
7122             UNI(OP_EACH);
7123
7124         case KEY_exec:
7125             LOP(OP_EXEC,XREF);
7126
7127         case KEY_endhostent:
7128             FUN0(OP_EHOSTENT);
7129
7130         case KEY_endnetent:
7131             FUN0(OP_ENETENT);
7132
7133         case KEY_endservent:
7134             FUN0(OP_ESERVENT);
7135
7136         case KEY_endprotoent:
7137             FUN0(OP_EPROTOENT);
7138
7139         case KEY_endpwent:
7140             FUN0(OP_EPWENT);
7141
7142         case KEY_endgrent:
7143             FUN0(OP_EGRENT);
7144
7145         case KEY_for:
7146         case KEY_foreach:
7147             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7148                 return REPORT(0);
7149             pl_yylval.ival = CopLINE(PL_curcop);
7150             s = SKIPSPACE1(s);
7151             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7152                 char *p = s;
7153
7154                 if ((PL_bufend - p) >= 3 &&
7155                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7156                     p += 2;
7157                 else if ((PL_bufend - p) >= 4 &&
7158                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7159                     p += 3;
7160                 p = PEEKSPACE(p);
7161                 /* skip optional package name, as in "for my abc $x (..)" */
7162                 if (isIDFIRST_lazy_if(p,UTF)) {
7163                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7164                     p = PEEKSPACE(p);
7165                 }
7166                 if (*p != '$')
7167                     Perl_croak(aTHX_ "Missing $ on loop variable");
7168             }
7169             OPERATOR(FOR);
7170
7171         case KEY_formline:
7172             LOP(OP_FORMLINE,XTERM);
7173
7174         case KEY_fork:
7175             FUN0(OP_FORK);
7176
7177         case KEY_fc:
7178             UNI(OP_FC);
7179
7180         case KEY_fcntl:
7181             LOP(OP_FCNTL,XTERM);
7182
7183         case KEY_fileno:
7184             UNI(OP_FILENO);
7185
7186         case KEY_flock:
7187             LOP(OP_FLOCK,XTERM);
7188
7189         case KEY_gt:
7190             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7191                 return REPORT(0);
7192             Rop(OP_SGT);
7193
7194         case KEY_ge:
7195             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7196                 return REPORT(0);
7197             Rop(OP_SGE);
7198
7199         case KEY_grep:
7200             LOP(OP_GREPSTART, XREF);
7201
7202         case KEY_goto:
7203             PL_expect = XOPERATOR;
7204             s = force_word(s,WORD,TRUE,FALSE);
7205             LOOPX(OP_GOTO);
7206
7207         case KEY_gmtime:
7208             UNI(OP_GMTIME);
7209
7210         case KEY_getc:
7211             UNIDOR(OP_GETC);
7212
7213         case KEY_getppid:
7214             FUN0(OP_GETPPID);
7215
7216         case KEY_getpgrp:
7217             UNI(OP_GETPGRP);
7218
7219         case KEY_getpriority:
7220             LOP(OP_GETPRIORITY,XTERM);
7221
7222         case KEY_getprotobyname:
7223             UNI(OP_GPBYNAME);
7224
7225         case KEY_getprotobynumber:
7226             LOP(OP_GPBYNUMBER,XTERM);
7227
7228         case KEY_getprotoent:
7229             FUN0(OP_GPROTOENT);
7230
7231         case KEY_getpwent:
7232             FUN0(OP_GPWENT);
7233
7234         case KEY_getpwnam:
7235             UNI(OP_GPWNAM);
7236
7237         case KEY_getpwuid:
7238             UNI(OP_GPWUID);
7239
7240         case KEY_getpeername:
7241             UNI(OP_GETPEERNAME);
7242
7243         case KEY_gethostbyname:
7244             UNI(OP_GHBYNAME);
7245
7246         case KEY_gethostbyaddr:
7247             LOP(OP_GHBYADDR,XTERM);
7248
7249         case KEY_gethostent:
7250             FUN0(OP_GHOSTENT);
7251
7252         case KEY_getnetbyname:
7253             UNI(OP_GNBYNAME);
7254
7255         case KEY_getnetbyaddr:
7256             LOP(OP_GNBYADDR,XTERM);
7257
7258         case KEY_getnetent:
7259             FUN0(OP_GNETENT);
7260
7261         case KEY_getservbyname:
7262             LOP(OP_GSBYNAME,XTERM);
7263
7264         case KEY_getservbyport:
7265             LOP(OP_GSBYPORT,XTERM);
7266
7267         case KEY_getservent:
7268             FUN0(OP_GSERVENT);
7269
7270         case KEY_getsockname:
7271             UNI(OP_GETSOCKNAME);
7272
7273         case KEY_getsockopt:
7274             LOP(OP_GSOCKOPT,XTERM);
7275
7276         case KEY_getgrent:
7277             FUN0(OP_GGRENT);
7278
7279         case KEY_getgrnam:
7280             UNI(OP_GGRNAM);
7281
7282         case KEY_getgrgid:
7283             UNI(OP_GGRGID);
7284
7285         case KEY_getlogin:
7286             FUN0(OP_GETLOGIN);
7287
7288         case KEY_given:
7289             pl_yylval.ival = CopLINE(PL_curcop);
7290             Perl_ck_warner_d(aTHX_
7291                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7292                 "given is experimental");
7293             OPERATOR(GIVEN);
7294
7295         case KEY_glob:
7296             LOP(
7297              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7298              XTERM
7299             );
7300
7301         case KEY_hex:
7302             UNI(OP_HEX);
7303
7304         case KEY_if:
7305             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7306                 return REPORT(0);
7307             pl_yylval.ival = CopLINE(PL_curcop);
7308             OPERATOR(IF);
7309
7310         case KEY_index:
7311             LOP(OP_INDEX,XTERM);
7312
7313         case KEY_int:
7314             UNI(OP_INT);
7315
7316         case KEY_ioctl:
7317             LOP(OP_IOCTL,XTERM);
7318
7319         case KEY_join:
7320             LOP(OP_JOIN,XTERM);
7321
7322         case KEY_keys:
7323             UNI(OP_KEYS);
7324
7325         case KEY_kill:
7326             LOP(OP_KILL,XTERM);
7327
7328         case KEY_last:
7329             PL_expect = XOPERATOR;
7330             s = force_word(s,WORD,TRUE,FALSE);
7331             LOOPX(OP_LAST);
7332
7333         case KEY_lc:
7334             UNI(OP_LC);
7335
7336         case KEY_lcfirst:
7337             UNI(OP_LCFIRST);
7338
7339         case KEY_local:
7340             pl_yylval.ival = 0;
7341             OPERATOR(LOCAL);
7342
7343         case KEY_length:
7344             UNI(OP_LENGTH);
7345
7346         case KEY_lt:
7347             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7348                 return REPORT(0);
7349             Rop(OP_SLT);
7350
7351         case KEY_le:
7352             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7353                 return REPORT(0);
7354             Rop(OP_SLE);
7355
7356         case KEY_localtime:
7357             UNI(OP_LOCALTIME);
7358
7359         case KEY_log:
7360             UNI(OP_LOG);
7361
7362         case KEY_link:
7363             LOP(OP_LINK,XTERM);
7364
7365         case KEY_listen:
7366             LOP(OP_LISTEN,XTERM);
7367
7368         case KEY_lock:
7369             UNI(OP_LOCK);
7370
7371         case KEY_lstat:
7372             UNI(OP_LSTAT);
7373
7374         case KEY_m:
7375             s = scan_pat(s,OP_MATCH);
7376             TERM(sublex_start());
7377
7378         case KEY_map:
7379             LOP(OP_MAPSTART, XREF);
7380
7381         case KEY_mkdir:
7382             LOP(OP_MKDIR,XTERM);
7383
7384         case KEY_msgctl:
7385             LOP(OP_MSGCTL,XTERM);
7386
7387         case KEY_msgget:
7388             LOP(OP_MSGGET,XTERM);
7389
7390         case KEY_msgrcv:
7391             LOP(OP_MSGRCV,XTERM);
7392
7393         case KEY_msgsnd:
7394             LOP(OP_MSGSND,XTERM);
7395
7396         case KEY_our:
7397         case KEY_my:
7398         case KEY_state:
7399             PL_in_my = (U16)tmp;
7400             s = SKIPSPACE1(s);
7401             if (isIDFIRST_lazy_if(s,UTF)) {
7402                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7403                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7404                 {
7405                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7406                         Perl_croak(aTHX_
7407                                   "Experimental \"%s\" subs not enabled",
7408                                    tmp == KEY_my    ? "my"    :
7409                                    tmp == KEY_state ? "state" : "our");
7410                     Perl_ck_warner_d(aTHX_
7411                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7412                         "The lexical_subs feature is experimental");
7413                     goto really_sub;
7414                 }
7415                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7416                 if (!PL_in_my_stash) {
7417                     char tmpbuf[1024];
7418                     int len;
7419                     PL_bufptr = s;
7420                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7421                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7422                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7423                 }
7424             }
7425             pl_yylval.ival = 1;
7426             OPERATOR(MY);
7427
7428         case KEY_next:
7429             PL_expect = XOPERATOR;
7430             s = force_word(s,WORD,TRUE,FALSE);
7431             LOOPX(OP_NEXT);
7432
7433         case KEY_ne:
7434             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7435                 return REPORT(0);
7436             Eop(OP_SNE);
7437
7438         case KEY_no:
7439             s = tokenize_use(0, s);
7440             TERM(USE);
7441
7442         case KEY_not:
7443             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7444                 FUN1(OP_NOT);
7445             else {
7446                 if (!PL_lex_allbrackets &&
7447                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7448                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7449                 OPERATOR(NOTOP);
7450             }
7451
7452         case KEY_open:
7453             s = SKIPSPACE1(s);
7454             if (isIDFIRST_lazy_if(s,UTF)) {
7455           const char *t;
7456           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7457               &len);
7458                 for (t=d; isSPACE(*t);)
7459                     t++;
7460                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7461                     /* [perl #16184] */
7462                     && !(t[0] == '=' && t[1] == '>')
7463                     && !(t[0] == ':' && t[1] == ':')
7464                     && !keyword(s, d-s, 0)
7465                 ) {
7466                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7467                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7468                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7469                 }
7470             }
7471             LOP(OP_OPEN,XTERM);
7472
7473         case KEY_or:
7474             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7475                 return REPORT(0);
7476             pl_yylval.ival = OP_OR;
7477             OPERATOR(OROP);
7478
7479         case KEY_ord:
7480             UNI(OP_ORD);
7481
7482         case KEY_oct:
7483             UNI(OP_OCT);
7484
7485         case KEY_opendir:
7486             LOP(OP_OPEN_DIR,XTERM);
7487
7488         case KEY_print:
7489             checkcomma(s,PL_tokenbuf,"filehandle");
7490             LOP(OP_PRINT,XREF);
7491
7492         case KEY_printf:
7493             checkcomma(s,PL_tokenbuf,"filehandle");
7494             LOP(OP_PRTF,XREF);
7495
7496         case KEY_prototype:
7497             UNI(OP_PROTOTYPE);
7498
7499         case KEY_push:
7500             LOP(OP_PUSH,XTERM);
7501
7502         case KEY_pop:
7503             UNIDOR(OP_POP);
7504
7505         case KEY_pos:
7506             UNIDOR(OP_POS);
7507
7508         case KEY_pack:
7509             LOP(OP_PACK,XTERM);
7510
7511         case KEY_package:
7512             s = force_word(s,WORD,FALSE,TRUE);
7513             s = SKIPSPACE1(s);
7514             s = force_strict_version(s);
7515             PL_lex_expect = XBLOCK;
7516             OPERATOR(PACKAGE);
7517
7518         case KEY_pipe:
7519             LOP(OP_PIPE_OP,XTERM);
7520
7521         case KEY_q:
7522             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7523             if (!s)
7524                 missingterm(NULL);
7525             COPLINE_SET_FROM_MULTI_END;
7526             pl_yylval.ival = OP_CONST;
7527             TERM(sublex_start());
7528
7529         case KEY_quotemeta:
7530             UNI(OP_QUOTEMETA);
7531
7532         case KEY_qw: {
7533             OP *words = NULL;
7534             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7535             if (!s)
7536                 missingterm(NULL);
7537             COPLINE_SET_FROM_MULTI_END;
7538             PL_expect = XOPERATOR;
7539             if (SvCUR(PL_lex_stuff)) {
7540                 int warned_comma = !ckWARN(WARN_QW);
7541                 int warned_comment = warned_comma;
7542                 d = SvPV_force(PL_lex_stuff, len);
7543                 while (len) {
7544                     for (; isSPACE(*d) && len; --len, ++d)
7545                         /**/;
7546                     if (len) {
7547                         SV *sv;
7548                         const char *b = d;
7549                         if (!warned_comma || !warned_comment) {
7550                             for (; !isSPACE(*d) && len; --len, ++d) {
7551                                 if (!warned_comma && *d == ',') {
7552                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7553                                         "Possible attempt to separate words with commas");
7554                                     ++warned_comma;
7555                                 }
7556                                 else if (!warned_comment && *d == '#') {
7557                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7558                                         "Possible attempt to put comments in qw() list");
7559                                     ++warned_comment;
7560                                 }
7561                             }
7562                         }
7563                         else {
7564                             for (; !isSPACE(*d) && len; --len, ++d)
7565                                 /**/;
7566                         }
7567                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7568                         words = op_append_elem(OP_LIST, words,
7569                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7570                     }
7571                 }
7572             }
7573             if (!words)
7574                 words = newNULLLIST();
7575             if (PL_lex_stuff) {
7576                 SvREFCNT_dec(PL_lex_stuff);
7577                 PL_lex_stuff = NULL;
7578             }
7579             PL_expect = XOPERATOR;
7580             pl_yylval.opval = sawparens(words);
7581             TOKEN(QWLIST);
7582         }
7583
7584         case KEY_qq:
7585             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7586             if (!s)
7587                 missingterm(NULL);
7588             pl_yylval.ival = OP_STRINGIFY;
7589             if (SvIVX(PL_lex_stuff) == '\'')
7590                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7591             TERM(sublex_start());
7592
7593         case KEY_qr:
7594             s = scan_pat(s,OP_QR);
7595             TERM(sublex_start());
7596
7597         case KEY_qx:
7598             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7599             if (!s)
7600                 missingterm(NULL);
7601             pl_yylval.ival = OP_BACKTICK;
7602             TERM(sublex_start());
7603
7604         case KEY_return:
7605             OLDLOP(OP_RETURN);
7606
7607         case KEY_require:
7608             s = SKIPSPACE1(s);
7609             PL_expect = XOPERATOR;
7610             if (isDIGIT(*s)) {
7611                 s = force_version(s, FALSE);
7612             }
7613             else if (*s != 'v' || !isDIGIT(s[1])
7614                     || (s = force_version(s, TRUE), *s == 'v'))
7615             {
7616                 *PL_tokenbuf = '\0';
7617                 s = force_word(s,WORD,TRUE,TRUE);
7618                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7619                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7620                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7621                 else if (*s == '<')
7622                     yyerror("<> at require-statement should be quotes");
7623             }
7624             if (orig_keyword == KEY_require) {
7625                 orig_keyword = 0;
7626                 pl_yylval.ival = 1;
7627             }
7628             else
7629                 pl_yylval.ival = 0;
7630             PL_expect = XTERM;
7631             PL_bufptr = s;
7632             PL_last_uni = PL_oldbufptr;
7633             PL_last_lop_op = OP_REQUIRE;
7634             s = skipspace(s);
7635             return REPORT( (int)REQUIRE );
7636
7637         case KEY_reset:
7638             UNI(OP_RESET);
7639
7640         case KEY_redo:
7641             PL_expect = XOPERATOR;
7642             s = force_word(s,WORD,TRUE,FALSE);
7643             LOOPX(OP_REDO);
7644
7645         case KEY_rename:
7646             LOP(OP_RENAME,XTERM);
7647
7648         case KEY_rand:
7649             UNI(OP_RAND);
7650
7651         case KEY_rmdir:
7652             UNI(OP_RMDIR);
7653
7654         case KEY_rindex:
7655             LOP(OP_RINDEX,XTERM);
7656
7657         case KEY_read:
7658             LOP(OP_READ,XTERM);
7659
7660         case KEY_readdir:
7661             UNI(OP_READDIR);
7662
7663         case KEY_readline:
7664             UNIDOR(OP_READLINE);
7665
7666         case KEY_readpipe:
7667             UNIDOR(OP_BACKTICK);
7668
7669         case KEY_rewinddir:
7670             UNI(OP_REWINDDIR);
7671
7672         case KEY_recv:
7673             LOP(OP_RECV,XTERM);
7674
7675         case KEY_reverse:
7676             LOP(OP_REVERSE,XTERM);
7677
7678         case KEY_readlink:
7679             UNIDOR(OP_READLINK);
7680
7681         case KEY_ref:
7682             UNI(OP_REF);
7683
7684         case KEY_s:
7685             s = scan_subst(s);
7686             if (pl_yylval.opval)
7687                 TERM(sublex_start());
7688             else
7689                 TOKEN(1);       /* force error */
7690
7691         case KEY_say:
7692             checkcomma(s,PL_tokenbuf,"filehandle");
7693             LOP(OP_SAY,XREF);
7694
7695         case KEY_chomp:
7696             UNI(OP_CHOMP);
7697
7698         case KEY_scalar:
7699             UNI(OP_SCALAR);
7700
7701         case KEY_select:
7702             LOP(OP_SELECT,XTERM);
7703
7704         case KEY_seek:
7705             LOP(OP_SEEK,XTERM);
7706
7707         case KEY_semctl:
7708             LOP(OP_SEMCTL,XTERM);
7709
7710         case KEY_semget:
7711             LOP(OP_SEMGET,XTERM);
7712
7713         case KEY_semop:
7714             LOP(OP_SEMOP,XTERM);
7715
7716         case KEY_send:
7717             LOP(OP_SEND,XTERM);
7718
7719         case KEY_setpgrp:
7720             LOP(OP_SETPGRP,XTERM);
7721
7722         case KEY_setpriority:
7723             LOP(OP_SETPRIORITY,XTERM);
7724
7725         case KEY_sethostent:
7726             UNI(OP_SHOSTENT);
7727
7728         case KEY_setnetent:
7729             UNI(OP_SNETENT);
7730
7731         case KEY_setservent:
7732             UNI(OP_SSERVENT);
7733
7734         case KEY_setprotoent:
7735             UNI(OP_SPROTOENT);
7736
7737         case KEY_setpwent:
7738             FUN0(OP_SPWENT);
7739
7740         case KEY_setgrent:
7741             FUN0(OP_SGRENT);
7742
7743         case KEY_seekdir:
7744             LOP(OP_SEEKDIR,XTERM);
7745
7746         case KEY_setsockopt:
7747             LOP(OP_SSOCKOPT,XTERM);
7748
7749         case KEY_shift:
7750             UNIDOR(OP_SHIFT);
7751
7752         case KEY_shmctl:
7753             LOP(OP_SHMCTL,XTERM);
7754
7755         case KEY_shmget:
7756             LOP(OP_SHMGET,XTERM);
7757
7758         case KEY_shmread:
7759             LOP(OP_SHMREAD,XTERM);
7760
7761         case KEY_shmwrite:
7762             LOP(OP_SHMWRITE,XTERM);
7763
7764         case KEY_shutdown:
7765             LOP(OP_SHUTDOWN,XTERM);
7766
7767         case KEY_sin:
7768             UNI(OP_SIN);
7769
7770         case KEY_sleep:
7771             UNI(OP_SLEEP);
7772
7773         case KEY_socket:
7774             LOP(OP_SOCKET,XTERM);
7775
7776         case KEY_socketpair:
7777             LOP(OP_SOCKPAIR,XTERM);
7778
7779         case KEY_sort:
7780             checkcomma(s,PL_tokenbuf,"subroutine name");
7781             s = SKIPSPACE1(s);
7782             PL_expect = XTERM;
7783             s = force_word(s,WORD,TRUE,TRUE);
7784             LOP(OP_SORT,XREF);
7785
7786         case KEY_split:
7787             LOP(OP_SPLIT,XTERM);
7788
7789         case KEY_sprintf:
7790             LOP(OP_SPRINTF,XTERM);
7791
7792         case KEY_splice:
7793             LOP(OP_SPLICE,XTERM);
7794
7795         case KEY_sqrt:
7796             UNI(OP_SQRT);
7797
7798         case KEY_srand:
7799             UNI(OP_SRAND);
7800
7801         case KEY_stat:
7802             UNI(OP_STAT);
7803
7804         case KEY_study:
7805             UNI(OP_STUDY);
7806
7807         case KEY_substr:
7808             LOP(OP_SUBSTR,XTERM);
7809
7810         case KEY_format:
7811         case KEY_sub:
7812           really_sub:
7813             {
7814                 char * const tmpbuf = PL_tokenbuf + 1;
7815                 expectation attrful;
7816                 bool have_name, have_proto;
7817                 const int key = tmp;
7818                 SV *format_name = NULL;
7819
7820                 d = s;
7821                 s = skipspace(s);
7822
7823                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7824                     (*s == ':' && s[1] == ':'))
7825                 {
7826
7827                     PL_expect = XBLOCK;
7828                     attrful = XATTRBLOCK;
7829                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7830                                   &len);
7831                     if (key == KEY_format)
7832                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7833                     *PL_tokenbuf = '&';
7834                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
7835                      || pad_findmy_pvn(
7836                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
7837                         ) != NOT_IN_PAD)
7838                         sv_setpvn(PL_subname, tmpbuf, len);
7839                     else {
7840                         sv_setsv(PL_subname,PL_curstname);
7841                         sv_catpvs(PL_subname,"::");
7842                         sv_catpvn(PL_subname,tmpbuf,len);
7843                     }
7844                     if (SvUTF8(PL_linestr))
7845                         SvUTF8_on(PL_subname);
7846                     have_name = TRUE;
7847
7848
7849                     s = skipspace(d);
7850                 }
7851                 else {
7852                     if (key == KEY_my || key == KEY_our || key==KEY_state)
7853                     {
7854                         *d = '\0';
7855                         /* diag_listed_as: Missing name in "%s sub" */
7856                         Perl_croak(aTHX_
7857                                   "Missing name in \"%s\"", PL_bufptr);
7858                     }
7859                     PL_expect = XTERMBLOCK;
7860                     attrful = XATTRTERM;
7861                     sv_setpvs(PL_subname,"?");
7862                     have_name = FALSE;
7863                 }
7864
7865                 if (key == KEY_format) {
7866                     if (format_name) {
7867                         NEXTVAL_NEXTTOKE.opval
7868                             = (OP*)newSVOP(OP_CONST,0, format_name);
7869                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
7870                         force_next(WORD);
7871                     }
7872                     PREBLOCK(FORMAT);
7873                 }
7874
7875                 /* Look for a prototype */
7876                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
7877                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7878                     COPLINE_SET_FROM_MULTI_END;
7879                     if (!s)
7880                         Perl_croak(aTHX_ "Prototype not terminated");
7881                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
7882                     have_proto = TRUE;
7883
7884                     s = skipspace(s);
7885                 }
7886                 else
7887                     have_proto = FALSE;
7888
7889                 if (*s == ':' && s[1] != ':')
7890                     PL_expect = attrful;
7891                 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
7892                     if (!have_name)
7893                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7894                     else if (*s != ';' && *s != '}')
7895                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7896                 }
7897
7898                 if (have_proto) {
7899                     NEXTVAL_NEXTTOKE.opval =
7900                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7901                     PL_lex_stuff = NULL;
7902                     force_next(THING);
7903                 }
7904                 if (!have_name) {
7905                     if (PL_curstash)
7906                         sv_setpvs(PL_subname, "__ANON__");
7907                     else
7908                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7909                     TOKEN(ANONSUB);
7910                 }
7911                 force_ident_maybe_lex('&');
7912                 TOKEN(SUB);
7913             }
7914
7915         case KEY_system:
7916             LOP(OP_SYSTEM,XREF);
7917
7918         case KEY_symlink:
7919             LOP(OP_SYMLINK,XTERM);
7920
7921         case KEY_syscall:
7922             LOP(OP_SYSCALL,XTERM);
7923
7924         case KEY_sysopen:
7925             LOP(OP_SYSOPEN,XTERM);
7926
7927         case KEY_sysseek:
7928             LOP(OP_SYSSEEK,XTERM);
7929
7930         case KEY_sysread:
7931             LOP(OP_SYSREAD,XTERM);
7932
7933         case KEY_syswrite:
7934             LOP(OP_SYSWRITE,XTERM);
7935
7936         case KEY_tr:
7937         case KEY_y:
7938             s = scan_trans(s);
7939             TERM(sublex_start());
7940
7941         case KEY_tell:
7942             UNI(OP_TELL);
7943
7944         case KEY_telldir:
7945             UNI(OP_TELLDIR);
7946
7947         case KEY_tie:
7948             LOP(OP_TIE,XTERM);
7949
7950         case KEY_tied:
7951             UNI(OP_TIED);
7952
7953         case KEY_time:
7954             FUN0(OP_TIME);
7955
7956         case KEY_times:
7957             FUN0(OP_TMS);
7958
7959         case KEY_truncate:
7960             LOP(OP_TRUNCATE,XTERM);
7961
7962         case KEY_uc:
7963             UNI(OP_UC);
7964
7965         case KEY_ucfirst:
7966             UNI(OP_UCFIRST);
7967
7968         case KEY_untie:
7969             UNI(OP_UNTIE);
7970
7971         case KEY_until:
7972             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7973                 return REPORT(0);
7974             pl_yylval.ival = CopLINE(PL_curcop);
7975             OPERATOR(UNTIL);
7976
7977         case KEY_unless:
7978             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7979                 return REPORT(0);
7980             pl_yylval.ival = CopLINE(PL_curcop);
7981             OPERATOR(UNLESS);
7982
7983         case KEY_unlink:
7984             LOP(OP_UNLINK,XTERM);
7985
7986         case KEY_undef:
7987             UNIDOR(OP_UNDEF);
7988
7989         case KEY_unpack:
7990             LOP(OP_UNPACK,XTERM);
7991
7992         case KEY_utime:
7993             LOP(OP_UTIME,XTERM);
7994
7995         case KEY_umask:
7996             UNIDOR(OP_UMASK);
7997
7998         case KEY_unshift:
7999             LOP(OP_UNSHIFT,XTERM);
8000
8001         case KEY_use:
8002             s = tokenize_use(1, s);
8003             OPERATOR(USE);
8004
8005         case KEY_values:
8006             UNI(OP_VALUES);
8007
8008         case KEY_vec:
8009             LOP(OP_VEC,XTERM);
8010
8011         case KEY_when:
8012             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8013                 return REPORT(0);
8014             pl_yylval.ival = CopLINE(PL_curcop);
8015             Perl_ck_warner_d(aTHX_
8016                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8017                 "when is experimental");
8018             OPERATOR(WHEN);
8019
8020         case KEY_while:
8021             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8022                 return REPORT(0);
8023             pl_yylval.ival = CopLINE(PL_curcop);
8024             OPERATOR(WHILE);
8025
8026         case KEY_warn:
8027             PL_hints |= HINT_BLOCK_SCOPE;
8028             LOP(OP_WARN,XTERM);
8029
8030         case KEY_wait:
8031             FUN0(OP_WAIT);
8032
8033         case KEY_waitpid:
8034             LOP(OP_WAITPID,XTERM);
8035
8036         case KEY_wantarray:
8037             FUN0(OP_WANTARRAY);
8038
8039         case KEY_write:
8040             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8041              * we use the same number on EBCDIC */
8042             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8043             UNI(OP_ENTERWRITE);
8044
8045         case KEY_x:
8046             if (PL_expect == XOPERATOR) {
8047                 if (*s == '=' && !PL_lex_allbrackets &&
8048                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8049                     return REPORT(0);
8050                 Mop(OP_REPEAT);
8051             }
8052             check_uni();
8053             goto just_a_word;
8054
8055         case KEY_xor:
8056             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8057                 return REPORT(0);
8058             pl_yylval.ival = OP_XOR;
8059             OPERATOR(OROP);
8060         }
8061     }}
8062 }
8063
8064 /*
8065   S_pending_ident
8066
8067   Looks up an identifier in the pad or in a package
8068
8069   Returns:
8070     PRIVATEREF if this is a lexical name.
8071     WORD       if this belongs to a package.
8072
8073   Structure:
8074       if we're in a my declaration
8075           croak if they tried to say my($foo::bar)
8076           build the ops for a my() declaration
8077       if it's an access to a my() variable
8078           build ops for access to a my() variable
8079       if in a dq string, and they've said @foo and we can't find @foo
8080           warn
8081       build ops for a bareword
8082 */
8083
8084 static int
8085 S_pending_ident(pTHX)
8086 {
8087     PADOFFSET tmp = 0;
8088     const char pit = (char)pl_yylval.ival;
8089     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8090     /* All routes through this function want to know if there is a colon.  */
8091     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8092
8093     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8094           "### Pending identifier '%s'\n", PL_tokenbuf); });
8095
8096     /* if we're in a my(), we can't allow dynamics here.
8097        $foo'bar has already been turned into $foo::bar, so
8098        just check for colons.
8099
8100        if it's a legal name, the OP is a PADANY.
8101     */
8102     if (PL_in_my) {
8103         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8104             if (has_colon)
8105                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8106                                   "variable %s in \"our\"",
8107                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8108             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8109         }
8110         else {
8111             if (has_colon) {
8112                 /* PL_no_myglob is constant */
8113                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8114                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8115                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8116                             UTF ? SVf_UTF8 : 0);
8117                 GCC_DIAG_RESTORE;
8118             }
8119
8120             pl_yylval.opval = newOP(OP_PADANY, 0);
8121             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8122                                                         UTF ? SVf_UTF8 : 0);
8123             return PRIVATEREF;
8124         }
8125     }
8126
8127     /*
8128        build the ops for accesses to a my() variable.
8129     */
8130
8131     if (!has_colon) {
8132         if (!PL_in_my)
8133             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8134                                     UTF ? SVf_UTF8 : 0);
8135         if (tmp != NOT_IN_PAD) {
8136             /* might be an "our" variable" */
8137             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8138                 /* build ops for a bareword */
8139                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8140                 HEK * const stashname = HvNAME_HEK(stash);
8141                 SV *  const sym = newSVhek(stashname);
8142                 sv_catpvs(sym, "::");
8143                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8144                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8145                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8146                 if (pit != '&')
8147                   gv_fetchsv(sym,
8148                     (PL_in_eval
8149                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8150                         : GV_ADDMULTI
8151                     ),
8152                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8153                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8154                      : SVt_PVHV));
8155                 return WORD;
8156             }
8157
8158             pl_yylval.opval = newOP(OP_PADANY, 0);
8159             pl_yylval.opval->op_targ = tmp;
8160             return PRIVATEREF;
8161         }
8162     }
8163
8164     /*
8165        Whine if they've said @foo in a doublequoted string,
8166        and @foo isn't a variable we can find in the symbol
8167        table.
8168     */
8169     if (ckWARN(WARN_AMBIGUOUS) &&
8170         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8171         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8172                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8173         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8174                 /* DO NOT warn for @- and @+ */
8175                 && !( PL_tokenbuf[2] == '\0' &&
8176                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8177            )
8178         {
8179             /* Downgraded from fatal to warning 20000522 mjd */
8180             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8181                         "Possible unintended interpolation of %"UTF8f
8182                         " in string",
8183                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8184         }
8185     }
8186
8187     /* build ops for a bareword */
8188     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8189                                    newSVpvn_flags(PL_tokenbuf + 1,
8190                                                       tokenbuf_len - 1,
8191                                                       UTF ? SVf_UTF8 : 0 ));
8192     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8193     if (pit != '&')
8194         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8195                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8196                      | ( UTF ? SVf_UTF8 : 0 ),
8197                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8198                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8199                       : SVt_PVHV));
8200     return WORD;
8201 }
8202
8203 STATIC void
8204 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8205 {
8206     PERL_ARGS_ASSERT_CHECKCOMMA;
8207
8208     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8209         if (ckWARN(WARN_SYNTAX)) {
8210             int level = 1;
8211             const char *w;
8212             for (w = s+2; *w && level; w++) {
8213                 if (*w == '(')
8214                     ++level;
8215                 else if (*w == ')')
8216                     --level;
8217             }
8218             while (isSPACE(*w))
8219                 ++w;
8220             /* the list of chars below is for end of statements or
8221              * block / parens, boolean operators (&&, ||, //) and branch
8222              * constructs (or, and, if, until, unless, while, err, for).
8223              * Not a very solid hack... */
8224             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8225                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8226                             "%s (...) interpreted as function",name);
8227         }
8228     }
8229     while (s < PL_bufend && isSPACE(*s))
8230         s++;
8231     if (*s == '(')
8232         s++;
8233     while (s < PL_bufend && isSPACE(*s))
8234         s++;
8235     if (isIDFIRST_lazy_if(s,UTF)) {
8236         const char * const w = s;
8237         s += UTF ? UTF8SKIP(s) : 1;
8238         while (isWORDCHAR_lazy_if(s,UTF))
8239             s += UTF ? UTF8SKIP(s) : 1;
8240         while (s < PL_bufend && isSPACE(*s))
8241             s++;
8242         if (*s == ',') {
8243             GV* gv;
8244             if (keyword(w, s - w, 0))
8245                 return;
8246
8247             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8248             if (gv && GvCVu(gv))
8249                 return;
8250             Perl_croak(aTHX_ "No comma allowed after %s", what);
8251         }
8252     }
8253 }
8254
8255 /* S_new_constant(): do any overload::constant lookup.
8256
8257    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8258    Best used as sv=new_constant(..., sv, ...).
8259    If s, pv are NULL, calls subroutine with one argument,
8260    and <type> is used with error messages only.
8261    <type> is assumed to be well formed UTF-8 */
8262
8263 STATIC SV *
8264 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8265                SV *sv, SV *pv, const char *type, STRLEN typelen)
8266 {
8267     dSP;
8268     HV * table = GvHV(PL_hintgv);                /* ^H */
8269     SV *res;
8270     SV *errsv = NULL;
8271     SV **cvp;
8272     SV *cv, *typesv;
8273     const char *why1 = "", *why2 = "", *why3 = "";
8274
8275     PERL_ARGS_ASSERT_NEW_CONSTANT;
8276     /* We assume that this is true: */
8277     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8278     assert(type || s);
8279
8280     /* charnames doesn't work well if there have been errors found */
8281     if (PL_error_count > 0 && *key == 'c')
8282     {
8283         SvREFCNT_dec_NN(sv);
8284         return &PL_sv_undef;
8285     }
8286
8287     sv_2mortal(sv);                     /* Parent created it permanently */
8288     if (!table
8289         || ! (PL_hints & HINT_LOCALIZE_HH)
8290         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8291         || ! SvOK(*cvp))
8292     {
8293         char *msg;
8294
8295         /* Here haven't found what we're looking for.  If it is charnames,
8296          * perhaps it needs to be loaded.  Try doing that before giving up */
8297         if (*key == 'c') {
8298             Perl_load_module(aTHX_
8299                             0,
8300                             newSVpvs("_charnames"),
8301                              /* version parameter; no need to specify it, as if
8302                               * we get too early a version, will fail anyway,
8303                               * not being able to find '_charnames' */
8304                             NULL,
8305                             newSVpvs(":full"),
8306                             newSVpvs(":short"),
8307                             NULL);
8308             assert(sp == PL_stack_sp);
8309             table = GvHV(PL_hintgv);
8310             if (table
8311                 && (PL_hints & HINT_LOCALIZE_HH)
8312                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8313                 && SvOK(*cvp))
8314             {
8315                 goto now_ok;
8316             }
8317         }
8318         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8319             msg = Perl_form(aTHX_
8320                                "Constant(%.*s) unknown",
8321                                 (int)(type ? typelen : len),
8322                                 (type ? type: s));
8323         }
8324         else {
8325             why1 = "$^H{";
8326             why2 = key;
8327             why3 = "} is not defined";
8328         report:
8329             if (*key == 'c') {
8330                 msg = Perl_form(aTHX_
8331                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8332                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8333                       );
8334             }
8335             else {
8336                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8337                                     (int)(type ? typelen : len),
8338                                     (type ? type: s), why1, why2, why3);
8339             }
8340         }
8341         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8342         return SvREFCNT_inc_simple_NN(sv);
8343     }
8344 now_ok:
8345     cv = *cvp;
8346     if (!pv && s)
8347         pv = newSVpvn_flags(s, len, SVs_TEMP);
8348     if (type && pv)
8349         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8350     else
8351         typesv = &PL_sv_undef;
8352
8353     PUSHSTACKi(PERLSI_OVERLOAD);
8354     ENTER ;
8355     SAVETMPS;
8356
8357     PUSHMARK(SP) ;
8358     EXTEND(sp, 3);
8359     if (pv)
8360         PUSHs(pv);
8361     PUSHs(sv);
8362     if (pv)
8363         PUSHs(typesv);
8364     PUTBACK;
8365     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8366
8367     SPAGAIN ;
8368
8369     /* Check the eval first */
8370     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8371         STRLEN errlen;
8372         const char * errstr;
8373         sv_catpvs(errsv, "Propagated");
8374         errstr = SvPV_const(errsv, errlen);
8375         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8376         (void)POPs;
8377         res = SvREFCNT_inc_simple_NN(sv);
8378     }
8379     else {
8380         res = POPs;
8381         SvREFCNT_inc_simple_void_NN(res);
8382     }
8383
8384     PUTBACK ;
8385     FREETMPS ;
8386     LEAVE ;
8387     POPSTACK;
8388
8389     if (!SvOK(res)) {
8390         why1 = "Call to &{$^H{";
8391         why2 = key;
8392         why3 = "}} did not return a defined value";
8393         sv = res;
8394         (void)sv_2mortal(sv);
8395         goto report;
8396     }
8397
8398     return res;
8399 }
8400
8401 PERL_STATIC_INLINE void
8402 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8403     PERL_ARGS_ASSERT_PARSE_IDENT;
8404
8405     for (;;) {
8406         if (*d >= e)
8407             Perl_croak(aTHX_ "%s", ident_too_long);
8408         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8409              /* The UTF-8 case must come first, otherwise things
8410              * like c\N{COMBINING TILDE} would start failing, as the
8411              * isWORDCHAR_A case below would gobble the 'c' up.
8412              */
8413
8414             char *t = *s + UTF8SKIP(*s);
8415             while (isIDCONT_utf8((U8*)t))
8416                 t += UTF8SKIP(t);
8417             if (*d + (t - *s) > e)
8418                 Perl_croak(aTHX_ "%s", ident_too_long);
8419             Copy(*s, *d, t - *s, char);
8420             *d += t - *s;
8421             *s = t;
8422         }
8423         else if ( isWORDCHAR_A(**s) ) {
8424             do {
8425                 *(*d)++ = *(*s)++;
8426             } while (isWORDCHAR_A(**s) && *d < e);
8427         }
8428         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8429             *(*d)++ = ':';
8430             *(*d)++ = ':';
8431             (*s)++;
8432         }
8433         else if (allow_package && **s == ':' && (*s)[1] == ':'
8434            /* Disallow things like Foo::$bar. For the curious, this is
8435             * the code path that triggers the "Bad name after" warning
8436             * when looking for barewords.
8437             */
8438            && (*s)[2] != '$') {
8439             *(*d)++ = *(*s)++;
8440             *(*d)++ = *(*s)++;
8441         }
8442         else
8443             break;
8444     }
8445     return;
8446 }
8447
8448 /* Returns a NUL terminated string, with the length of the string written to
8449    *slp
8450    */
8451 STATIC char *
8452 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8453 {
8454     char *d = dest;
8455     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8456     bool is_utf8 = cBOOL(UTF);
8457
8458     PERL_ARGS_ASSERT_SCAN_WORD;
8459
8460     parse_ident(&s, &d, e, allow_package, is_utf8);
8461     *d = '\0';
8462     *slp = d - dest;
8463     return s;
8464 }
8465
8466 STATIC char *
8467 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8468 {
8469     I32 herelines = PL_parser->herelines;
8470     SSize_t bracket = -1;
8471     char funny = *s++;
8472     char *d = dest;
8473     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8474     bool is_utf8 = cBOOL(UTF);
8475     I32 orig_copline = 0, tmp_copline = 0;
8476
8477     PERL_ARGS_ASSERT_SCAN_IDENT;
8478
8479     if (isSPACE(*s))
8480         s = PEEKSPACE(s);
8481     if (isDIGIT(*s)) {
8482         while (isDIGIT(*s)) {
8483             if (d >= e)
8484                 Perl_croak(aTHX_ "%s", ident_too_long);
8485             *d++ = *s++;
8486         }
8487     }
8488     else {
8489         parse_ident(&s, &d, e, 1, is_utf8);
8490     }
8491     *d = '\0';
8492     d = dest;
8493     if (*d) {
8494         /* Either a digit variable, or parse_ident() found an identifier
8495            (anything valid as a bareword), so job done and return.  */
8496         if (PL_lex_state != LEX_NORMAL)
8497             PL_lex_state = LEX_INTERPENDMAYBE;
8498         return s;
8499     }
8500     if (*s == '$' && s[1] &&
8501       (isIDFIRST_lazy_if(s+1,is_utf8)
8502          || isDIGIT_A((U8)s[1])
8503          || s[1] == '$'
8504          || s[1] == '{'
8505          || strnEQ(s+1,"::",2)) )
8506     {
8507         /* Dereferencing a value in a scalar variable.
8508            The alternatives are different syntaxes for a scalar variable.
8509            Using ' as a leading package separator isn't allowed. :: is.   */
8510         return s;
8511     }
8512     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8513     if (*s == '{') {
8514         bracket = s - SvPVX(PL_linestr);
8515         s++;
8516         orig_copline = CopLINE(PL_curcop);
8517         if (s < PL_bufend && isSPACE(*s)) {
8518             s = PEEKSPACE(s);
8519         }
8520     }
8521
8522 /* Is the byte 'd' a legal single character identifier name?  'u' is true
8523  * iff Unicode semantics are to be used.  The legal ones are any of:
8524  *  a) ASCII digits
8525  *  b) ASCII punctuation
8526  *  c) When not under Unicode rules, any upper Latin1 character
8527  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
8528  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
8529  *     the \s ones. */
8530 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
8531                                    || isDIGIT_A((U8)(d))                    \
8532                                    || (!(u) && !isASCII((U8)(d)))           \
8533                                    || ((((U8)(d)) < 32)                     \
8534                                        && (((((U8)(d)) >= 14)               \
8535                                            || (((U8)(d)) <= 8 && (d) != 0) \
8536                                            || (((U8)(d)) == 13))))          \
8537                                    || (((U8)(d)) == toCTRL('?')))
8538     if (s < PL_bufend
8539         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
8540     {
8541         if ( isCNTRL_A((U8)*s) ) {
8542             deprecate("literal control characters in variable names");
8543         }
8544
8545         if (is_utf8) {
8546             const STRLEN skip = UTF8SKIP(s);
8547             STRLEN i;
8548             d[skip] = '\0';
8549             for ( i = 0; i < skip; i++ )
8550                 d[i] = *s++;
8551         }
8552         else {
8553             *d = *s++;
8554             d[1] = '\0';
8555         }
8556     }
8557     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8558     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8559         *d = toCTRL(*s);
8560         s++;
8561     }
8562     /* Warn about ambiguous code after unary operators if {...} notation isn't
8563        used.  There's no difference in ambiguity; it's merely a heuristic
8564        about when not to warn.  */
8565     else if (ck_uni && bracket == -1)
8566         check_uni();
8567     if (bracket != -1) {
8568         /* If we were processing {...} notation then...  */
8569         if (isIDFIRST_lazy_if(d,is_utf8)) {
8570             /* if it starts as a valid identifier, assume that it is one.
8571                (the later check for } being at the expected point will trap
8572                cases where this doesn't pan out.)  */
8573         d += is_utf8 ? UTF8SKIP(d) : 1;
8574         parse_ident(&s, &d, e, 1, is_utf8);
8575             *d = '\0';
8576             tmp_copline = CopLINE(PL_curcop);
8577             if (s < PL_bufend && isSPACE(*s)) {
8578                 s = PEEKSPACE(s);
8579             }
8580             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8581                 /* ${foo[0]} and ${foo{bar}} notation.  */
8582                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8583                     const char * const brack =
8584                         (const char *)
8585                         ((*s == '[') ? "[...]" : "{...}");
8586                     orig_copline = CopLINE(PL_curcop);
8587                     CopLINE_set(PL_curcop, tmp_copline);
8588    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8589                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8590                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8591                         funny, dest, brack, funny, dest, brack);
8592                     CopLINE_set(PL_curcop, orig_copline);
8593                 }
8594                 bracket++;
8595                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8596                 PL_lex_allbrackets++;
8597                 return s;
8598             }
8599         }
8600         /* Handle extended ${^Foo} variables
8601          * 1999-02-27 mjd-perl-patch@plover.com */
8602         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8603                  && isWORDCHAR(*s))
8604         {
8605             d++;
8606             while (isWORDCHAR(*s) && d < e) {
8607                 *d++ = *s++;
8608             }
8609             if (d >= e)
8610                 Perl_croak(aTHX_ "%s", ident_too_long);
8611             *d = '\0';
8612         }
8613
8614         if ( !tmp_copline )
8615             tmp_copline = CopLINE(PL_curcop);
8616         if (s < PL_bufend && isSPACE(*s)) {
8617             s = PEEKSPACE(s);
8618         }
8619
8620         /* Expect to find a closing } after consuming any trailing whitespace.
8621          */
8622         if (*s == '}') {
8623             s++;
8624             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8625                 PL_lex_state = LEX_INTERPEND;
8626                 PL_expect = XREF;
8627             }
8628             if (PL_lex_state == LEX_NORMAL) {
8629                 if (ckWARN(WARN_AMBIGUOUS) &&
8630                     (keyword(dest, d - dest, 0)
8631                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8632                 {
8633                     SV *tmp = newSVpvn_flags( dest, d - dest,
8634                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8635                     if (funny == '#')
8636                         funny = '@';
8637                     orig_copline = CopLINE(PL_curcop);
8638                     CopLINE_set(PL_curcop, tmp_copline);
8639                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8640                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8641                         funny, SVfARG(tmp), funny, SVfARG(tmp));
8642                     CopLINE_set(PL_curcop, orig_copline);
8643                 }
8644             }
8645         }
8646         else {
8647             /* Didn't find the closing } at the point we expected, so restore
8648                state such that the next thing to process is the opening { and */
8649             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8650             CopLINE_set(PL_curcop, orig_copline);
8651             PL_parser->herelines = herelines;
8652             *dest = '\0';
8653         }
8654     }
8655     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8656         PL_lex_state = LEX_INTERPEND;
8657     return s;
8658 }
8659
8660 static bool
8661 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8662
8663     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8664      * the parse starting at 's', based on the subset that are valid in this
8665      * context input to this routine in 'valid_flags'. Advances s.  Returns
8666      * TRUE if the input should be treated as a valid flag, so the next char
8667      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
8668      * first call on the current regex.  This routine will set it to any
8669      * charset modifier found.  The caller shouldn't change it.  This way,
8670      * another charset modifier encountered in the parse can be detected as an
8671      * error, as we have decided to allow only one */
8672
8673     const char c = **s;
8674     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8675
8676     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8677         if (isWORDCHAR_lazy_if(*s, UTF)) {
8678             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8679                        UTF ? SVf_UTF8 : 0);
8680             (*s) += charlen;
8681             /* Pretend that it worked, so will continue processing before
8682              * dieing */
8683             return TRUE;
8684         }
8685         return FALSE;
8686     }
8687
8688     switch (c) {
8689
8690         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8691         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8692         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8693         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8694         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8695         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8696         case LOCALE_PAT_MOD:
8697             if (*charset) {
8698                 goto multiple_charsets;
8699             }
8700             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8701             *charset = c;
8702             break;
8703         case UNICODE_PAT_MOD:
8704             if (*charset) {
8705                 goto multiple_charsets;
8706             }
8707             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8708             *charset = c;
8709             break;
8710         case ASCII_RESTRICT_PAT_MOD:
8711             if (! *charset) {
8712                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8713             }
8714             else {
8715
8716                 /* Error if previous modifier wasn't an 'a', but if it was, see
8717                  * if, and accept, a second occurrence (only) */
8718                 if (*charset != 'a'
8719                     || get_regex_charset(*pmfl)
8720                         != REGEX_ASCII_RESTRICTED_CHARSET)
8721                 {
8722                         goto multiple_charsets;
8723                 }
8724                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8725             }
8726             *charset = c;
8727             break;
8728         case DEPENDS_PAT_MOD:
8729             if (*charset) {
8730                 goto multiple_charsets;
8731             }
8732             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8733             *charset = c;
8734             break;
8735     }
8736
8737     (*s)++;
8738     return TRUE;
8739
8740     multiple_charsets:
8741         if (*charset != c) {
8742             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8743         }
8744         else if (c == 'a') {
8745   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8746             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8747         }
8748         else {
8749             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8750         }
8751
8752         /* Pretend that it worked, so will continue processing before dieing */
8753         (*s)++;
8754         return TRUE;
8755 }
8756
8757 STATIC char *
8758 S_scan_pat(pTHX_ char *start, I32 type)
8759 {
8760     PMOP *pm;
8761     char *s;
8762     const char * const valid_flags =
8763         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8764     char charset = '\0';    /* character set modifier */
8765
8766     PERL_ARGS_ASSERT_SCAN_PAT;
8767
8768     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8769     if (!s)
8770         Perl_croak(aTHX_ "Search pattern not terminated");
8771
8772     pm = (PMOP*)newPMOP(type, 0);
8773     if (PL_multi_open == '?') {
8774         /* This is the only point in the code that sets PMf_ONCE:  */
8775         pm->op_pmflags |= PMf_ONCE;
8776
8777         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8778            allows us to restrict the list needed by reset to just the ??
8779            matches.  */
8780         assert(type != OP_TRANS);
8781         if (PL_curstash) {
8782             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8783             U32 elements;
8784             if (!mg) {
8785                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8786                                  0);
8787             }
8788             elements = mg->mg_len / sizeof(PMOP**);
8789             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8790             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8791             mg->mg_len = elements * sizeof(PMOP**);
8792             PmopSTASH_set(pm,PL_curstash);
8793         }
8794     }
8795
8796     /* if qr/...(?{..}).../, then need to parse the pattern within a new
8797      * anon CV. False positives like qr/[(?{]/ are harmless */
8798
8799     if (type == OP_QR) {
8800         STRLEN len;
8801         char *e, *p = SvPV(PL_lex_stuff, len);
8802         e = p + len;
8803         for (; p < e; p++) {
8804             if (p[0] == '(' && p[1] == '?'
8805                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8806             {
8807                 pm->op_pmflags |= PMf_HAS_CV;
8808                 break;
8809             }
8810         }
8811         pm->op_pmflags |= PMf_IS_QR;
8812     }
8813
8814     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8815     /* issue a warning if /c is specified,but /g is not */
8816     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8817     {
8818         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8819                        "Use of /c modifier is meaningless without /g" );
8820     }
8821
8822     PL_lex_op = (OP*)pm;
8823     pl_yylval.ival = OP_MATCH;
8824     return s;
8825 }
8826
8827 STATIC char *
8828 S_scan_subst(pTHX_ char *start)
8829 {
8830     char *s;
8831     PMOP *pm;
8832     I32 first_start;
8833     line_t first_line;
8834     I32 es = 0;
8835     char charset = '\0';    /* character set modifier */
8836     char *t;
8837
8838     PERL_ARGS_ASSERT_SCAN_SUBST;
8839
8840     pl_yylval.ival = OP_NULL;
8841
8842     s = scan_str(start, TRUE, FALSE, FALSE, &t);
8843
8844     if (!s)
8845         Perl_croak(aTHX_ "Substitution pattern not terminated");
8846
8847     s = t;
8848
8849     first_start = PL_multi_start;
8850     first_line = CopLINE(PL_curcop);
8851     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8852     if (!s) {
8853         if (PL_lex_stuff) {
8854             SvREFCNT_dec(PL_lex_stuff);
8855             PL_lex_stuff = NULL;
8856         }
8857         Perl_croak(aTHX_ "Substitution replacement not terminated");
8858     }
8859     PL_multi_start = first_start;       /* so whole substitution is taken together */
8860
8861     pm = (PMOP*)newPMOP(OP_SUBST, 0);
8862
8863
8864     while (*s) {
8865         if (*s == EXEC_PAT_MOD) {
8866             s++;
8867             es++;
8868         }
8869         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
8870         {
8871             break;
8872         }
8873     }
8874
8875     if ((pm->op_pmflags & PMf_CONTINUE)) {
8876         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
8877     }
8878
8879     if (es) {
8880         SV * const repl = newSVpvs("");
8881
8882         PL_multi_end = 0;
8883         pm->op_pmflags |= PMf_EVAL;
8884         while (es-- > 0) {
8885             if (es)
8886                 sv_catpvs(repl, "eval ");
8887             else
8888                 sv_catpvs(repl, "do ");
8889         }
8890         sv_catpvs(repl, "{");
8891         sv_catsv(repl, PL_sublex_info.repl);
8892         sv_catpvs(repl, "}");
8893         SvEVALED_on(repl);
8894         SvREFCNT_dec(PL_sublex_info.repl);
8895         PL_sublex_info.repl = repl;
8896     }
8897     if (CopLINE(PL_curcop) != first_line) {
8898         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
8899         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
8900             CopLINE(PL_curcop) - first_line;
8901         CopLINE_set(PL_curcop, first_line);
8902     }
8903
8904     PL_lex_op = (OP*)pm;
8905     pl_yylval.ival = OP_SUBST;
8906     return s;
8907 }
8908
8909 STATIC char *
8910 S_scan_trans(pTHX_ char *start)
8911 {
8912     char* s;
8913     OP *o;
8914     U8 squash;
8915     U8 del;
8916     U8 complement;
8917     bool nondestruct = 0;
8918     char *t;
8919
8920     PERL_ARGS_ASSERT_SCAN_TRANS;
8921
8922     pl_yylval.ival = OP_NULL;
8923
8924     s = scan_str(start,FALSE,FALSE,FALSE,&t);
8925     if (!s)
8926         Perl_croak(aTHX_ "Transliteration pattern not terminated");
8927
8928     s = t;
8929
8930     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8931     if (!s) {
8932         if (PL_lex_stuff) {
8933             SvREFCNT_dec(PL_lex_stuff);
8934             PL_lex_stuff = NULL;
8935         }
8936         Perl_croak(aTHX_ "Transliteration replacement not terminated");
8937     }
8938
8939     complement = del = squash = 0;
8940     while (1) {
8941         switch (*s) {
8942         case 'c':
8943             complement = OPpTRANS_COMPLEMENT;
8944             break;
8945         case 'd':
8946             del = OPpTRANS_DELETE;
8947             break;
8948         case 's':
8949             squash = OPpTRANS_SQUASH;
8950             break;
8951         case 'r':
8952             nondestruct = 1;
8953             break;
8954         default:
8955             goto no_more;
8956         }
8957         s++;
8958     }
8959   no_more:
8960
8961     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
8962     o->op_private &= ~OPpTRANS_ALL;
8963     o->op_private |= del|squash|complement|
8964       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
8965       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
8966
8967     PL_lex_op = o;
8968     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
8969
8970
8971     return s;
8972 }
8973
8974 /* scan_heredoc
8975    Takes a pointer to the first < in <<FOO.
8976    Returns a pointer to the byte following <<FOO.
8977
8978    This function scans a heredoc, which involves different methods
8979    depending on whether we are in a string eval, quoted construct, etc.
8980    This is because PL_linestr could containing a single line of input, or
8981    a whole string being evalled, or the contents of the current quote-
8982    like operator.
8983
8984    The two basic methods are:
8985     - Steal lines from the input stream
8986     - Scan the heredoc in PL_linestr and remove it therefrom
8987
8988    In a file scope or filtered eval, the first method is used; in a
8989    string eval, the second.
8990
8991    In a quote-like operator, we have to choose between the two,
8992    depending on where we can find a newline.  We peek into outer lex-
8993    ing scopes until we find one with a newline in it.  If we reach the
8994    outermost lexing scope and it is a file, we use the stream method.
8995    Otherwise it is treated as an eval.
8996 */
8997
8998 STATIC char *
8999 S_scan_heredoc(pTHX_ char *s)
9000 {
9001     I32 op_type = OP_SCALAR;
9002     I32 len;
9003     SV *tmpstr;
9004     char term;
9005     char *d;
9006     char *e;
9007     char *peek;
9008     const bool infile = PL_rsfp || PL_parser->filtered;
9009     const line_t origline = CopLINE(PL_curcop);
9010     LEXSHARED *shared = PL_parser->lex_shared;
9011
9012     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9013
9014     s += 2;
9015     d = PL_tokenbuf + 1;
9016     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9017     *PL_tokenbuf = '\n';
9018     peek = s;
9019     while (SPACE_OR_TAB(*peek))
9020         peek++;
9021     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9022         s = peek;
9023         term = *s++;
9024         s = delimcpy(d, e, s, PL_bufend, term, &len);
9025         if (s == PL_bufend)
9026             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9027         d += len;
9028         s++;
9029     }
9030     else {
9031         if (*s == '\\')
9032             /* <<\FOO is equivalent to <<'FOO' */
9033             s++, term = '\'';
9034         else
9035             term = '"';
9036         if (!isWORDCHAR_lazy_if(s,UTF))
9037             deprecate("bare << to mean <<\"\"");
9038         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9039             if (d < e)
9040                 *d++ = *s;
9041         }
9042     }
9043     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9044         Perl_croak(aTHX_ "Delimiter for here document is too long");
9045     *d++ = '\n';
9046     *d = '\0';
9047     len = d - PL_tokenbuf;
9048
9049 #ifndef PERL_STRICT_CR
9050     d = strchr(s, '\r');
9051     if (d) {
9052         char * const olds = s;
9053         s = d;
9054         while (s < PL_bufend) {
9055             if (*s == '\r') {
9056                 *d++ = '\n';
9057                 if (*++s == '\n')
9058                     s++;
9059             }
9060             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9061                 *d++ = *s++;
9062                 s++;
9063             }
9064             else
9065                 *d++ = *s++;
9066         }
9067         *d = '\0';
9068         PL_bufend = d;
9069         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9070         s = olds;
9071     }
9072 #endif
9073
9074     tmpstr = newSV_type(SVt_PVIV);
9075     SvGROW(tmpstr, 80);
9076     if (term == '\'') {
9077         op_type = OP_CONST;
9078         SvIV_set(tmpstr, -1);
9079     }
9080     else if (term == '`') {
9081         op_type = OP_BACKTICK;
9082         SvIV_set(tmpstr, '\\');
9083     }
9084
9085     PL_multi_start = origline + 1 + PL_parser->herelines;
9086     PL_multi_open = PL_multi_close = '<';
9087     /* inside a string eval or quote-like operator */
9088     if (!infile || PL_lex_inwhat) {
9089         SV *linestr;
9090         char *bufend;
9091         char * const olds = s;
9092         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9093         /* These two fields are not set until an inner lexing scope is
9094            entered.  But we need them set here. */
9095         shared->ls_bufptr  = s;
9096         shared->ls_linestr = PL_linestr;
9097         if (PL_lex_inwhat)
9098           /* Look for a newline.  If the current buffer does not have one,
9099              peek into the line buffer of the parent lexing scope, going
9100              up as many levels as necessary to find one with a newline
9101              after bufptr.
9102            */
9103           while (!(s = (char *)memchr(
9104                     (void *)shared->ls_bufptr, '\n',
9105                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9106                 ))) {
9107             shared = shared->ls_prev;
9108             /* shared is only null if we have gone beyond the outermost
9109                lexing scope.  In a file, we will have broken out of the
9110                loop in the previous iteration.  In an eval, the string buf-
9111                fer ends with "\n;", so the while condition above will have
9112                evaluated to false.  So shared can never be null. */
9113             assert(shared);
9114             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9115                most lexing scope.  In a file, shared->ls_linestr at that
9116                level is just one line, so there is no body to steal. */
9117             if (infile && !shared->ls_prev) {
9118                 s = olds;
9119                 goto streaming;
9120             }
9121           }
9122         else {  /* eval */
9123             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9124             assert(s);
9125         }
9126         linestr = shared->ls_linestr;
9127         bufend = SvEND(linestr);
9128         d = s;
9129         while (s < bufend - len + 1 &&
9130           memNE(s,PL_tokenbuf,len) ) {
9131             if (*s++ == '\n')
9132                 ++PL_parser->herelines;
9133         }
9134         if (s >= bufend - len + 1) {
9135             goto interminable;
9136         }
9137         sv_setpvn(tmpstr,d+1,s-d);
9138         s += len - 1;
9139         /* the preceding stmt passes a newline */
9140         PL_parser->herelines++;
9141
9142         /* s now points to the newline after the heredoc terminator.
9143            d points to the newline before the body of the heredoc.
9144          */
9145
9146         /* We are going to modify linestr in place here, so set
9147            aside copies of the string if necessary for re-evals or
9148            (caller $n)[6]. */
9149         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9150            check shared->re_eval_str. */
9151         if (shared->re_eval_start || shared->re_eval_str) {
9152             /* Set aside the rest of the regexp */
9153             if (!shared->re_eval_str)
9154                 shared->re_eval_str =
9155                        newSVpvn(shared->re_eval_start,
9156                                 bufend - shared->re_eval_start);
9157             shared->re_eval_start -= s-d;
9158         }
9159         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9160             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9161             cx->blk_eval.cur_text == linestr)
9162         {
9163             cx->blk_eval.cur_text = newSVsv(linestr);
9164             SvSCREAM_on(cx->blk_eval.cur_text);
9165         }
9166         /* Copy everything from s onwards back to d. */
9167         Move(s,d,bufend-s + 1,char);
9168         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9169         /* Setting PL_bufend only applies when we have not dug deeper
9170            into other scopes, because sublex_done sets PL_bufend to
9171            SvEND(PL_linestr). */
9172         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9173         s = olds;
9174     }
9175     else
9176     {
9177       SV *linestr_save;
9178      streaming:
9179       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9180       term = PL_tokenbuf[1];
9181       len--;
9182       linestr_save = PL_linestr; /* must restore this afterwards */
9183       d = s;                     /* and this */
9184       PL_linestr = newSVpvs("");
9185       PL_bufend = SvPVX(PL_linestr);
9186       while (1) {
9187         PL_bufptr = PL_bufend;
9188         CopLINE_set(PL_curcop,
9189                     origline + 1 + PL_parser->herelines);
9190         if (!lex_next_chunk(LEX_NO_TERM)
9191          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9192             SvREFCNT_dec(linestr_save);
9193             goto interminable;
9194         }
9195         CopLINE_set(PL_curcop, origline);
9196         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9197             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9198             /* ^That should be enough to avoid this needing to grow:  */
9199             sv_catpvs(PL_linestr, "\n\0");
9200             assert(s == SvPVX(PL_linestr));
9201             PL_bufend = SvEND(PL_linestr);
9202         }
9203         s = PL_bufptr;
9204         PL_parser->herelines++;
9205         PL_last_lop = PL_last_uni = NULL;
9206 #ifndef PERL_STRICT_CR
9207         if (PL_bufend - PL_linestart >= 2) {
9208             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9209                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9210             {
9211                 PL_bufend[-2] = '\n';
9212                 PL_bufend--;
9213                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9214             }
9215             else if (PL_bufend[-1] == '\r')
9216                 PL_bufend[-1] = '\n';
9217         }
9218         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9219             PL_bufend[-1] = '\n';
9220 #endif
9221         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9222             SvREFCNT_dec(PL_linestr);
9223             PL_linestr = linestr_save;
9224             PL_linestart = SvPVX(linestr_save);
9225             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9226             s = d;
9227             break;
9228         }
9229         else {
9230             sv_catsv(tmpstr,PL_linestr);
9231         }
9232       }
9233     }
9234     PL_multi_end = origline + PL_parser->herelines;
9235     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9236         SvPV_shrink_to_cur(tmpstr);
9237     }
9238     if (!IN_BYTES) {
9239         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9240             SvUTF8_on(tmpstr);
9241         else if (PL_encoding)
9242             sv_recode_to_utf8(tmpstr, PL_encoding);
9243     }
9244     PL_lex_stuff = tmpstr;
9245     pl_yylval.ival = op_type;
9246     return s;
9247
9248   interminable:
9249     SvREFCNT_dec(tmpstr);
9250     CopLINE_set(PL_curcop, origline);
9251     missingterm(PL_tokenbuf + 1);
9252 }
9253
9254 /* scan_inputsymbol
9255    takes: current position in input buffer
9256    returns: new position in input buffer
9257    side-effects: pl_yylval and lex_op are set.
9258
9259    This code handles:
9260
9261    <>           read from ARGV
9262    <FH>         read from filehandle
9263    <pkg::FH>    read from package qualified filehandle
9264    <pkg'FH>     read from package qualified filehandle
9265    <$fh>        read from filehandle in $fh
9266    <*.h>        filename glob
9267
9268 */
9269
9270 STATIC char *
9271 S_scan_inputsymbol(pTHX_ char *start)
9272 {
9273     char *s = start;            /* current position in buffer */
9274     char *end;
9275     I32 len;
9276     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9277     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9278
9279     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9280
9281     end = strchr(s, '\n');
9282     if (!end)
9283         end = PL_bufend;
9284     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9285
9286     /* die if we didn't have space for the contents of the <>,
9287        or if it didn't end, or if we see a newline
9288     */
9289
9290     if (len >= (I32)sizeof PL_tokenbuf)
9291         Perl_croak(aTHX_ "Excessively long <> operator");
9292     if (s >= end)
9293         Perl_croak(aTHX_ "Unterminated <> operator");
9294
9295     s++;
9296
9297     /* check for <$fh>
9298        Remember, only scalar variables are interpreted as filehandles by
9299        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9300        treated as a glob() call.
9301        This code makes use of the fact that except for the $ at the front,
9302        a scalar variable and a filehandle look the same.
9303     */
9304     if (*d == '$' && d[1]) d++;
9305
9306     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9307     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9308         d += UTF ? UTF8SKIP(d) : 1;
9309
9310     /* If we've tried to read what we allow filehandles to look like, and
9311        there's still text left, then it must be a glob() and not a getline.
9312        Use scan_str to pull out the stuff between the <> and treat it
9313        as nothing more than a string.
9314     */
9315
9316     if (d - PL_tokenbuf != len) {
9317         pl_yylval.ival = OP_GLOB;
9318         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9319         if (!s)
9320            Perl_croak(aTHX_ "Glob not terminated");
9321         return s;
9322     }
9323     else {
9324         bool readline_overriden = FALSE;
9325         GV *gv_readline;
9326         /* we're in a filehandle read situation */
9327         d = PL_tokenbuf;
9328
9329         /* turn <> into <ARGV> */
9330         if (!len)
9331             Copy("ARGV",d,5,char);
9332
9333         /* Check whether readline() is overriden */
9334         if ((gv_readline = gv_override("readline",8)))
9335             readline_overriden = TRUE;
9336
9337         /* if <$fh>, create the ops to turn the variable into a
9338            filehandle
9339         */
9340         if (*d == '$') {
9341             /* try to find it in the pad for this block, otherwise find
9342                add symbol table ops
9343             */
9344             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9345             if (tmp != NOT_IN_PAD) {
9346                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9347                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9348                     HEK * const stashname = HvNAME_HEK(stash);
9349                     SV * const sym = sv_2mortal(newSVhek(stashname));
9350                     sv_catpvs(sym, "::");
9351                     sv_catpv(sym, d+1);
9352                     d = SvPVX(sym);
9353                     goto intro_sym;
9354                 }
9355                 else {
9356                     OP * const o = newOP(OP_PADSV, 0);
9357                     o->op_targ = tmp;
9358                     PL_lex_op = readline_overriden
9359                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9360                                 op_append_elem(OP_LIST, o,
9361                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9362                         : (OP*)newUNOP(OP_READLINE, 0, o);
9363                 }
9364             }
9365             else {
9366                 GV *gv;
9367                 ++d;
9368 intro_sym:
9369                 gv = gv_fetchpv(d,
9370                                 (PL_in_eval
9371                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9372                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9373                                 SVt_PV);
9374                 PL_lex_op = readline_overriden
9375                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9376                             op_append_elem(OP_LIST,
9377                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9378                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9379                     : (OP*)newUNOP(OP_READLINE, 0,
9380                             newUNOP(OP_RV2SV, 0,
9381                                 newGVOP(OP_GV, 0, gv)));
9382             }
9383             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9384             pl_yylval.ival = OP_NULL;
9385         }
9386
9387         /* If it's none of the above, it must be a literal filehandle
9388            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9389         else {
9390             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9391             PL_lex_op = readline_overriden
9392                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9393                         op_append_elem(OP_LIST,
9394                             newGVOP(OP_GV, 0, gv),
9395                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9396                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9397             pl_yylval.ival = OP_NULL;
9398         }
9399     }
9400
9401     return s;
9402 }
9403
9404
9405 /* scan_str
9406    takes:
9407         start                   position in buffer
9408         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9409                                 only if they are of the open/close form
9410         keep_delims             preserve the delimiters around the string
9411         re_reparse              compiling a run-time /(?{})/:
9412                                    collapse // to /,  and skip encoding src
9413         delimp                  if non-null, this is set to the position of
9414                                 the closing delimiter, or just after it if
9415                                 the closing and opening delimiters differ
9416                                 (i.e., the opening delimiter of a substitu-
9417                                 tion replacement)
9418    returns: position to continue reading from buffer
9419    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9420         updates the read buffer.
9421
9422    This subroutine pulls a string out of the input.  It is called for:
9423         q               single quotes           q(literal text)
9424         '               single quotes           'literal text'
9425         qq              double quotes           qq(interpolate $here please)
9426         "               double quotes           "interpolate $here please"
9427         qx              backticks               qx(/bin/ls -l)
9428         `               backticks               `/bin/ls -l`
9429         qw              quote words             @EXPORT_OK = qw( func() $spam )
9430         m//             regexp match            m/this/
9431         s///            regexp substitute       s/this/that/
9432         tr///           string transliterate    tr/this/that/
9433         y///            string transliterate    y/this/that/
9434         ($*@)           sub prototypes          sub foo ($)
9435         (stuff)         sub attr parameters     sub foo : attr(stuff)
9436         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9437
9438    In most of these cases (all but <>, patterns and transliterate)
9439    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9440    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9441    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9442    calls scan_str().
9443
9444    It skips whitespace before the string starts, and treats the first
9445    character as the delimiter.  If the delimiter is one of ([{< then
9446    the corresponding "close" character )]}> is used as the closing
9447    delimiter.  It allows quoting of delimiters, and if the string has
9448    balanced delimiters ([{<>}]) it allows nesting.
9449
9450    On success, the SV with the resulting string is put into lex_stuff or,
9451    if that is already non-NULL, into lex_repl. The second case occurs only
9452    when parsing the RHS of the special constructs s/// and tr/// (y///).
9453    For convenience, the terminating delimiter character is stuffed into
9454    SvIVX of the SV.
9455 */
9456
9457 STATIC char *
9458 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9459                  char **delimp
9460     )
9461 {
9462     SV *sv;                     /* scalar value: string */
9463     const char *tmps;           /* temp string, used for delimiter matching */
9464     char *s = start;            /* current position in the buffer */
9465     char term;                  /* terminating character */
9466     char *to;                   /* current position in the sv's data */
9467     I32 brackets = 1;           /* bracket nesting level */
9468     bool has_utf8 = FALSE;      /* is there any utf8 content? */
9469     I32 termcode;               /* terminating char. code */
9470     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
9471     STRLEN termlen;             /* length of terminating string */
9472     int last_off = 0;           /* last position for nesting bracket */
9473     line_t herelines;
9474
9475     PERL_ARGS_ASSERT_SCAN_STR;
9476
9477     /* skip space before the delimiter */
9478     if (isSPACE(*s)) {
9479         s = PEEKSPACE(s);
9480     }
9481
9482     /* mark where we are, in case we need to report errors */
9483     CLINE;
9484
9485     /* after skipping whitespace, the next character is the terminator */
9486     term = *s;
9487     if (!UTF) {
9488         termcode = termstr[0] = term;
9489         termlen = 1;
9490     }
9491     else {
9492         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9493         Copy(s, termstr, termlen, U8);
9494         if (!UTF8_IS_INVARIANT(term))
9495             has_utf8 = TRUE;
9496     }
9497
9498     /* mark where we are */
9499     PL_multi_start = CopLINE(PL_curcop);
9500     PL_multi_open = term;
9501     herelines = PL_parser->herelines;
9502
9503     /* find corresponding closing delimiter */
9504     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9505         termcode = termstr[0] = term = tmps[5];
9506
9507     PL_multi_close = term;
9508
9509     if (PL_multi_open == PL_multi_close) {
9510         keep_bracketed_quoted = FALSE;
9511     }
9512
9513     /* create a new SV to hold the contents.  79 is the SV's initial length.
9514        What a random number. */
9515     sv = newSV_type(SVt_PVIV);
9516     SvGROW(sv, 80);
9517     SvIV_set(sv, termcode);
9518     (void)SvPOK_only(sv);               /* validate pointer */
9519
9520     /* move past delimiter and try to read a complete string */
9521     if (keep_delims)
9522         sv_catpvn(sv, s, termlen);
9523     s += termlen;
9524     for (;;) {
9525         if (PL_encoding && !UTF && !re_reparse) {
9526             bool cont = TRUE;
9527
9528             while (cont) {
9529                 int offset = s - SvPVX_const(PL_linestr);
9530                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9531                                            &offset, (char*)termstr, termlen);
9532                 const char *ns;
9533                 char *svlast;
9534
9535                 if (SvIsCOW(PL_linestr)) {
9536                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9537                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9538                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9539                     char *buf = SvPVX(PL_linestr);
9540                     bufend_pos = PL_parser->bufend - buf;
9541                     bufptr_pos = PL_parser->bufptr - buf;
9542                     oldbufptr_pos = PL_parser->oldbufptr - buf;
9543                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9544                     linestart_pos = PL_parser->linestart - buf;
9545                     last_uni_pos = PL_parser->last_uni
9546                         ? PL_parser->last_uni - buf
9547                         : 0;
9548                     last_lop_pos = PL_parser->last_lop
9549                         ? PL_parser->last_lop - buf
9550                         : 0;
9551                     re_eval_start_pos =
9552                         PL_parser->lex_shared->re_eval_start ?
9553                             PL_parser->lex_shared->re_eval_start - buf : 0;
9554                     s_pos = s - buf;
9555
9556                     sv_force_normal(PL_linestr);
9557
9558                     buf = SvPVX(PL_linestr);
9559                     PL_parser->bufend = buf + bufend_pos;
9560                     PL_parser->bufptr = buf + bufptr_pos;
9561                     PL_parser->oldbufptr = buf + oldbufptr_pos;
9562                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9563                     PL_parser->linestart = buf + linestart_pos;
9564                     if (PL_parser->last_uni)
9565                         PL_parser->last_uni = buf + last_uni_pos;
9566                     if (PL_parser->last_lop)
9567                         PL_parser->last_lop = buf + last_lop_pos;
9568                     if (PL_parser->lex_shared->re_eval_start)
9569                         PL_parser->lex_shared->re_eval_start  =
9570                             buf + re_eval_start_pos;
9571                     s = buf + s_pos;
9572                 }
9573                 ns = SvPVX_const(PL_linestr) + offset;
9574                 svlast = SvEND(sv) - 1;
9575
9576                 for (; s < ns; s++) {
9577                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9578                         COPLINE_INC_WITH_HERELINES;
9579                 }
9580                 if (!found)
9581                     goto read_more_line;
9582                 else {
9583                     /* handle quoted delimiters */
9584                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9585                         const char *t;
9586                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9587                             t--;
9588                         if ((svlast-1 - t) % 2) {
9589                             if (!keep_bracketed_quoted) {
9590                                 *(svlast-1) = term;
9591                                 *svlast = '\0';
9592                                 SvCUR_set(sv, SvCUR(sv) - 1);
9593                             }
9594                             continue;
9595                         }
9596                     }
9597                     if (PL_multi_open == PL_multi_close) {
9598                         cont = FALSE;
9599                     }
9600                     else {
9601                         const char *t;
9602                         char *w;
9603                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9604                             /* At here, all closes are "was quoted" one,
9605                                so we don't check PL_multi_close. */
9606                             if (*t == '\\') {
9607                                 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9608                                     t++;
9609                                 else
9610                                     *w++ = *t++;
9611                             }
9612                             else if (*t == PL_multi_open)
9613                                 brackets++;
9614
9615                             *w = *t;
9616                         }
9617                         if (w < t) {
9618                             *w++ = term;
9619                             *w = '\0';
9620                             SvCUR_set(sv, w - SvPVX_const(sv));
9621                         }
9622                         last_off = w - SvPVX(sv);
9623                         if (--brackets <= 0)
9624                             cont = FALSE;
9625                     }
9626                 }
9627             }
9628             if (!keep_delims) {
9629                 SvCUR_set(sv, SvCUR(sv) - 1);
9630                 *SvEND(sv) = '\0';
9631             }
9632             break;
9633         }
9634
9635         /* extend sv if need be */
9636         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9637         /* set 'to' to the next character in the sv's string */
9638         to = SvPVX(sv)+SvCUR(sv);
9639
9640         /* if open delimiter is the close delimiter read unbridle */
9641         if (PL_multi_open == PL_multi_close) {
9642             for (; s < PL_bufend; s++,to++) {
9643                 /* embedded newlines increment the current line number */
9644                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9645                     COPLINE_INC_WITH_HERELINES;
9646                 /* handle quoted delimiters */
9647                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9648                     if (!keep_bracketed_quoted
9649                         && (s[1] == term
9650                             || (re_reparse && s[1] == '\\'))
9651                     )
9652                         s++;
9653                     else /* any other quotes are simply copied straight through */
9654                         *to++ = *s++;
9655                 }
9656                 /* terminate when run out of buffer (the for() condition), or
9657                    have found the terminator */
9658                 else if (*s == term) {
9659                     if (termlen == 1)
9660                         break;
9661                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9662                         break;
9663                 }
9664                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9665                     has_utf8 = TRUE;
9666                 *to = *s;
9667             }
9668         }
9669
9670         /* if the terminator isn't the same as the start character (e.g.,
9671            matched brackets), we have to allow more in the quoting, and
9672            be prepared for nested brackets.
9673         */
9674         else {
9675             /* read until we run out of string, or we find the terminator */
9676             for (; s < PL_bufend; s++,to++) {
9677                 /* embedded newlines increment the line count */
9678                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9679                     COPLINE_INC_WITH_HERELINES;
9680                 /* backslashes can escape the open or closing characters */
9681                 if (*s == '\\' && s+1 < PL_bufend) {
9682                     if (!keep_bracketed_quoted &&
9683                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9684                     {
9685                         s++;
9686                     }
9687                     else
9688                         *to++ = *s++;
9689                 }
9690                 /* allow nested opens and closes */
9691                 else if (*s == PL_multi_close && --brackets <= 0)
9692                     break;
9693                 else if (*s == PL_multi_open)
9694                     brackets++;
9695                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9696                     has_utf8 = TRUE;
9697                 *to = *s;
9698             }
9699         }
9700         /* terminate the copied string and update the sv's end-of-string */
9701         *to = '\0';
9702         SvCUR_set(sv, to - SvPVX_const(sv));
9703
9704         /*
9705          * this next chunk reads more into the buffer if we're not done yet
9706          */
9707
9708         if (s < PL_bufend)
9709             break;              /* handle case where we are done yet :-) */
9710
9711 #ifndef PERL_STRICT_CR
9712         if (to - SvPVX_const(sv) >= 2) {
9713             if ((to[-2] == '\r' && to[-1] == '\n') ||
9714                 (to[-2] == '\n' && to[-1] == '\r'))
9715             {
9716                 to[-2] = '\n';
9717                 to--;
9718                 SvCUR_set(sv, to - SvPVX_const(sv));
9719             }
9720             else if (to[-1] == '\r')
9721                 to[-1] = '\n';
9722         }
9723         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9724             to[-1] = '\n';
9725 #endif
9726
9727      read_more_line:
9728         /* if we're out of file, or a read fails, bail and reset the current
9729            line marker so we can report where the unterminated string began
9730         */
9731         COPLINE_INC_WITH_HERELINES;
9732         PL_bufptr = PL_bufend;
9733         if (!lex_next_chunk(0)) {
9734             sv_free(sv);
9735             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9736             return NULL;
9737         }
9738         s = PL_bufptr;
9739     }
9740
9741     /* at this point, we have successfully read the delimited string */
9742
9743     if (!PL_encoding || UTF || re_reparse) {
9744
9745         if (keep_delims)
9746             sv_catpvn(sv, s, termlen);
9747         s += termlen;
9748     }
9749     if (has_utf8 || (PL_encoding && !re_reparse))
9750         SvUTF8_on(sv);
9751
9752     PL_multi_end = CopLINE(PL_curcop);
9753     CopLINE_set(PL_curcop, PL_multi_start);
9754     PL_parser->herelines = herelines;
9755
9756     /* if we allocated too much space, give some back */
9757     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9758         SvLEN_set(sv, SvCUR(sv) + 1);
9759         SvPV_renew(sv, SvLEN(sv));
9760     }
9761
9762     /* decide whether this is the first or second quoted string we've read
9763        for this op
9764     */
9765
9766     if (PL_lex_stuff)
9767         PL_sublex_info.repl = sv;
9768     else
9769         PL_lex_stuff = sv;
9770     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9771     return s;
9772 }
9773
9774 /*
9775   scan_num
9776   takes: pointer to position in buffer
9777   returns: pointer to new position in buffer
9778   side-effects: builds ops for the constant in pl_yylval.op
9779
9780   Read a number in any of the formats that Perl accepts:
9781
9782   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9783   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9784   0b[01](_?[01])*
9785   0[0-7](_?[0-7])*
9786   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9787
9788   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9789   thing it reads.
9790
9791   If it reads a number without a decimal point or an exponent, it will
9792   try converting the number to an integer and see if it can do so
9793   without loss of precision.
9794 */
9795
9796 char *
9797 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9798 {
9799     const char *s = start;      /* current position in buffer */
9800     char *d;                    /* destination in temp buffer */
9801     char *e;                    /* end of temp buffer */
9802     NV nv;                              /* number read, as a double */
9803     SV *sv = NULL;                      /* place to put the converted number */
9804     bool floatit;                       /* boolean: int or float? */
9805     const char *lastub = NULL;          /* position of last underbar */
9806     static const char* const number_too_long = "Number too long";
9807     /* Hexadecimal floating point.
9808      *
9809      * In many places (where we have quads and NV is IEEE 754 double)
9810      * we can fit the mantissa bits of a NV into an unsigned quad.
9811      * (Note that UVs might not be quads even when we have quads.)
9812      * This will not work everywhere, though (either no quads, or
9813      * using long doubles), in which case we have to resort to NV,
9814      * which will probably mean horrible loss of precision due to
9815      * multiple fp operations. */
9816     bool hexfp = FALSE;
9817     int total_bits = 0;
9818 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
9819 #  define HEXFP_UQUAD
9820     Uquad_t hexfp_uquad = 0;
9821     int hexfp_frac_bits = 0;
9822 #else
9823 #  define HEXFP_NV
9824     NV hexfp_nv = 0.0;
9825 #endif
9826     NV hexfp_mult = 1.0;
9827     UV high_non_zero = 0; /* highest digit */
9828
9829     PERL_ARGS_ASSERT_SCAN_NUM;
9830
9831     /* We use the first character to decide what type of number this is */
9832
9833     switch (*s) {
9834     default:
9835         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
9836
9837     /* if it starts with a 0, it could be an octal number, a decimal in
9838        0.13 disguise, or a hexadecimal number, or a binary number. */
9839     case '0':
9840         {
9841           /* variables:
9842              u          holds the "number so far"
9843              shift      the power of 2 of the base
9844                         (hex == 4, octal == 3, binary == 1)
9845              overflowed was the number more than we can hold?
9846
9847              Shift is used when we add a digit.  It also serves as an "are
9848              we in octal/hex/binary?" indicator to disallow hex characters
9849              when in octal mode.
9850            */
9851             NV n = 0.0;
9852             UV u = 0;
9853             I32 shift;
9854             bool overflowed = FALSE;
9855             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
9856             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
9857             static const char* const bases[5] =
9858               { "", "binary", "", "octal", "hexadecimal" };
9859             static const char* const Bases[5] =
9860               { "", "Binary", "", "Octal", "Hexadecimal" };
9861             static const char* const maxima[5] =
9862               { "",
9863                 "0b11111111111111111111111111111111",
9864                 "",
9865                 "037777777777",
9866                 "0xffffffff" };
9867             const char *base, *Base, *max;
9868
9869             /* check for hex */
9870             if (s[1] == 'x' || s[1] == 'X') {
9871                 shift = 4;
9872                 s += 2;
9873                 just_zero = FALSE;
9874             } else if (s[1] == 'b' || s[1] == 'B') {
9875                 shift = 1;
9876                 s += 2;
9877                 just_zero = FALSE;
9878             }
9879             /* check for a decimal in disguise */
9880             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
9881                 goto decimal;
9882             /* so it must be octal */
9883             else {
9884                 shift = 3;
9885                 s++;
9886             }
9887
9888             if (*s == '_') {
9889                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
9890                                "Misplaced _ in number");
9891                lastub = s++;
9892             }
9893
9894             base = bases[shift];
9895             Base = Bases[shift];
9896             max  = maxima[shift];
9897
9898             /* read the rest of the number */
9899             for (;;) {
9900                 /* x is used in the overflow test,
9901                    b is the digit we're adding on. */
9902                 UV x, b;
9903
9904                 switch (*s) {
9905
9906                 /* if we don't mention it, we're done */
9907                 default:
9908                     goto out;
9909
9910                 /* _ are ignored -- but warned about if consecutive */
9911                 case '_':
9912                     if (lastub && s == lastub + 1)
9913                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
9914                                        "Misplaced _ in number");
9915                     lastub = s++;
9916                     break;
9917
9918                 /* 8 and 9 are not octal */
9919                 case '8': case '9':
9920                     if (shift == 3)
9921                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
9922                     /* FALLTHROUGH */
9923
9924                 /* octal digits */
9925                 case '2': case '3': case '4':
9926                 case '5': case '6': case '7':
9927                     if (shift == 1)
9928                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
9929                     /* FALLTHROUGH */
9930
9931                 case '0': case '1':
9932                     b = *s++ & 15;              /* ASCII digit -> value of digit */
9933                     goto digit;
9934
9935                 /* hex digits */
9936                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
9937                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
9938                     /* make sure they said 0x */
9939                     if (shift != 4)
9940                         goto out;
9941                     b = (*s++ & 7) + 9;
9942
9943                     /* Prepare to put the digit we have onto the end
9944                        of the number so far.  We check for overflows.
9945                     */
9946
9947                   digit:
9948                     just_zero = FALSE;
9949                     if (!overflowed) {
9950                         x = u << shift; /* make room for the digit */
9951
9952                         total_bits += shift;
9953
9954                         if ((x >> shift) != u
9955                             && !(PL_hints & HINT_NEW_BINARY)) {
9956                             overflowed = TRUE;
9957                             n = (NV) u;
9958                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
9959                                              "Integer overflow in %s number",
9960                                              base);
9961                         } else
9962                             u = x | b;          /* add the digit to the end */
9963                     }
9964                     if (overflowed) {
9965                         n *= nvshift[shift];
9966                         /* If an NV has not enough bits in its
9967                          * mantissa to represent an UV this summing of
9968                          * small low-order numbers is a waste of time
9969                          * (because the NV cannot preserve the
9970                          * low-order bits anyway): we could just
9971                          * remember when did we overflow and in the
9972                          * end just multiply n by the right
9973                          * amount. */
9974                         n += (NV) b;
9975                     }
9976
9977                     if (high_non_zero == 0 && b > 0)
9978                         high_non_zero = b;
9979
9980                     /* this could be hexfp, but peek ahead
9981                      * to avoid matching ".." */
9982 #define HEXFP_PEEK(s) \
9983         (((s[0] == '.') && \
9984           (isXDIGIT(s[1]) || s[1] == 'p' || s[1] == 'P')) \
9985          || s[0] == 'p' || s[0] == 'P')
9986                     if (UNLIKELY(HEXFP_PEEK(s))) {
9987                         goto out;
9988                     }
9989
9990                     break;
9991                 }
9992             }
9993
9994           /* if we get here, we had success: make a scalar value from
9995              the number.
9996           */
9997           out:
9998
9999             /* final misplaced underbar check */
10000             if (s[-1] == '_') {
10001                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10002             }
10003
10004             if (UNLIKELY(HEXFP_PEEK(s))) {
10005                 /* Do sloppy (on the underbars) but quick detection
10006                  * (and value construction) for hexfp, the decimal
10007                  * detection will shortly be more thorough with the
10008                  * underbar checks. */
10009                 const char* h = s;
10010 #ifdef HEXFP_UQUAD
10011                 hexfp_uquad = u;
10012 #else /* HEXFP_NV */
10013                 hexfp_nv = u;
10014 #endif
10015                 if (*h == '.') {
10016 #ifdef HEXFP_NV
10017                     NV mult = 1 / 16.0;
10018 #endif
10019                     h++;
10020                     while (isXDIGIT(*h) || *h == '_') {
10021                         if (isXDIGIT(*h)) {
10022                             U8 b = XDIGIT_VALUE(*h);
10023                             total_bits += shift;
10024 #ifdef HEXFP_UQUAD
10025                             hexfp_uquad <<= shift;
10026                             hexfp_uquad |= b;
10027                             hexfp_frac_bits += shift;
10028 #else /* HEXFP_NV */
10029                             hexfp_nv += b * mult;
10030                             mult /= 16.0;
10031 #endif
10032                         }
10033                         h++;
10034                     }
10035                 }
10036
10037                 if (total_bits >= 4) {
10038                     if (high_non_zero < 0x8)
10039                         total_bits--;
10040                     if (high_non_zero < 0x4)
10041                         total_bits--;
10042                     if (high_non_zero < 0x2)
10043                         total_bits--;
10044                 }
10045
10046                 if (total_bits > 0 && (*h == 'p' || *h == 'P')) {
10047                     bool negexp = FALSE;
10048                     h++;
10049                     if (*h == '+')
10050                         h++;
10051                     else if (*h == '-') {
10052                         negexp = TRUE;
10053                         h++;
10054                     }
10055                     if (isDIGIT(*h)) {
10056                         I32 hexfp_exp = 0;
10057                         while (isDIGIT(*h) || *h == '_') {
10058                             if (isDIGIT(*h)) {
10059                                 hexfp_exp *= 10;
10060                                 hexfp_exp += *h - '0';
10061 #ifdef NV_MIN_EXP
10062                                 if (negexp &&
10063                                     -hexfp_exp < NV_MIN_EXP - 1) {
10064                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10065                                                    "Hexadecimal float: exponent underflow");
10066 #endif
10067                                     break;
10068                                 }
10069                                 else {
10070 #ifdef NV_MAX_EXP
10071                                     if (!negexp &&
10072                                         hexfp_exp > NV_MAX_EXP - 1) {
10073                                         Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10074                                                    "Hexadecimal float: exponent overflow");
10075                                         break;
10076                                     }
10077 #endif
10078                                 }
10079                             }
10080                             h++;
10081                         }
10082                         if (negexp)
10083                             hexfp_exp = -hexfp_exp;
10084 #ifdef HEXFP_UQUAD
10085                         hexfp_exp -= hexfp_frac_bits;
10086 #endif
10087                         hexfp_mult = pow(2.0, hexfp_exp);
10088                         hexfp = TRUE;
10089                         goto decimal;
10090                     }
10091                 }
10092             }
10093
10094             if (overflowed) {
10095                 if (n > 4294967295.0)
10096                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10097                                    "%s number > %s non-portable",
10098                                    Base, max);
10099                 sv = newSVnv(n);
10100             }
10101             else {
10102 #if UVSIZE > 4
10103                 if (u > 0xffffffff)
10104                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10105                                    "%s number > %s non-portable",
10106                                    Base, max);
10107 #endif
10108                 sv = newSVuv(u);
10109             }
10110             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10111                 sv = new_constant(start, s - start, "integer",
10112                                   sv, NULL, NULL, 0);
10113             else if (PL_hints & HINT_NEW_BINARY)
10114                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10115         }
10116         break;
10117
10118     /*
10119       handle decimal numbers.
10120       we're also sent here when we read a 0 as the first digit
10121     */
10122     case '1': case '2': case '3': case '4': case '5':
10123     case '6': case '7': case '8': case '9': case '.':
10124       decimal:
10125         d = PL_tokenbuf;
10126         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10127         floatit = FALSE;
10128         if (hexfp) {
10129             floatit = TRUE;
10130             *d++ = '0';
10131             *d++ = 'x';
10132             s = start + 2;
10133         }
10134
10135         /* read next group of digits and _ and copy into d */
10136         while (isDIGIT(*s) || *s == '_' ||
10137                UNLIKELY(hexfp && isXDIGIT(*s))) {
10138             /* skip underscores, checking for misplaced ones
10139                if -w is on
10140             */
10141             if (*s == '_') {
10142                 if (lastub && s == lastub + 1)
10143                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10144                                    "Misplaced _ in number");
10145                 lastub = s++;
10146             }
10147             else {
10148                 /* check for end of fixed-length buffer */
10149                 if (d >= e)
10150                     Perl_croak(aTHX_ "%s", number_too_long);
10151                 /* if we're ok, copy the character */
10152                 *d++ = *s++;
10153             }
10154         }
10155
10156         /* final misplaced underbar check */
10157         if (lastub && s == lastub + 1) {
10158             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10159         }
10160
10161         /* read a decimal portion if there is one.  avoid
10162            3..5 being interpreted as the number 3. followed
10163            by .5
10164         */
10165         if (*s == '.' && s[1] != '.') {
10166             floatit = TRUE;
10167             *d++ = *s++;
10168
10169             if (*s == '_') {
10170                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10171                                "Misplaced _ in number");
10172                 lastub = s;
10173             }
10174
10175             /* copy, ignoring underbars, until we run out of digits.
10176             */
10177             for (; isDIGIT(*s) || *s == '_' ||
10178                      UNLIKELY(hexfp && isXDIGIT(*s));
10179                  s++) {
10180                 /* fixed length buffer check */
10181                 if (d >= e)
10182                     Perl_croak(aTHX_ "%s", number_too_long);
10183                 if (*s == '_') {
10184                    if (lastub && s == lastub + 1)
10185                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10186                                       "Misplaced _ in number");
10187                    lastub = s;
10188                 }
10189                 else
10190                     *d++ = *s;
10191             }
10192             /* fractional part ending in underbar? */
10193             if (s[-1] == '_') {
10194                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10195                                "Misplaced _ in number");
10196             }
10197             if (*s == '.' && isDIGIT(s[1])) {
10198                 /* oops, it's really a v-string, but without the "v" */
10199                 s = start;
10200                 goto vstring;
10201             }
10202         }
10203
10204         /* read exponent part, if present */
10205         if (((*s == 'e' || *s == 'E') ||
10206              UNLIKELY(hexfp && (*s == 'p' || *s == 'P'))) &&
10207             strchr("+-0123456789_", s[1])) {
10208             floatit = TRUE;
10209
10210             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10211                ditto for p (hexfloats) */
10212             if ((*s == 'e' || *s == 'E')) {
10213                 /* At least some Mach atof()s don't grok 'E' */
10214                 *d++ = 'e';
10215             }
10216             else if (UNLIKELY(hexfp && (*s == 'p' || *s == 'P'))) {
10217                 *d++ = 'p';
10218             }
10219
10220             s++;
10221
10222
10223             /* stray preinitial _ */
10224             if (*s == '_') {
10225                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10226                                "Misplaced _ in number");
10227                 lastub = s++;
10228             }
10229
10230             /* allow positive or negative exponent */
10231             if (*s == '+' || *s == '-')
10232                 *d++ = *s++;
10233
10234             /* stray initial _ */
10235             if (*s == '_') {
10236                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10237                                "Misplaced _ in number");
10238                 lastub = s++;
10239             }
10240
10241             /* read digits of exponent */
10242             while (isDIGIT(*s) || *s == '_') {
10243                 if (isDIGIT(*s)) {
10244                     if (d >= e)
10245                         Perl_croak(aTHX_ "%s", number_too_long);
10246                     *d++ = *s++;
10247                 }
10248                 else {
10249                    if (((lastub && s == lastub + 1) ||
10250                         (!isDIGIT(s[1]) && s[1] != '_')))
10251                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10252                                       "Misplaced _ in number");
10253                    lastub = s++;
10254                 }
10255             }
10256         }
10257
10258
10259         /*
10260            We try to do an integer conversion first if no characters
10261            indicating "float" have been found.
10262          */
10263
10264         if (!floatit) {
10265             UV uv;
10266             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10267
10268             if (flags == IS_NUMBER_IN_UV) {
10269               if (uv <= IV_MAX)
10270                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10271               else
10272                 sv = newSVuv(uv);
10273             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10274               if (uv <= (UV) IV_MIN)
10275                 sv = newSViv(-(IV)uv);
10276               else
10277                 floatit = TRUE;
10278             } else
10279               floatit = TRUE;
10280         }
10281         if (floatit) {
10282             STORE_NUMERIC_LOCAL_SET_STANDARD();
10283             /* terminate the string */
10284             *d = '\0';
10285             if (UNLIKELY(hexfp)) {
10286 #  ifdef NV_MANT_DIG
10287                 if (total_bits > NV_MANT_DIG)
10288                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10289                                    "Hexadecimal float: mantissa overflow");
10290 #  endif
10291 #ifdef HEXFP_UQUAD
10292                 nv = hexfp_uquad * hexfp_mult;
10293 #else /* HEXFP_NV */
10294                 nv = hexfp_nv * hexfp_mult;
10295 #endif
10296             } else {
10297                 nv = Atof(PL_tokenbuf);
10298             }
10299             RESTORE_NUMERIC_LOCAL();
10300             sv = newSVnv(nv);
10301         }
10302
10303         if ( floatit
10304              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10305             const char *const key = floatit ? "float" : "integer";
10306             const STRLEN keylen = floatit ? 5 : 7;
10307             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10308                                 key, keylen, sv, NULL, NULL, 0);
10309         }
10310         break;
10311
10312     /* if it starts with a v, it could be a v-string */
10313     case 'v':
10314 vstring:
10315                 sv = newSV(5); /* preallocate storage space */
10316                 ENTER_with_name("scan_vstring");
10317                 SAVEFREESV(sv);
10318                 s = scan_vstring(s, PL_bufend, sv);
10319                 SvREFCNT_inc_simple_void_NN(sv);
10320                 LEAVE_with_name("scan_vstring");
10321         break;
10322     }
10323
10324     /* make the op for the constant and return */
10325
10326     if (sv)
10327         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10328     else
10329         lvalp->opval = NULL;
10330
10331     return (char *)s;
10332 }
10333
10334 STATIC char *
10335 S_scan_formline(pTHX_ char *s)
10336 {
10337     char *eol;
10338     char *t;
10339     SV * const stuff = newSVpvs("");
10340     bool needargs = FALSE;
10341     bool eofmt = FALSE;
10342
10343     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10344
10345     while (!needargs) {
10346         if (*s == '.') {
10347             t = s+1;
10348 #ifdef PERL_STRICT_CR
10349             while (SPACE_OR_TAB(*t))
10350                 t++;
10351 #else
10352             while (SPACE_OR_TAB(*t) || *t == '\r')
10353                 t++;
10354 #endif
10355             if (*t == '\n' || t == PL_bufend) {
10356                 eofmt = TRUE;
10357                 break;
10358             }
10359         }
10360         eol = (char *) memchr(s,'\n',PL_bufend-s);
10361         if (!eol++)
10362                 eol = PL_bufend;
10363         if (*s != '#') {
10364             for (t = s; t < eol; t++) {
10365                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10366                     needargs = FALSE;
10367                     goto enough;        /* ~~ must be first line in formline */
10368                 }
10369                 if (*t == '@' || *t == '^')
10370                     needargs = TRUE;
10371             }
10372             if (eol > s) {
10373                 sv_catpvn(stuff, s, eol-s);
10374 #ifndef PERL_STRICT_CR
10375                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10376                     char *end = SvPVX(stuff) + SvCUR(stuff);
10377                     end[-2] = '\n';
10378                     end[-1] = '\0';
10379                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10380                 }
10381 #endif
10382             }
10383             else
10384               break;
10385         }
10386         s = (char*)eol;
10387         if ((PL_rsfp || PL_parser->filtered)
10388          && PL_parser->form_lex_state == LEX_NORMAL) {
10389             bool got_some;
10390             PL_bufptr = PL_bufend;
10391             COPLINE_INC_WITH_HERELINES;
10392             got_some = lex_next_chunk(0);
10393             CopLINE_dec(PL_curcop);
10394             s = PL_bufptr;
10395             if (!got_some)
10396                 break;
10397         }
10398         incline(s);
10399     }
10400   enough:
10401     if (!SvCUR(stuff) || needargs)
10402         PL_lex_state = PL_parser->form_lex_state;
10403     if (SvCUR(stuff)) {
10404         PL_expect = XSTATE;
10405         if (needargs) {
10406             const char *s2 = s;
10407             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10408                 || *s2 == 013)
10409                 s2++;
10410             if (*s2 == '{') {
10411                 PL_expect = XTERMBLOCK;
10412                 NEXTVAL_NEXTTOKE.ival = 0;
10413                 force_next(DO);
10414             }
10415             NEXTVAL_NEXTTOKE.ival = 0;
10416             force_next(FORMLBRACK);
10417         }
10418         if (!IN_BYTES) {
10419             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10420                 SvUTF8_on(stuff);
10421             else if (PL_encoding)
10422                 sv_recode_to_utf8(stuff, PL_encoding);
10423         }
10424         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10425         force_next(THING);
10426     }
10427     else {
10428         SvREFCNT_dec(stuff);
10429         if (eofmt)
10430             PL_lex_formbrack = 0;
10431     }
10432     return s;
10433 }
10434
10435 I32
10436 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10437 {
10438     const I32 oldsavestack_ix = PL_savestack_ix;
10439     CV* const outsidecv = PL_compcv;
10440
10441     SAVEI32(PL_subline);
10442     save_item(PL_subname);
10443     SAVESPTR(PL_compcv);
10444
10445     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10446     CvFLAGS(PL_compcv) |= flags;
10447
10448     PL_subline = CopLINE(PL_curcop);
10449     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10450     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10451     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10452     if (outsidecv && CvPADLIST(outsidecv))
10453         CvPADLIST(PL_compcv)->xpadl_outid =
10454             PadlistNAMES(CvPADLIST(outsidecv));
10455
10456     return oldsavestack_ix;
10457 }
10458
10459 static int
10460 S_yywarn(pTHX_ const char *const s, U32 flags)
10461 {
10462     PERL_ARGS_ASSERT_YYWARN;
10463
10464     PL_in_eval |= EVAL_WARNONLY;
10465     yyerror_pv(s, flags);
10466     PL_in_eval &= ~EVAL_WARNONLY;
10467     return 0;
10468 }
10469
10470 int
10471 Perl_yyerror(pTHX_ const char *const s)
10472 {
10473     PERL_ARGS_ASSERT_YYERROR;
10474     return yyerror_pvn(s, strlen(s), 0);
10475 }
10476
10477 int
10478 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10479 {
10480     PERL_ARGS_ASSERT_YYERROR_PV;
10481     return yyerror_pvn(s, strlen(s), flags);
10482 }
10483
10484 int
10485 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10486 {
10487     const char *context = NULL;
10488     int contlen = -1;
10489     SV *msg;
10490     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10491     int yychar  = PL_parser->yychar;
10492
10493     PERL_ARGS_ASSERT_YYERROR_PVN;
10494
10495     if (!yychar || (yychar == ';' && !PL_rsfp))
10496         sv_catpvs(where_sv, "at EOF");
10497     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10498       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10499       PL_oldbufptr != PL_bufptr) {
10500         /*
10501                 Only for NetWare:
10502                 The code below is removed for NetWare because it abends/crashes on NetWare
10503                 when the script has error such as not having the closing quotes like:
10504                     if ($var eq "value)
10505                 Checking of white spaces is anyway done in NetWare code.
10506         */
10507 #ifndef NETWARE
10508         while (isSPACE(*PL_oldoldbufptr))
10509             PL_oldoldbufptr++;
10510 #endif
10511         context = PL_oldoldbufptr;
10512         contlen = PL_bufptr - PL_oldoldbufptr;
10513     }
10514     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10515       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10516         /*
10517                 Only for NetWare:
10518                 The code below is removed for NetWare because it abends/crashes on NetWare
10519                 when the script has error such as not having the closing quotes like:
10520                     if ($var eq "value)
10521                 Checking of white spaces is anyway done in NetWare code.
10522         */
10523 #ifndef NETWARE
10524         while (isSPACE(*PL_oldbufptr))
10525             PL_oldbufptr++;
10526 #endif
10527         context = PL_oldbufptr;
10528         contlen = PL_bufptr - PL_oldbufptr;
10529     }
10530     else if (yychar > 255)
10531         sv_catpvs(where_sv, "next token ???");
10532     else if (yychar == -2) { /* YYEMPTY */
10533         if (PL_lex_state == LEX_NORMAL ||
10534            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10535             sv_catpvs(where_sv, "at end of line");
10536         else if (PL_lex_inpat)
10537             sv_catpvs(where_sv, "within pattern");
10538         else
10539             sv_catpvs(where_sv, "within string");
10540     }
10541     else {
10542         sv_catpvs(where_sv, "next char ");
10543         if (yychar < 32)
10544             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10545         else if (isPRINT_LC(yychar)) {
10546             const char string = yychar;
10547             sv_catpvn(where_sv, &string, 1);
10548         }
10549         else
10550             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10551     }
10552     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10553     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10554         OutCopFILE(PL_curcop),
10555         (IV)(PL_parser->preambling == NOLINE
10556                ? CopLINE(PL_curcop)
10557                : PL_parser->preambling));
10558     if (context)
10559         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10560                              UTF8fARG(UTF, contlen, context));
10561     else
10562         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10563     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10564         Perl_sv_catpvf(aTHX_ msg,
10565         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10566                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10567         PL_multi_end = 0;
10568     }
10569     if (PL_in_eval & EVAL_WARNONLY) {
10570         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10571     }
10572     else
10573         qerror(msg);
10574     if (PL_error_count >= 10) {
10575         SV * errsv;
10576         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10577             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10578                        SVfARG(errsv), OutCopFILE(PL_curcop));
10579         else
10580             Perl_croak(aTHX_ "%s has too many errors.\n",
10581             OutCopFILE(PL_curcop));
10582     }
10583     PL_in_my = 0;
10584     PL_in_my_stash = NULL;
10585     return 0;
10586 }
10587
10588 STATIC char*
10589 S_swallow_bom(pTHX_ U8 *s)
10590 {
10591     const STRLEN slen = SvCUR(PL_linestr);
10592
10593     PERL_ARGS_ASSERT_SWALLOW_BOM;
10594
10595     switch (s[0]) {
10596     case 0xFF:
10597         if (s[1] == 0xFE) {
10598             /* UTF-16 little-endian? (or UTF-32LE?) */
10599             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10600                 /* diag_listed_as: Unsupported script encoding %s */
10601                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10602 #ifndef PERL_NO_UTF16_FILTER
10603             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10604             s += 2;
10605             if (PL_bufend > (char*)s) {
10606                 s = add_utf16_textfilter(s, TRUE);
10607             }
10608 #else
10609             /* diag_listed_as: Unsupported script encoding %s */
10610             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10611 #endif
10612         }
10613         break;
10614     case 0xFE:
10615         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10616 #ifndef PERL_NO_UTF16_FILTER
10617             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10618             s += 2;
10619             if (PL_bufend > (char *)s) {
10620                 s = add_utf16_textfilter(s, FALSE);
10621             }
10622 #else
10623             /* diag_listed_as: Unsupported script encoding %s */
10624             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10625 #endif
10626         }
10627         break;
10628     case BOM_UTF8_FIRST_BYTE: {
10629         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10630         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10631             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10632             s += len + 1;                      /* UTF-8 */
10633         }
10634         break;
10635     }
10636     case 0:
10637         if (slen > 3) {
10638              if (s[1] == 0) {
10639                   if (s[2] == 0xFE && s[3] == 0xFF) {
10640                        /* UTF-32 big-endian */
10641                        /* diag_listed_as: Unsupported script encoding %s */
10642                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10643                   }
10644              }
10645              else if (s[2] == 0 && s[3] != 0) {
10646                   /* Leading bytes
10647                    * 00 xx 00 xx
10648                    * are a good indicator of UTF-16BE. */
10649 #ifndef PERL_NO_UTF16_FILTER
10650                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10651                   s = add_utf16_textfilter(s, FALSE);
10652 #else
10653                   /* diag_listed_as: Unsupported script encoding %s */
10654                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10655 #endif
10656              }
10657         }
10658         break;
10659
10660     default:
10661          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10662                   /* Leading bytes
10663                    * xx 00 xx 00
10664                    * are a good indicator of UTF-16LE. */
10665 #ifndef PERL_NO_UTF16_FILTER
10666               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10667               s = add_utf16_textfilter(s, TRUE);
10668 #else
10669               /* diag_listed_as: Unsupported script encoding %s */
10670               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10671 #endif
10672          }
10673     }
10674     return (char*)s;
10675 }
10676
10677
10678 #ifndef PERL_NO_UTF16_FILTER
10679 static I32
10680 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10681 {
10682     SV *const filter = FILTER_DATA(idx);
10683     /* We re-use this each time round, throwing the contents away before we
10684        return.  */
10685     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10686     SV *const utf8_buffer = filter;
10687     IV status = IoPAGE(filter);
10688     const bool reverse = cBOOL(IoLINES(filter));
10689     I32 retval;
10690
10691     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10692
10693     /* As we're automatically added, at the lowest level, and hence only called
10694        from this file, we can be sure that we're not called in block mode. Hence
10695        don't bother writing code to deal with block mode.  */
10696     if (maxlen) {
10697         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10698     }
10699     if (status < 0) {
10700         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10701     }
10702     DEBUG_P(PerlIO_printf(Perl_debug_log,
10703                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10704                           FPTR2DPTR(void *, S_utf16_textfilter),
10705                           reverse ? 'l' : 'b', idx, maxlen, status,
10706                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10707
10708     while (1) {
10709         STRLEN chars;
10710         STRLEN have;
10711         I32 newlen;
10712         U8 *end;
10713         /* First, look in our buffer of existing UTF-8 data:  */
10714         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10715
10716         if (nl) {
10717             ++nl;
10718         } else if (status == 0) {
10719             /* EOF */
10720             IoPAGE(filter) = 0;
10721             nl = SvEND(utf8_buffer);
10722         }
10723         if (nl) {
10724             STRLEN got = nl - SvPVX(utf8_buffer);
10725             /* Did we have anything to append?  */
10726             retval = got != 0;
10727             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10728             /* Everything else in this code works just fine if SVp_POK isn't
10729                set.  This, however, needs it, and we need it to work, else
10730                we loop infinitely because the buffer is never consumed.  */
10731             sv_chop(utf8_buffer, nl);
10732             break;
10733         }
10734
10735         /* OK, not a complete line there, so need to read some more UTF-16.
10736            Read an extra octect if the buffer currently has an odd number. */
10737         while (1) {
10738             if (status <= 0)
10739                 break;
10740             if (SvCUR(utf16_buffer) >= 2) {
10741                 /* Location of the high octet of the last complete code point.
10742                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10743                    *coupled* with all the benefits of partial reads and
10744                    endianness.  */
10745                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10746                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10747
10748                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10749                     break;
10750                 }
10751
10752                 /* We have the first half of a surrogate. Read more.  */
10753                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10754             }
10755
10756             status = FILTER_READ(idx + 1, utf16_buffer,
10757                                  160 + (SvCUR(utf16_buffer) & 1));
10758             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10759             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10760             if (status < 0) {
10761                 /* Error */
10762                 IoPAGE(filter) = status;
10763                 return status;
10764             }
10765         }
10766
10767         chars = SvCUR(utf16_buffer) >> 1;
10768         have = SvCUR(utf8_buffer);
10769         SvGROW(utf8_buffer, have + chars * 3 + 1);
10770
10771         if (reverse) {
10772             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10773                                          (U8*)SvPVX_const(utf8_buffer) + have,
10774                                          chars * 2, &newlen);
10775         } else {
10776             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10777                                 (U8*)SvPVX_const(utf8_buffer) + have,
10778                                 chars * 2, &newlen);
10779         }
10780         SvCUR_set(utf8_buffer, have + newlen);
10781         *end = '\0';
10782
10783         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10784            it's private to us, and utf16_to_utf8{,reversed} take a
10785            (pointer,length) pair, rather than a NUL-terminated string.  */
10786         if(SvCUR(utf16_buffer) & 1) {
10787             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10788             SvCUR_set(utf16_buffer, 1);
10789         } else {
10790             SvCUR_set(utf16_buffer, 0);
10791         }
10792     }
10793     DEBUG_P(PerlIO_printf(Perl_debug_log,
10794                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10795                           status,
10796                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10797     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10798     return retval;
10799 }
10800
10801 static U8 *
10802 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10803 {
10804     SV *filter = filter_add(S_utf16_textfilter, NULL);
10805
10806     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10807
10808     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10809     sv_setpvs(filter, "");
10810     IoLINES(filter) = reversed;
10811     IoPAGE(filter) = 1; /* Not EOF */
10812
10813     /* Sadly, we have to return a valid pointer, come what may, so we have to
10814        ignore any error return from this.  */
10815     SvCUR_set(PL_linestr, 0);
10816     if (FILTER_READ(0, PL_linestr, 0)) {
10817         SvUTF8_on(PL_linestr);
10818     } else {
10819         SvUTF8_on(PL_linestr);
10820     }
10821     PL_bufend = SvEND(PL_linestr);
10822     return (U8*)SvPVX(PL_linestr);
10823 }
10824 #endif
10825
10826 /*
10827 Returns a pointer to the next character after the parsed
10828 vstring, as well as updating the passed in sv.
10829
10830 Function must be called like
10831
10832         sv = sv_2mortal(newSV(5));
10833         s = scan_vstring(s,e,sv);
10834
10835 where s and e are the start and end of the string.
10836 The sv should already be large enough to store the vstring
10837 passed in, for performance reasons.
10838
10839 This function may croak if fatal warnings are enabled in the
10840 calling scope, hence the sv_2mortal in the example (to prevent
10841 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
10842 sv_2mortal.
10843
10844 */
10845
10846 char *
10847 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10848 {
10849     const char *pos = s;
10850     const char *start = s;
10851
10852     PERL_ARGS_ASSERT_SCAN_VSTRING;
10853
10854     if (*pos == 'v') pos++;  /* get past 'v' */
10855     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10856         pos++;
10857     if ( *pos != '.') {
10858         /* this may not be a v-string if followed by => */
10859         const char *next = pos;
10860         while (next < e && isSPACE(*next))
10861             ++next;
10862         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10863             /* return string not v-string */
10864             sv_setpvn(sv,(char *)s,pos-s);
10865             return (char *)pos;
10866         }
10867     }
10868
10869     if (!isALPHA(*pos)) {
10870         U8 tmpbuf[UTF8_MAXBYTES+1];
10871
10872         if (*s == 'v')
10873             s++;  /* get past 'v' */
10874
10875         sv_setpvs(sv, "");
10876
10877         for (;;) {
10878             /* this is atoi() that tolerates underscores */
10879             U8 *tmpend;
10880             UV rev = 0;
10881             const char *end = pos;
10882             UV mult = 1;
10883             while (--end >= s) {
10884                 if (*end != '_') {
10885                     const UV orev = rev;
10886                     rev += (*end - '0') * mult;
10887                     mult *= 10;
10888                     if (orev > rev)
10889                         /* diag_listed_as: Integer overflow in %s number */
10890                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10891                                          "Integer overflow in decimal number");
10892                 }
10893             }
10894 #ifdef EBCDIC
10895             if (rev > 0x7FFFFFFF)
10896                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10897 #endif
10898             /* Append native character for the rev point */
10899             tmpend = uvchr_to_utf8(tmpbuf, rev);
10900             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10901             if (!UVCHR_IS_INVARIANT(rev))
10902                  SvUTF8_on(sv);
10903             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10904                  s = ++pos;
10905             else {
10906                  s = pos;
10907                  break;
10908             }
10909             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10910                  pos++;
10911         }
10912         SvPOK_on(sv);
10913         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10914         SvRMAGICAL_on(sv);
10915     }
10916     return (char *)s;
10917 }
10918
10919 int
10920 Perl_keyword_plugin_standard(pTHX_
10921         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10922 {
10923     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10924     PERL_UNUSED_CONTEXT;
10925     PERL_UNUSED_ARG(keyword_ptr);
10926     PERL_UNUSED_ARG(keyword_len);
10927     PERL_UNUSED_ARG(op_ptr);
10928     return KEYWORD_PLUGIN_DECLINE;
10929 }
10930
10931 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10932 static void
10933 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
10934 {
10935     SAVEI32(PL_lex_brackets);
10936     if (PL_lex_brackets > 100)
10937         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10938     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
10939     SAVEI32(PL_lex_allbrackets);
10940     PL_lex_allbrackets = 0;
10941     SAVEI8(PL_lex_fakeeof);
10942     PL_lex_fakeeof = (U8)fakeeof;
10943     if(yyparse(gramtype) && !PL_parser->error_count)
10944         qerror(Perl_mess(aTHX_ "Parse error"));
10945 }
10946
10947 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
10948 static OP *
10949 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
10950 {
10951     OP *o;
10952     ENTER;
10953     SAVEVPTR(PL_eval_root);
10954     PL_eval_root = NULL;
10955     parse_recdescent(gramtype, fakeeof);
10956     o = PL_eval_root;
10957     LEAVE;
10958     return o;
10959 }
10960
10961 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
10962 static OP *
10963 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
10964 {
10965     OP *exprop;
10966     if (flags & ~PARSE_OPTIONAL)
10967         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
10968     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
10969     if (!exprop && !(flags & PARSE_OPTIONAL)) {
10970         if (!PL_parser->error_count)
10971             qerror(Perl_mess(aTHX_ "Parse error"));
10972         exprop = newOP(OP_NULL, 0);
10973     }
10974     return exprop;
10975 }
10976
10977 /*
10978 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
10979
10980 Parse a Perl arithmetic expression.  This may contain operators of precedence
10981 down to the bit shift operators.  The expression must be followed (and thus
10982 terminated) either by a comparison or lower-precedence operator or by
10983 something that would normally terminate an expression such as semicolon.
10984 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10985 otherwise it is mandatory.  It is up to the caller to ensure that the
10986 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10987 the source of the code to be parsed and the lexical context for the
10988 expression.
10989
10990 The op tree representing the expression is returned.  If an optional
10991 expression is absent, a null pointer is returned, otherwise the pointer
10992 will be non-null.
10993
10994 If an error occurs in parsing or compilation, in most cases a valid op
10995 tree is returned anyway.  The error is reflected in the parser state,
10996 normally resulting in a single exception at the top level of parsing
10997 which covers all the compilation errors that occurred.  Some compilation
10998 errors, however, will throw an exception immediately.
10999
11000 =cut
11001 */
11002
11003 OP *
11004 Perl_parse_arithexpr(pTHX_ U32 flags)
11005 {
11006     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11007 }
11008
11009 /*
11010 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11011
11012 Parse a Perl term expression.  This may contain operators of precedence
11013 down to the assignment operators.  The expression must be followed (and thus
11014 terminated) either by a comma or lower-precedence operator or by
11015 something that would normally terminate an expression such as semicolon.
11016 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11017 otherwise it is mandatory.  It is up to the caller to ensure that the
11018 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11019 the source of the code to be parsed and the lexical context for the
11020 expression.
11021
11022 The op tree representing the expression is returned.  If an optional
11023 expression is absent, a null pointer is returned, otherwise the pointer
11024 will be non-null.
11025
11026 If an error occurs in parsing or compilation, in most cases a valid op
11027 tree is returned anyway.  The error is reflected in the parser state,
11028 normally resulting in a single exception at the top level of parsing
11029 which covers all the compilation errors that occurred.  Some compilation
11030 errors, however, will throw an exception immediately.
11031
11032 =cut
11033 */
11034
11035 OP *
11036 Perl_parse_termexpr(pTHX_ U32 flags)
11037 {
11038     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11039 }
11040
11041 /*
11042 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11043
11044 Parse a Perl list expression.  This may contain operators of precedence
11045 down to the comma operator.  The expression must be followed (and thus
11046 terminated) either by a low-precedence logic operator such as C<or> or by
11047 something that would normally terminate an expression such as semicolon.
11048 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11049 otherwise it is mandatory.  It is up to the caller to ensure that the
11050 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11051 the source of the code to be parsed and the lexical context for the
11052 expression.
11053
11054 The op tree representing the expression is returned.  If an optional
11055 expression is absent, a null pointer is returned, otherwise the pointer
11056 will be non-null.
11057
11058 If an error occurs in parsing or compilation, in most cases a valid op
11059 tree is returned anyway.  The error is reflected in the parser state,
11060 normally resulting in a single exception at the top level of parsing
11061 which covers all the compilation errors that occurred.  Some compilation
11062 errors, however, will throw an exception immediately.
11063
11064 =cut
11065 */
11066
11067 OP *
11068 Perl_parse_listexpr(pTHX_ U32 flags)
11069 {
11070     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11071 }
11072
11073 /*
11074 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11075
11076 Parse a single complete Perl expression.  This allows the full
11077 expression grammar, including the lowest-precedence operators such
11078 as C<or>.  The expression must be followed (and thus terminated) by a
11079 token that an expression would normally be terminated by: end-of-file,
11080 closing bracketing punctuation, semicolon, or one of the keywords that
11081 signals a postfix expression-statement modifier.  If I<flags> includes
11082 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11083 mandatory.  It is up to the caller to ensure that the dynamic parser
11084 state (L</PL_parser> et al) is correctly set to reflect the source of
11085 the code to be parsed and the lexical context for the expression.
11086
11087 The op tree representing the expression is returned.  If an optional
11088 expression is absent, a null pointer is returned, otherwise the pointer
11089 will be non-null.
11090
11091 If an error occurs in parsing or compilation, in most cases a valid op
11092 tree is returned anyway.  The error is reflected in the parser state,
11093 normally resulting in a single exception at the top level of parsing
11094 which covers all the compilation errors that occurred.  Some compilation
11095 errors, however, will throw an exception immediately.
11096
11097 =cut
11098 */
11099
11100 OP *
11101 Perl_parse_fullexpr(pTHX_ U32 flags)
11102 {
11103     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11104 }
11105
11106 /*
11107 =for apidoc Amx|OP *|parse_block|U32 flags
11108
11109 Parse a single complete Perl code block.  This consists of an opening
11110 brace, a sequence of statements, and a closing brace.  The block
11111 constitutes a lexical scope, so C<my> variables and various compile-time
11112 effects can be contained within it.  It is up to the caller to ensure
11113 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11114 reflect the source of the code to be parsed and the lexical context for
11115 the statement.
11116
11117 The op tree representing the code block is returned.  This is always a
11118 real op, never a null pointer.  It will normally be a C<lineseq> list,
11119 including C<nextstate> or equivalent ops.  No ops to construct any kind
11120 of runtime scope are included by virtue of it being a block.
11121
11122 If an error occurs in parsing or compilation, in most cases a valid op
11123 tree (most likely null) is returned anyway.  The error is reflected in
11124 the parser state, normally resulting in a single exception at the top
11125 level of parsing which covers all the compilation errors that occurred.
11126 Some compilation errors, however, will throw an exception immediately.
11127
11128 The I<flags> parameter is reserved for future use, and must always
11129 be zero.
11130
11131 =cut
11132 */
11133
11134 OP *
11135 Perl_parse_block(pTHX_ U32 flags)
11136 {
11137     if (flags)
11138         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11139     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11140 }
11141
11142 /*
11143 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11144
11145 Parse a single unadorned Perl statement.  This may be a normal imperative
11146 statement or a declaration that has compile-time effect.  It does not
11147 include any label or other affixture.  It is up to the caller to ensure
11148 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11149 reflect the source of the code to be parsed and the lexical context for
11150 the statement.
11151
11152 The op tree representing the statement is returned.  This may be a
11153 null pointer if the statement is null, for example if it was actually
11154 a subroutine definition (which has compile-time side effects).  If not
11155 null, it will be ops directly implementing the statement, suitable to
11156 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11157 equivalent op (except for those embedded in a scope contained entirely
11158 within the statement).
11159
11160 If an error occurs in parsing or compilation, in most cases a valid op
11161 tree (most likely null) is returned anyway.  The error is reflected in
11162 the parser state, normally resulting in a single exception at the top
11163 level of parsing which covers all the compilation errors that occurred.
11164 Some compilation errors, however, will throw an exception immediately.
11165
11166 The I<flags> parameter is reserved for future use, and must always
11167 be zero.
11168
11169 =cut
11170 */
11171
11172 OP *
11173 Perl_parse_barestmt(pTHX_ U32 flags)
11174 {
11175     if (flags)
11176         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11177     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11178 }
11179
11180 /*
11181 =for apidoc Amx|SV *|parse_label|U32 flags
11182
11183 Parse a single label, possibly optional, of the type that may prefix a
11184 Perl statement.  It is up to the caller to ensure that the dynamic parser
11185 state (L</PL_parser> et al) is correctly set to reflect the source of
11186 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11187 label is optional, otherwise it is mandatory.
11188
11189 The name of the label is returned in the form of a fresh scalar.  If an
11190 optional label is absent, a null pointer is returned.
11191
11192 If an error occurs in parsing, which can only occur if the label is
11193 mandatory, a valid label is returned anyway.  The error is reflected in
11194 the parser state, normally resulting in a single exception at the top
11195 level of parsing which covers all the compilation errors that occurred.
11196
11197 =cut
11198 */
11199
11200 SV *
11201 Perl_parse_label(pTHX_ U32 flags)
11202 {
11203     if (flags & ~PARSE_OPTIONAL)
11204         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11205     if (PL_lex_state == LEX_KNOWNEXT) {
11206         PL_parser->yychar = yylex();
11207         if (PL_parser->yychar == LABEL) {
11208             char * const lpv = pl_yylval.pval;
11209             STRLEN llen = strlen(lpv);
11210             PL_parser->yychar = YYEMPTY;
11211             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11212         } else {
11213             yyunlex();
11214             goto no_label;
11215         }
11216     } else {
11217         char *s, *t;
11218         STRLEN wlen, bufptr_pos;
11219         lex_read_space(0);
11220         t = s = PL_bufptr;
11221         if (!isIDFIRST_lazy_if(s, UTF))
11222             goto no_label;
11223         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11224         if (word_takes_any_delimeter(s, wlen))
11225             goto no_label;
11226         bufptr_pos = s - SvPVX(PL_linestr);
11227         PL_bufptr = t;
11228         lex_read_space(LEX_KEEP_PREVIOUS);
11229         t = PL_bufptr;
11230         s = SvPVX(PL_linestr) + bufptr_pos;
11231         if (t[0] == ':' && t[1] != ':') {
11232             PL_oldoldbufptr = PL_oldbufptr;
11233             PL_oldbufptr = s;
11234             PL_bufptr = t+1;
11235             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11236         } else {
11237             PL_bufptr = s;
11238             no_label:
11239             if (flags & PARSE_OPTIONAL) {
11240                 return NULL;
11241             } else {
11242                 qerror(Perl_mess(aTHX_ "Parse error"));
11243                 return newSVpvs("x");
11244             }
11245         }
11246     }
11247 }
11248
11249 /*
11250 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11251
11252 Parse a single complete Perl statement.  This may be a normal imperative
11253 statement or a declaration that has compile-time effect, and may include
11254 optional labels.  It is up to the caller to ensure that the dynamic
11255 parser state (L</PL_parser> et al) is correctly set to reflect the source
11256 of the code to be parsed and the lexical context for the statement.
11257
11258 The op tree representing the statement is returned.  This may be a
11259 null pointer if the statement is null, for example if it was actually
11260 a subroutine definition (which has compile-time side effects).  If not
11261 null, it will be the result of a L</newSTATEOP> call, normally including
11262 a C<nextstate> or equivalent op.
11263
11264 If an error occurs in parsing or compilation, in most cases a valid op
11265 tree (most likely null) is returned anyway.  The error is reflected in
11266 the parser state, normally resulting in a single exception at the top
11267 level of parsing which covers all the compilation errors that occurred.
11268 Some compilation errors, however, will throw an exception immediately.
11269
11270 The I<flags> parameter is reserved for future use, and must always
11271 be zero.
11272
11273 =cut
11274 */
11275
11276 OP *
11277 Perl_parse_fullstmt(pTHX_ U32 flags)
11278 {
11279     if (flags)
11280         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11281     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11282 }
11283
11284 /*
11285 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11286
11287 Parse a sequence of zero or more Perl statements.  These may be normal
11288 imperative statements, including optional labels, or declarations
11289 that have compile-time effect, or any mixture thereof.  The statement
11290 sequence ends when a closing brace or end-of-file is encountered in a
11291 place where a new statement could have validly started.  It is up to
11292 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11293 is correctly set to reflect the source of the code to be parsed and the
11294 lexical context for the statements.
11295
11296 The op tree representing the statement sequence is returned.  This may
11297 be a null pointer if the statements were all null, for example if there
11298 were no statements or if there were only subroutine definitions (which
11299 have compile-time side effects).  If not null, it will be a C<lineseq>
11300 list, normally including C<nextstate> or equivalent ops.
11301
11302 If an error occurs in parsing or compilation, in most cases a valid op
11303 tree is returned anyway.  The error is reflected in the parser state,
11304 normally resulting in a single exception at the top level of parsing
11305 which covers all the compilation errors that occurred.  Some compilation
11306 errors, however, will throw an exception immediately.
11307
11308 The I<flags> parameter is reserved for future use, and must always
11309 be zero.
11310
11311 =cut
11312 */
11313
11314 OP *
11315 Perl_parse_stmtseq(pTHX_ U32 flags)
11316 {
11317     OP *stmtseqop;
11318     I32 c;
11319     if (flags)
11320         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11321     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11322     c = lex_peek_unichar(0);
11323     if (c != -1 && c != /*{*/'}')
11324         qerror(Perl_mess(aTHX_ "Parse error"));
11325     return stmtseqop;
11326 }
11327
11328 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11329 static void
11330 S_lex_token_boundary(pTHX)
11331 {
11332     PL_oldoldbufptr = PL_oldbufptr;
11333     PL_oldbufptr = PL_bufptr;
11334 }
11335
11336 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11337 static OP *
11338 S_parse_opt_lexvar(pTHX)
11339 {
11340     I32 sigil, c;
11341     char *s, *d;
11342     OP *var;
11343     lex_token_boundary();
11344     sigil = lex_read_unichar(0);
11345     if (lex_peek_unichar(0) == '#') {
11346         qerror(Perl_mess(aTHX_ "Parse error"));
11347         return NULL;
11348     }
11349     lex_read_space(0);
11350     c = lex_peek_unichar(0);
11351     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11352         return NULL;
11353     s = PL_bufptr;
11354     d = PL_tokenbuf + 1;
11355     PL_tokenbuf[0] = (char)sigil;
11356     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11357     PL_bufptr = s;
11358     if (d == PL_tokenbuf+1)
11359         return NULL;
11360     *d = 0;
11361     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11362                 OPf_MOD | (OPpLVAL_INTRO<<8));
11363     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11364     return var;
11365 }
11366
11367 OP *
11368 Perl_parse_subsignature(pTHX)
11369 {
11370     I32 c;
11371     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11372     OP *initops = NULL;
11373     lex_read_space(0);
11374     c = lex_peek_unichar(0);
11375     while (c != /*(*/')') {
11376         switch (c) {
11377             case '$': {
11378                 OP *var, *expr;
11379                 if (prev_type == 2)
11380                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11381                 var = parse_opt_lexvar();
11382                 expr = var ?
11383                     newBINOP(OP_AELEM, 0,
11384                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11385                             OP_RV2AV),
11386                         newSVOP(OP_CONST, 0, newSViv(pos))) :
11387                     NULL;
11388                 lex_read_space(0);
11389                 c = lex_peek_unichar(0);
11390                 if (c == '=') {
11391                     lex_token_boundary();
11392                     lex_read_unichar(0);
11393                     lex_read_space(0);
11394                     c = lex_peek_unichar(0);
11395                     if (c == ',' || c == /*(*/')') {
11396                         if (var)
11397                             qerror(Perl_mess(aTHX_ "Optional parameter "
11398                                     "lacks default expression"));
11399                     } else {
11400                         OP *defexpr = parse_termexpr(0);
11401                         if (defexpr->op_type == OP_UNDEF &&
11402                                 !(defexpr->op_flags & OPf_KIDS)) {
11403                             op_free(defexpr);
11404                         } else {
11405                             OP *ifop =
11406                                 newBINOP(OP_GE, 0,
11407                                     scalar(newUNOP(OP_RV2AV, 0,
11408                                             newGVOP(OP_GV, 0, PL_defgv))),
11409                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
11410                             expr = var ?
11411                                 newCONDOP(0, ifop, expr, defexpr) :
11412                                 newLOGOP(OP_OR, 0, ifop, defexpr);
11413                         }
11414                     }
11415                     prev_type = 1;
11416                 } else {
11417                     if (prev_type == 1)
11418                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
11419                                 "follows optional parameter"));
11420                     prev_type = 0;
11421                     min_arity = pos + 1;
11422                 }
11423                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11424                 if (expr)
11425                     initops = op_append_list(OP_LINESEQ, initops,
11426                                 newSTATEOP(0, NULL, expr));
11427                 max_arity = ++pos;
11428             } break;
11429             case '@':
11430             case '%': {
11431                 OP *var;
11432                 if (prev_type == 2)
11433                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11434                 var = parse_opt_lexvar();
11435                 if (c == '%') {
11436                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11437                             newBINOP(OP_BIT_AND, 0,
11438                                 scalar(newUNOP(OP_RV2AV, 0,
11439                                     newGVOP(OP_GV, 0, PL_defgv))),
11440                                 newSVOP(OP_CONST, 0, newSViv(1))),
11441                             newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11442                                 newSVOP(OP_CONST, 0,
11443                                     newSVpvs("Odd name/value argument "
11444                                         "for subroutine"))));
11445                     if (pos != min_arity)
11446                         chkop = newLOGOP(OP_AND, 0,
11447                                     newBINOP(OP_GT, 0,
11448                                         scalar(newUNOP(OP_RV2AV, 0,
11449                                             newGVOP(OP_GV, 0, PL_defgv))),
11450                                         newSVOP(OP_CONST, 0, newSViv(pos))),
11451                                     chkop);
11452                     initops = op_append_list(OP_LINESEQ,
11453                                 newSTATEOP(0, NULL, chkop),
11454                                 initops);
11455                 }
11456                 if (var) {
11457                     OP *slice = pos ?
11458                         op_prepend_elem(OP_ASLICE,
11459                             newOP(OP_PUSHMARK, 0),
11460                             newLISTOP(OP_ASLICE, 0,
11461                                 list(newRANGE(0,
11462                                     newSVOP(OP_CONST, 0, newSViv(pos)),
11463                                     newUNOP(OP_AV2ARYLEN, 0,
11464                                         ref(newUNOP(OP_RV2AV, 0,
11465                                                 newGVOP(OP_GV, 0, PL_defgv)),
11466                                             OP_AV2ARYLEN)))),
11467                                 ref(newUNOP(OP_RV2AV, 0,
11468                                         newGVOP(OP_GV, 0, PL_defgv)),
11469                                     OP_ASLICE))) :
11470                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11471                     initops = op_append_list(OP_LINESEQ, initops,
11472                         newSTATEOP(0, NULL,
11473                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
11474                 }
11475                 prev_type = 2;
11476                 max_arity = -1;
11477             } break;
11478             default:
11479                 parse_error:
11480                 qerror(Perl_mess(aTHX_ "Parse error"));
11481                 return NULL;
11482         }
11483         lex_read_space(0);
11484         c = lex_peek_unichar(0);
11485         switch (c) {
11486             case /*(*/')': break;
11487             case ',':
11488                 do {
11489                     lex_token_boundary();
11490                     lex_read_unichar(0);
11491                     lex_read_space(0);
11492                     c = lex_peek_unichar(0);
11493                 } while (c == ',');
11494                 break;
11495             default:
11496                 goto parse_error;
11497         }
11498     }
11499     if (min_arity != 0) {
11500         initops = op_append_list(OP_LINESEQ,
11501             newSTATEOP(0, NULL,
11502                 newLOGOP(OP_OR, 0,
11503                     newBINOP(OP_GE, 0,
11504                         scalar(newUNOP(OP_RV2AV, 0,
11505                             newGVOP(OP_GV, 0, PL_defgv))),
11506                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
11507                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11508                         newSVOP(OP_CONST, 0,
11509                             newSVpvs("Too few arguments for subroutine"))))),
11510             initops);
11511     }
11512     if (max_arity != -1) {
11513         initops = op_append_list(OP_LINESEQ,
11514             newSTATEOP(0, NULL,
11515                 newLOGOP(OP_OR, 0,
11516                     newBINOP(OP_LE, 0,
11517                         scalar(newUNOP(OP_RV2AV, 0,
11518                             newGVOP(OP_GV, 0, PL_defgv))),
11519                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
11520                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11521                         newSVOP(OP_CONST, 0,
11522                             newSVpvs("Too many arguments for subroutine"))))),
11523             initops);
11524     }
11525     return initops;
11526 }
11527
11528 /*
11529  * Local variables:
11530  * c-indentation-style: bsd
11531  * c-basic-offset: 4
11532  * indent-tabs-mode: nil
11533  * End:
11534  *
11535  * ex: set ts=8 sts=4 sw=4 et:
11536  */