toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char ident_too_long[] = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN0OP       : zero-argument function, with its op created in this file
 225  * FUN1         : not used, except for not, which isn't a UNIOP
 226  * BOop         : bitwise or or xor
 227  * BAop         : bitwise and
 228  * SHop         : shift operator
 229  * PWop         : power operator
 230  * PMop         : pattern-matching operator
 231  * Aop          : addition-level operator
 232  * Mop          : multiplication-level operator
 233  * Eop          : equality-testing operator
 234  * Rop          : relational operator <= != gt
 235  *
 236  * Also see LOP and lop() below.
 237  */
 238
 239 #ifdef DEBUGGING /* Serve -DT. */
 240 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 241 #else
 242 #   define REPORT(retval) (retval)
 243 #endif
 244
 245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 253 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 254 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 255 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI3(f,x,have_x) { \
 273         pl_yylval.ival = f; \
 274         if (have_x) PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI3(f,XTERM,1)
 284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 285 #define UNIPROTO(f,optional) { \
 286         if (optional) PL_last_uni = PL_oldbufptr; \
 287         OPERATOR(f); \
 288         }
 289
 290 #define UNIBRACK(f) UNI3(f,0,0)
 291
 292 /* grandfather return to old style */
 293 #define OLDLOP(f) \
 294         do { \
 295             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 296                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 297             pl_yylval.ival = (f); \
 298             PL_expect = XTERM; \
 299             PL_bufptr = s; \
 300             return (int)LSTOP; \
 301         } while(0)
 302
 303 #define COPLINE_INC_WITH_HERELINES                  \
 304     STMT_START {                                     \
 305         CopLINE_inc(PL_curcop);                       \
 306         if (PL_parser->lex_shared->herelines)          \
 307             CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
 308             PL_parser->lex_shared->herelines = 0;                    \
 309     } STMT_END
 310
 311
 312 #ifdef DEBUGGING
 313
 314 /* how to interpret the pl_yylval associated with the token */
 315 enum token_type {
 316     TOKENTYPE_NONE,
 317     TOKENTYPE_IVAL,
 318     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 319     TOKENTYPE_PVAL,
 320     TOKENTYPE_OPVAL
 321 };
 322
 323 static struct debug_tokens {
 324     const int token;
 325     enum token_type type;
 326     const char *name;
 327 } const debug_tokens[] =
 328 {
 329     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 330     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 331     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 332     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 333     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 334     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 335     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 336     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 337     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 338     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 339     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 340     { DO,               TOKENTYPE_NONE,         "DO" },
 341     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 342     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 343     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 344     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 345     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 346     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 347     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 348     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 349     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 350     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 351     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 352     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 353     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 354     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 355     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 356     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 357     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 358     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 359     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 360     { IF,               TOKENTYPE_IVAL,         "IF" },
 361     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 362     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 363     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 364     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 365     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 366     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 367     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 368     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 369     { MY,               TOKENTYPE_IVAL,         "MY" },
 370     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 371     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 372     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 373     { OROR,             TOKENTYPE_NONE,         "OROR" },
 374     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 375     { PEG,              TOKENTYPE_NONE,         "PEG" },
 376     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 377     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 378     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 379     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 380     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 381     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 382     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 383     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 384     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 385     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 386     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 387     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 388     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 389     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 390     { SUB,              TOKENTYPE_NONE,         "SUB" },
 391     { THING,            TOKENTYPE_OPVAL,        "THING" },
 392     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 393     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 394     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 395     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 396     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 397     { USE,              TOKENTYPE_IVAL,         "USE" },
 398     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 399     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 400     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 401     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 402     { 0,                TOKENTYPE_NONE,         NULL }
 403 };
 404
 405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 406
 407 STATIC int
 408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 409 {
 410     dVAR;
 411
 412     PERL_ARGS_ASSERT_TOKEREPORT;
 413
 414     if (DEBUG_T_TEST) {
 415         const char *name = NULL;
 416         enum token_type type = TOKENTYPE_NONE;
 417         const struct debug_tokens *p;
 418         SV* const report = newSVpvs("<== ");
 419
 420         for (p = debug_tokens; p->token; p++) {
 421             if (p->token == (int)rv) {
 422                 name = p->name;
 423                 type = p->type;
 424                 break;
 425             }
 426         }
 427         if (name)
 428             Perl_sv_catpv(aTHX_ report, name);
 429         else if ((char)rv > ' ' && (char)rv <= '~')
 430             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 431         else if (!rv)
 432             sv_catpvs(report, "EOF");
 433         else
 434             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 435         switch (type) {
 436         case TOKENTYPE_NONE:
 437             break;
 438         case TOKENTYPE_IVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 440             break;
 441         case TOKENTYPE_OPNUM:
 442             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 443                                     PL_op_name[lvalp->ival]);
 444             break;
 445         case TOKENTYPE_PVAL:
 446             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 447             break;
 448         case TOKENTYPE_OPVAL:
 449             if (lvalp->opval) {
 450                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 451                                     PL_op_name[lvalp->opval->op_type]);
 452                 if (lvalp->opval->op_type == OP_CONST) {
 453                     Perl_sv_catpvf(aTHX_ report, " %s",
 454                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 455                 }
 456
 457             }
 458             else
 459                 sv_catpvs(report, "(opval=null)");
 460             break;
 461         }
 462         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 463     };
 464     return (int)rv;
 465 }
 466
 467
 468 /* print the buffer with suitable escapes */
 469
 470 STATIC void
 471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 472 {
 473     SV* const tmp = newSVpvs("");
 474
 475     PERL_ARGS_ASSERT_PRINTBUF;
 476
 477     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 478     SvREFCNT_dec(tmp);
 479 }
 480
 481 #endif
 482
 483 static int
 484 S_deprecate_commaless_var_list(pTHX) {
 485     PL_expect = XTERM;
 486     deprecate("comma-less variable list");
 487     return REPORT(','); /* grandfather non-comma-format format */
 488 }
 489
 490 /*
 491  * S_ao
 492  *
 493  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 494  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 495  */
 496
 497 STATIC int
 498 S_ao(pTHX_ int toketype)
 499 {
 500     dVAR;
 501     if (*PL_bufptr == '=') {
 502         PL_bufptr++;
 503         if (toketype == ANDAND)
 504             pl_yylval.ival = OP_ANDASSIGN;
 505         else if (toketype == OROR)
 506             pl_yylval.ival = OP_ORASSIGN;
 507         else if (toketype == DORDOR)
 508             pl_yylval.ival = OP_DORASSIGN;
 509         toketype = ASSIGNOP;
 510     }
 511     return toketype;
 512 }
 513
 514 /*
 515  * S_no_op
 516  * When Perl expects an operator and finds something else, no_op
 517  * prints the warning.  It always prints "<something> found where
 518  * operator expected.  It prints "Missing semicolon on previous line?"
 519  * if the surprise occurs at the start of the line.  "do you need to
 520  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 521  * where the compiler doesn't know if foo is a method call or a function.
 522  * It prints "Missing operator before end of line" if there's nothing
 523  * after the missing operator, or "... before <...>" if there is something
 524  * after the missing operator.
 525  */
 526
 527 STATIC void
 528 S_no_op(pTHX_ const char *const what, char *s)
 529 {
 530     dVAR;
 531     char * const oldbp = PL_bufptr;
 532     const bool is_first = (PL_oldbufptr == PL_linestart);
 533
 534     PERL_ARGS_ASSERT_NO_OP;
 535
 536     if (!s)
 537         s = oldbp;
 538     else
 539         PL_bufptr = s;
 540     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 541     if (ckWARN_d(WARN_SYNTAX)) {
 542         if (is_first)
 543             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                     "\t(Missing semicolon on previous line?)\n");
 545         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 546             const char *t;
 547             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
 548                                                             t += UTF ? UTF8SKIP(t) : 1)
 549                 NOOP;
 550             if (t < PL_bufptr && isSPACE(*t))
 551                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 552                         "\t(Do you need to predeclare %"SVf"?)\n",
 553                     SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
 554                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 555         }
 556         else {
 557             assert(s >= oldbp);
 558             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 559                     "\t(Missing operator before %"SVf"?)\n",
 560                     SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
 561                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 562         }
 563     }
 564     PL_bufptr = oldbp;
 565 }
 566
 567 /*
 568  * S_missingterm
 569  * Complain about missing quote/regexp/heredoc terminator.
 570  * If it's called with NULL then it cauterizes the line buffer.
 571  * If we're in a delimited string and the delimiter is a control
 572  * character, it's reformatted into a two-char sequence like ^C.
 573  * This is fatal.
 574  */
 575
 576 STATIC void
 577 S_missingterm(pTHX_ char *s)
 578 {
 579     dVAR;
 580     char tmpbuf[3];
 581     char q;
 582     if (s) {
 583         char * const nl = strrchr(s,'\n');
 584         if (nl)
 585             *nl = '\0';
 586     }
 587     else if (isCNTRL(PL_multi_close)) {
 588         *tmpbuf = '^';
 589         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 590         tmpbuf[2] = '\0';
 591         s = tmpbuf;
 592     }
 593     else {
 594         *tmpbuf = (char)PL_multi_close;
 595         tmpbuf[1] = '\0';
 596         s = tmpbuf;
 597     }
 598     q = strchr(s,'"') ? '\'' : '"';
 599     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 600 }
 601
 602 #include "feature.h"
 603
 604 /*
 605  * Check whether the named feature is enabled.
 606  */
 607 bool
 608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 609 {
 610     dVAR;
 611     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 612
 613     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 614
 615     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 616
 617     if (namelen > MAX_FEATURE_LEN)
 618         return FALSE;
 619     memcpy(&he_name[8], name, namelen);
 620
 621     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 622                                      REFCOUNTED_HE_EXISTS));
 623 }
 624
 625 /*
 626  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 627  * utf16-to-utf8-reversed.
 628  */
 629
 630 #ifdef PERL_CR_FILTER
 631 static void
 632 strip_return(SV *sv)
 633 {
 634     const char *s = SvPVX_const(sv);
 635     const char * const e = s + SvCUR(sv);
 636
 637     PERL_ARGS_ASSERT_STRIP_RETURN;
 638
 639     /* outer loop optimized to do nothing if there are no CR-LFs */
 640     while (s < e) {
 641         if (*s++ == '\r' && *s == '\n') {
 642             /* hit a CR-LF, need to copy the rest */
 643             char *d = s - 1;
 644             *d++ = *s++;
 645             while (s < e) {
 646                 if (*s == '\r' && s[1] == '\n')
 647                     s++;
 648                 *d++ = *s++;
 649             }
 650             SvCUR(sv) -= s - d;
 651             return;
 652         }
 653     }
 654 }
 655
 656 STATIC I32
 657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 658 {
 659     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 660     if (count > 0 && !maxlen)
 661         strip_return(sv);
 662     return count;
 663 }
 664 #endif
 665
 666 /*
 667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 668
 669 Creates and initialises a new lexer/parser state object, supplying
 670 a context in which to lex and parse from a new source of Perl code.
 671 A pointer to the new state object is placed in L</PL_parser>.  An entry
 672 is made on the save stack so that upon unwinding the new state object
 673 will be destroyed and the former value of L</PL_parser> will be restored.
 674 Nothing else need be done to clean up the parsing context.
 675
 676 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 677 non-null, provides a string (in SV form) containing code to be parsed.
 678 A copy of the string is made, so subsequent modification of I<line>
 679 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 680 from which code will be read to be parsed.  If both are non-null, the
 681 code in I<line> comes first and must consist of complete lines of input,
 682 and I<rsfp> supplies the remainder of the source.
 683
 684 The I<flags> parameter is reserved for future use.  Currently it is only
 685 used by perl internally, so extensions should always pass zero.
 686
 687 =cut
 688 */
 689
 690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 691    can share filters with the current parser.
 692    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 693    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 694    destruction. This is used to handle the case of defaulting to reading the
 695    script from the standard input because no filename was given on the command
 696    line (without getting confused by situation where STDIN has been closed, so
 697    the script handle is opened on fd 0)  */
 698
 699 void
 700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 701 {
 702     dVAR;
 703     const char *s = NULL;
 704     yy_parser *parser, *oparser;
 705     if (flags && flags & ~LEX_START_FLAGS)
 706         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 707
 708     /* create and initialise a parser */
 709
 710     Newxz(parser, 1, yy_parser);
 711     parser->old_parser = oparser = PL_parser;
 712     PL_parser = parser;
 713
 714     parser->stack = NULL;
 715     parser->ps = NULL;
 716     parser->stack_size = 0;
 717
 718     /* on scope exit, free this parser and restore any outer one */
 719     SAVEPARSER(parser);
 720     parser->saved_curcop = PL_curcop;
 721
 722     /* initialise lexer state */
 723
 724 #ifdef PERL_MAD
 725     parser->curforce = -1;
 726 #else
 727     parser->nexttoke = 0;
 728 #endif
 729     parser->error_count = oparser ? oparser->error_count : 0;
 730     parser->copline = NOLINE;
 731     parser->lex_state = LEX_NORMAL;
 732     parser->expect = XSTATE;
 733     parser->rsfp = rsfp;
 734     parser->rsfp_filters =
 735       !(flags & LEX_START_SAME_FILTER) || !oparser
 736         ? NULL
 737         : MUTABLE_AV(SvREFCNT_inc(
 738             oparser->rsfp_filters
 739              ? oparser->rsfp_filters
 740              : (oparser->rsfp_filters = newAV())
 741           ));
 742
 743     Newx(parser->lex_brackstack, 120, char);
 744     Newx(parser->lex_casestack, 12, char);
 745     *parser->lex_casestack = '\0';
 746     Newxz(parser->lex_shared, 1, LEXSHARED);
 747
 748     if (line) {
 749         STRLEN len;
 750         s = SvPV_const(line, len);
 751         parser->linestr = flags & LEX_START_COPIED
 752                             ? SvREFCNT_inc_simple_NN(line)
 753                             : newSVpvn_flags(s, len, SvUTF8(line));
 754         sv_catpvs(parser->linestr, "\n;");
 755     } else {
 756         parser->linestr = newSVpvs("\n;");
 757     }
 758     parser->oldoldbufptr =
 759         parser->oldbufptr =
 760         parser->bufptr =
 761         parser->linestart = SvPVX(parser->linestr);
 762     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 763     parser->last_lop = parser->last_uni = NULL;
 764     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 765                                  |LEX_DONT_CLOSE_RSFP);
 766
 767     parser->in_pod = parser->filtered = 0;
 768 }
 769
 770
 771 /* delete a parser object */
 772
 773 void
 774 Perl_parser_free(pTHX_  const yy_parser *parser)
 775 {
 776     PERL_ARGS_ASSERT_PARSER_FREE;
 777
 778     PL_curcop = parser->saved_curcop;
 779     SvREFCNT_dec(parser->linestr);
 780
 781     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 782         PerlIO_clearerr(parser->rsfp);
 783     else if (parser->rsfp && (!parser->old_parser ||
 784                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 785         PerlIO_close(parser->rsfp);
 786     SvREFCNT_dec(parser->rsfp_filters);
 787
 788     Safefree(parser->lex_brackstack);
 789     Safefree(parser->lex_casestack);
 790     Safefree(parser->lex_shared);
 791     PL_parser = parser->old_parser;
 792     Safefree(parser);
 793 }
 794
 795
 796 /*
 797 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 798
 799 Buffer scalar containing the chunk currently under consideration of the
 800 text currently being lexed.  This is always a plain string scalar (for
 801 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 802 normal scalar means; instead refer to the buffer directly by the pointer
 803 variables described below.
 804
 805 The lexer maintains various C<char*> pointers to things in the
 806 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 807 reallocated, all of these pointers must be updated.  Don't attempt to
 808 do this manually, but rather use L</lex_grow_linestr> if you need to
 809 reallocate the buffer.
 810
 811 The content of the text chunk in the buffer is commonly exactly one
 812 complete line of input, up to and including a newline terminator,
 813 but there are situations where it is otherwise.  The octets of the
 814 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 815 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 816 flag on this scalar, which may disagree with it.
 817
 818 For direct examination of the buffer, the variable
 819 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 820 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 821 of these pointers is usually preferable to examination of the scalar
 822 through normal scalar means.
 823
 824 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 825
 826 Direct pointer to the end of the chunk of text currently being lexed, the
 827 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 828 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 829 always located at the end of the buffer, and does not count as part of
 830 the buffer's contents.
 831
 832 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 833
 834 Points to the current position of lexing inside the lexer buffer.
 835 Characters around this point may be freely examined, within
 836 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 837 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 838 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 839
 840 Lexing code (whether in the Perl core or not) moves this pointer past
 841 the characters that it consumes.  It is also expected to perform some
 842 bookkeeping whenever a newline character is consumed.  This movement
 843 can be more conveniently performed by the function L</lex_read_to>,
 844 which handles newlines appropriately.
 845
 846 Interpretation of the buffer's octets can be abstracted out by
 847 using the slightly higher-level functions L</lex_peek_unichar> and
 848 L</lex_read_unichar>.
 849
 850 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 851
 852 Points to the start of the current line inside the lexer buffer.
 853 This is useful for indicating at which column an error occurred, and
 854 not much else.  This must be updated by any lexing code that consumes
 855 a newline; the function L</lex_read_to> handles this detail.
 856
 857 =cut
 858 */
 859
 860 /*
 861 =for apidoc Amx|bool|lex_bufutf8
 862
 863 Indicates whether the octets in the lexer buffer
 864 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 865 of Unicode characters.  If not, they should be interpreted as Latin-1
 866 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 867
 868 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 869 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 870 encoding.
 871
 872 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 873 is significant, but not the whole story regarding the input character
 874 encoding.  Normally, when a file is being read, the scalar contains octets
 875 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 876 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 877 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 878 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 879 is in effect.  This logic may change in the future; use this function
 880 instead of implementing the logic yourself.
 881
 882 =cut
 883 */
 884
 885 bool
 886 Perl_lex_bufutf8(pTHX)
 887 {
 888     return UTF;
 889 }
 890
 891 /*
 892 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 893
 894 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 895 at least I<len> octets (including terminating NUL).  Returns a
 896 pointer to the reallocated buffer.  This is necessary before making
 897 any direct modification of the buffer that would increase its length.
 898 L</lex_stuff_pvn> provides a more convenient way to insert text into
 899 the buffer.
 900
 901 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 902 this function updates all of the lexer's variables that point directly
 903 into the buffer.
 904
 905 =cut
 906 */
 907
 908 char *
 909 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 910 {
 911     SV *linestr;
 912     char *buf;
 913     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 914     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 915     linestr = PL_parser->linestr;
 916     buf = SvPVX(linestr);
 917     if (len <= SvLEN(linestr))
 918         return buf;
 919     bufend_pos = PL_parser->bufend - buf;
 920     bufptr_pos = PL_parser->bufptr - buf;
 921     oldbufptr_pos = PL_parser->oldbufptr - buf;
 922     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 923     linestart_pos = PL_parser->linestart - buf;
 924     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 925     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 926     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 927                             PL_parser->lex_shared->re_eval_start - buf : 0;
 928
 929     buf = sv_grow(linestr, len);
 930
 931     PL_parser->bufend = buf + bufend_pos;
 932     PL_parser->bufptr = buf + bufptr_pos;
 933     PL_parser->oldbufptr = buf + oldbufptr_pos;
 934     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 935     PL_parser->linestart = buf + linestart_pos;
 936     if (PL_parser->last_uni)
 937         PL_parser->last_uni = buf + last_uni_pos;
 938     if (PL_parser->last_lop)
 939         PL_parser->last_lop = buf + last_lop_pos;
 940     if (PL_parser->lex_shared->re_eval_start)
 941         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 942     return buf;
 943 }
 944
 945 /*
 946 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 947
 948 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 949 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 950 reallocating the buffer if necessary.  This means that lexing code that
 951 runs later will see the characters as if they had appeared in the input.
 952 It is not recommended to do this as part of normal parsing, and most
 953 uses of this facility run the risk of the inserted characters being
 954 interpreted in an unintended manner.
 955
 956 The string to be inserted is represented by I<len> octets starting
 957 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 958 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 959 The characters are recoded for the lexer buffer, according to how the
 960 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 961 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 962 function is more convenient.
 963
 964 =cut
 965 */
 966
 967 void
 968 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 969 {
 970     dVAR;
 971     char *bufptr;
 972     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 973     if (flags & ~(LEX_STUFF_UTF8))
 974         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 975     if (UTF) {
 976         if (flags & LEX_STUFF_UTF8) {
 977             goto plain_copy;
 978         } else {
 979             STRLEN highhalf = 0;
 980             const char *p, *e = pv+len;
 981             for (p = pv; p != e; p++)
 982                 highhalf += !!(((U8)*p) & 0x80);
 983             if (!highhalf)
 984                 goto plain_copy;
 985             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 986             bufptr = PL_parser->bufptr;
 987             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 988             SvCUR_set(PL_parser->linestr,
 989                 SvCUR(PL_parser->linestr) + len+highhalf);
 990             PL_parser->bufend += len+highhalf;
 991             for (p = pv; p != e; p++) {
 992                 U8 c = (U8)*p;
 993                 if (c & 0x80) {
 994                     *bufptr++ = (char)(0xc0 | (c >> 6));
 995                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 996                 } else {
 997                     *bufptr++ = (char)c;
 998                 }
 999             }
1000         }
1001     } else {
1002         if (flags & LEX_STUFF_UTF8) {
1003             STRLEN highhalf = 0;
1004             const char *p, *e = pv+len;
1005             for (p = pv; p != e; p++) {
1006                 U8 c = (U8)*p;
1007                 if (c >= 0xc4) {
1008                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1009                                 "non-Latin-1 character into Latin-1 input");
1010                 } else if (c >= 0xc2 && p+1 != e &&
1011                             (((U8)p[1]) & 0xc0) == 0x80) {
1012                     p++;
1013                     highhalf++;
1014                 } else if (c >= 0x80) {
1015                     /* malformed UTF-8 */
1016                     ENTER;
1017                     SAVESPTR(PL_warnhook);
1018                     PL_warnhook = PERL_WARNHOOK_FATAL;
1019                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1020                     LEAVE;
1021                 }
1022             }
1023             if (!highhalf)
1024                 goto plain_copy;
1025             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1026             bufptr = PL_parser->bufptr;
1027             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1028             SvCUR_set(PL_parser->linestr,
1029                 SvCUR(PL_parser->linestr) + len-highhalf);
1030             PL_parser->bufend += len-highhalf;
1031             for (p = pv; p != e; p++) {
1032                 U8 c = (U8)*p;
1033                 if (c & 0x80) {
1034                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1035                     p++;
1036                 } else {
1037                     *bufptr++ = (char)c;
1038                 }
1039             }
1040         } else {
1041             plain_copy:
1042             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043             bufptr = PL_parser->bufptr;
1044             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046             PL_parser->bufend += len;
1047             Copy(pv, bufptr, len, char);
1048         }
1049     }
1050 }
1051
1052 /*
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1054
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary.  This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1062
1063 The string to be inserted is represented by octets starting at I<pv>
1064 and continuing to the first nul.  These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in I<flags>.  The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1076 {
1077     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078     lex_stuff_pvn(pv, strlen(pv), flags);
1079 }
1080
1081 /*
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1083
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary.  This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1091
1092 The string to be inserted is the string value of I<sv>.  The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1097
1098 =cut
1099 */
1100
1101 void
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103 {
1104     char *pv;
1105     STRLEN len;
1106     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1107     if (flags)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1109     pv = SvPV(sv, len);
1110     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111 }
1112
1113 /*
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1115
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1120
1121 This is not the normal way to consume lexed text.  For that, use
1122 L</lex_read_to>.
1123
1124 =cut
1125 */
1126
1127 void
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1129 {
1130     char *buf, *bufend;
1131     STRLEN unstuff_len;
1132     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133     buf = PL_parser->bufptr;
1134     if (ptr < buf)
1135         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1136     if (ptr == buf)
1137         return;
1138     bufend = PL_parser->bufend;
1139     if (ptr > bufend)
1140         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141     unstuff_len = ptr - buf;
1142     Move(ptr, buf, bufend+1-ptr, char);
1143     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144     PL_parser->bufend = bufend - unstuff_len;
1145 }
1146
1147 /*
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1149
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1154
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_read_to(pTHX_ char *ptr)
1164 {
1165     char *s;
1166     PERL_ARGS_ASSERT_LEX_READ_TO;
1167     s = PL_parser->bufptr;
1168     if (ptr < s || ptr > PL_parser->bufend)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170     for (; s != ptr; s++)
1171         if (*s == '\n') {
1172             COPLINE_INC_WITH_HERELINES;
1173             PL_parser->linestart = s+1;
1174         }
1175     PL_parser->bufptr = ptr;
1176 }
1177
1178 /*
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1180
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to I<ptr>.  The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately.  I<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1186
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it.  However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1194
1195 =cut
1196 */
1197
1198 void
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1200 {
1201     char *buf;
1202     STRLEN discard_len;
1203     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204     buf = SvPVX(PL_parser->linestr);
1205     if (ptr < buf)
1206         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1207     if (ptr == buf)
1208         return;
1209     if (ptr > PL_parser->bufptr)
1210         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211     discard_len = ptr - buf;
1212     if (PL_parser->oldbufptr < ptr)
1213         PL_parser->oldbufptr = ptr;
1214     if (PL_parser->oldoldbufptr < ptr)
1215         PL_parser->oldoldbufptr = ptr;
1216     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217         PL_parser->last_uni = NULL;
1218     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219         PL_parser->last_lop = NULL;
1220     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222     PL_parser->bufend -= discard_len;
1223     PL_parser->bufptr -= discard_len;
1224     PL_parser->oldbufptr -= discard_len;
1225     PL_parser->oldoldbufptr -= discard_len;
1226     if (PL_parser->last_uni)
1227         PL_parser->last_uni -= discard_len;
1228     if (PL_parser->last_lop)
1229         PL_parser->last_lop -= discard_len;
1230 }
1231
1232 /*
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1234
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more.  It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1240
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1245 will not be discarded.  If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1247
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1250
1251 =cut
1252 */
1253
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM  0x40000000
1256
1257 bool
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1259 {
1260     SV *linestr;
1261     char *buf;
1262     STRLEN old_bufend_pos, new_bufend_pos;
1263     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265     bool got_some_for_debugger = 0;
1266     bool got_some;
1267     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269     linestr = PL_parser->linestr;
1270     buf = SvPVX(linestr);
1271     if (!(flags & LEX_KEEP_PREVIOUS) &&
1272             PL_parser->bufptr == PL_parser->bufend) {
1273         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1274         linestart_pos = 0;
1275         if (PL_parser->last_uni != PL_parser->bufend)
1276             PL_parser->last_uni = NULL;
1277         if (PL_parser->last_lop != PL_parser->bufend)
1278             PL_parser->last_lop = NULL;
1279         last_uni_pos = last_lop_pos = 0;
1280         *buf = 0;
1281         SvCUR(linestr) = 0;
1282     } else {
1283         old_bufend_pos = PL_parser->bufend - buf;
1284         bufptr_pos = PL_parser->bufptr - buf;
1285         oldbufptr_pos = PL_parser->oldbufptr - buf;
1286         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1287         linestart_pos = PL_parser->linestart - buf;
1288         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1289         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1290     }
1291     if (flags & LEX_FAKE_EOF) {
1292         goto eof;
1293     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1294         got_some = 0;
1295     } else if (filter_gets(linestr, old_bufend_pos)) {
1296         got_some = 1;
1297         got_some_for_debugger = 1;
1298     } else if (flags & LEX_NO_TERM) {
1299         got_some = 0;
1300     } else {
1301         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1302             sv_setpvs(linestr, "");
1303         eof:
1304         /* End of real input.  Close filehandle (unless it was STDIN),
1305          * then add implicit termination.
1306          */
1307         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1308             PerlIO_clearerr(PL_parser->rsfp);
1309         else if (PL_parser->rsfp)
1310             (void)PerlIO_close(PL_parser->rsfp);
1311         PL_parser->rsfp = NULL;
1312         PL_parser->in_pod = PL_parser->filtered = 0;
1313 #ifdef PERL_MAD
1314         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1315             PL_faketokens = 1;
1316 #endif
1317         if (!PL_in_eval && PL_minus_p) {
1318             sv_catpvs(linestr,
1319                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1320             PL_minus_n = PL_minus_p = 0;
1321         } else if (!PL_in_eval && PL_minus_n) {
1322             sv_catpvs(linestr, /*{*/";}");
1323             PL_minus_n = 0;
1324         } else
1325             sv_catpvs(linestr, ";");
1326         got_some = 1;
1327     }
1328     buf = SvPVX(linestr);
1329     new_bufend_pos = SvCUR(linestr);
1330     PL_parser->bufend = buf + new_bufend_pos;
1331     PL_parser->bufptr = buf + bufptr_pos;
1332     PL_parser->oldbufptr = buf + oldbufptr_pos;
1333     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1334     PL_parser->linestart = buf + linestart_pos;
1335     if (PL_parser->last_uni)
1336         PL_parser->last_uni = buf + last_uni_pos;
1337     if (PL_parser->last_lop)
1338         PL_parser->last_lop = buf + last_lop_pos;
1339     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1340             PL_curstash != PL_debstash) {
1341         /* debugger active and we're not compiling the debugger code,
1342          * so store the line into the debugger's array of lines
1343          */
1344         update_debugger_info(NULL, buf+old_bufend_pos,
1345             new_bufend_pos-old_bufend_pos);
1346     }
1347     return got_some;
1348 }
1349
1350 /*
1351 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1352
1353 Looks ahead one (Unicode) character in the text currently being lexed.
1354 Returns the codepoint (unsigned integer value) of the next character,
1355 or -1 if lexing has reached the end of the input text.  To consume the
1356 peeked character, use L</lex_read_unichar>.
1357
1358 If the next character is in (or extends into) the next chunk of input
1359 text, the next chunk will be read in.  Normally the current chunk will be
1360 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1361 then the current chunk will not be discarded.
1362
1363 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1364 is encountered, an exception is generated.
1365
1366 =cut
1367 */
1368
1369 I32
1370 Perl_lex_peek_unichar(pTHX_ U32 flags)
1371 {
1372     dVAR;
1373     char *s, *bufend;
1374     if (flags & ~(LEX_KEEP_PREVIOUS))
1375         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1376     s = PL_parser->bufptr;
1377     bufend = PL_parser->bufend;
1378     if (UTF) {
1379         U8 head;
1380         I32 unichar;
1381         STRLEN len, retlen;
1382         if (s == bufend) {
1383             if (!lex_next_chunk(flags))
1384                 return -1;
1385             s = PL_parser->bufptr;
1386             bufend = PL_parser->bufend;
1387         }
1388         head = (U8)*s;
1389         if (!(head & 0x80))
1390             return head;
1391         if (head & 0x40) {
1392             len = PL_utf8skip[head];
1393             while ((STRLEN)(bufend-s) < len) {
1394                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1395                     break;
1396                 s = PL_parser->bufptr;
1397                 bufend = PL_parser->bufend;
1398             }
1399         }
1400         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1401         if (retlen == (STRLEN)-1) {
1402             /* malformed UTF-8 */
1403             ENTER;
1404             SAVESPTR(PL_warnhook);
1405             PL_warnhook = PERL_WARNHOOK_FATAL;
1406             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1407             LEAVE;
1408         }
1409         return unichar;
1410     } else {
1411         if (s == bufend) {
1412             if (!lex_next_chunk(flags))
1413                 return -1;
1414             s = PL_parser->bufptr;
1415         }
1416         return (U8)*s;
1417     }
1418 }
1419
1420 /*
1421 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1422
1423 Reads the next (Unicode) character in the text currently being lexed.
1424 Returns the codepoint (unsigned integer value) of the character read,
1425 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1426 if lexing has reached the end of the input text.  To non-destructively
1427 examine the next character, use L</lex_peek_unichar> instead.
1428
1429 If the next character is in (or extends into) the next chunk of input
1430 text, the next chunk will be read in.  Normally the current chunk will be
1431 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1432 then the current chunk will not be discarded.
1433
1434 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1435 is encountered, an exception is generated.
1436
1437 =cut
1438 */
1439
1440 I32
1441 Perl_lex_read_unichar(pTHX_ U32 flags)
1442 {
1443     I32 c;
1444     if (flags & ~(LEX_KEEP_PREVIOUS))
1445         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1446     c = lex_peek_unichar(flags);
1447     if (c != -1) {
1448         if (c == '\n')
1449             COPLINE_INC_WITH_HERELINES;
1450         if (UTF)
1451             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1452         else
1453             ++(PL_parser->bufptr);
1454     }
1455     return c;
1456 }
1457
1458 /*
1459 =for apidoc Amx|void|lex_read_space|U32 flags
1460
1461 Reads optional spaces, in Perl style, in the text currently being
1462 lexed.  The spaces may include ordinary whitespace characters and
1463 Perl-style comments.  C<#line> directives are processed if encountered.
1464 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1465 at a non-space character (or the end of the input text).
1466
1467 If spaces extend into the next chunk of input text, the next chunk will
1468 be read in.  Normally the current chunk will be discarded at the same
1469 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1470 chunk will not be discarded.
1471
1472 =cut
1473 */
1474
1475 #define LEX_NO_NEXT_CHUNK 0x80000000
1476
1477 void
1478 Perl_lex_read_space(pTHX_ U32 flags)
1479 {
1480     char *s, *bufend;
1481     bool need_incline = 0;
1482     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1483         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1484 #ifdef PERL_MAD
1485     if (PL_skipwhite) {
1486         sv_free(PL_skipwhite);
1487         PL_skipwhite = NULL;
1488     }
1489     if (PL_madskills)
1490         PL_skipwhite = newSVpvs("");
1491 #endif /* PERL_MAD */
1492     s = PL_parser->bufptr;
1493     bufend = PL_parser->bufend;
1494     while (1) {
1495         char c = *s;
1496         if (c == '#') {
1497             do {
1498                 c = *++s;
1499             } while (!(c == '\n' || (c == 0 && s == bufend)));
1500         } else if (c == '\n') {
1501             s++;
1502             PL_parser->linestart = s;
1503             if (s == bufend)
1504                 need_incline = 1;
1505             else
1506                 incline(s);
1507         } else if (isSPACE(c)) {
1508             s++;
1509         } else if (c == 0 && s == bufend) {
1510             bool got_more;
1511 #ifdef PERL_MAD
1512             if (PL_madskills)
1513                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1514 #endif /* PERL_MAD */
1515             if (flags & LEX_NO_NEXT_CHUNK)
1516                 break;
1517             PL_parser->bufptr = s;
1518             COPLINE_INC_WITH_HERELINES;
1519             got_more = lex_next_chunk(flags);
1520             CopLINE_dec(PL_curcop);
1521             s = PL_parser->bufptr;
1522             bufend = PL_parser->bufend;
1523             if (!got_more)
1524                 break;
1525             if (need_incline && PL_parser->rsfp) {
1526                 incline(s);
1527                 need_incline = 0;
1528             }
1529         } else {
1530             break;
1531         }
1532     }
1533 #ifdef PERL_MAD
1534     if (PL_madskills)
1535         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1536 #endif /* PERL_MAD */
1537     PL_parser->bufptr = s;
1538 }
1539
1540 /*
1541  * S_incline
1542  * This subroutine has nothing to do with tilting, whether at windmills
1543  * or pinball tables.  Its name is short for "increment line".  It
1544  * increments the current line number in CopLINE(PL_curcop) and checks
1545  * to see whether the line starts with a comment of the form
1546  *    # line 500 "foo.pm"
1547  * If so, it sets the current line number and file to the values in the comment.
1548  */
1549
1550 STATIC void
1551 S_incline(pTHX_ const char *s)
1552 {
1553     dVAR;
1554     const char *t;
1555     const char *n;
1556     const char *e;
1557     line_t line_num;
1558
1559     PERL_ARGS_ASSERT_INCLINE;
1560
1561     COPLINE_INC_WITH_HERELINES;
1562     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1563      && s+1 == PL_bufend && *s == ';') {
1564         /* fake newline in string eval */
1565         CopLINE_dec(PL_curcop);
1566         return;
1567     }
1568     if (*s++ != '#')
1569         return;
1570     while (SPACE_OR_TAB(*s))
1571         s++;
1572     if (strnEQ(s, "line", 4))
1573         s += 4;
1574     else
1575         return;
1576     if (SPACE_OR_TAB(*s))
1577         s++;
1578     else
1579         return;
1580     while (SPACE_OR_TAB(*s))
1581         s++;
1582     if (!isDIGIT(*s))
1583         return;
1584
1585     n = s;
1586     while (isDIGIT(*s))
1587         s++;
1588     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1589         return;
1590     while (SPACE_OR_TAB(*s))
1591         s++;
1592     if (*s == '"' && (t = strchr(s+1, '"'))) {
1593         s++;
1594         e = t + 1;
1595     }
1596     else {
1597         t = s;
1598         while (!isSPACE(*t))
1599             t++;
1600         e = t;
1601     }
1602     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1603         e++;
1604     if (*e != '\n' && *e != '\0')
1605         return;         /* false alarm */
1606
1607     line_num = atoi(n)-1;
1608
1609     if (t - s > 0) {
1610         const STRLEN len = t - s;
1611         SV *const temp_sv = CopFILESV(PL_curcop);
1612         const char *cf;
1613         STRLEN tmplen;
1614
1615         if (temp_sv) {
1616             cf = SvPVX(temp_sv);
1617             tmplen = SvCUR(temp_sv);
1618         } else {
1619             cf = NULL;
1620             tmplen = 0;
1621         }
1622
1623         if (!PL_rsfp && !PL_parser->filtered) {
1624             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1625              * to *{"::_<newfilename"} */
1626             /* However, the long form of evals is only turned on by the
1627                debugger - usually they're "(eval %lu)" */
1628             char smallbuf[128];
1629             char *tmpbuf;
1630             GV **gvp;
1631             STRLEN tmplen2 = len;
1632             if (tmplen + 2 <= sizeof smallbuf)
1633                 tmpbuf = smallbuf;
1634             else
1635                 Newx(tmpbuf, tmplen + 2, char);
1636             tmpbuf[0] = '_';
1637             tmpbuf[1] = '<';
1638             memcpy(tmpbuf + 2, cf, tmplen);
1639             tmplen += 2;
1640             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1641             if (gvp) {
1642                 char *tmpbuf2;
1643                 GV *gv2;
1644
1645                 if (tmplen2 + 2 <= sizeof smallbuf)
1646                     tmpbuf2 = smallbuf;
1647                 else
1648                     Newx(tmpbuf2, tmplen2 + 2, char);
1649
1650                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1651                     /* Either they malloc'd it, or we malloc'd it,
1652                        so no prefix is present in ours.  */
1653                     tmpbuf2[0] = '_';
1654                     tmpbuf2[1] = '<';
1655                 }
1656
1657                 memcpy(tmpbuf2 + 2, s, tmplen2);
1658                 tmplen2 += 2;
1659
1660                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1661                 if (!isGV(gv2)) {
1662                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1663                     /* adjust ${"::_<newfilename"} to store the new file name */
1664                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1665                     /* The line number may differ. If that is the case,
1666                        alias the saved lines that are in the array.
1667                        Otherwise alias the whole array. */
1668                     if (CopLINE(PL_curcop) == line_num) {
1669                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1670                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1671                     }
1672                     else if (GvAV(*gvp)) {
1673                         AV * const av = GvAV(*gvp);
1674                         const I32 start = CopLINE(PL_curcop)+1;
1675                         I32 items = AvFILLp(av) - start;
1676                         if (items > 0) {
1677                             AV * const av2 = GvAVn(gv2);
1678                             SV **svp = AvARRAY(av) + start;
1679                             I32 l = (I32)line_num+1;
1680                             while (items--)
1681                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1682                         }
1683                     }
1684                 }
1685
1686                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1687             }
1688             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1689         }
1690         CopFILE_free(PL_curcop);
1691         CopFILE_setn(PL_curcop, s, len);
1692     }
1693     CopLINE_set(PL_curcop, line_num);
1694 }
1695
1696 #ifdef PERL_MAD
1697 /* skip space before PL_thistoken */
1698
1699 STATIC char *
1700 S_skipspace0(pTHX_ register char *s)
1701 {
1702     PERL_ARGS_ASSERT_SKIPSPACE0;
1703
1704     s = skipspace(s);
1705     if (!PL_madskills)
1706         return s;
1707     if (PL_skipwhite) {
1708         if (!PL_thiswhite)
1709             PL_thiswhite = newSVpvs("");
1710         sv_catsv(PL_thiswhite, PL_skipwhite);
1711         sv_free(PL_skipwhite);
1712         PL_skipwhite = 0;
1713     }
1714     PL_realtokenstart = s - SvPVX(PL_linestr);
1715     return s;
1716 }
1717
1718 /* skip space after PL_thistoken */
1719
1720 STATIC char *
1721 S_skipspace1(pTHX_ register char *s)
1722 {
1723     const char *start = s;
1724     I32 startoff = start - SvPVX(PL_linestr);
1725
1726     PERL_ARGS_ASSERT_SKIPSPACE1;
1727
1728     s = skipspace(s);
1729     if (!PL_madskills)
1730         return s;
1731     start = SvPVX(PL_linestr) + startoff;
1732     if (!PL_thistoken && PL_realtokenstart >= 0) {
1733         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1734         PL_thistoken = newSVpvn(tstart, start - tstart);
1735     }
1736     PL_realtokenstart = -1;
1737     if (PL_skipwhite) {
1738         if (!PL_nextwhite)
1739             PL_nextwhite = newSVpvs("");
1740         sv_catsv(PL_nextwhite, PL_skipwhite);
1741         sv_free(PL_skipwhite);
1742         PL_skipwhite = 0;
1743     }
1744     return s;
1745 }
1746
1747 STATIC char *
1748 S_skipspace2(pTHX_ register char *s, SV **svp)
1749 {
1750     char *start;
1751     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1752     const I32 startoff = s - SvPVX(PL_linestr);
1753
1754     PERL_ARGS_ASSERT_SKIPSPACE2;
1755
1756     s = skipspace(s);
1757     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1758     if (!PL_madskills || !svp)
1759         return s;
1760     start = SvPVX(PL_linestr) + startoff;
1761     if (!PL_thistoken && PL_realtokenstart >= 0) {
1762         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1763         PL_thistoken = newSVpvn(tstart, start - tstart);
1764         PL_realtokenstart = -1;
1765     }
1766     if (PL_skipwhite) {
1767         if (!*svp)
1768             *svp = newSVpvs("");
1769         sv_setsv(*svp, PL_skipwhite);
1770         sv_free(PL_skipwhite);
1771         PL_skipwhite = 0;
1772     }
1773
1774     return s;
1775 }
1776 #endif
1777
1778 STATIC void
1779 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1780 {
1781     AV *av = CopFILEAVx(PL_curcop);
1782     if (av) {
1783         SV * const sv = newSV_type(SVt_PVMG);
1784         if (orig_sv)
1785             sv_setsv(sv, orig_sv);
1786         else
1787             sv_setpvn(sv, buf, len);
1788         (void)SvIOK_on(sv);
1789         SvIV_set(sv, 0);
1790         av_store(av, (I32)CopLINE(PL_curcop), sv);
1791     }
1792 }
1793
1794 /*
1795  * S_skipspace
1796  * Called to gobble the appropriate amount and type of whitespace.
1797  * Skips comments as well.
1798  */
1799
1800 STATIC char *
1801 S_skipspace(pTHX_ register char *s)
1802 {
1803 #ifdef PERL_MAD
1804     char *start = s;
1805 #endif /* PERL_MAD */
1806     PERL_ARGS_ASSERT_SKIPSPACE;
1807 #ifdef PERL_MAD
1808     if (PL_skipwhite) {
1809         sv_free(PL_skipwhite);
1810         PL_skipwhite = NULL;
1811     }
1812 #endif /* PERL_MAD */
1813     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1814         while (s < PL_bufend && SPACE_OR_TAB(*s))
1815             s++;
1816     } else {
1817         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1818         PL_bufptr = s;
1819         lex_read_space(LEX_KEEP_PREVIOUS |
1820                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1821                     LEX_NO_NEXT_CHUNK : 0));
1822         s = PL_bufptr;
1823         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1824         if (PL_linestart > PL_bufptr)
1825             PL_bufptr = PL_linestart;
1826         return s;
1827     }
1828 #ifdef PERL_MAD
1829     if (PL_madskills)
1830         PL_skipwhite = newSVpvn(start, s-start);
1831 #endif /* PERL_MAD */
1832     return s;
1833 }
1834
1835 /*
1836  * S_check_uni
1837  * Check the unary operators to ensure there's no ambiguity in how they're
1838  * used.  An ambiguous piece of code would be:
1839  *     rand + 5
1840  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1841  * the +5 is its argument.
1842  */
1843
1844 STATIC void
1845 S_check_uni(pTHX)
1846 {
1847     dVAR;
1848     const char *s;
1849     const char *t;
1850
1851     if (PL_oldoldbufptr != PL_last_uni)
1852         return;
1853     while (isSPACE(*PL_last_uni))
1854         PL_last_uni++;
1855     s = PL_last_uni;
1856     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1857         s++;
1858     if ((t = strchr(s, '(')) && t < PL_bufptr)
1859         return;
1860
1861     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1862                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1863                      (int)(s - PL_last_uni), PL_last_uni);
1864 }
1865
1866 /*
1867  * LOP : macro to build a list operator.  Its behaviour has been replaced
1868  * with a subroutine, S_lop() for which LOP is just another name.
1869  */
1870
1871 #define LOP(f,x) return lop(f,x,s)
1872
1873 /*
1874  * S_lop
1875  * Build a list operator (or something that might be one).  The rules:
1876  *  - if we have a next token, then it's a list operator [why?]
1877  *  - if the next thing is an opening paren, then it's a function
1878  *  - else it's a list operator
1879  */
1880
1881 STATIC I32
1882 S_lop(pTHX_ I32 f, int x, char *s)
1883 {
1884     dVAR;
1885
1886     PERL_ARGS_ASSERT_LOP;
1887
1888     pl_yylval.ival = f;
1889     CLINE;
1890     PL_expect = x;
1891     PL_bufptr = s;
1892     PL_last_lop = PL_oldbufptr;
1893     PL_last_lop_op = (OPCODE)f;
1894 #ifdef PERL_MAD
1895     if (PL_lasttoke)
1896         goto lstop;
1897 #else
1898     if (PL_nexttoke)
1899         goto lstop;
1900 #endif
1901     if (*s == '(')
1902         return REPORT(FUNC);
1903     s = PEEKSPACE(s);
1904     if (*s == '(')
1905         return REPORT(FUNC);
1906     else {
1907         lstop:
1908         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1909             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1910         return REPORT(LSTOP);
1911     }
1912 }
1913
1914 #ifdef PERL_MAD
1915  /*
1916  * S_start_force
1917  * Sets up for an eventual force_next().  start_force(0) basically does
1918  * an unshift, while start_force(-1) does a push.  yylex removes items
1919  * on the "pop" end.
1920  */
1921
1922 STATIC void
1923 S_start_force(pTHX_ int where)
1924 {
1925     int i;
1926
1927     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1928         where = PL_lasttoke;
1929     assert(PL_curforce < 0 || PL_curforce == where);
1930     if (PL_curforce != where) {
1931         for (i = PL_lasttoke; i > where; --i) {
1932             PL_nexttoke[i] = PL_nexttoke[i-1];
1933         }
1934         PL_lasttoke++;
1935     }
1936     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1937         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1938     PL_curforce = where;
1939     if (PL_nextwhite) {
1940         if (PL_madskills)
1941             curmad('^', newSVpvs(""));
1942         CURMAD('_', PL_nextwhite);
1943     }
1944 }
1945
1946 STATIC void
1947 S_curmad(pTHX_ char slot, SV *sv)
1948 {
1949     MADPROP **where;
1950
1951     if (!sv)
1952         return;
1953     if (PL_curforce < 0)
1954         where = &PL_thismad;
1955     else
1956         where = &PL_nexttoke[PL_curforce].next_mad;
1957
1958     if (PL_faketokens)
1959         sv_setpvs(sv, "");
1960     else {
1961         if (!IN_BYTES) {
1962             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1963                 SvUTF8_on(sv);
1964             else if (PL_encoding) {
1965                 sv_recode_to_utf8(sv, PL_encoding);
1966             }
1967         }
1968     }
1969
1970     /* keep a slot open for the head of the list? */
1971     if (slot != '_' && *where && (*where)->mad_key == '^') {
1972         (*where)->mad_key = slot;
1973         sv_free(MUTABLE_SV(((*where)->mad_val)));
1974         (*where)->mad_val = (void*)sv;
1975     }
1976     else
1977         addmad(newMADsv(slot, sv), where, 0);
1978 }
1979 #else
1980 #  define start_force(where)    NOOP
1981 #  define curmad(slot, sv)      NOOP
1982 #endif
1983
1984 /*
1985  * S_force_next
1986  * When the lexer realizes it knows the next token (for instance,
1987  * it is reordering tokens for the parser) then it can call S_force_next
1988  * to know what token to return the next time the lexer is called.  Caller
1989  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1990  * and possibly PL_expect to ensure the lexer handles the token correctly.
1991  */
1992
1993 STATIC void
1994 S_force_next(pTHX_ I32 type)
1995 {
1996     dVAR;
1997 #ifdef DEBUGGING
1998     if (DEBUG_T_TEST) {
1999         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2000         tokereport(type, &NEXTVAL_NEXTTOKE);
2001     }
2002 #endif
2003     /* Don’t let opslab_force_free snatch it */
2004     if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2005         assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2006         NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2007     }
2008 #ifdef PERL_MAD
2009     if (PL_curforce < 0)
2010         start_force(PL_lasttoke);
2011     PL_nexttoke[PL_curforce].next_type = type;
2012     if (PL_lex_state != LEX_KNOWNEXT)
2013         PL_lex_defer = PL_lex_state;
2014     PL_lex_state = LEX_KNOWNEXT;
2015     PL_lex_expect = PL_expect;
2016     PL_curforce = -1;
2017 #else
2018     PL_nexttype[PL_nexttoke] = type;
2019     PL_nexttoke++;
2020     if (PL_lex_state != LEX_KNOWNEXT) {
2021         PL_lex_defer = PL_lex_state;
2022         PL_lex_expect = PL_expect;
2023         PL_lex_state = LEX_KNOWNEXT;
2024     }
2025 #endif
2026 }
2027
2028 void
2029 Perl_yyunlex(pTHX)
2030 {
2031     int yyc = PL_parser->yychar;
2032     if (yyc != YYEMPTY) {
2033         if (yyc) {
2034             start_force(-1);
2035             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2036             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2037                 PL_lex_allbrackets--;
2038                 PL_lex_brackets--;
2039                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2040             } else if (yyc == '('/*)*/) {
2041                 PL_lex_allbrackets--;
2042                 yyc |= (2<<24);
2043             }
2044             force_next(yyc);
2045         }
2046         PL_parser->yychar = YYEMPTY;
2047     }
2048 }
2049
2050 STATIC SV *
2051 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2052 {
2053     dVAR;
2054     SV * const sv = newSVpvn_utf8(start, len,
2055                                   !IN_BYTES
2056                                   && UTF
2057                                   && !is_ascii_string((const U8*)start, len)
2058                                   && is_utf8_string((const U8*)start, len));
2059     return sv;
2060 }
2061
2062 /*
2063  * S_force_word
2064  * When the lexer knows the next thing is a word (for instance, it has
2065  * just seen -> and it knows that the next char is a word char, then
2066  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2067  * lookahead.
2068  *
2069  * Arguments:
2070  *   char *start : buffer position (must be within PL_linestr)
2071  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2072  *   int check_keyword : if true, Perl checks to make sure the word isn't
2073  *       a keyword (do this if the word is a label, e.g. goto FOO)
2074  *   int allow_pack : if true, : characters will also be allowed (require,
2075  *       use, etc. do this)
2076  *   int allow_initial_tick : used by the "sub" lexer only.
2077  */
2078
2079 STATIC char *
2080 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2081 {
2082     dVAR;
2083     char *s;
2084     STRLEN len;
2085
2086     PERL_ARGS_ASSERT_FORCE_WORD;
2087
2088     start = SKIPSPACE1(start);
2089     s = start;
2090     if (isIDFIRST_lazy_if(s,UTF) ||
2091         (allow_pack && *s == ':') ||
2092         (allow_initial_tick && *s == '\'') )
2093     {
2094         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2095         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2096             return start;
2097         start_force(PL_curforce);
2098         if (PL_madskills)
2099             curmad('X', newSVpvn(start,s-start));
2100         if (token == METHOD) {
2101             s = SKIPSPACE1(s);
2102             if (*s == '(')
2103                 PL_expect = XTERM;
2104             else {
2105                 PL_expect = XOPERATOR;
2106             }
2107         }
2108         if (PL_madskills)
2109             curmad('g', newSVpvs( "forced" ));
2110         NEXTVAL_NEXTTOKE.opval
2111             = (OP*)newSVOP(OP_CONST,0,
2112                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2113         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2114         force_next(token);
2115     }
2116     return s;
2117 }
2118
2119 /*
2120  * S_force_ident
2121  * Called when the lexer wants $foo *foo &foo etc, but the program
2122  * text only contains the "foo" portion.  The first argument is a pointer
2123  * to the "foo", and the second argument is the type symbol to prefix.
2124  * Forces the next token to be a "WORD".
2125  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2126  */
2127
2128 STATIC void
2129 S_force_ident(pTHX_ register const char *s, int kind)
2130 {
2131     dVAR;
2132
2133     PERL_ARGS_ASSERT_FORCE_IDENT;
2134
2135     if (*s) {
2136         const STRLEN len = strlen(s);
2137         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2138                                                                 UTF ? SVf_UTF8 : 0));
2139         start_force(PL_curforce);
2140         NEXTVAL_NEXTTOKE.opval = o;
2141         force_next(WORD);
2142         if (kind) {
2143             o->op_private = OPpCONST_ENTERED;
2144             /* XXX see note in pp_entereval() for why we forgo typo
2145                warnings if the symbol must be introduced in an eval.
2146                GSAR 96-10-12 */
2147             gv_fetchpvn_flags(s, len,
2148                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2149                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2150                               kind == '$' ? SVt_PV :
2151                               kind == '@' ? SVt_PVAV :
2152                               kind == '%' ? SVt_PVHV :
2153                               SVt_PVGV
2154                               );
2155         }
2156     }
2157 }
2158
2159 static void
2160 S_force_ident_maybe_lex(pTHX_ char pit)
2161 {
2162     start_force(PL_curforce);
2163     NEXTVAL_NEXTTOKE.ival = pit;
2164     force_next('p');
2165 }
2166
2167 NV
2168 Perl_str_to_version(pTHX_ SV *sv)
2169 {
2170     NV retval = 0.0;
2171     NV nshift = 1.0;
2172     STRLEN len;
2173     const char *start = SvPV_const(sv,len);
2174     const char * const end = start + len;
2175     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2176
2177     PERL_ARGS_ASSERT_STR_TO_VERSION;
2178
2179     while (start < end) {
2180         STRLEN skip;
2181         UV n;
2182         if (utf)
2183             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2184         else {
2185             n = *(U8*)start;
2186             skip = 1;
2187         }
2188         retval += ((NV)n)/nshift;
2189         start += skip;
2190         nshift *= 1000;
2191     }
2192     return retval;
2193 }
2194
2195 /*
2196  * S_force_version
2197  * Forces the next token to be a version number.
2198  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2199  * and if "guessing" is TRUE, then no new token is created (and the caller
2200  * must use an alternative parsing method).
2201  */
2202
2203 STATIC char *
2204 S_force_version(pTHX_ char *s, int guessing)
2205 {
2206     dVAR;
2207     OP *version = NULL;
2208     char *d;
2209 #ifdef PERL_MAD
2210     I32 startoff = s - SvPVX(PL_linestr);
2211 #endif
2212
2213     PERL_ARGS_ASSERT_FORCE_VERSION;
2214
2215     s = SKIPSPACE1(s);
2216
2217     d = s;
2218     if (*d == 'v')
2219         d++;
2220     if (isDIGIT(*d)) {
2221         while (isDIGIT(*d) || *d == '_' || *d == '.')
2222             d++;
2223 #ifdef PERL_MAD
2224         if (PL_madskills) {
2225             start_force(PL_curforce);
2226             curmad('X', newSVpvn(s,d-s));
2227         }
2228 #endif
2229         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2230             SV *ver;
2231 #ifdef USE_LOCALE_NUMERIC
2232             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2233             setlocale(LC_NUMERIC, "C");
2234 #endif
2235             s = scan_num(s, &pl_yylval);
2236 #ifdef USE_LOCALE_NUMERIC
2237             setlocale(LC_NUMERIC, loc);
2238             Safefree(loc);
2239 #endif
2240             version = pl_yylval.opval;
2241             ver = cSVOPx(version)->op_sv;
2242             if (SvPOK(ver) && !SvNIOK(ver)) {
2243                 SvUPGRADE(ver, SVt_PVNV);
2244                 SvNV_set(ver, str_to_version(ver));
2245                 SvNOK_on(ver);          /* hint that it is a version */
2246             }
2247         }
2248         else if (guessing) {
2249 #ifdef PERL_MAD
2250             if (PL_madskills) {
2251                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2252                 PL_nextwhite = 0;
2253                 s = SvPVX(PL_linestr) + startoff;
2254             }
2255 #endif
2256             return s;
2257         }
2258     }
2259
2260 #ifdef PERL_MAD
2261     if (PL_madskills && !version) {
2262         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2263         PL_nextwhite = 0;
2264         s = SvPVX(PL_linestr) + startoff;
2265     }
2266 #endif
2267     /* NOTE: The parser sees the package name and the VERSION swapped */
2268     start_force(PL_curforce);
2269     NEXTVAL_NEXTTOKE.opval = version;
2270     force_next(WORD);
2271
2272     return s;
2273 }
2274
2275 /*
2276  * S_force_strict_version
2277  * Forces the next token to be a version number using strict syntax rules.
2278  */
2279
2280 STATIC char *
2281 S_force_strict_version(pTHX_ char *s)
2282 {
2283     dVAR;
2284     OP *version = NULL;
2285 #ifdef PERL_MAD
2286     I32 startoff = s - SvPVX(PL_linestr);
2287 #endif
2288     const char *errstr = NULL;
2289
2290     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2291
2292     while (isSPACE(*s)) /* leading whitespace */
2293         s++;
2294
2295     if (is_STRICT_VERSION(s,&errstr)) {
2296         SV *ver = newSV(0);
2297         s = (char *)scan_version(s, ver, 0);
2298         version = newSVOP(OP_CONST, 0, ver);
2299     }
2300     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2301             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2302     {
2303         PL_bufptr = s;
2304         if (errstr)
2305             yyerror(errstr); /* version required */
2306         return s;
2307     }
2308
2309 #ifdef PERL_MAD
2310     if (PL_madskills && !version) {
2311         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2312         PL_nextwhite = 0;
2313         s = SvPVX(PL_linestr) + startoff;
2314     }
2315 #endif
2316     /* NOTE: The parser sees the package name and the VERSION swapped */
2317     start_force(PL_curforce);
2318     NEXTVAL_NEXTTOKE.opval = version;
2319     force_next(WORD);
2320
2321     return s;
2322 }
2323
2324 /*
2325  * S_tokeq
2326  * Tokenize a quoted string passed in as an SV.  It finds the next
2327  * chunk, up to end of string or a backslash.  It may make a new
2328  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2329  * turns \\ into \.
2330  */
2331
2332 STATIC SV *
2333 S_tokeq(pTHX_ SV *sv)
2334 {
2335     dVAR;
2336     char *s;
2337     char *send;
2338     char *d;
2339     STRLEN len = 0;
2340     SV *pv = sv;
2341
2342     PERL_ARGS_ASSERT_TOKEQ;
2343
2344     if (!SvLEN(sv))
2345         goto finish;
2346
2347     s = SvPV_force(sv, len);
2348     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2349         goto finish;
2350     send = s + len;
2351     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2352     while (s < send && !(*s == '\\' && s[1] == '\\'))
2353         s++;
2354     if (s == send)
2355         goto finish;
2356     d = s;
2357     if ( PL_hints & HINT_NEW_STRING ) {
2358         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2359     }
2360     while (s < send) {
2361         if (*s == '\\') {
2362             if (s + 1 < send && (s[1] == '\\'))
2363                 s++;            /* all that, just for this */
2364         }
2365         *d++ = *s++;
2366     }
2367     *d = '\0';
2368     SvCUR_set(sv, d - SvPVX_const(sv));
2369   finish:
2370     if ( PL_hints & HINT_NEW_STRING )
2371        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2372     return sv;
2373 }
2374
2375 /*
2376  * Now come three functions related to double-quote context,
2377  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2378  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2379  * interact with PL_lex_state, and create fake ( ... ) argument lists
2380  * to handle functions and concatenation.
2381  * For example,
2382  *   "foo\lbar"
2383  * is tokenised as
2384  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2385  */
2386
2387 /*
2388  * S_sublex_start
2389  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2390  *
2391  * Pattern matching will set PL_lex_op to the pattern-matching op to
2392  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2393  *
2394  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2395  *
2396  * Everything else becomes a FUNC.
2397  *
2398  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2399  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2400  * call to S_sublex_push().
2401  */
2402
2403 STATIC I32
2404 S_sublex_start(pTHX)
2405 {
2406     dVAR;
2407     const I32 op_type = pl_yylval.ival;
2408
2409     if (op_type == OP_NULL) {
2410         pl_yylval.opval = PL_lex_op;
2411         PL_lex_op = NULL;
2412         return THING;
2413     }
2414     if (op_type == OP_CONST || op_type == OP_READLINE) {
2415         SV *sv = tokeq(PL_lex_stuff);
2416
2417         if (SvTYPE(sv) == SVt_PVIV) {
2418             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2419             STRLEN len;
2420             const char * const p = SvPV_const(sv, len);
2421             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2422             SvREFCNT_dec(sv);
2423             sv = nsv;
2424         }
2425         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2426         PL_lex_stuff = NULL;
2427         /* Allow <FH> // "foo" */
2428         if (op_type == OP_READLINE)
2429             PL_expect = XTERMORDORDOR;
2430         return THING;
2431     }
2432     else if (op_type == OP_BACKTICK && PL_lex_op) {
2433         /* readpipe() vas overriden */
2434         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2435         pl_yylval.opval = PL_lex_op;
2436         PL_lex_op = NULL;
2437         PL_lex_stuff = NULL;
2438         return THING;
2439     }
2440
2441     PL_sublex_info.super_state = PL_lex_state;
2442     PL_sublex_info.sub_inwhat = (U16)op_type;
2443     PL_sublex_info.sub_op = PL_lex_op;
2444     PL_lex_state = LEX_INTERPPUSH;
2445
2446     PL_expect = XTERM;
2447     if (PL_lex_op) {
2448         pl_yylval.opval = PL_lex_op;
2449         PL_lex_op = NULL;
2450         return PMFUNC;
2451     }
2452     else
2453         return FUNC;
2454 }
2455
2456 /*
2457  * S_sublex_push
2458  * Create a new scope to save the lexing state.  The scope will be
2459  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2460  * to the uc, lc, etc. found before.
2461  * Sets PL_lex_state to LEX_INTERPCONCAT.
2462  */
2463
2464 STATIC I32
2465 S_sublex_push(pTHX)
2466 {
2467     dVAR;
2468     LEXSHARED *shared;
2469     ENTER;
2470
2471     PL_lex_state = PL_sublex_info.super_state;
2472     SAVEBOOL(PL_lex_dojoin);
2473     SAVEI32(PL_lex_brackets);
2474     SAVEI32(PL_lex_allbrackets);
2475     SAVEI32(PL_lex_formbrack);
2476     SAVEI8(PL_lex_fakeeof);
2477     SAVEI32(PL_lex_casemods);
2478     SAVEI32(PL_lex_starts);
2479     SAVEI8(PL_lex_state);
2480     SAVESPTR(PL_lex_repl);
2481     SAVEVPTR(PL_lex_inpat);
2482     SAVEI16(PL_lex_inwhat);
2483     SAVECOPLINE(PL_curcop);
2484     SAVEPPTR(PL_bufptr);
2485     SAVEPPTR(PL_bufend);
2486     SAVEPPTR(PL_oldbufptr);
2487     SAVEPPTR(PL_oldoldbufptr);
2488     SAVEPPTR(PL_last_lop);
2489     SAVEPPTR(PL_last_uni);
2490     SAVEPPTR(PL_linestart);
2491     SAVESPTR(PL_linestr);
2492     SAVEGENERICPV(PL_lex_brackstack);
2493     SAVEGENERICPV(PL_lex_casestack);
2494     SAVEGENERICPV(PL_parser->lex_shared);
2495
2496     /* The here-doc parser needs to be able to peek into outer lexing
2497        scopes to find the body of the here-doc.  So we put PL_linestr and
2498        PL_bufptr into lex_shared, to ‘share’ those values.
2499      */
2500     PL_parser->lex_shared->ls_linestr = PL_linestr;
2501     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2502
2503     PL_linestr = PL_lex_stuff;
2504     PL_lex_repl = PL_sublex_info.repl;
2505     PL_lex_stuff = NULL;
2506     PL_sublex_info.repl = NULL;
2507
2508     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2509         = SvPVX(PL_linestr);
2510     PL_bufend += SvCUR(PL_linestr);
2511     PL_last_lop = PL_last_uni = NULL;
2512     SAVEFREESV(PL_linestr);
2513     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2514
2515     PL_lex_dojoin = FALSE;
2516     PL_lex_brackets = PL_lex_formbrack = 0;
2517     PL_lex_allbrackets = 0;
2518     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2519     Newx(PL_lex_brackstack, 120, char);
2520     Newx(PL_lex_casestack, 12, char);
2521     PL_lex_casemods = 0;
2522     *PL_lex_casestack = '\0';
2523     PL_lex_starts = 0;
2524     PL_lex_state = LEX_INTERPCONCAT;
2525     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2526
2527     Newxz(shared, 1, LEXSHARED);
2528     shared->ls_prev = PL_parser->lex_shared;
2529     PL_parser->lex_shared = shared;
2530
2531     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2532     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2533     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2534         PL_lex_inpat = PL_sublex_info.sub_op;
2535     else
2536         PL_lex_inpat = NULL;
2537
2538     return '(';
2539 }
2540
2541 /*
2542  * S_sublex_done
2543  * Restores lexer state after a S_sublex_push.
2544  */
2545
2546 STATIC I32
2547 S_sublex_done(pTHX)
2548 {
2549     dVAR;
2550     if (!PL_lex_starts++) {
2551         SV * const sv = newSVpvs("");
2552         if (SvUTF8(PL_linestr))
2553             SvUTF8_on(sv);
2554         PL_expect = XOPERATOR;
2555         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2556         return THING;
2557     }
2558
2559     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2560         PL_lex_state = LEX_INTERPCASEMOD;
2561         return yylex();
2562     }
2563
2564     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2565     assert(PL_lex_inwhat != OP_TRANSR);
2566     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2567         PL_linestr = PL_lex_repl;
2568         PL_lex_inpat = 0;
2569         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2570         PL_bufend += SvCUR(PL_linestr);
2571         PL_last_lop = PL_last_uni = NULL;
2572         PL_lex_dojoin = FALSE;
2573         PL_lex_brackets = 0;
2574         PL_lex_allbrackets = 0;
2575         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2576         PL_lex_casemods = 0;
2577         *PL_lex_casestack = '\0';
2578         PL_lex_starts = 0;
2579         if (SvEVALED(PL_lex_repl)) {
2580             PL_lex_state = LEX_INTERPNORMAL;
2581             PL_lex_starts++;
2582             /*  we don't clear PL_lex_repl here, so that we can check later
2583                 whether this is an evalled subst; that means we rely on the
2584                 logic to ensure sublex_done() is called again only via the
2585                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2586         }
2587         else {
2588             PL_lex_state = LEX_INTERPCONCAT;
2589             PL_lex_repl = NULL;
2590         }
2591         return ',';
2592     }
2593     else {
2594 #ifdef PERL_MAD
2595         if (PL_madskills) {
2596             if (PL_thiswhite) {
2597                 if (!PL_endwhite)
2598                     PL_endwhite = newSVpvs("");
2599                 sv_catsv(PL_endwhite, PL_thiswhite);
2600                 PL_thiswhite = 0;
2601             }
2602             if (PL_thistoken)
2603                 sv_setpvs(PL_thistoken,"");
2604             else
2605                 PL_realtokenstart = -1;
2606         }
2607 #endif
2608         LEAVE;
2609         PL_bufend = SvPVX(PL_linestr);
2610         PL_bufend += SvCUR(PL_linestr);
2611         PL_expect = XOPERATOR;
2612         PL_sublex_info.sub_inwhat = 0;
2613         return ')';
2614     }
2615 }
2616
2617 /*
2618   scan_const
2619
2620   Extracts the next constant part of a pattern, double-quoted string,
2621   or transliteration.  This is terrifying code.
2622
2623   For example, in parsing the double-quoted string "ab\x63$d", it would
2624   stop at the '$' and return an OP_CONST containing 'abc'.
2625
2626   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2627   processing a pattern (PL_lex_inpat is true), a transliteration
2628   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2629
2630   Returns a pointer to the character scanned up to. If this is
2631   advanced from the start pointer supplied (i.e. if anything was
2632   successfully parsed), will leave an OP_CONST for the substring scanned
2633   in pl_yylval. Caller must intuit reason for not parsing further
2634   by looking at the next characters herself.
2635
2636   In patterns:
2637     expand:
2638       \N{ABC}  => \N{U+41.42.43}
2639
2640     pass through:
2641         all other \-char, including \N and \N{ apart from \N{ABC}
2642
2643     stops on:
2644         @ and $ where it appears to be a var, but not for $ as tail anchor
2645         \l \L \u \U \Q \E
2646         (?{  or  (??{
2647
2648
2649   In transliterations:
2650     characters are VERY literal, except for - not at the start or end
2651     of the string, which indicates a range. If the range is in bytes,
2652     scan_const expands the range to the full set of intermediate
2653     characters. If the range is in utf8, the hyphen is replaced with
2654     a certain range mark which will be handled by pmtrans() in op.c.
2655
2656   In double-quoted strings:
2657     backslashes:
2658       double-quoted style: \r and \n
2659       constants: \x31, etc.
2660       deprecated backrefs: \1 (in substitution replacements)
2661       case and quoting: \U \Q \E
2662     stops on @ and $
2663
2664   scan_const does *not* construct ops to handle interpolated strings.
2665   It stops processing as soon as it finds an embedded $ or @ variable
2666   and leaves it to the caller to work out what's going on.
2667
2668   embedded arrays (whether in pattern or not) could be:
2669       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2670
2671   $ in double-quoted strings must be the symbol of an embedded scalar.
2672
2673   $ in pattern could be $foo or could be tail anchor.  Assumption:
2674   it's a tail anchor if $ is the last thing in the string, or if it's
2675   followed by one of "()| \r\n\t"
2676
2677   \1 (backreferences) are turned into $1 in substitutions
2678
2679   The structure of the code is
2680       while (there's a character to process) {
2681           handle transliteration ranges
2682           skip regexp comments /(?#comment)/ and codes /(?{code})/
2683           skip #-initiated comments in //x patterns
2684           check for embedded arrays
2685           check for embedded scalars
2686           if (backslash) {
2687               deprecate \1 in substitution replacements
2688               handle string-changing backslashes \l \U \Q \E, etc.
2689               switch (what was escaped) {
2690                   handle \- in a transliteration (becomes a literal -)
2691                   if a pattern and not \N{, go treat as regular character
2692                   handle \132 (octal characters)
2693                   handle \x15 and \x{1234} (hex characters)
2694                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2695                   handle \cV (control characters)
2696                   handle printf-style backslashes (\f, \r, \n, etc)
2697               } (end switch)
2698               continue
2699           } (end if backslash)
2700           handle regular character
2701     } (end while character to read)
2702
2703 */
2704
2705 STATIC char *
2706 S_scan_const(pTHX_ char *start)
2707 {
2708     dVAR;
2709     char *send = PL_bufend;             /* end of the constant */
2710     SV *sv = newSV(send - start);               /* sv for the constant.  See
2711                                                    note below on sizing. */
2712     char *s = start;                    /* start of the constant */
2713     char *d = SvPVX(sv);                /* destination for copies */
2714     bool dorange = FALSE;                       /* are we in a translit range? */
2715     bool didrange = FALSE;                      /* did we just finish a range? */
2716     bool in_charclass = FALSE;                  /* within /[...]/ */
2717     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2718     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2719                                                    to be UTF8?  But, this can
2720                                                    show as true when the source
2721                                                    isn't utf8, as for example
2722                                                    when it is entirely composed
2723                                                    of hex constants */
2724
2725     /* Note on sizing:  The scanned constant is placed into sv, which is
2726      * initialized by newSV() assuming one byte of output for every byte of
2727      * input.  This routine expects newSV() to allocate an extra byte for a
2728      * trailing NUL, which this routine will append if it gets to the end of
2729      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2730      * CAPITAL LETTER A}), or more output than input if the constant ends up
2731      * recoded to utf8, but each time a construct is found that might increase
2732      * the needed size, SvGROW() is called.  Its size parameter each time is
2733      * based on the best guess estimate at the time, namely the length used so
2734      * far, plus the length the current construct will occupy, plus room for
2735      * the trailing NUL, plus one byte for every input byte still unscanned */
2736
2737     UV uv;
2738 #ifdef EBCDIC
2739     UV literal_endpoint = 0;
2740     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2741 #endif
2742
2743     PERL_ARGS_ASSERT_SCAN_CONST;
2744
2745     assert(PL_lex_inwhat != OP_TRANSR);
2746     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2747         /* If we are doing a trans and we know we want UTF8 set expectation */
2748         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2749         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2750     }
2751
2752
2753     while (s < send || dorange) {
2754
2755         /* get transliterations out of the way (they're most literal) */
2756         if (PL_lex_inwhat == OP_TRANS) {
2757             /* expand a range A-Z to the full set of characters.  AIE! */
2758             if (dorange) {
2759                 I32 i;                          /* current expanded character */
2760                 I32 min;                        /* first character in range */
2761                 I32 max;                        /* last character in range */
2762
2763 #ifdef EBCDIC
2764                 UV uvmax = 0;
2765 #endif
2766
2767                 if (has_utf8
2768 #ifdef EBCDIC
2769                     && !native_range
2770 #endif
2771                     ) {
2772                     char * const c = (char*)utf8_hop((U8*)d, -1);
2773                     char *e = d++;
2774                     while (e-- > c)
2775                         *(e + 1) = *e;
2776                     *c = (char)UTF_TO_NATIVE(0xff);
2777                     /* mark the range as done, and continue */
2778                     dorange = FALSE;
2779                     didrange = TRUE;
2780                     continue;
2781                 }
2782
2783                 i = d - SvPVX_const(sv);                /* remember current offset */
2784 #ifdef EBCDIC
2785                 SvGROW(sv,
2786                        SvLEN(sv) + (has_utf8 ?
2787                                     (512 - UTF_CONTINUATION_MARK +
2788                                      UNISKIP(0x100))
2789                                     : 256));
2790                 /* How many two-byte within 0..255: 128 in UTF-8,
2791                  * 96 in UTF-8-mod. */
2792 #else
2793                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2794 #endif
2795                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2796 #ifdef EBCDIC
2797                 if (has_utf8) {
2798                     int j;
2799                     for (j = 0; j <= 1; j++) {
2800                         char * const c = (char*)utf8_hop((U8*)d, -1);
2801                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2802                         if (j)
2803                             min = (U8)uv;
2804                         else if (uv < 256)
2805                             max = (U8)uv;
2806                         else {
2807                             max = (U8)0xff; /* only to \xff */
2808                             uvmax = uv; /* \x{100} to uvmax */
2809                         }
2810                         d = c; /* eat endpoint chars */
2811                      }
2812                 }
2813                else {
2814 #endif
2815                    d -= 2;              /* eat the first char and the - */
2816                    min = (U8)*d;        /* first char in range */
2817                    max = (U8)d[1];      /* last char in range  */
2818 #ifdef EBCDIC
2819                }
2820 #endif
2821
2822                 if (min > max) {
2823                     SvREFCNT_dec(sv);
2824                     Perl_croak(aTHX_
2825                                "Invalid range \"%c-%c\" in transliteration operator",
2826                                (char)min, (char)max);
2827                 }
2828
2829 #ifdef EBCDIC
2830                 if (literal_endpoint == 2 &&
2831                     ((isLOWER(min) && isLOWER(max)) ||
2832                      (isUPPER(min) && isUPPER(max)))) {
2833                     if (isLOWER(min)) {
2834                         for (i = min; i <= max; i++)
2835                             if (isLOWER(i))
2836                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2837                     } else {
2838                         for (i = min; i <= max; i++)
2839                             if (isUPPER(i))
2840                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2841                     }
2842                 }
2843                 else
2844 #endif
2845                     for (i = min; i <= max; i++)
2846 #ifdef EBCDIC
2847                         if (has_utf8) {
2848                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2849                             if (UNI_IS_INVARIANT(ch))
2850                                 *d++ = (U8)i;
2851                             else {
2852                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2853                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2854                             }
2855                         }
2856                         else
2857 #endif
2858                             *d++ = (char)i;
2859
2860 #ifdef EBCDIC
2861                 if (uvmax) {
2862                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2863                     if (uvmax > 0x101)
2864                         *d++ = (char)UTF_TO_NATIVE(0xff);
2865                     if (uvmax > 0x100)
2866                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2867                 }
2868 #endif
2869
2870                 /* mark the range as done, and continue */
2871                 dorange = FALSE;
2872                 didrange = TRUE;
2873 #ifdef EBCDIC
2874                 literal_endpoint = 0;
2875 #endif
2876                 continue;
2877             }
2878
2879             /* range begins (ignore - as first or last char) */
2880             else if (*s == '-' && s+1 < send  && s != start) {
2881                 if (didrange) {
2882                     SvREFCNT_dec(sv);
2883                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2884                 }
2885                 if (has_utf8
2886 #ifdef EBCDIC
2887                     && !native_range
2888 #endif
2889                     ) {
2890                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2891                     s++;
2892                     continue;
2893                 }
2894                 dorange = TRUE;
2895                 s++;
2896             }
2897             else {
2898                 didrange = FALSE;
2899 #ifdef EBCDIC
2900                 literal_endpoint = 0;
2901                 native_range = TRUE;
2902 #endif
2903             }
2904         }
2905
2906         /* if we get here, we're not doing a transliteration */
2907
2908         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2909             char *s1 = s-1;
2910             int esc = 0;
2911             while (s1 >= start && *s1-- == '\\')
2912                 esc = !esc;
2913             if (!esc)
2914                 in_charclass = TRUE;
2915         }
2916
2917         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2918             char *s1 = s-1;
2919             int esc = 0;
2920             while (s1 >= start && *s1-- == '\\')
2921                 esc = !esc;
2922             if (!esc)
2923                 in_charclass = FALSE;
2924         }
2925
2926         /* skip for regexp comments /(?#comment)/, except for the last
2927          * char, which will be done separately.
2928          * Stop on (?{..}) and friends */
2929
2930         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2931             if (s[2] == '#') {
2932                 while (s+1 < send && *s != ')')
2933                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2934             }
2935             else if (!PL_lex_casemods && !in_charclass &&
2936                      (    s[2] == '{' /* This should match regcomp.c */
2937                       || (s[2] == '?' && s[3] == '{')))
2938             {
2939                 break;
2940             }
2941         }
2942
2943         /* likewise skip #-initiated comments in //x patterns */
2944         else if (*s == '#' && PL_lex_inpat &&
2945           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2946             while (s+1 < send && *s != '\n')
2947                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2948         }
2949
2950         /* no further processing of single-quoted regex */
2951         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2952             goto default_action;
2953
2954         /* check for embedded arrays
2955            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2956            */
2957         else if (*s == '@' && s[1]) {
2958             if (isALNUM_lazy_if(s+1,UTF))
2959                 break;
2960             if (strchr(":'{$", s[1]))
2961                 break;
2962             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2963                 break; /* in regexp, neither @+ nor @- are interpolated */
2964         }
2965
2966         /* check for embedded scalars.  only stop if we're sure it's a
2967            variable.
2968         */
2969         else if (*s == '$') {
2970             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2971                 break;
2972             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2973                 if (s[1] == '\\') {
2974                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2975                                    "Possible unintended interpolation of $\\ in regex");
2976                 }
2977                 break;          /* in regexp, $ might be tail anchor */
2978             }
2979         }
2980
2981         /* End of else if chain - OP_TRANS rejoin rest */
2982
2983         /* backslashes */
2984         if (*s == '\\' && s+1 < send) {
2985             char* e;    /* Can be used for ending '}', etc. */
2986
2987             s++;
2988
2989             /* warn on \1 - \9 in substitution replacements, but note that \11
2990              * is an octal; and \19 is \1 followed by '9' */
2991             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2992                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2993             {
2994                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2995                 *--s = '$';
2996                 break;
2997             }
2998
2999             /* string-change backslash escapes */
3000             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3001                 --s;
3002                 break;
3003             }
3004             /* In a pattern, process \N, but skip any other backslash escapes.
3005              * This is because we don't want to translate an escape sequence
3006              * into a meta symbol and have the regex compiler use the meta
3007              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3008              * in spite of this, we do have to process \N here while the proper
3009              * charnames handler is in scope.  See bugs #56444 and #62056.
3010              * There is a complication because \N in a pattern may also stand
3011              * for 'match a non-nl', and not mean a charname, in which case its
3012              * processing should be deferred to the regex compiler.  To be a
3013              * charname it must be followed immediately by a '{', and not look
3014              * like \N followed by a curly quantifier, i.e., not something like
3015              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3016              * quantifier */
3017             else if (PL_lex_inpat
3018                     && (*s != 'N'
3019                         || s[1] != '{'
3020                         || regcurly(s + 1)))
3021             {
3022                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3023                 goto default_action;
3024             }
3025
3026             switch (*s) {
3027
3028             /* quoted - in transliterations */
3029             case '-':
3030                 if (PL_lex_inwhat == OP_TRANS) {
3031                     *d++ = *s++;
3032                     continue;
3033                 }
3034                 /* FALL THROUGH */
3035             default:
3036                 {
3037                     if ((isALNUMC(*s)))
3038                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3039                                        "Unrecognized escape \\%c passed through",
3040                                        *s);
3041                     /* default action is to copy the quoted character */
3042                     goto default_action;
3043                 }
3044
3045             /* eg. \132 indicates the octal constant 0132 */
3046             case '0': case '1': case '2': case '3':
3047             case '4': case '5': case '6': case '7':
3048                 {
3049                     I32 flags = 0;
3050                     STRLEN len = 3;
3051                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3052                     s += len;
3053                 }
3054                 goto NUM_ESCAPE_INSERT;
3055
3056             /* eg. \o{24} indicates the octal constant \024 */
3057             case 'o':
3058                 {
3059                     STRLEN len;
3060                     const char* error;
3061
3062                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3063                     s += len;
3064                     if (! valid) {
3065                         yyerror(error);
3066                         continue;
3067                     }
3068                     goto NUM_ESCAPE_INSERT;
3069                 }
3070
3071             /* eg. \x24 indicates the hex constant 0x24 */
3072             case 'x':
3073                 {
3074                     STRLEN len;
3075                     const char* error;
3076
3077                     bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3078                     s += len;
3079                     if (! valid) {
3080                         yyerror(error);
3081                         continue;
3082                     }
3083                 }
3084
3085               NUM_ESCAPE_INSERT:
3086                 /* Insert oct or hex escaped character.  There will always be
3087                  * enough room in sv since such escapes will be longer than any
3088                  * UTF-8 sequence they can end up as, except if they force us
3089                  * to recode the rest of the string into utf8 */
3090
3091                 /* Here uv is the ordinal of the next character being added in
3092                  * unicode (converted from native). */
3093                 if (!UNI_IS_INVARIANT(uv)) {
3094                     if (!has_utf8 && uv > 255) {
3095                         /* Might need to recode whatever we have accumulated so
3096                          * far if it contains any chars variant in utf8 or
3097                          * utf-ebcdic. */
3098
3099                         SvCUR_set(sv, d - SvPVX_const(sv));
3100                         SvPOK_on(sv);
3101                         *d = '\0';
3102                         /* See Note on sizing above.  */
3103                         sv_utf8_upgrade_flags_grow(sv,
3104                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3105                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3106                         d = SvPVX(sv) + SvCUR(sv);
3107                         has_utf8 = TRUE;
3108                     }
3109
3110                     if (has_utf8) {
3111                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3112                         if (PL_lex_inwhat == OP_TRANS &&
3113                             PL_sublex_info.sub_op) {
3114                             PL_sublex_info.sub_op->op_private |=
3115                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3116                                              : OPpTRANS_TO_UTF);
3117                         }
3118 #ifdef EBCDIC
3119                         if (uv > 255 && !dorange)
3120                             native_range = FALSE;
3121 #endif
3122                     }
3123                     else {
3124                         *d++ = (char)uv;
3125                     }
3126                 }
3127                 else {
3128                     *d++ = (char) uv;
3129                 }
3130                 continue;
3131
3132             case 'N':
3133                 /* In a non-pattern \N must be a named character, like \N{LATIN
3134                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3135                  * mean to match a non-newline.  For non-patterns, named
3136                  * characters are converted to their string equivalents. In
3137                  * patterns, named characters are not converted to their
3138                  * ultimate forms for the same reasons that other escapes
3139                  * aren't.  Instead, they are converted to the \N{U+...} form
3140                  * to get the value from the charnames that is in effect right
3141                  * now, while preserving the fact that it was a named character
3142                  * so that the regex compiler knows this */
3143
3144                 /* This section of code doesn't generally use the
3145                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3146                  * a close examination of this macro and determined it is a
3147                  * no-op except on utfebcdic variant characters.  Every
3148                  * character generated by this that would normally need to be
3149                  * enclosed by this macro is invariant, so the macro is not
3150                  * needed, and would complicate use of copy().  XXX There are
3151                  * other parts of this file where the macro is used
3152                  * inconsistently, but are saved by it being a no-op */
3153
3154                 /* The structure of this section of code (besides checking for
3155                  * errors and upgrading to utf8) is:
3156                  *  Further disambiguate between the two meanings of \N, and if
3157                  *      not a charname, go process it elsewhere
3158                  *  If of form \N{U+...}, pass it through if a pattern;
3159                  *      otherwise convert to utf8
3160                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3161                  *  pattern; otherwise convert to utf8 */
3162
3163                 /* Here, s points to the 'N'; the test below is guaranteed to
3164                  * succeed if we are being called on a pattern as we already
3165                  * know from a test above that the next character is a '{'.
3166                  * On a non-pattern \N must mean 'named sequence, which
3167                  * requires braces */
3168                 s++;
3169                 if (*s != '{') {
3170                     yyerror("Missing braces on \\N{}");
3171                     continue;
3172                 }
3173                 s++;
3174
3175                 /* If there is no matching '}', it is an error. */
3176                 if (! (e = strchr(s, '}'))) {
3177                     if (! PL_lex_inpat) {
3178                         yyerror("Missing right brace on \\N{}");
3179                     } else {
3180                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3181                     }
3182                     continue;
3183                 }
3184
3185                 /* Here it looks like a named character */
3186
3187                 if (PL_lex_inpat) {
3188
3189                     /* XXX This block is temporary code.  \N{} implies that the
3190                      * pattern is to have Unicode semantics, and therefore
3191                      * currently has to be encoded in utf8.  By putting it in
3192                      * utf8 now, we save a whole pass in the regular expression
3193                      * compiler.  Once that code is changed so Unicode
3194                      * semantics doesn't necessarily have to be in utf8, this
3195                      * block should be removed.  However, the code that parses
3196                      * the output of this would have to be changed to not
3197                      * necessarily expect utf8 */
3198                     if (!has_utf8) {
3199                         SvCUR_set(sv, d - SvPVX_const(sv));
3200                         SvPOK_on(sv);
3201                         *d = '\0';
3202                         /* See Note on sizing above.  */
3203                         sv_utf8_upgrade_flags_grow(sv,
3204                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3205                                         /* 5 = '\N{' + cur char + NUL */
3206                                         (STRLEN)(send - s) + 5);
3207                         d = SvPVX(sv) + SvCUR(sv);
3208                         has_utf8 = TRUE;
3209                     }
3210                 }
3211
3212                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3213                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3214                                 | PERL_SCAN_DISALLOW_PREFIX;
3215                     STRLEN len;
3216
3217                     /* For \N{U+...}, the '...' is a unicode value even on
3218                      * EBCDIC machines */
3219                     s += 2;         /* Skip to next char after the 'U+' */
3220                     len = e - s;
3221                     uv = grok_hex(s, &len, &flags, NULL);
3222                     if (len == 0 || len != (STRLEN)(e - s)) {
3223                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3224                         s = e + 1;
3225                         continue;
3226                     }
3227
3228                     if (PL_lex_inpat) {
3229
3230                         /* On non-EBCDIC platforms, pass through to the regex
3231                          * compiler unchanged.  The reason we evaluated the
3232                          * number above is to make sure there wasn't a syntax
3233                          * error.  But on EBCDIC we convert to native so
3234                          * downstream code can continue to assume it's native
3235                          */
3236                         s -= 5;     /* Include the '\N{U+' */
3237 #ifdef EBCDIC
3238                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3239                                                                and the \0 */
3240                                     "\\N{U+%X}",
3241                                     (unsigned int) UNI_TO_NATIVE(uv));
3242 #else
3243                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3244                         d += e - s + 1;
3245 #endif
3246                     }
3247                     else {  /* Not a pattern: convert the hex to string */
3248
3249                          /* If destination is not in utf8, unconditionally
3250                           * recode it to be so.  This is because \N{} implies
3251                           * Unicode semantics, and scalars have to be in utf8
3252                           * to guarantee those semantics */
3253                         if (! has_utf8) {
3254                             SvCUR_set(sv, d - SvPVX_const(sv));
3255                             SvPOK_on(sv);
3256                             *d = '\0';
3257                             /* See Note on sizing above.  */
3258                             sv_utf8_upgrade_flags_grow(
3259                                         sv,
3260                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3261                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3262                             d = SvPVX(sv) + SvCUR(sv);
3263                             has_utf8 = TRUE;
3264                         }
3265
3266                         /* Add the string to the output */
3267                         if (UNI_IS_INVARIANT(uv)) {
3268                             *d++ = (char) uv;
3269                         }
3270                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3271                     }
3272                 }
3273                 else { /* Here is \N{NAME} but not \N{U+...}. */
3274
3275                     SV *res;            /* result from charnames */
3276                     const char *str;    /* the string in 'res' */
3277                     STRLEN len;         /* its length */
3278
3279                     /* Get the value for NAME */
3280                     res = newSVpvn(s, e - s);
3281                     res = new_constant( NULL, 0, "charnames",
3282                                         /* includes all of: \N{...} */
3283                                         res, NULL, s - 3, e - s + 4 );
3284
3285                     /* Most likely res will be in utf8 already since the
3286                      * standard charnames uses pack U, but a custom translator
3287                      * can leave it otherwise, so make sure.  XXX This can be
3288                      * revisited to not have charnames use utf8 for characters
3289                      * that don't need it when regexes don't have to be in utf8
3290                      * for Unicode semantics.  If doing so, remember EBCDIC */
3291                     sv_utf8_upgrade(res);
3292                     str = SvPV_const(res, len);
3293
3294                     /* Don't accept malformed input */
3295                     if (! is_utf8_string((U8 *) str, len)) {
3296                         yyerror("Malformed UTF-8 returned by \\N");
3297                     }
3298                     else if (PL_lex_inpat) {
3299
3300                         if (! len) { /* The name resolved to an empty string */
3301                             Copy("\\N{}", d, 4, char);
3302                             d += 4;
3303                         }
3304                         else {
3305                             /* In order to not lose information for the regex
3306                             * compiler, pass the result in the specially made
3307                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3308                             * the code points in hex of each character
3309                             * returned by charnames */
3310
3311                             const char *str_end = str + len;
3312                             STRLEN char_length;     /* cur char's byte length */
3313                             STRLEN output_length;   /* and the number of bytes
3314                                                        after this is translated
3315                                                        into hex digits */
3316                             const STRLEN off = d - SvPVX_const(sv);
3317
3318                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3319                              * max('U+', '.'); and 1 for NUL */
3320                             char hex_string[2 * UTF8_MAXBYTES + 5];
3321
3322                             /* Get the first character of the result. */
3323                             U32 uv = utf8n_to_uvuni((U8 *) str,
3324                                                     len,
3325                                                     &char_length,
3326                                                     UTF8_ALLOW_ANYUV);
3327
3328                             /* The call to is_utf8_string() above hopefully
3329                              * guarantees that there won't be an error.  But
3330                              * it's easy here to make sure.  The function just
3331                              * above warns and returns 0 if invalid utf8, but
3332                              * it can also return 0 if the input is validly a
3333                              * NUL. Disambiguate */
3334                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3335                                 uv = UNICODE_REPLACEMENT;
3336                             }
3337
3338                             /* Convert first code point to hex, including the
3339                              * boiler plate before it.  For all these, we
3340                              * convert to native format so that downstream code
3341                              * can continue to assume the input is native */
3342                             output_length =
3343                                 my_snprintf(hex_string, sizeof(hex_string),
3344                                             "\\N{U+%X",
3345                                             (unsigned int) UNI_TO_NATIVE(uv));
3346
3347                             /* Make sure there is enough space to hold it */
3348                             d = off + SvGROW(sv, off
3349                                                  + output_length
3350                                                  + (STRLEN)(send - e)
3351                                                  + 2);  /* '}' + NUL */
3352                             /* And output it */
3353                             Copy(hex_string, d, output_length, char);
3354                             d += output_length;
3355
3356                             /* For each subsequent character, append dot and
3357                              * its ordinal in hex */
3358                             while ((str += char_length) < str_end) {
3359                                 const STRLEN off = d - SvPVX_const(sv);
3360                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3361                                                         str_end - str,
3362                                                         &char_length,
3363                                                         UTF8_ALLOW_ANYUV);
3364                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3365                                     uv = UNICODE_REPLACEMENT;
3366                                 }
3367
3368                                 output_length =
3369                                     my_snprintf(hex_string, sizeof(hex_string),
3370                                             ".%X",
3371                                             (unsigned int) UNI_TO_NATIVE(uv));
3372
3373                                 d = off + SvGROW(sv, off
3374                                                      + output_length
3375                                                      + (STRLEN)(send - e)
3376                                                      + 2);      /* '}' +  NUL */
3377                                 Copy(hex_string, d, output_length, char);
3378                                 d += output_length;
3379                             }
3380
3381                             *d++ = '}'; /* Done.  Add the trailing brace */
3382                         }
3383                     }
3384                     else { /* Here, not in a pattern.  Convert the name to a
3385                             * string. */
3386
3387                          /* If destination is not in utf8, unconditionally
3388                           * recode it to be so.  This is because \N{} implies
3389                           * Unicode semantics, and scalars have to be in utf8
3390                           * to guarantee those semantics */
3391                         if (! has_utf8) {
3392                             SvCUR_set(sv, d - SvPVX_const(sv));
3393                             SvPOK_on(sv);
3394                             *d = '\0';
3395                             /* See Note on sizing above.  */
3396                             sv_utf8_upgrade_flags_grow(sv,
3397                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3398                                                 len + (STRLEN)(send - s) + 1);
3399                             d = SvPVX(sv) + SvCUR(sv);
3400                             has_utf8 = TRUE;
3401                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3402
3403                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3404                              * set correctly here). */
3405                             const STRLEN off = d - SvPVX_const(sv);
3406                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3407                         }
3408                         Copy(str, d, len, char);
3409                         d += len;
3410                     }
3411                     SvREFCNT_dec(res);
3412
3413                     /* Deprecate non-approved name syntax */
3414                     if (ckWARN_d(WARN_DEPRECATED)) {
3415                         bool problematic = FALSE;
3416                         char* i = s;
3417
3418                         /* For non-ut8 input, look to see that the first
3419                          * character is an alpha, then loop through the rest
3420                          * checking that each is a continuation */
3421                         if (! this_utf8) {
3422                             if (! isALPHAU(*i)) problematic = TRUE;
3423                             else for (i = s + 1; i < e; i++) {
3424                                 if (isCHARNAME_CONT(*i)) continue;
3425                                 problematic = TRUE;
3426                                 break;
3427                             }
3428                         }
3429                         else {
3430                             /* Similarly for utf8.  For invariants can check
3431                              * directly.  We accept anything above the latin1
3432                              * range because it is immaterial to Perl if it is
3433                              * correct or not, and is expensive to check.  But
3434                              * it is fairly easy in the latin1 range to convert
3435                              * the variants into a single character and check
3436                              * those */
3437                             if (UTF8_IS_INVARIANT(*i)) {
3438                                 if (! isALPHAU(*i)) problematic = TRUE;
3439                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3440                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3441                                                                             *(i+1)))))
3442                                 {
3443                                     problematic = TRUE;
3444                                 }
3445                             }
3446                             if (! problematic) for (i = s + UTF8SKIP(s);
3447                                                     i < e;
3448                                                     i+= UTF8SKIP(i))
3449                             {
3450                                 if (UTF8_IS_INVARIANT(*i)) {
3451                                     if (isCHARNAME_CONT(*i)) continue;
3452                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3453                                     continue;
3454                                 } else if (isCHARNAME_CONT(
3455                                             UNI_TO_NATIVE(
3456                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3457                                 {
3458                                     continue;
3459                                 }
3460                                 problematic = TRUE;
3461                                 break;
3462                             }
3463                         }
3464                         if (problematic) {
3465                             /* The e-i passed to the final %.*s makes sure that
3466                              * should the trailing NUL be missing that this
3467                              * print won't run off the end of the string */
3468                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3469                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3470                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3471                         }
3472                     }
3473                 } /* End \N{NAME} */
3474 #ifdef EBCDIC
3475                 if (!dorange)
3476                     native_range = FALSE; /* \N{} is defined to be Unicode */
3477 #endif
3478                 s = e + 1;  /* Point to just after the '}' */
3479                 continue;
3480
3481             /* \c is a control character */
3482             case 'c':
3483                 s++;
3484                 if (s < send) {
3485                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3486                 }
3487                 else {
3488                     yyerror("Missing control char name in \\c");
3489                 }
3490                 continue;
3491
3492             /* printf-style backslashes, formfeeds, newlines, etc */
3493             case 'b':
3494                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3495                 break;
3496             case 'n':
3497                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3498                 break;
3499             case 'r':
3500                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3501                 break;
3502             case 'f':
3503                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3504                 break;
3505             case 't':
3506                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3507                 break;
3508             case 'e':
3509                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3510                 break;
3511             case 'a':
3512                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3513                 break;
3514             } /* end switch */
3515
3516             s++;
3517             continue;
3518         } /* end if (backslash) */
3519 #ifdef EBCDIC
3520         else
3521             literal_endpoint++;
3522 #endif
3523
3524     default_action:
3525         /* If we started with encoded form, or already know we want it,
3526            then encode the next character */
3527         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3528             STRLEN len  = 1;
3529
3530
3531             /* One might think that it is wasted effort in the case of the
3532              * source being utf8 (this_utf8 == TRUE) to take the next character
3533              * in the source, convert it to an unsigned value, and then convert
3534              * it back again.  But the source has not been validated here.  The
3535              * routine that does the conversion checks for errors like
3536              * malformed utf8 */
3537
3538             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3539             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3540             if (!has_utf8) {
3541                 SvCUR_set(sv, d - SvPVX_const(sv));
3542                 SvPOK_on(sv);
3543                 *d = '\0';
3544                 /* See Note on sizing above.  */
3545                 sv_utf8_upgrade_flags_grow(sv,
3546                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3547                                         need + (STRLEN)(send - s) + 1);
3548                 d = SvPVX(sv) + SvCUR(sv);
3549                 has_utf8 = TRUE;
3550             } else if (need > len) {
3551                 /* encoded value larger than old, may need extra space (NOTE:
3552                  * SvCUR() is not set correctly here).   See Note on sizing
3553                  * above.  */
3554                 const STRLEN off = d - SvPVX_const(sv);
3555                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3556             }
3557             s += len;
3558
3559             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3560 #ifdef EBCDIC
3561             if (uv > 255 && !dorange)
3562                 native_range = FALSE;
3563 #endif
3564         }
3565         else {
3566             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3567         }
3568     } /* while loop to process each character */
3569
3570     /* terminate the string and set up the sv */
3571     *d = '\0';
3572     SvCUR_set(sv, d - SvPVX_const(sv));
3573     if (SvCUR(sv) >= SvLEN(sv))
3574         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3575                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3576
3577     SvPOK_on(sv);
3578     if (PL_encoding && !has_utf8) {
3579         sv_recode_to_utf8(sv, PL_encoding);
3580         if (SvUTF8(sv))
3581             has_utf8 = TRUE;
3582     }
3583     if (has_utf8) {
3584         SvUTF8_on(sv);
3585         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3586             PL_sublex_info.sub_op->op_private |=
3587                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3588         }
3589     }
3590
3591     /* shrink the sv if we allocated more than we used */
3592     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3593         SvPV_shrink_to_cur(sv);
3594     }
3595
3596     /* return the substring (via pl_yylval) only if we parsed anything */
3597     if (s > PL_bufptr) {
3598         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3599             const char *const key = PL_lex_inpat ? "qr" : "q";
3600             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3601             const char *type;
3602             STRLEN typelen;
3603
3604             if (PL_lex_inwhat == OP_TRANS) {
3605                 type = "tr";
3606                 typelen = 2;
3607             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3608                 type = "s";
3609                 typelen = 1;
3610             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3611                 type = "q";
3612                 typelen = 1;
3613             } else  {
3614                 type = "qq";
3615                 typelen = 2;
3616             }
3617
3618             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3619                                 type, typelen);
3620         }
3621         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3622     } else
3623         SvREFCNT_dec(sv);
3624     return s;
3625 }
3626
3627 /* S_intuit_more
3628  * Returns TRUE if there's more to the expression (e.g., a subscript),
3629  * FALSE otherwise.
3630  *
3631  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3632  *
3633  * ->[ and ->{ return TRUE
3634  * { and [ outside a pattern are always subscripts, so return TRUE
3635  * if we're outside a pattern and it's not { or [, then return FALSE
3636  * if we're in a pattern and the first char is a {
3637  *   {4,5} (any digits around the comma) returns FALSE
3638  * if we're in a pattern and the first char is a [
3639  *   [] returns FALSE
3640  *   [SOMETHING] has a funky algorithm to decide whether it's a
3641  *      character class or not.  It has to deal with things like
3642  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3643  * anything else returns TRUE
3644  */
3645
3646 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3647
3648 STATIC int
3649 S_intuit_more(pTHX_ register char *s)
3650 {
3651     dVAR;
3652
3653     PERL_ARGS_ASSERT_INTUIT_MORE;
3654
3655     if (PL_lex_brackets)
3656         return TRUE;
3657     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3658         return TRUE;
3659     if (*s != '{' && *s != '[')
3660         return FALSE;
3661     if (!PL_lex_inpat)
3662         return TRUE;
3663
3664     /* In a pattern, so maybe we have {n,m}. */
3665     if (*s == '{') {
3666         if (regcurly(s)) {
3667             return FALSE;
3668         }
3669         return TRUE;
3670     }
3671
3672     /* On the other hand, maybe we have a character class */
3673
3674     s++;
3675     if (*s == ']' || *s == '^')
3676         return FALSE;
3677     else {
3678         /* this is terrifying, and it works */
3679         int weight = 2;         /* let's weigh the evidence */
3680         char seen[256];
3681         unsigned char un_char = 255, last_un_char;
3682         const char * const send = strchr(s,']');
3683         char tmpbuf[sizeof PL_tokenbuf * 4];
3684
3685         if (!send)              /* has to be an expression */
3686             return TRUE;
3687
3688         Zero(seen,256,char);
3689         if (*s == '$')
3690             weight -= 3;
3691         else if (isDIGIT(*s)) {
3692             if (s[1] != ']') {
3693                 if (isDIGIT(s[1]) && s[2] == ']')
3694                     weight -= 10;
3695             }
3696             else
3697                 weight -= 100;
3698         }
3699         for (; s < send; s++) {
3700             last_un_char = un_char;
3701             un_char = (unsigned char)*s;
3702             switch (*s) {
3703             case '@':
3704             case '&':
3705             case '$':
3706                 weight -= seen[un_char] * 10;
3707                 if (isALNUM_lazy_if(s+1,UTF)) {
3708                     int len;
3709                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3710                     len = (int)strlen(tmpbuf);
3711                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3712                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3713                         weight -= 100;
3714                     else
3715                         weight -= 10;
3716                 }
3717                 else if (*s == '$' && s[1] &&
3718                   strchr("[#!%*<>()-=",s[1])) {
3719                     if (/*{*/ strchr("])} =",s[2]))
3720                         weight -= 10;
3721                     else
3722                         weight -= 1;
3723                 }
3724                 break;
3725             case '\\':
3726                 un_char = 254;
3727                 if (s[1]) {
3728                     if (strchr("wds]",s[1]))
3729                         weight += 100;
3730                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3731                         weight += 1;
3732                     else if (strchr("rnftbxcav",s[1]))
3733                         weight += 40;
3734                     else if (isDIGIT(s[1])) {
3735                         weight += 40;
3736                         while (s[1] && isDIGIT(s[1]))
3737                             s++;
3738                     }
3739                 }
3740                 else
3741                     weight += 100;
3742                 break;
3743             case '-':
3744                 if (s[1] == '\\')
3745                     weight += 50;
3746                 if (strchr("aA01! ",last_un_char))
3747                     weight += 30;
3748                 if (strchr("zZ79~",s[1]))
3749                     weight += 30;
3750                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3751                     weight -= 5;        /* cope with negative subscript */
3752                 break;
3753             default:
3754                 if (!isALNUM(last_un_char)
3755                     && !(last_un_char == '$' || last_un_char == '@'
3756                          || last_un_char == '&')
3757                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3758                     char *d = tmpbuf;
3759                     while (isALPHA(*s))
3760                         *d++ = *s++;
3761                     *d = '\0';
3762                     if (keyword(tmpbuf, d - tmpbuf, 0))
3763                         weight -= 150;
3764                 }
3765                 if (un_char == last_un_char + 1)
3766                     weight += 5;
3767                 weight -= seen[un_char];
3768                 break;
3769             }
3770             seen[un_char]++;
3771         }
3772         if (weight >= 0)        /* probably a character class */
3773             return FALSE;
3774     }
3775
3776     return TRUE;
3777 }
3778
3779 /*
3780  * S_intuit_method
3781  *
3782  * Does all the checking to disambiguate
3783  *   foo bar
3784  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3785  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3786  *
3787  * First argument is the stuff after the first token, e.g. "bar".
3788  *
3789  * Not a method if foo is a filehandle.
3790  * Not a method if foo is a subroutine prototyped to take a filehandle.
3791  * Not a method if it's really "Foo $bar"
3792  * Method if it's "foo $bar"
3793  * Not a method if it's really "print foo $bar"
3794  * Method if it's really "foo package::" (interpreted as package->foo)
3795  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3796  * Not a method if bar is a filehandle or package, but is quoted with
3797  *   =>
3798  */
3799
3800 STATIC int
3801 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3802 {
3803     dVAR;
3804     char *s = start + (*start == '$');
3805     char tmpbuf[sizeof PL_tokenbuf];
3806     STRLEN len;
3807     GV* indirgv;
3808 #ifdef PERL_MAD
3809     int soff;
3810 #endif
3811
3812     PERL_ARGS_ASSERT_INTUIT_METHOD;
3813
3814     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3815             return 0;
3816     if (cv && SvPOK(cv)) {
3817                 const char *proto = CvPROTO(cv);
3818                 if (proto) {
3819                     if (*proto == ';')
3820                         proto++;
3821                     if (*proto == '*')
3822                         return 0;
3823                 }
3824     }
3825     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3826     /* start is the beginning of the possible filehandle/object,
3827      * and s is the end of it
3828      * tmpbuf is a copy of it
3829      */
3830
3831     if (*start == '$') {
3832         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3833                 isUPPER(*PL_tokenbuf))
3834             return 0;
3835 #ifdef PERL_MAD
3836         len = start - SvPVX(PL_linestr);
3837 #endif
3838         s = PEEKSPACE(s);
3839 #ifdef PERL_MAD
3840         start = SvPVX(PL_linestr) + len;
3841 #endif
3842         PL_bufptr = start;
3843         PL_expect = XREF;
3844         return *s == '(' ? FUNCMETH : METHOD;
3845     }
3846     if (!keyword(tmpbuf, len, 0)) {
3847         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3848             len -= 2;
3849             tmpbuf[len] = '\0';
3850 #ifdef PERL_MAD
3851             soff = s - SvPVX(PL_linestr);
3852 #endif
3853             goto bare_package;
3854         }
3855         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3856         if (indirgv && GvCVu(indirgv))
3857             return 0;
3858         /* filehandle or package name makes it a method */
3859         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3860 #ifdef PERL_MAD
3861             soff = s - SvPVX(PL_linestr);
3862 #endif
3863             s = PEEKSPACE(s);
3864             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3865                 return 0;       /* no assumptions -- "=>" quotes bareword */
3866       bare_package:
3867             start_force(PL_curforce);
3868             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3869                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3870             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3871             if (PL_madskills)
3872                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3873                                                             ( UTF ? SVf_UTF8 : 0 )));
3874             PL_expect = XTERM;
3875             force_next(WORD);
3876             PL_bufptr = s;
3877 #ifdef PERL_MAD
3878             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3879 #endif
3880             return *s == '(' ? FUNCMETH : METHOD;
3881         }
3882     }
3883     return 0;
3884 }
3885
3886 /* Encoded script support. filter_add() effectively inserts a
3887  * 'pre-processing' function into the current source input stream.
3888  * Note that the filter function only applies to the current source file
3889  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3890  *
3891  * The datasv parameter (which may be NULL) can be used to pass
3892  * private data to this instance of the filter. The filter function
3893  * can recover the SV using the FILTER_DATA macro and use it to
3894  * store private buffers and state information.
3895  *
3896  * The supplied datasv parameter is upgraded to a PVIO type
3897  * and the IoDIRP/IoANY field is used to store the function pointer,
3898  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3899  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3900  * private use must be set using malloc'd pointers.
3901  */
3902
3903 SV *
3904 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3905 {
3906     dVAR;
3907     if (!funcp)
3908         return NULL;
3909
3910     if (!PL_parser)
3911         return NULL;
3912
3913     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3914         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3915
3916     if (!PL_rsfp_filters)
3917         PL_rsfp_filters = newAV();
3918     if (!datasv)
3919         datasv = newSV(0);
3920     SvUPGRADE(datasv, SVt_PVIO);
3921     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3922     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3923     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3924                           FPTR2DPTR(void *, IoANY(datasv)),
3925                           SvPV_nolen(datasv)));
3926     av_unshift(PL_rsfp_filters, 1);
3927     av_store(PL_rsfp_filters, 0, datasv) ;
3928     if (
3929         !PL_parser->filtered
3930      && PL_parser->lex_flags & LEX_EVALBYTES
3931      && PL_bufptr < PL_bufend
3932     ) {
3933         const char *s = PL_bufptr;
3934         while (s < PL_bufend) {
3935             if (*s == '\n') {
3936                 SV *linestr = PL_parser->linestr;
3937                 char *buf = SvPVX(linestr);
3938                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3939                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3940                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3941                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3942                 STRLEN const last_uni_pos =
3943                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3944                 STRLEN const last_lop_pos =
3945                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3946                 av_push(PL_rsfp_filters, linestr);
3947                 PL_parser->linestr =
3948                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3949                 buf = SvPVX(PL_parser->linestr);
3950                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3951                 PL_parser->bufptr = buf + bufptr_pos;
3952                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3953                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3954                 PL_parser->linestart = buf + linestart_pos;
3955                 if (PL_parser->last_uni)
3956                     PL_parser->last_uni = buf + last_uni_pos;
3957                 if (PL_parser->last_lop)
3958                     PL_parser->last_lop = buf + last_lop_pos;
3959                 SvLEN(linestr) = SvCUR(linestr);
3960                 SvCUR(linestr) = s-SvPVX(linestr);
3961                 PL_parser->filtered = 1;
3962                 break;
3963             }
3964             s++;
3965         }
3966     }
3967     return(datasv);
3968 }
3969
3970
3971 /* Delete most recently added instance of this filter function. */
3972 void
3973 Perl_filter_del(pTHX_ filter_t funcp)
3974 {
3975     dVAR;
3976     SV *datasv;
3977
3978     PERL_ARGS_ASSERT_FILTER_DEL;
3979
3980 #ifdef DEBUGGING
3981     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3982                           FPTR2DPTR(void*, funcp)));
3983 #endif
3984     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3985         return;
3986     /* if filter is on top of stack (usual case) just pop it off */
3987     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3988     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3989         sv_free(av_pop(PL_rsfp_filters));
3990
3991         return;
3992     }
3993     /* we need to search for the correct entry and clear it     */
3994     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3995 }
3996
3997
3998 /* Invoke the idxth filter function for the current rsfp.        */
3999 /* maxlen 0 = read one text line */
4000 I32
4001 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4002 {
4003     dVAR;
4004     filter_t funcp;
4005     SV *datasv = NULL;
4006     /* This API is bad. It should have been using unsigned int for maxlen.
4007        Not sure if we want to change the API, but if not we should sanity
4008        check the value here.  */
4009     unsigned int correct_length
4010         = maxlen < 0 ?
4011 #ifdef PERL_MICRO
4012         0x7FFFFFFF
4013 #else
4014         INT_MAX
4015 #endif
4016         : maxlen;
4017
4018     PERL_ARGS_ASSERT_FILTER_READ;
4019
4020     if (!PL_parser || !PL_rsfp_filters)
4021         return -1;
4022     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4023         /* Provide a default input filter to make life easy.    */
4024         /* Note that we append to the line. This is handy.      */
4025         DEBUG_P(PerlIO_printf(Perl_debug_log,
4026                               "filter_read %d: from rsfp\n", idx));
4027         if (correct_length) {
4028             /* Want a block */
4029             int len ;
4030             const int old_len = SvCUR(buf_sv);
4031
4032             /* ensure buf_sv is large enough */
4033             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4034             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4035                                    correct_length)) <= 0) {
4036                 if (PerlIO_error(PL_rsfp))
4037                     return -1;          /* error */
4038                 else
4039                     return 0 ;          /* end of file */
4040             }
4041             SvCUR_set(buf_sv, old_len + len) ;
4042             SvPVX(buf_sv)[old_len + len] = '\0';
4043         } else {
4044             /* Want a line */
4045             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4046                 if (PerlIO_error(PL_rsfp))
4047                     return -1;          /* error */
4048                 else
4049                     return 0 ;          /* end of file */
4050             }
4051         }
4052         return SvCUR(buf_sv);
4053     }
4054     /* Skip this filter slot if filter has been deleted */
4055     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4056         DEBUG_P(PerlIO_printf(Perl_debug_log,
4057                               "filter_read %d: skipped (filter deleted)\n",
4058                               idx));
4059         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4060     }
4061     if (SvTYPE(datasv) != SVt_PVIO) {
4062         if (correct_length) {
4063             /* Want a block */
4064             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4065             if (!remainder) return 0; /* eof */
4066             if (correct_length > remainder) correct_length = remainder;
4067             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4068             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4069         } else {
4070             /* Want a line */
4071             const char *s = SvEND(datasv);
4072             const char *send = SvPVX(datasv) + SvLEN(datasv);
4073             while (s < send) {
4074                 if (*s == '\n') {
4075                     s++;
4076                     break;
4077                 }
4078                 s++;
4079             }
4080             if (s == send) return 0; /* eof */
4081             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4082             SvCUR_set(datasv, s-SvPVX(datasv));
4083         }
4084         return SvCUR(buf_sv);
4085     }
4086     /* Get function pointer hidden within datasv        */
4087     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4088     DEBUG_P(PerlIO_printf(Perl_debug_log,
4089                           "filter_read %d: via function %p (%s)\n",
4090                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4091     /* Call function. The function is expected to       */
4092     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4093     /* Return: <0:error, =0:eof, >0:not eof             */
4094     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4095 }
4096
4097 STATIC char *
4098 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4099 {
4100     dVAR;
4101
4102     PERL_ARGS_ASSERT_FILTER_GETS;
4103
4104 #ifdef PERL_CR_FILTER
4105     if (!PL_rsfp_filters) {
4106         filter_add(S_cr_textfilter,NULL);
4107     }
4108 #endif
4109     if (PL_rsfp_filters) {
4110         if (!append)
4111             SvCUR_set(sv, 0);   /* start with empty line        */
4112         if (FILTER_READ(0, sv, 0) > 0)
4113             return ( SvPVX(sv) ) ;
4114         else
4115             return NULL ;
4116     }
4117     else
4118         return (sv_gets(sv, PL_rsfp, append));
4119 }
4120
4121 STATIC HV *
4122 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4123 {
4124     dVAR;
4125     GV *gv;
4126
4127     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4128
4129     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4130         return PL_curstash;
4131
4132     if (len > 2 &&
4133         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4134         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4135     {
4136         return GvHV(gv);                        /* Foo:: */
4137     }
4138
4139     /* use constant CLASS => 'MyClass' */
4140     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4141     if (gv && GvCV(gv)) {
4142         SV * const sv = cv_const_sv(GvCV(gv));
4143         if (sv)
4144             pkgname = SvPV_const(sv, len);
4145     }
4146
4147     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4148 }
4149
4150 /*
4151  * S_readpipe_override
4152  * Check whether readpipe() is overridden, and generates the appropriate
4153  * optree, provided sublex_start() is called afterwards.
4154  */
4155 STATIC void
4156 S_readpipe_override(pTHX)
4157 {
4158     GV **gvp;
4159     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4160     pl_yylval.ival = OP_BACKTICK;
4161     if ((gv_readpipe
4162                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4163             ||
4164             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4165              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4166              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4167     {
4168         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4169             op_append_elem(OP_LIST,
4170                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4171                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4172     }
4173 }
4174
4175 #ifdef PERL_MAD
4176  /*
4177  * Perl_madlex
4178  * The intent of this yylex wrapper is to minimize the changes to the
4179  * tokener when we aren't interested in collecting madprops.  It remains
4180  * to be seen how successful this strategy will be...
4181  */
4182
4183 int
4184 Perl_madlex(pTHX)
4185 {
4186     int optype;
4187     char *s = PL_bufptr;
4188
4189     /* make sure PL_thiswhite is initialized */
4190     PL_thiswhite = 0;
4191     PL_thismad = 0;
4192
4193     /* previous token ate up our whitespace? */
4194     if (!PL_lasttoke && PL_nextwhite) {
4195         PL_thiswhite = PL_nextwhite;
4196         PL_nextwhite = 0;
4197     }
4198
4199     /* isolate the token, and figure out where it is without whitespace */
4200     PL_realtokenstart = -1;
4201     PL_thistoken = 0;
4202     optype = yylex();
4203     s = PL_bufptr;
4204     assert(PL_curforce < 0);
4205
4206     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4207         if (!PL_thistoken) {
4208             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4209                 PL_thistoken = newSVpvs("");
4210             else {
4211                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4212                 PL_thistoken = newSVpvn(tstart, s - tstart);
4213             }
4214         }
4215         if (PL_thismad) /* install head */
4216             CURMAD('X', PL_thistoken);
4217     }
4218
4219     /* last whitespace of a sublex? */
4220     if (optype == ')' && PL_endwhite) {
4221         CURMAD('X', PL_endwhite);
4222     }
4223
4224     if (!PL_thismad) {
4225
4226         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4227         if (!PL_thiswhite && !PL_endwhite && !optype) {
4228             sv_free(PL_thistoken);
4229             PL_thistoken = 0;
4230             return 0;
4231         }
4232
4233         /* put off final whitespace till peg */
4234         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4235             PL_nextwhite = PL_thiswhite;
4236             PL_thiswhite = 0;
4237         }
4238         else if (PL_thisopen) {
4239             CURMAD('q', PL_thisopen);
4240             if (PL_thistoken)
4241                 sv_free(PL_thistoken);
4242             PL_thistoken = 0;
4243         }
4244         else {
4245             /* Store actual token text as madprop X */
4246             CURMAD('X', PL_thistoken);
4247         }
4248
4249         if (PL_thiswhite) {
4250             /* add preceding whitespace as madprop _ */
4251             CURMAD('_', PL_thiswhite);
4252         }
4253
4254         if (PL_thisstuff) {
4255             /* add quoted material as madprop = */
4256             CURMAD('=', PL_thisstuff);
4257         }
4258
4259         if (PL_thisclose) {
4260             /* add terminating quote as madprop Q */
4261             CURMAD('Q', PL_thisclose);
4262         }
4263     }
4264
4265     /* special processing based on optype */
4266
4267     switch (optype) {
4268
4269     /* opval doesn't need a TOKEN since it can already store mp */
4270     case WORD:
4271     case METHOD:
4272     case FUNCMETH:
4273     case THING:
4274     case PMFUNC:
4275     case PRIVATEREF:
4276     case FUNC0SUB:
4277     case UNIOPSUB:
4278     case LSTOPSUB:
4279     case LABEL:
4280         if (pl_yylval.opval)
4281             append_madprops(PL_thismad, pl_yylval.opval, 0);
4282         PL_thismad = 0;
4283         return optype;
4284
4285     /* fake EOF */
4286     case 0:
4287         optype = PEG;
4288         if (PL_endwhite) {
4289             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4290             PL_endwhite = 0;
4291         }
4292         break;
4293
4294     case ']':
4295     case '}':
4296         if (PL_faketokens)
4297             break;
4298         /* remember any fake bracket that lexer is about to discard */
4299         if (PL_lex_brackets == 1 &&
4300             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4301         {
4302             s = PL_bufptr;
4303             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4304                 s++;
4305             if (*s == '}') {
4306                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4307                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4308                 PL_thiswhite = 0;
4309                 PL_bufptr = s - 1;
4310                 break;  /* don't bother looking for trailing comment */
4311             }
4312             else
4313                 s = PL_bufptr;
4314         }
4315         if (optype == ']')
4316             break;
4317         /* FALLTHROUGH */
4318
4319     /* attach a trailing comment to its statement instead of next token */
4320     case ';':
4321         if (PL_faketokens)
4322             break;
4323         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4324             s = PL_bufptr;
4325             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4326                 s++;
4327             if (*s == '\n' || *s == '#') {
4328                 while (s < PL_bufend && *s != '\n')
4329                     s++;
4330                 if (s < PL_bufend)
4331                     s++;
4332                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4333                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4334                 PL_thiswhite = 0;
4335                 PL_bufptr = s;
4336             }
4337         }
4338         break;
4339
4340     /* ival */
4341     default:
4342         break;
4343
4344     }
4345
4346     /* Create new token struct.  Note: opvals return early above. */
4347     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4348     PL_thismad = 0;
4349     return optype;
4350 }
4351 #endif
4352
4353 STATIC char *
4354 S_tokenize_use(pTHX_ int is_use, char *s) {
4355     dVAR;
4356
4357     PERL_ARGS_ASSERT_TOKENIZE_USE;
4358
4359     if (PL_expect != XSTATE)
4360         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4361                     is_use ? "use" : "no"));
4362     PL_expect = XTERM;
4363     s = SKIPSPACE1(s);
4364     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4365         s = force_version(s, TRUE);
4366         if (*s == ';' || *s == '}'
4367                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4368             start_force(PL_curforce);
4369             NEXTVAL_NEXTTOKE.opval = NULL;
4370             force_next(WORD);
4371         }
4372         else if (*s == 'v') {
4373             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4374             s = force_version(s, FALSE);
4375         }
4376     }
4377     else {
4378         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4379         s = force_version(s, FALSE);
4380     }
4381     pl_yylval.ival = is_use;
4382     return s;
4383 }
4384 #ifdef DEBUGGING
4385     static const char* const exp_name[] =
4386         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4387           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4388         };
4389 #endif
4390
4391 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4392 STATIC bool
4393 S_word_takes_any_delimeter(char *p, STRLEN len)
4394 {
4395     return (len == 1 && strchr("msyq", p[0])) ||
4396            (len == 2 && (
4397             (p[0] == 't' && p[1] == 'r') ||
4398             (p[0] == 'q' && strchr("qwxr", p[1]))));
4399 }
4400
4401 /*
4402   yylex
4403
4404   Works out what to call the token just pulled out of the input
4405   stream.  The yacc parser takes care of taking the ops we return and
4406   stitching them into a tree.
4407
4408   Returns:
4409     PRIVATEREF
4410
4411   Structure:
4412       if read an identifier
4413           if we're in a my declaration
4414               croak if they tried to say my($foo::bar)
4415               build the ops for a my() declaration
4416           if it's an access to a my() variable
4417               are we in a sort block?
4418                   croak if my($a); $a <=> $b
4419               build ops for access to a my() variable
4420           if in a dq string, and they've said @foo and we can't find @foo
4421               croak
4422           build ops for a bareword
4423       if we already built the token before, use it.
4424 */
4425
4426
4427 #ifdef __SC__
4428 #pragma segment Perl_yylex
4429 #endif
4430 int
4431 Perl_yylex(pTHX)
4432 {
4433     dVAR;
4434     char *s = PL_bufptr;
4435     char *d;
4436     STRLEN len;
4437     bool bof = FALSE;
4438     U8 formbrack = 0;
4439     U32 fake_eof = 0;
4440
4441     /* orig_keyword, gvp, and gv are initialized here because
4442      * jump to the label just_a_word_zero can bypass their
4443      * initialization later. */
4444     I32 orig_keyword = 0;
4445     GV *gv = NULL;
4446     GV **gvp = NULL;
4447
4448     DEBUG_T( {
4449         SV* tmp = newSVpvs("");
4450         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4451             (IV)CopLINE(PL_curcop),
4452             lex_state_names[PL_lex_state],
4453             exp_name[PL_expect],
4454             pv_display(tmp, s, strlen(s), 0, 60));
4455         SvREFCNT_dec(tmp);
4456     } );
4457
4458     switch (PL_lex_state) {
4459 #ifdef COMMENTARY
4460     case LEX_NORMAL:            /* Some compilers will produce faster */
4461     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4462         break;
4463 #endif
4464
4465     /* when we've already built the next token, just pull it out of the queue */
4466     case LEX_KNOWNEXT:
4467 #ifdef PERL_MAD
4468         PL_lasttoke--;
4469         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4470         if (PL_madskills) {
4471             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4472             PL_nexttoke[PL_lasttoke].next_mad = 0;
4473             if (PL_thismad && PL_thismad->mad_key == '_') {
4474                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4475                 PL_thismad->mad_val = 0;
4476                 mad_free(PL_thismad);
4477                 PL_thismad = 0;
4478             }
4479         }
4480         if (!PL_lasttoke) {
4481             PL_lex_state = PL_lex_defer;
4482             PL_expect = PL_lex_expect;
4483             PL_lex_defer = LEX_NORMAL;
4484             if (!PL_nexttoke[PL_lasttoke].next_type)
4485                 return yylex();
4486         }
4487 #else
4488         PL_nexttoke--;
4489         pl_yylval = PL_nextval[PL_nexttoke];
4490         if (!PL_nexttoke) {
4491             PL_lex_state = PL_lex_defer;
4492             PL_expect = PL_lex_expect;
4493             PL_lex_defer = LEX_NORMAL;
4494         }
4495 #endif
4496         {
4497             I32 next_type;
4498 #ifdef PERL_MAD
4499             next_type = PL_nexttoke[PL_lasttoke].next_type;
4500 #else
4501             next_type = PL_nexttype[PL_nexttoke];
4502 #endif
4503             if (next_type & (7<<24)) {
4504                 if (next_type & (1<<24)) {
4505                     if (PL_lex_brackets > 100)
4506                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4507                     PL_lex_brackstack[PL_lex_brackets++] =
4508                         (char) ((next_type >> 16) & 0xff);
4509                 }
4510                 if (next_type & (2<<24))
4511                     PL_lex_allbrackets++;
4512                 if (next_type & (4<<24))
4513                     PL_lex_allbrackets--;
4514                 next_type &= 0xffff;
4515             }
4516             if (S_is_opval_token(next_type) && pl_yylval.opval)
4517                 pl_yylval.opval->op_savefree = 0; /* release */
4518             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4519         }
4520
4521     /* interpolated case modifiers like \L \U, including \Q and \E.
4522        when we get here, PL_bufptr is at the \
4523     */
4524     case LEX_INTERPCASEMOD:
4525 #ifdef DEBUGGING
4526         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4527             Perl_croak(aTHX_
4528                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4529                        PL_bufptr, PL_bufend, *PL_bufptr);
4530 #endif
4531         /* handle \E or end of string */
4532         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4533             /* if at a \E */
4534             if (PL_lex_casemods) {
4535                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4536                 PL_lex_casestack[PL_lex_casemods] = '\0';
4537
4538                 if (PL_bufptr != PL_bufend
4539                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4540                         || oldmod == 'F')) {
4541                     PL_bufptr += 2;
4542                     PL_lex_state = LEX_INTERPCONCAT;
4543 #ifdef PERL_MAD
4544                     if (PL_madskills)
4545                         PL_thistoken = newSVpvs("\\E");
4546 #endif
4547                 }
4548                 PL_lex_allbrackets--;
4549                 return REPORT(')');
4550             }
4551             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4552                /* Got an unpaired \E */
4553                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4554                         "Useless use of \\E");
4555             }
4556 #ifdef PERL_MAD
4557             while (PL_bufptr != PL_bufend &&
4558               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4559                 if (!PL_thiswhite)
4560                     PL_thiswhite = newSVpvs("");
4561                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4562                 PL_bufptr += 2;
4563             }
4564 #else
4565             if (PL_bufptr != PL_bufend)
4566                 PL_bufptr += 2;
4567 #endif
4568             PL_lex_state = LEX_INTERPCONCAT;
4569             return yylex();
4570         }
4571         else {
4572             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4573               "### Saw case modifier\n"); });
4574             s = PL_bufptr + 1;
4575             if (s[1] == '\\' && s[2] == 'E') {
4576 #ifdef PERL_MAD
4577                 if (!PL_thiswhite)
4578                     PL_thiswhite = newSVpvs("");
4579                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4580 #endif
4581                 PL_bufptr = s + 3;
4582                 PL_lex_state = LEX_INTERPCONCAT;
4583                 return yylex();
4584             }
4585             else {
4586                 I32 tmp;
4587                 if (!PL_madskills) /* when just compiling don't need correct */
4588                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4589                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4590                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4591                     (strchr(PL_lex_casestack, 'L')
4592                         || strchr(PL_lex_casestack, 'U')
4593                         || strchr(PL_lex_casestack, 'F'))) {
4594                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4595                     PL_lex_allbrackets--;
4596                     return REPORT(')');
4597                 }
4598                 if (PL_lex_casemods > 10)
4599                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4600                 PL_lex_casestack[PL_lex_casemods++] = *s;
4601                 PL_lex_casestack[PL_lex_casemods] = '\0';
4602                 PL_lex_state = LEX_INTERPCONCAT;
4603                 start_force(PL_curforce);
4604                 NEXTVAL_NEXTTOKE.ival = 0;
4605                 force_next((2<<24)|'(');
4606                 start_force(PL_curforce);
4607                 if (*s == 'l')
4608                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4609                 else if (*s == 'u')
4610                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4611                 else if (*s == 'L')
4612                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4613                 else if (*s == 'U')
4614                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4615                 else if (*s == 'Q')
4616                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4617                 else if (*s == 'F')
4618                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4619                 else
4620                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4621                 if (PL_madskills) {
4622                     SV* const tmpsv = newSVpvs("\\ ");
4623                     /* replace the space with the character we want to escape
4624                      */
4625                     SvPVX(tmpsv)[1] = *s;
4626                     curmad('_', tmpsv);
4627                 }
4628                 PL_bufptr = s + 1;
4629             }
4630             force_next(FUNC);
4631             if (PL_lex_starts) {
4632                 s = PL_bufptr;
4633                 PL_lex_starts = 0;
4634 #ifdef PERL_MAD
4635                 if (PL_madskills) {
4636                     if (PL_thistoken)
4637                         sv_free(PL_thistoken);
4638                     PL_thistoken = newSVpvs("");
4639                 }
4640 #endif
4641                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4642                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4643                     OPERATOR(',');
4644                 else
4645                     Aop(OP_CONCAT);
4646             }
4647             else
4648                 return yylex();
4649         }
4650
4651     case LEX_INTERPPUSH:
4652         return REPORT(sublex_push());
4653
4654     case LEX_INTERPSTART:
4655         if (PL_bufptr == PL_bufend)
4656             return REPORT(sublex_done());
4657         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4658               "### Interpolated variable\n"); });
4659         PL_expect = XTERM;
4660         PL_lex_dojoin = (*PL_bufptr == '@');
4661         PL_lex_state = LEX_INTERPNORMAL;
4662         if (PL_lex_dojoin) {
4663             start_force(PL_curforce);
4664             NEXTVAL_NEXTTOKE.ival = 0;
4665             force_next(',');
4666             start_force(PL_curforce);
4667             force_ident("\"", '$');
4668             start_force(PL_curforce);
4669             NEXTVAL_NEXTTOKE.ival = 0;
4670             force_next('$');
4671             start_force(PL_curforce);
4672             NEXTVAL_NEXTTOKE.ival = 0;
4673             force_next((2<<24)|'(');
4674             start_force(PL_curforce);
4675             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4676             force_next(FUNC);
4677         }
4678         /* Convert (?{...}) and friends to 'do {...}' */
4679         if (PL_lex_inpat && *PL_bufptr == '(') {
4680             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4681             PL_bufptr += 2;
4682             if (*PL_bufptr != '{')
4683                 PL_bufptr++;
4684             start_force(PL_curforce);
4685             /* XXX probably need a CURMAD(something) here */
4686             PL_expect = XTERMBLOCK;
4687             force_next(DO);
4688         }
4689
4690         if (PL_lex_starts++) {
4691             s = PL_bufptr;
4692 #ifdef PERL_MAD
4693             if (PL_madskills) {
4694                 if (PL_thistoken)
4695                     sv_free(PL_thistoken);
4696                 PL_thistoken = newSVpvs("");
4697             }
4698 #endif
4699             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4700             if (!PL_lex_casemods && PL_lex_inpat)
4701                 OPERATOR(',');
4702             else
4703                 Aop(OP_CONCAT);
4704         }
4705         return yylex();
4706
4707     case LEX_INTERPENDMAYBE:
4708         if (intuit_more(PL_bufptr)) {
4709             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4710             break;
4711         }
4712         /* FALL THROUGH */
4713
4714     case LEX_INTERPEND:
4715         if (PL_lex_dojoin) {
4716             PL_lex_dojoin = FALSE;
4717             PL_lex_state = LEX_INTERPCONCAT;
4718 #ifdef PERL_MAD
4719             if (PL_madskills) {
4720                 if (PL_thistoken)
4721                     sv_free(PL_thistoken);
4722                 PL_thistoken = newSVpvs("");
4723             }
4724 #endif
4725             PL_lex_allbrackets--;
4726             return REPORT(')');
4727         }
4728         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4729             && SvEVALED(PL_lex_repl))
4730         {
4731             if (PL_bufptr != PL_bufend)
4732                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4733             PL_lex_repl = NULL;
4734         }
4735         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4736            re_eval_str.  If the here-doc body’s length equals the previous
4737            value of re_eval_start, re_eval_start will now be null.  So
4738            check re_eval_str as well. */
4739         if (PL_parser->lex_shared->re_eval_start
4740          || PL_parser->lex_shared->re_eval_str) {
4741             SV *sv;
4742             if (*PL_bufptr != ')')
4743                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4744             PL_bufptr++;
4745             /* having compiled a (?{..}) expression, return the original
4746              * text too, as a const */
4747             if (PL_parser->lex_shared->re_eval_str) {
4748                 sv = PL_parser->lex_shared->re_eval_str;
4749                 PL_parser->lex_shared->re_eval_str = NULL;
4750                 SvCUR_set(sv,
4751                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4752                 SvPV_shrink_to_cur(sv);
4753             }
4754             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4755                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4756             start_force(PL_curforce);
4757             /* XXX probably need a CURMAD(something) here */
4758             NEXTVAL_NEXTTOKE.opval =
4759                     (OP*)newSVOP(OP_CONST, 0,
4760                                  sv);
4761             force_next(THING);
4762             PL_parser->lex_shared->re_eval_start = NULL;
4763             PL_expect = XTERM;
4764             return REPORT(',');
4765         }
4766
4767         /* FALLTHROUGH */
4768     case LEX_INTERPCONCAT:
4769 #ifdef DEBUGGING
4770         if (PL_lex_brackets)
4771             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4772                        (long) PL_lex_brackets);
4773 #endif
4774         if (PL_bufptr == PL_bufend)
4775             return REPORT(sublex_done());
4776
4777         /* m'foo' still needs to be parsed for possible (?{...}) */
4778         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4779             SV *sv = newSVsv(PL_linestr);
4780             sv = tokeq(sv);
4781             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4782             s = PL_bufend;
4783         }
4784         else {
4785             s = scan_const(PL_bufptr);
4786             if (*s == '\\')
4787                 PL_lex_state = LEX_INTERPCASEMOD;
4788             else
4789                 PL_lex_state = LEX_INTERPSTART;
4790         }
4791
4792         if (s != PL_bufptr) {
4793             start_force(PL_curforce);
4794             if (PL_madskills) {
4795                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4796             }
4797             NEXTVAL_NEXTTOKE = pl_yylval;
4798             PL_expect = XTERM;
4799             force_next(THING);
4800             if (PL_lex_starts++) {
4801 #ifdef PERL_MAD
4802                 if (PL_madskills) {
4803                     if (PL_thistoken)
4804                         sv_free(PL_thistoken);
4805                     PL_thistoken = newSVpvs("");
4806                 }
4807 #endif
4808                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4809                 if (!PL_lex_casemods && PL_lex_inpat)
4810                     OPERATOR(',');
4811                 else
4812                     Aop(OP_CONCAT);
4813             }
4814             else {
4815                 PL_bufptr = s;
4816                 return yylex();
4817             }
4818         }
4819
4820         return yylex();
4821     case LEX_FORMLINE:
4822         s = scan_formline(PL_bufptr);
4823         if (!PL_lex_formbrack)
4824         {
4825             formbrack = 1;
4826             goto rightbracket;
4827         }
4828         PL_bufptr = s;
4829         return yylex();
4830     }
4831
4832     s = PL_bufptr;
4833     PL_oldoldbufptr = PL_oldbufptr;
4834     PL_oldbufptr = s;
4835
4836   retry:
4837 #ifdef PERL_MAD
4838     if (PL_thistoken) {
4839         sv_free(PL_thistoken);
4840         PL_thistoken = 0;
4841     }
4842     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4843 #endif
4844     switch (*s) {
4845     default:
4846         if (isIDFIRST_lazy_if(s,UTF))
4847             goto keylookup;
4848         {
4849         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4850         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4851                                                     UTF8SKIP(s),
4852                                                     SVs_TEMP | SVf_UTF8),
4853                                             10, UNI_DISPLAY_ISPRINT))
4854                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4855         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4856         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4857             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4858         } else {
4859             d = PL_linestart;
4860         }
4861         *s = '\0';
4862         sv_setpv(dsv, d);
4863         if (UTF)
4864             SvUTF8_on(dsv);
4865         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4866     }
4867     case 4:
4868     case 26:
4869         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4870     case 0:
4871 #ifdef PERL_MAD
4872         if (PL_madskills)
4873             PL_faketokens = 0;
4874 #endif
4875         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4876             PL_last_uni = 0;
4877             PL_last_lop = 0;
4878             if (PL_lex_brackets &&
4879                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4880                 yyerror((const char *)
4881                         (PL_lex_formbrack
4882                          ? "Format not terminated"
4883                          : "Missing right curly or square bracket"));
4884             }
4885             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4886                         "### Tokener got EOF\n");
4887             } );
4888             TOKEN(0);
4889         }
4890         if (s++ < PL_bufend)
4891             goto retry;                 /* ignore stray nulls */
4892         PL_last_uni = 0;
4893         PL_last_lop = 0;
4894         if (!PL_in_eval && !PL_preambled) {
4895             PL_preambled = TRUE;
4896 #ifdef PERL_MAD
4897             if (PL_madskills)
4898                 PL_faketokens = 1;
4899 #endif
4900             if (PL_perldb) {
4901                 /* Generate a string of Perl code to load the debugger.
4902                  * If PERL5DB is set, it will return the contents of that,
4903                  * otherwise a compile-time require of perl5db.pl.  */
4904
4905                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4906
4907                 if (pdb) {
4908                     sv_setpv(PL_linestr, pdb);
4909                     sv_catpvs(PL_linestr,";");
4910                 } else {
4911                     SETERRNO(0,SS_NORMAL);
4912                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4913                 }
4914             } else
4915                 sv_setpvs(PL_linestr,"");
4916             if (PL_preambleav) {
4917                 SV **svp = AvARRAY(PL_preambleav);
4918                 SV **const end = svp + AvFILLp(PL_preambleav);
4919                 while(svp <= end) {
4920                     sv_catsv(PL_linestr, *svp);
4921                     ++svp;
4922                     sv_catpvs(PL_linestr, ";");
4923                 }
4924                 sv_free(MUTABLE_SV(PL_preambleav));
4925                 PL_preambleav = NULL;
4926             }
4927             if (PL_minus_E)
4928                 sv_catpvs(PL_linestr,
4929                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4930             if (PL_minus_n || PL_minus_p) {
4931                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4932                 if (PL_minus_l)
4933                     sv_catpvs(PL_linestr,"chomp;");
4934                 if (PL_minus_a) {
4935                     if (PL_minus_F) {
4936                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4937                              || *PL_splitstr == '"')
4938                               && strchr(PL_splitstr + 1, *PL_splitstr))
4939                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4940                         else {
4941                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4942                                bytes can be used as quoting characters.  :-) */
4943                             const char *splits = PL_splitstr;
4944                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4945                             do {
4946                                 /* Need to \ \s  */
4947                                 if (*splits == '\\')
4948                                     sv_catpvn(PL_linestr, splits, 1);
4949                                 sv_catpvn(PL_linestr, splits, 1);
4950                             } while (*splits++);
4951                             /* This loop will embed the trailing NUL of
4952                                PL_linestr as the last thing it does before
4953                                terminating.  */
4954                             sv_catpvs(PL_linestr, ");");
4955                         }
4956                     }
4957                     else
4958                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4959                 }
4960             }
4961             sv_catpvs(PL_linestr, "\n");
4962             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4963             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4964             PL_last_lop = PL_last_uni = NULL;
4965             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4966                 update_debugger_info(PL_linestr, NULL, 0);
4967             goto retry;
4968         }
4969         do {
4970             fake_eof = 0;
4971             bof = PL_rsfp ? TRUE : FALSE;
4972             if (0) {
4973               fake_eof:
4974                 fake_eof = LEX_FAKE_EOF;
4975             }
4976             PL_bufptr = PL_bufend;
4977             COPLINE_INC_WITH_HERELINES;
4978             if (!lex_next_chunk(fake_eof)) {
4979                 CopLINE_dec(PL_curcop);
4980                 s = PL_bufptr;
4981                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4982             }
4983             CopLINE_dec(PL_curcop);
4984 #ifdef PERL_MAD
4985             if (!PL_rsfp)
4986                 PL_realtokenstart = -1;
4987 #endif
4988             s = PL_bufptr;
4989             /* If it looks like the start of a BOM or raw UTF-16,
4990              * check if it in fact is. */
4991             if (bof && PL_rsfp &&
4992                      (*s == 0 ||
4993                       *(U8*)s == 0xEF ||
4994                       *(U8*)s >= 0xFE ||
4995                       s[1] == 0)) {
4996                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4997                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4998 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4999                 /* offset may include swallowed CR */
5000                 if (!bof)
5001                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5002 #endif
5003                 if (bof) {
5004                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5005                     s = swallow_bom((U8*)s);
5006                 }
5007             }
5008             if (PL_parser->in_pod) {
5009                 /* Incest with pod. */
5010 #ifdef PERL_MAD
5011                 if (PL_madskills)
5012                     sv_catsv(PL_thiswhite, PL_linestr);
5013 #endif
5014                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5015                     sv_setpvs(PL_linestr, "");
5016                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5017                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5018                     PL_last_lop = PL_last_uni = NULL;
5019                     PL_parser->in_pod = 0;
5020                 }
5021             }
5022             if (PL_rsfp || PL_parser->filtered)
5023                 incline(s);
5024         } while (PL_parser->in_pod);
5025         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5026         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5027         PL_last_lop = PL_last_uni = NULL;
5028         if (CopLINE(PL_curcop) == 1) {
5029             while (s < PL_bufend && isSPACE(*s))
5030                 s++;
5031             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5032                 s++;
5033 #ifdef PERL_MAD
5034             if (PL_madskills)
5035                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5036 #endif
5037             d = NULL;
5038             if (!PL_in_eval) {
5039                 if (*s == '#' && *(s+1) == '!')
5040                     d = s + 2;
5041 #ifdef ALTERNATE_SHEBANG
5042                 else {
5043                     static char const as[] = ALTERNATE_SHEBANG;
5044                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5045                         d = s + (sizeof(as) - 1);
5046                 }
5047 #endif /* ALTERNATE_SHEBANG */
5048             }
5049             if (d) {
5050                 char *ipath;
5051                 char *ipathend;
5052
5053                 while (isSPACE(*d))
5054                     d++;
5055                 ipath = d;
5056                 while (*d && !isSPACE(*d))
5057                     d++;
5058                 ipathend = d;
5059
5060 #ifdef ARG_ZERO_IS_SCRIPT
5061                 if (ipathend > ipath) {
5062                     /*
5063                      * HP-UX (at least) sets argv[0] to the script name,
5064                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5065                      * at least, set argv[0] to the basename of the Perl
5066                      * interpreter. So, having found "#!", we'll set it right.
5067                      */
5068                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5069                                                     SVt_PV)); /* $^X */
5070                     assert(SvPOK(x) || SvGMAGICAL(x));
5071                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5072                         sv_setpvn(x, ipath, ipathend - ipath);
5073                         SvSETMAGIC(x);
5074                     }
5075                     else {
5076                         STRLEN blen;
5077                         STRLEN llen;
5078                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5079                         const char * const lstart = SvPV_const(x,llen);
5080                         if (llen < blen) {
5081                             bstart += blen - llen;
5082                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5083                                 sv_setpvn(x, ipath, ipathend - ipath);
5084                                 SvSETMAGIC(x);
5085                             }
5086                         }
5087                     }
5088                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5089                 }
5090 #endif /* ARG_ZERO_IS_SCRIPT */
5091
5092                 /*
5093                  * Look for options.
5094                  */
5095                 d = instr(s,"perl -");
5096                 if (!d) {
5097                     d = instr(s,"perl");
5098 #if defined(DOSISH)
5099                     /* avoid getting into infinite loops when shebang
5100                      * line contains "Perl" rather than "perl" */
5101                     if (!d) {
5102                         for (d = ipathend-4; d >= ipath; --d) {
5103                             if ((*d == 'p' || *d == 'P')
5104                                 && !ibcmp(d, "perl", 4))
5105                             {
5106                                 break;
5107                             }
5108                         }
5109                         if (d < ipath)
5110                             d = NULL;
5111                     }
5112 #endif
5113                 }
5114 #ifdef ALTERNATE_SHEBANG
5115                 /*
5116                  * If the ALTERNATE_SHEBANG on this system starts with a
5117                  * character that can be part of a Perl expression, then if
5118                  * we see it but not "perl", we're probably looking at the
5119                  * start of Perl code, not a request to hand off to some
5120                  * other interpreter.  Similarly, if "perl" is there, but
5121                  * not in the first 'word' of the line, we assume the line
5122                  * contains the start of the Perl program.
5123                  */
5124                 if (d && *s != '#') {
5125                     const char *c = ipath;
5126                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5127                         c++;
5128                     if (c < d)
5129                         d = NULL;       /* "perl" not in first word; ignore */
5130                     else
5131                         *s = '#';       /* Don't try to parse shebang line */
5132                 }
5133 #endif /* ALTERNATE_SHEBANG */
5134                 if (!d &&
5135                     *s == '#' &&
5136                     ipathend > ipath &&
5137                     !PL_minus_c &&
5138                     !instr(s,"indir") &&
5139                     instr(PL_origargv[0],"perl"))
5140                 {
5141                     dVAR;
5142                     char **newargv;
5143
5144                     *ipathend = '\0';
5145                     s = ipathend + 1;
5146                     while (s < PL_bufend && isSPACE(*s))
5147                         s++;
5148                     if (s < PL_bufend) {
5149                         Newx(newargv,PL_origargc+3,char*);
5150                         newargv[1] = s;
5151                         while (s < PL_bufend && !isSPACE(*s))
5152                             s++;
5153                         *s = '\0';
5154                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5155                     }
5156                     else
5157                         newargv = PL_origargv;
5158                     newargv[0] = ipath;
5159                     PERL_FPU_PRE_EXEC
5160                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5161                     PERL_FPU_POST_EXEC
5162                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5163                 }
5164                 if (d) {
5165                     while (*d && !isSPACE(*d))
5166                         d++;
5167                     while (SPACE_OR_TAB(*d))
5168                         d++;
5169
5170                     if (*d++ == '-') {
5171                         const bool switches_done = PL_doswitches;
5172                         const U32 oldpdb = PL_perldb;
5173                         const bool oldn = PL_minus_n;
5174                         const bool oldp = PL_minus_p;
5175                         const char *d1 = d;
5176
5177                         do {
5178                             bool baduni = FALSE;
5179                             if (*d1 == 'C') {
5180                                 const char *d2 = d1 + 1;
5181                                 if (parse_unicode_opts((const char **)&d2)
5182                                     != PL_unicode)
5183                                     baduni = TRUE;
5184                             }
5185                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5186                                 const char * const m = d1;
5187                                 while (*d1 && !isSPACE(*d1))
5188                                     d1++;
5189                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5190                                       (int)(d1 - m), m);
5191                             }
5192                             d1 = moreswitches(d1);
5193                         } while (d1);
5194                         if (PL_doswitches && !switches_done) {
5195                             int argc = PL_origargc;
5196                             char **argv = PL_origargv;
5197                             do {
5198                                 argc--,argv++;
5199                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5200                             init_argv_symbols(argc,argv);
5201                         }
5202                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5203                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5204                               /* if we have already added "LINE: while (<>) {",
5205                                  we must not do it again */
5206                         {
5207                             sv_setpvs(PL_linestr, "");
5208                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5209                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5210                             PL_last_lop = PL_last_uni = NULL;
5211                             PL_preambled = FALSE;
5212                             if (PERLDB_LINE || PERLDB_SAVESRC)
5213                                 (void)gv_fetchfile(PL_origfilename);
5214                             goto retry;
5215                         }
5216                     }
5217                 }
5218             }
5219         }
5220         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5221             PL_lex_state = LEX_FORMLINE;
5222             start_force(PL_curforce);
5223             NEXTVAL_NEXTTOKE.ival = 0;
5224             force_next(FORMRBRACK);
5225             TOKEN(';');
5226         }
5227         goto retry;
5228     case '\r':
5229 #ifdef PERL_STRICT_CR
5230         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5231         Perl_croak(aTHX_
5232       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5233 #endif
5234     case ' ': case '\t': case '\f': case 013:
5235 #ifdef PERL_MAD
5236         PL_realtokenstart = -1;
5237         if (!PL_thiswhite)
5238             PL_thiswhite = newSVpvs("");
5239         sv_catpvn(PL_thiswhite, s, 1);
5240 #endif
5241         s++;
5242         goto retry;
5243     case '#':
5244     case '\n':
5245 #ifdef PERL_MAD
5246         PL_realtokenstart = -1;
5247         if (PL_madskills)
5248             PL_faketokens = 0;
5249 #endif
5250         if (PL_lex_state != LEX_NORMAL ||
5251              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5252             if (*s == '#' && s == PL_linestart && PL_in_eval
5253              && !PL_rsfp && !PL_parser->filtered) {
5254                 /* handle eval qq[#line 1 "foo"\n ...] */
5255                 CopLINE_dec(PL_curcop);
5256                 incline(s);
5257             }
5258             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5259                 s = SKIPSPACE0(s);
5260                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5261                     incline(s);
5262             }
5263             else {
5264                 const bool in_comment = *s == '#';
5265                 d = s;
5266                 while (d < PL_bufend && *d != '\n')
5267                     d++;
5268                 if (d < PL_bufend)
5269                     d++;
5270                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5271                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5272                                d, PL_bufend);
5273 #ifdef PERL_MAD
5274                 if (PL_madskills)
5275                     PL_thiswhite = newSVpvn(s, d - s);
5276 #endif
5277                 s = d;
5278                 if (in_comment && d == PL_bufend
5279                  && PL_lex_state == LEX_INTERPNORMAL
5280                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5281                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5282                 else incline(s);
5283             }
5284             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5285                 PL_lex_state = LEX_FORMLINE;
5286                 start_force(PL_curforce);
5287                 NEXTVAL_NEXTTOKE.ival = 0;
5288                 force_next(FORMRBRACK);
5289                 TOKEN(';');
5290             }
5291         }
5292         else {
5293 #ifdef PERL_MAD
5294             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5295                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5296                     PL_faketokens = 0;
5297                     s = SKIPSPACE0(s);
5298                     TOKEN(PEG); /* make sure any #! line is accessible */
5299                 }
5300                 s = SKIPSPACE0(s);
5301             }
5302             else {
5303 /*              if (PL_madskills && PL_lex_formbrack) { */
5304                     d = s;
5305                     while (d < PL_bufend && *d != '\n')
5306                         d++;
5307                     if (d < PL_bufend)
5308                         d++;
5309                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5310                       Perl_croak(aTHX_ "panic: input overflow");
5311                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5312                         if (!PL_thiswhite)
5313                             PL_thiswhite = newSVpvs("");
5314                         if (CopLINE(PL_curcop) == 1) {
5315                             sv_setpvs(PL_thiswhite, "");
5316                             PL_faketokens = 0;
5317                         }
5318                         sv_catpvn(PL_thiswhite, s, d - s);
5319                     }
5320                     s = d;
5321 /*              }
5322                 *s = '\0';
5323                 PL_bufend = s; */
5324             }
5325 #else
5326             *s = '\0';
5327             PL_bufend = s;
5328 #endif
5329         }
5330         goto retry;
5331     case '-':
5332         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5333             I32 ftst = 0;
5334             char tmp;
5335
5336             s++;
5337             PL_bufptr = s;
5338             tmp = *s++;
5339
5340             while (s < PL_bufend && SPACE_OR_TAB(*s))
5341                 s++;
5342
5343             if (strnEQ(s,"=>",2)) {
5344                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5345                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5346                 OPERATOR('-');          /* unary minus */
5347             }
5348             PL_last_uni = PL_oldbufptr;
5349             switch (tmp) {
5350             case 'r': ftst = OP_FTEREAD;        break;
5351             case 'w': ftst = OP_FTEWRITE;       break;
5352             case 'x': ftst = OP_FTEEXEC;        break;
5353             case 'o': ftst = OP_FTEOWNED;       break;
5354             case 'R': ftst = OP_FTRREAD;        break;
5355             case 'W': ftst = OP_FTRWRITE;       break;
5356             case 'X': ftst = OP_FTREXEC;        break;
5357             case 'O': ftst = OP_FTROWNED;       break;
5358             case 'e': ftst = OP_FTIS;           break;
5359             case 'z': ftst = OP_FTZERO;         break;
5360             case 's': ftst = OP_FTSIZE;         break;
5361             case 'f': ftst = OP_FTFILE;         break;
5362             case 'd': ftst = OP_FTDIR;          break;
5363             case 'l': ftst = OP_FTLINK;         break;
5364             case 'p': ftst = OP_FTPIPE;         break;
5365             case 'S': ftst = OP_FTSOCK;         break;
5366             case 'u': ftst = OP_FTSUID;         break;
5367             case 'g': ftst = OP_FTSGID;         break;
5368             case 'k': ftst = OP_FTSVTX;         break;
5369             case 'b': ftst = OP_FTBLK;          break;
5370             case 'c': ftst = OP_FTCHR;          break;
5371             case 't': ftst = OP_FTTTY;          break;
5372             case 'T': ftst = OP_FTTEXT;         break;
5373             case 'B': ftst = OP_FTBINARY;       break;
5374             case 'M': case 'A': case 'C':
5375                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5376                 switch (tmp) {
5377                 case 'M': ftst = OP_FTMTIME;    break;
5378                 case 'A': ftst = OP_FTATIME;    break;
5379                 case 'C': ftst = OP_FTCTIME;    break;
5380                 default:                        break;
5381                 }
5382                 break;
5383             default:
5384                 break;
5385             }
5386             if (ftst) {
5387                 PL_last_lop_op = (OPCODE)ftst;
5388                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5389                         "### Saw file test %c\n", (int)tmp);
5390                 } );
5391                 FTST(ftst);
5392             }
5393             else {
5394                 /* Assume it was a minus followed by a one-letter named
5395                  * subroutine call (or a -bareword), then. */
5396                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5397                         "### '-%c' looked like a file test but was not\n",
5398                         (int) tmp);
5399                 } );
5400                 s = --PL_bufptr;
5401             }
5402         }
5403         {
5404             const char tmp = *s++;
5405             if (*s == tmp) {
5406                 s++;
5407                 if (PL_expect == XOPERATOR)
5408                     TERM(POSTDEC);
5409                 else
5410                     OPERATOR(PREDEC);
5411             }
5412             else if (*s == '>') {
5413                 s++;
5414                 s = SKIPSPACE1(s);
5415                 if (isIDFIRST_lazy_if(s,UTF)) {
5416                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5417                     TOKEN(ARROW);
5418                 }
5419                 else if (*s == '$')
5420                     OPERATOR(ARROW);
5421                 else
5422                     TERM(ARROW);
5423             }
5424             if (PL_expect == XOPERATOR) {
5425                 if (*s == '=' && !PL_lex_allbrackets &&
5426                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5427                     s--;
5428                     TOKEN(0);
5429                 }
5430                 Aop(OP_SUBTRACT);
5431             }
5432             else {
5433                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5434                     check_uni();
5435                 OPERATOR('-');          /* unary minus */
5436             }
5437         }
5438
5439     case '+':
5440         {
5441             const char tmp = *s++;
5442             if (*s == tmp) {
5443                 s++;
5444                 if (PL_expect == XOPERATOR)
5445                     TERM(POSTINC);
5446                 else
5447                     OPERATOR(PREINC);
5448             }
5449             if (PL_expect == XOPERATOR) {
5450                 if (*s == '=' && !PL_lex_allbrackets &&
5451                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5452                     s--;
5453                     TOKEN(0);
5454                 }
5455                 Aop(OP_ADD);
5456             }
5457             else {
5458                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5459                     check_uni();
5460                 OPERATOR('+');
5461             }
5462         }
5463
5464     case '*':
5465         if (PL_expect != XOPERATOR) {
5466             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5467             PL_expect = XOPERATOR;
5468             force_ident(PL_tokenbuf, '*');
5469             if (!*PL_tokenbuf)
5470                 PREREF('*');
5471             TERM('*');
5472         }
5473         s++;
5474         if (*s == '*') {
5475             s++;
5476             if (*s == '=' && !PL_lex_allbrackets &&
5477                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5478                 s -= 2;
5479                 TOKEN(0);
5480             }
5481             PWop(OP_POW);
5482         }
5483         if (*s == '=' && !PL_lex_allbrackets &&
5484                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5485             s--;
5486             TOKEN(0);
5487         }
5488         Mop(OP_MULTIPLY);
5489
5490     case '%':
5491         if (PL_expect == XOPERATOR) {
5492             if (s[1] == '=' && !PL_lex_allbrackets &&
5493                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5494                 TOKEN(0);
5495             ++s;
5496             Mop(OP_MODULO);
5497         }
5498         PL_tokenbuf[0] = '%';
5499         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5500                 sizeof PL_tokenbuf - 1, FALSE);
5501         if (!PL_tokenbuf[1]) {
5502             PREREF('%');
5503         }
5504         PL_expect = XOPERATOR;
5505         force_ident_maybe_lex('%');
5506         TERM('%');
5507
5508     case '^':
5509         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5510                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5511             TOKEN(0);
5512         s++;
5513         BOop(OP_BIT_XOR);
5514     case '[':
5515         if (PL_lex_brackets > 100)
5516             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5517         PL_lex_brackstack[PL_lex_brackets++] = 0;
5518         PL_lex_allbrackets++;
5519         {
5520             const char tmp = *s++;
5521             OPERATOR(tmp);
5522         }
5523     case '~':
5524         if (s[1] == '~'
5525             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5526         {
5527             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5528                 TOKEN(0);
5529             s += 2;
5530             Eop(OP_SMARTMATCH);
5531         }
5532         s++;
5533         OPERATOR('~');
5534     case ',':
5535         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5536             TOKEN(0);
5537         s++;
5538         OPERATOR(',');
5539     case ':':
5540         if (s[1] == ':') {
5541             len = 0;
5542             goto just_a_word_zero_gv;
5543         }
5544         s++;
5545         switch (PL_expect) {
5546             OP *attrs;
5547 #ifdef PERL_MAD
5548             I32 stuffstart;
5549 #endif
5550         case XOPERATOR:
5551             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5552                 break;
5553             PL_bufptr = s;      /* update in case we back off */
5554             if (*s == '=') {
5555                 Perl_croak(aTHX_
5556                            "Use of := for an empty attribute list is not allowed");
5557             }
5558             goto grabattrs;
5559         case XATTRBLOCK:
5560             PL_expect = XBLOCK;
5561             goto grabattrs;
5562         case XATTRTERM:
5563             PL_expect = XTERMBLOCK;
5564          grabattrs:
5565 #ifdef PERL_MAD
5566             stuffstart = s - SvPVX(PL_linestr) - 1;
5567 #endif
5568             s = PEEKSPACE(s);
5569             attrs = NULL;
5570             while (isIDFIRST_lazy_if(s,UTF)) {
5571                 I32 tmp;
5572                 SV *sv;
5573                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5574                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5575                     if (tmp < 0) tmp = -tmp;
5576                     switch (tmp) {
5577                     case KEY_or:
5578                     case KEY_and:
5579                     case KEY_for:
5580                     case KEY_foreach:
5581                     case KEY_unless:
5582                     case KEY_if:
5583                     case KEY_while:
5584                     case KEY_until:
5585                         goto got_attrs;
5586                     default:
5587                         break;
5588                     }
5589                 }
5590                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5591                 if (*d == '(') {
5592                     d = scan_str(d,TRUE,TRUE,FALSE);
5593                     if (!d) {
5594                         /* MUST advance bufptr here to avoid bogus
5595                            "at end of line" context messages from yyerror().
5596                          */
5597                         PL_bufptr = s + len;
5598                         yyerror("Unterminated attribute parameter in attribute list");
5599                         if (attrs)
5600                             op_free(attrs);
5601                         sv_free(sv);
5602                         return REPORT(0);       /* EOF indicator */
5603                     }
5604                 }
5605                 if (PL_lex_stuff) {
5606                     sv_catsv(sv, PL_lex_stuff);
5607                     attrs = op_append_elem(OP_LIST, attrs,
5608                                         newSVOP(OP_CONST, 0, sv));
5609                     SvREFCNT_dec(PL_lex_stuff);
5610                     PL_lex_stuff = NULL;
5611                 }
5612                 else {
5613                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5614                         sv_free(sv);
5615                         if (PL_in_my == KEY_our) {
5616                             deprecate(":unique");
5617                         }
5618                         else
5619                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5620                     }
5621
5622                     /* NOTE: any CV attrs applied here need to be part of
5623                        the CVf_BUILTIN_ATTRS define in cv.h! */
5624                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5625                         sv_free(sv);
5626                         CvLVALUE_on(PL_compcv);
5627                     }
5628                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5629                         sv_free(sv);
5630                         deprecate(":locked");
5631                     }
5632                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5633                         sv_free(sv);
5634                         CvMETHOD_on(PL_compcv);
5635                     }
5636                     /* After we've set the flags, it could be argued that
5637                        we don't need to do the attributes.pm-based setting
5638                        process, and shouldn't bother appending recognized
5639                        flags.  To experiment with that, uncomment the
5640                        following "else".  (Note that's already been
5641                        uncommented.  That keeps the above-applied built-in
5642                        attributes from being intercepted (and possibly
5643                        rejected) by a package's attribute routines, but is
5644                        justified by the performance win for the common case
5645                        of applying only built-in attributes.) */
5646                     else
5647                         attrs = op_append_elem(OP_LIST, attrs,
5648                                             newSVOP(OP_CONST, 0,
5649                                                     sv));
5650                 }
5651                 s = PEEKSPACE(d);
5652                 if (*s == ':' && s[1] != ':')
5653                     s = PEEKSPACE(s+1);
5654                 else if (s == d)
5655                     break;      /* require real whitespace or :'s */
5656                 /* XXX losing whitespace on sequential attributes here */
5657             }
5658             {
5659                 const char tmp
5660                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5661                 if (*s != ';' && *s != '}' && *s != tmp
5662                     && (tmp != '=' || *s != ')')) {
5663                     const char q = ((*s == '\'') ? '"' : '\'');
5664                     /* If here for an expression, and parsed no attrs, back
5665                        off. */
5666                     if (tmp == '=' && !attrs) {
5667                         s = PL_bufptr;
5668                         break;
5669                     }
5670                     /* MUST advance bufptr here to avoid bogus "at end of line"
5671                        context messages from yyerror().
5672                     */
5673                     PL_bufptr = s;
5674                     yyerror( (const char *)
5675                              (*s
5676                               ? Perl_form(aTHX_ "Invalid separator character "
5677                                           "%c%c%c in attribute list", q, *s, q)
5678                               : "Unterminated attribute list" ) );
5679                     if (attrs)
5680                         op_free(attrs);
5681                     OPERATOR(':');
5682                 }
5683             }
5684         got_attrs:
5685             if (attrs) {
5686                 start_force(PL_curforce);
5687                 NEXTVAL_NEXTTOKE.opval = attrs;
5688                 CURMAD('_', PL_nextwhite);
5689                 force_next(THING);
5690             }
5691 #ifdef PERL_MAD
5692             if (PL_madskills) {
5693                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5694                                      (s - SvPVX(PL_linestr)) - stuffstart);
5695             }
5696 #endif
5697             TOKEN(COLONATTR);
5698         }
5699         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5700             s--;
5701             TOKEN(0);
5702         }
5703         PL_lex_allbrackets--;
5704         OPERATOR(':');
5705     case '(':
5706         s++;
5707         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5708             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5709         else
5710             PL_expect = XTERM;
5711         s = SKIPSPACE1(s);
5712         PL_lex_allbrackets++;
5713         TOKEN('(');
5714     case ';':
5715         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5716             TOKEN(0);
5717         CLINE;
5718         s++;
5719         OPERATOR(';');
5720     case ')':
5721         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5722             TOKEN(0);
5723         s++;
5724         PL_lex_allbrackets--;
5725         s = SKIPSPACE1(s);
5726         if (*s == '{')
5727             PREBLOCK(')');
5728         TERM(')');
5729     case ']':
5730         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5731             TOKEN(0);
5732         s++;
5733         if (PL_lex_brackets <= 0)
5734             yyerror("Unmatched right square bracket");
5735         else
5736             --PL_lex_brackets;
5737         PL_lex_allbrackets--;
5738         if (PL_lex_state == LEX_INTERPNORMAL) {
5739             if (PL_lex_brackets == 0) {
5740                 if (*s == '-' && s[1] == '>')
5741                     PL_lex_state = LEX_INTERPENDMAYBE;
5742                 else if (*s != '[' && *s != '{')
5743                     PL_lex_state = LEX_INTERPEND;
5744             }
5745         }
5746         TERM(']');
5747     case '{':
5748         s++;
5749       leftbracket:
5750         if (PL_lex_brackets > 100) {
5751             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5752         }
5753         switch (PL_expect) {
5754         case XTERM:
5755             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5756             PL_lex_allbrackets++;
5757             OPERATOR(HASHBRACK);
5758         case XOPERATOR:
5759             while (s < PL_bufend && SPACE_OR_TAB(*s))
5760                 s++;
5761             d = s;
5762             PL_tokenbuf[0] = '\0';
5763             if (d < PL_bufend && *d == '-') {
5764                 PL_tokenbuf[0] = '-';
5765                 d++;
5766                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5767                     d++;
5768             }
5769             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5770                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5771                               FALSE, &len);
5772                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5773                     d++;
5774                 if (*d == '}') {
5775                     const char minus = (PL_tokenbuf[0] == '-');
5776                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5777                     if (minus)
5778                         force_next('-');
5779                 }
5780             }
5781             /* FALL THROUGH */
5782         case XATTRBLOCK:
5783         case XBLOCK:
5784             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5785             PL_lex_allbrackets++;
5786             PL_expect = XSTATE;
5787             break;
5788         case XATTRTERM:
5789         case XTERMBLOCK:
5790             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5791             PL_lex_allbrackets++;
5792             PL_expect = XSTATE;
5793             break;
5794         default: {
5795                 const char *t;
5796                 if (PL_oldoldbufptr == PL_last_lop)
5797                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5798                 else
5799                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5800                 PL_lex_allbrackets++;
5801                 s = SKIPSPACE1(s);
5802                 if (*s == '}') {
5803                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5804                         PL_expect = XTERM;
5805                         /* This hack is to get the ${} in the message. */
5806                         PL_bufptr = s+1;
5807                         yyerror("syntax error");
5808                         break;
5809                     }
5810                     OPERATOR(HASHBRACK);
5811                 }
5812                 /* This hack serves to disambiguate a pair of curlies
5813                  * as being a block or an anon hash.  Normally, expectation
5814                  * determines that, but in cases where we're not in a
5815                  * position to expect anything in particular (like inside
5816                  * eval"") we have to resolve the ambiguity.  This code
5817                  * covers the case where the first term in the curlies is a
5818                  * quoted string.  Most other cases need to be explicitly
5819                  * disambiguated by prepending a "+" before the opening
5820                  * curly in order to force resolution as an anon hash.
5821                  *
5822                  * XXX should probably propagate the outer expectation
5823                  * into eval"" to rely less on this hack, but that could
5824                  * potentially break current behavior of eval"".
5825                  * GSAR 97-07-21
5826                  */
5827                 t = s;
5828                 if (*s == '\'' || *s == '"' || *s == '`') {
5829                     /* common case: get past first string, handling escapes */
5830                     for (t++; t < PL_bufend && *t != *s;)
5831                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5832                             t++;
5833                     t++;
5834                 }
5835                 else if (*s == 'q') {
5836                     if (++t < PL_bufend
5837                         && (!isALNUM(*t)
5838                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5839                                 && !isALNUM(*t))))
5840                     {
5841                         /* skip q//-like construct */
5842                         const char *tmps;
5843                         char open, close, term;
5844                         I32 brackets = 1;
5845
5846                         while (t < PL_bufend && isSPACE(*t))
5847                             t++;
5848                         /* check for q => */
5849                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5850                             OPERATOR(HASHBRACK);
5851                         }
5852                         term = *t;
5853                         open = term;
5854                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5855                             term = tmps[5];
5856                         close = term;
5857                         if (open == close)
5858                             for (t++; t < PL_bufend; t++) {
5859                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5860                                     t++;
5861                                 else if (*t == open)
5862                                     break;
5863                             }
5864                         else {
5865                             for (t++; t < PL_bufend; t++) {
5866                                 if (*t == '\\' && t+1 < PL_bufend)
5867                                     t++;
5868                                 else if (*t == close && --brackets <= 0)
5869                                     break;
5870                                 else if (*t == open)
5871                                     brackets++;
5872                             }
5873                         }
5874                         t++;
5875                     }
5876                     else
5877                         /* skip plain q word */
5878                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5879                              t += UTF8SKIP(t);
5880                 }
5881                 else if (isALNUM_lazy_if(t,UTF)) {
5882                     t += UTF8SKIP(t);
5883                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5884                          t += UTF8SKIP(t);
5885                 }
5886                 while (t < PL_bufend && isSPACE(*t))
5887                     t++;
5888                 /* if comma follows first term, call it an anon hash */
5889                 /* XXX it could be a comma expression with loop modifiers */
5890                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5891                                    || (*t == '=' && t[1] == '>')))
5892                     OPERATOR(HASHBRACK);
5893                 if (PL_expect == XREF)
5894                     PL_expect = XTERM;
5895                 else {
5896                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5897                     PL_expect = XSTATE;
5898                 }
5899             }
5900             break;
5901         }
5902         pl_yylval.ival = CopLINE(PL_curcop);
5903         if (isSPACE(*s) || *s == '#')
5904             PL_copline = NOLINE;   /* invalidate current command line number */
5905         TOKEN(formbrack ? '=' : '{');
5906     case '}':
5907         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5908             TOKEN(0);
5909       rightbracket:
5910         s++;
5911         if (PL_lex_brackets <= 0)
5912             yyerror("Unmatched right curly bracket");
5913         else
5914             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5915         PL_lex_allbrackets--;
5916         if (PL_lex_state == LEX_INTERPNORMAL) {
5917             if (PL_lex_brackets == 0) {
5918                 if (PL_expect & XFAKEBRACK) {
5919                     PL_expect &= XENUMMASK;
5920                     PL_lex_state = LEX_INTERPEND;
5921                     PL_bufptr = s;
5922 #if 0
5923                     if (PL_madskills) {
5924                         if (!PL_thiswhite)
5925                             PL_thiswhite = newSVpvs("");
5926                         sv_catpvs(PL_thiswhite,"}");
5927                     }
5928 #endif
5929                     return yylex();     /* ignore fake brackets */
5930                 }
5931                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5932                  && SvEVALED(PL_lex_repl))
5933                     PL_lex_state = LEX_INTERPEND;
5934                 else if (*s == '-' && s[1] == '>')
5935                     PL_lex_state = LEX_INTERPENDMAYBE;
5936                 else if (*s != '[' && *s != '{')
5937                     PL_lex_state = LEX_INTERPEND;
5938             }
5939         }
5940         if (PL_expect & XFAKEBRACK) {
5941             PL_expect &= XENUMMASK;
5942             PL_bufptr = s;
5943             return yylex();             /* ignore fake brackets */
5944         }
5945         start_force(PL_curforce);
5946         if (PL_madskills) {
5947             curmad('X', newSVpvn(s-1,1));
5948             CURMAD('_', PL_thiswhite);
5949         }
5950         force_next(formbrack ? '.' : '}');
5951         if (formbrack) LEAVE;
5952 #ifdef PERL_MAD
5953         if (!PL_thistoken)
5954             PL_thistoken = newSVpvs("");
5955 #endif
5956         if (formbrack == 2) { /* means . where arguments were expected */
5957             start_force(PL_curforce);
5958             force_next(';');
5959             TOKEN(FORMRBRACK);
5960         }
5961         TOKEN(';');
5962     case '&':
5963         s++;
5964         if (*s++ == '&') {
5965             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5966                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5967                 s -= 2;
5968                 TOKEN(0);
5969             }
5970             AOPERATOR(ANDAND);
5971         }
5972         s--;
5973         if (PL_expect == XOPERATOR) {
5974             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5975                 && isIDFIRST_lazy_if(s,UTF))
5976             {
5977                 CopLINE_dec(PL_curcop);
5978                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5979                 CopLINE_inc(PL_curcop);
5980             }
5981             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5982                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5983                 s--;
5984                 TOKEN(0);
5985             }
5986             BAop(OP_BIT_AND);
5987         }
5988
5989         PL_tokenbuf[0] = '&';
5990         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
5991                        sizeof PL_tokenbuf - 1, TRUE);
5992         if (PL_tokenbuf[1]) {
5993             PL_expect = XOPERATOR;
5994             force_ident_maybe_lex('&');
5995         }
5996         else
5997             PREREF('&');
5998         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5999         TERM('&');
6000
6001     case '|':
6002         s++;
6003         if (*s++ == '|') {
6004             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6005                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6006                 s -= 2;
6007                 TOKEN(0);
6008             }
6009             AOPERATOR(OROR);
6010         }
6011         s--;
6012         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6013                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6014             s--;
6015             TOKEN(0);
6016         }
6017         BOop(OP_BIT_OR);
6018     case '=':
6019         s++;
6020         {
6021             const char tmp = *s++;
6022             if (tmp == '=') {
6023                 if (!PL_lex_allbrackets &&
6024                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6025                     s -= 2;
6026                     TOKEN(0);
6027                 }
6028                 Eop(OP_EQ);
6029             }
6030             if (tmp == '>') {
6031                 if (!PL_lex_allbrackets &&
6032                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6033                     s -= 2;
6034                     TOKEN(0);
6035                 }
6036                 OPERATOR(',');
6037             }
6038             if (tmp == '~')
6039                 PMop(OP_MATCH);
6040             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6041                 && strchr("+-*/%.^&|<",tmp))
6042                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6043                             "Reversed %c= operator",(int)tmp);
6044             s--;
6045             if (PL_expect == XSTATE && isALPHA(tmp) &&
6046                 (s == PL_linestart+1 || s[-2] == '\n') )
6047                 {
6048                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6049                         || PL_lex_state != LEX_NORMAL) {
6050                         d = PL_bufend;
6051                         while (s < d) {
6052                             if (*s++ == '\n') {
6053                                 incline(s);
6054                                 if (strnEQ(s,"=cut",4)) {
6055                                     s = strchr(s,'\n');
6056                                     if (s)
6057                                         s++;
6058                                     else
6059                                         s = d;
6060                                     incline(s);
6061                                     goto retry;
6062                                 }
6063                             }
6064                         }
6065                         goto retry;
6066                     }
6067 #ifdef PERL_MAD
6068                     if (PL_madskills) {
6069                         if (!PL_thiswhite)
6070                             PL_thiswhite = newSVpvs("");
6071                         sv_catpvn(PL_thiswhite, PL_linestart,
6072                                   PL_bufend - PL_linestart);
6073                     }
6074 #endif
6075                     s = PL_bufend;
6076                     PL_parser->in_pod = 1;
6077                     goto retry;
6078                 }
6079         }
6080         if (PL_expect == XBLOCK) {
6081             const char *t = s;
6082 #ifdef PERL_STRICT_CR
6083             while (SPACE_OR_TAB(*t))
6084 #else
6085             while (SPACE_OR_TAB(*t) || *t == '\r')
6086 #endif
6087                 t++;
6088             if (*t == '\n' || *t == '#') {
6089                 formbrack = 1;
6090                 ENTER;
6091                 SAVEI8(PL_parser->form_lex_state);
6092                 SAVEI32(PL_lex_formbrack);
6093                 PL_parser->form_lex_state = PL_lex_state;
6094                 PL_lex_formbrack = PL_lex_brackets + 1;
6095                 goto leftbracket;
6096             }
6097         }
6098         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6099             s--;
6100             TOKEN(0);
6101         }
6102         pl_yylval.ival = 0;
6103         OPERATOR(ASSIGNOP);
6104     case '!':
6105         s++;
6106         {
6107             const char tmp = *s++;
6108             if (tmp == '=') {
6109                 /* was this !=~ where !~ was meant?
6110                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6111
6112                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6113                     const char *t = s+1;
6114
6115                     while (t < PL_bufend && isSPACE(*t))
6116                         ++t;
6117
6118                     if (*t == '/' || *t == '?' ||
6119                         ((*t == 'm' || *t == 's' || *t == 'y')
6120                          && !isALNUM(t[1])) ||
6121                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6122                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6123                                     "!=~ should be !~");
6124                 }
6125                 if (!PL_lex_allbrackets &&
6126                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6127                     s -= 2;
6128                     TOKEN(0);
6129                 }
6130                 Eop(OP_NE);
6131             }
6132             if (tmp == '~')
6133                 PMop(OP_NOT);
6134         }
6135         s--;
6136         OPERATOR('!');
6137     case '<':
6138         if (PL_expect != XOPERATOR) {
6139             if (s[1] != '<' && !strchr(s,'>'))
6140                 check_uni();
6141             if (s[1] == '<')
6142                 s = scan_heredoc(s);
6143             else
6144                 s = scan_inputsymbol(s);
6145             PL_expect = XOPERATOR;
6146             TOKEN(sublex_start());
6147         }
6148         s++;
6149         {
6150             char tmp = *s++;
6151             if (tmp == '<') {
6152                 if (*s == '=' && !PL_lex_allbrackets &&
6153                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6154                     s -= 2;
6155                     TOKEN(0);
6156                 }
6157                 SHop(OP_LEFT_SHIFT);
6158             }
6159             if (tmp == '=') {
6160                 tmp = *s++;
6161                 if (tmp == '>') {
6162                     if (!PL_lex_allbrackets &&
6163                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6164                         s -= 3;
6165                         TOKEN(0);
6166                     }
6167                     Eop(OP_NCMP);
6168                 }
6169                 s--;
6170                 if (!PL_lex_allbrackets &&
6171                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6172                     s -= 2;
6173                     TOKEN(0);
6174                 }
6175                 Rop(OP_LE);
6176             }
6177         }
6178         s--;
6179         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6180             s--;
6181             TOKEN(0);
6182         }
6183         Rop(OP_LT);
6184     case '>':
6185         s++;
6186         {
6187             const char tmp = *s++;
6188             if (tmp == '>') {
6189                 if (*s == '=' && !PL_lex_allbrackets &&
6190                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6191                     s -= 2;
6192                     TOKEN(0);
6193                 }
6194                 SHop(OP_RIGHT_SHIFT);
6195             }
6196             else if (tmp == '=') {
6197                 if (!PL_lex_allbrackets &&
6198                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6199                     s -= 2;
6200                     TOKEN(0);
6201                 }
6202                 Rop(OP_GE);
6203             }
6204         }
6205         s--;
6206         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6207             s--;
6208             TOKEN(0);
6209         }
6210         Rop(OP_GT);
6211
6212     case '$':
6213         CLINE;
6214
6215         if (PL_expect == XOPERATOR) {
6216             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6217                 return deprecate_commaless_var_list();
6218             }
6219         }
6220
6221         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6222             PL_tokenbuf[0] = '@';
6223             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6224                            sizeof PL_tokenbuf - 1, FALSE);
6225             if (PL_expect == XOPERATOR)
6226                 no_op("Array length", s);
6227             if (!PL_tokenbuf[1])
6228                 PREREF(DOLSHARP);
6229             PL_expect = XOPERATOR;
6230             force_ident_maybe_lex('#');
6231             TOKEN(DOLSHARP);
6232         }
6233
6234         PL_tokenbuf[0] = '$';
6235         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6236                        sizeof PL_tokenbuf - 1, FALSE);
6237         if (PL_expect == XOPERATOR)
6238             no_op("Scalar", s);
6239         if (!PL_tokenbuf[1]) {
6240             if (s == PL_bufend)
6241                 yyerror("Final $ should be \\$ or $name");
6242             PREREF('$');
6243         }
6244
6245         d = s;
6246         {
6247             const char tmp = *s;
6248             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6249                 s = SKIPSPACE1(s);
6250
6251             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6252                 && intuit_more(s)) {
6253                 if (*s == '[') {
6254                     PL_tokenbuf[0] = '@';
6255                     if (ckWARN(WARN_SYNTAX)) {
6256                         char *t = s+1;
6257
6258                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6259                             t++;
6260                         if (*t++ == ',') {
6261                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6262                             while (t < PL_bufend && *t != ']')
6263                                 t++;
6264                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6265                                         "Multidimensional syntax %.*s not supported",
6266                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6267                         }
6268                     }
6269                 }
6270                 else if (*s == '{') {
6271                     char *t;
6272                     PL_tokenbuf[0] = '%';
6273                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6274                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6275                         {
6276                             char tmpbuf[sizeof PL_tokenbuf];
6277                             do {
6278                                 t++;
6279                             } while (isSPACE(*t));
6280                             if (isIDFIRST_lazy_if(t,UTF)) {
6281                                 STRLEN len;
6282                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6283                                               &len);
6284                                 while (isSPACE(*t))
6285                                     t++;
6286                                 if (*t == ';'
6287                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6288                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6289                                                 "You need to quote \"%"SVf"\"",
6290                                                   SVfARG(newSVpvn_flags(tmpbuf, len,
6291                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6292                             }
6293                         }
6294                 }
6295             }
6296
6297             PL_expect = XOPERATOR;
6298             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6299                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6300                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6301                     PL_expect = XOPERATOR;
6302                 else if (strchr("$@\"'`q", *s))
6303                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6304                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6305                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6306                 else if (isIDFIRST_lazy_if(s,UTF)) {
6307                     char tmpbuf[sizeof PL_tokenbuf];
6308                     int t2;
6309                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6310                     if ((t2 = keyword(tmpbuf, len, 0))) {
6311                         /* binary operators exclude handle interpretations */
6312                         switch (t2) {
6313                         case -KEY_x:
6314                         case -KEY_eq:
6315                         case -KEY_ne:
6316                         case -KEY_gt:
6317                         case -KEY_lt:
6318                         case -KEY_ge:
6319                         case -KEY_le:
6320                         case -KEY_cmp:
6321                             break;
6322                         default:
6323                             PL_expect = XTERM;  /* e.g. print $fh length() */
6324                             break;
6325                         }
6326                     }
6327                     else {
6328                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6329                     }
6330                 }
6331                 else if (isDIGIT(*s))
6332                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6333                 else if (*s == '.' && isDIGIT(s[1]))
6334                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6335                 else if ((*s == '?' || *s == '-' || *s == '+')
6336                          && !isSPACE(s[1]) && s[1] != '=')
6337                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6338                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6339                          && s[1] != '/')
6340                     PL_expect = XTERM;          /* e.g. print $fh /.../
6341                                                    XXX except DORDOR operator
6342                                                 */
6343                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6344                          && s[2] != '=')
6345                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6346             }
6347         }
6348         force_ident_maybe_lex('$');
6349         TOKEN('$');
6350
6351     case '@':
6352         if (PL_expect == XOPERATOR)
6353             no_op("Array", s);
6354         PL_tokenbuf[0] = '@';
6355         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6356         if (!PL_tokenbuf[1]) {
6357             PREREF('@');
6358         }
6359         if (PL_lex_state == LEX_NORMAL)
6360             s = SKIPSPACE1(s);
6361         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6362             if (*s == '{')
6363                 PL_tokenbuf[0] = '%';
6364
6365             /* Warn about @ where they meant $. */
6366             if (*s == '[' || *s == '{') {
6367                 if (ckWARN(WARN_SYNTAX)) {
6368                     const char *t = s + 1;
6369                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6370                         t += UTF ? UTF8SKIP(t) : 1;
6371                     if (*t == '}' || *t == ']') {
6372                         t++;
6373                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6374        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6375                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6376                             "Scalar value %"SVf" better written as $%"SVf,
6377                             SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6378                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6379                             SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6380                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6381                     }
6382                 }
6383             }
6384         }
6385         PL_expect = XOPERATOR;
6386         force_ident_maybe_lex('@');
6387         TERM('@');
6388
6389      case '/':                  /* may be division, defined-or, or pattern */
6390         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6391             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6392                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6393                 TOKEN(0);
6394             s += 2;
6395             AOPERATOR(DORDOR);
6396         }
6397      case '?':                  /* may either be conditional or pattern */
6398         if (PL_expect == XOPERATOR) {
6399              char tmp = *s++;
6400              if(tmp == '?') {
6401                 if (!PL_lex_allbrackets &&
6402                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6403                     s--;
6404                     TOKEN(0);
6405                 }
6406                 PL_lex_allbrackets++;
6407                 OPERATOR('?');
6408              }
6409              else {
6410                  tmp = *s++;
6411                  if(tmp == '/') {
6412                      /* A // operator. */
6413                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6414                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6415                                             LEX_FAKEEOF_LOGIC)) {
6416                         s -= 2;
6417                         TOKEN(0);
6418                     }
6419                     AOPERATOR(DORDOR);
6420                  }
6421                  else {
6422                      s--;
6423                      if (*s == '=' && !PL_lex_allbrackets &&
6424                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6425                          s--;
6426                          TOKEN(0);
6427                      }
6428                      Mop(OP_DIVIDE);
6429                  }
6430              }
6431          }
6432          else {
6433              /* Disable warning on "study /blah/" */
6434              if (PL_oldoldbufptr == PL_last_uni
6435               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6436                   || memNE(PL_last_uni, "study", 5)
6437                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6438               ))
6439                  check_uni();
6440              if (*s == '?')
6441                  deprecate("?PATTERN? without explicit operator");
6442              s = scan_pat(s,OP_MATCH);
6443              TERM(sublex_start());
6444          }
6445
6446     case '.':
6447         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6448 #ifdef PERL_STRICT_CR
6449             && s[1] == '\n'
6450 #else
6451             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6452 #endif
6453             && (s == PL_linestart || s[-1] == '\n') )
6454         {
6455             PL_expect = XSTATE;
6456             formbrack = 2; /* dot seen where arguments expected */
6457             goto rightbracket;
6458         }
6459         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6460             s += 3;
6461             OPERATOR(YADAYADA);
6462         }
6463         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6464             char tmp = *s++;
6465             if (*s == tmp) {
6466                 if (!PL_lex_allbrackets &&
6467                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6468                     s--;
6469                     TOKEN(0);
6470                 }
6471                 s++;
6472                 if (*s == tmp) {
6473                     s++;
6474                     pl_yylval.ival = OPf_SPECIAL;
6475                 }
6476                 else
6477                     pl_yylval.ival = 0;
6478                 OPERATOR(DOTDOT);
6479             }
6480             if (*s == '=' && !PL_lex_allbrackets &&
6481                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6482                 s--;
6483                 TOKEN(0);
6484             }
6485             Aop(OP_CONCAT);
6486         }
6487         /* FALL THROUGH */
6488     case '0': case '1': case '2': case '3': case '4':
6489     case '5': case '6': case '7': case '8': case '9':
6490         s = scan_num(s, &pl_yylval);
6491         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6492         if (PL_expect == XOPERATOR)
6493             no_op("Number",s);
6494         TERM(THING);
6495
6496     case '\'':
6497         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6498         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6499         if (PL_expect == XOPERATOR) {
6500             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6501                 return deprecate_commaless_var_list();
6502             }
6503             else
6504                 no_op("String",s);
6505         }
6506         if (!s)
6507             missingterm(NULL);
6508         pl_yylval.ival = OP_CONST;
6509         TERM(sublex_start());
6510
6511     case '"':
6512         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6513         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6514         if (PL_expect == XOPERATOR) {
6515             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6516                 return deprecate_commaless_var_list();
6517             }
6518             else
6519                 no_op("String",s);
6520         }
6521         if (!s)
6522             missingterm(NULL);
6523         pl_yylval.ival = OP_CONST;
6524         /* FIXME. I think that this can be const if char *d is replaced by
6525            more localised variables.  */
6526         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6527             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6528                 pl_yylval.ival = OP_STRINGIFY;
6529                 break;
6530             }
6531         }
6532         TERM(sublex_start());
6533
6534     case '`':
6535         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6536         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6537         if (PL_expect == XOPERATOR)
6538             no_op("Backticks",s);
6539         if (!s)
6540             missingterm(NULL);
6541         readpipe_override();
6542         TERM(sublex_start());
6543
6544     case '\\':
6545         s++;
6546         if (PL_lex_inwhat && isDIGIT(*s))
6547             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6548                            *s, *s);
6549         if (PL_expect == XOPERATOR)
6550             no_op("Backslash",s);
6551         OPERATOR(REFGEN);
6552
6553     case 'v':
6554         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6555             char *start = s + 2;
6556             while (isDIGIT(*start) || *start == '_')
6557                 start++;
6558             if (*start == '.' && isDIGIT(start[1])) {
6559                 s = scan_num(s, &pl_yylval);
6560                 TERM(THING);
6561             }
6562             else if ((*start == ':' && start[1] == ':')
6563                   || (PL_expect == XSTATE && *start == ':'))
6564                 goto keylookup;
6565             else if (PL_expect == XSTATE) {
6566                 d = start;
6567                 while (d < PL_bufend && isSPACE(*d)) d++;
6568                 if (*d == ':') goto keylookup;
6569             }
6570             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6571             if (!isALPHA(*start) && (PL_expect == XTERM
6572                         || PL_expect == XREF || PL_expect == XSTATE
6573                         || PL_expect == XTERMORDORDOR)) {
6574                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6575                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6576                 if (!gv) {
6577                     s = scan_num(s, &pl_yylval);
6578                     TERM(THING);
6579                 }
6580             }
6581         }
6582         goto keylookup;
6583     case 'x':
6584         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6585             s++;
6586             Mop(OP_REPEAT);
6587         }
6588         goto keylookup;
6589
6590     case '_':
6591     case 'a': case 'A':
6592     case 'b': case 'B':
6593     case 'c': case 'C':
6594     case 'd': case 'D':
6595     case 'e': case 'E':
6596     case 'f': case 'F':
6597     case 'g': case 'G':
6598     case 'h': case 'H':
6599     case 'i': case 'I':
6600     case 'j': case 'J':
6601     case 'k': case 'K':
6602     case 'l': case 'L':
6603     case 'm': case 'M':
6604     case 'n': case 'N':
6605     case 'o': case 'O':
6606     case 'p': case 'P':
6607     case 'q': case 'Q':
6608     case 'r': case 'R':
6609     case 's': case 'S':
6610     case 't': case 'T':
6611     case 'u': case 'U':
6612               case 'V':
6613     case 'w': case 'W':
6614               case 'X':
6615     case 'y': case 'Y':
6616     case 'z': case 'Z':
6617
6618       keylookup: {
6619         bool anydelim;
6620         bool lex;
6621         I32 tmp;
6622         SV *sv;
6623         CV *cv;
6624         PADOFFSET off;
6625         OP *rv2cv_op;
6626
6627         lex = FALSE;
6628         orig_keyword = 0;
6629         off = 0;
6630         sv = NULL;
6631         cv = NULL;
6632         gv = NULL;
6633         gvp = NULL;
6634         rv2cv_op = NULL;
6635
6636         PL_bufptr = s;
6637         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6638
6639         /* Some keywords can be followed by any delimiter, including ':' */
6640         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6641
6642         /* x::* is just a word, unless x is "CORE" */
6643         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6644             goto just_a_word;
6645
6646         d = s;
6647         while (d < PL_bufend && isSPACE(*d))
6648                 d++;    /* no comments skipped here, or s### is misparsed */
6649
6650         /* Is this a word before a => operator? */
6651         if (*d == '=' && d[1] == '>') {
6652             CLINE;
6653             pl_yylval.opval
6654                 = (OP*)newSVOP(OP_CONST, 0,
6655                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6656             pl_yylval.opval->op_private = OPpCONST_BARE;
6657             TERM(WORD);
6658         }
6659
6660         /* Check for plugged-in keyword */
6661         {
6662             OP *o;
6663             int result;
6664             char *saved_bufptr = PL_bufptr;
6665             PL_bufptr = s;
6666             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6667             s = PL_bufptr;
6668             if (result == KEYWORD_PLUGIN_DECLINE) {
6669                 /* not a plugged-in keyword */
6670                 PL_bufptr = saved_bufptr;
6671             } else if (result == KEYWORD_PLUGIN_STMT) {
6672                 pl_yylval.opval = o;
6673                 CLINE;
6674                 PL_expect = XSTATE;
6675                 return REPORT(PLUGSTMT);
6676             } else if (result == KEYWORD_PLUGIN_EXPR) {
6677                 pl_yylval.opval = o;
6678                 CLINE;
6679                 PL_expect = XOPERATOR;
6680                 return REPORT(PLUGEXPR);
6681             } else {
6682                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6683                                         PL_tokenbuf);
6684             }
6685         }
6686
6687         /* Check for built-in keyword */
6688         tmp = keyword(PL_tokenbuf, len, 0);
6689
6690         /* Is this a label? */
6691         if (!anydelim && PL_expect == XSTATE
6692               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6693             s = d + 1;
6694             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6695                                             newSVpvn_flags(PL_tokenbuf,
6696                                                         len, UTF ? SVf_UTF8 : 0));
6697             CLINE;
6698             TOKEN(LABEL);
6699         }
6700
6701         /* Check for lexical sub */
6702         if (PL_expect != XOPERATOR) {
6703             char tmpbuf[sizeof PL_tokenbuf + 1];
6704             *tmpbuf = '&';
6705             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6706             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6707             if (off != NOT_IN_PAD) {
6708                 assert(off); /* we assume this is boolean-true below */
6709                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6710                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6711                     HEK * const stashname = HvNAME_HEK(stash);
6712                     sv = newSVhek(stashname);
6713                     sv_catpvs(sv, "::");
6714                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6715                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6716                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6717                                     SVt_PVCV);
6718                     off = 0;
6719                 }
6720                 else {
6721                     rv2cv_op = newOP(OP_PADANY, 0);
6722                     rv2cv_op->op_targ = off;
6723                     rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6724                     cv = (CV *)PAD_SV(off);
6725                 }
6726                 lex = TRUE;
6727                 goto just_a_word;
6728             }
6729             off = 0;
6730         }
6731
6732         if (tmp < 0) {                  /* second-class keyword? */
6733             GV *ogv = NULL;     /* override (winner) */
6734             GV *hgv = NULL;     /* hidden (loser) */
6735             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6736                 CV *cv;
6737                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6738                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6739                     (cv = GvCVu(gv)))
6740                 {
6741                     if (GvIMPORTED_CV(gv))
6742                         ogv = gv;
6743                     else if (! CvMETHOD(cv))
6744                         hgv = gv;
6745                 }
6746                 if (!ogv &&
6747                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6748                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6749                     (gv = *gvp) && isGV_with_GP(gv) &&
6750                     GvCVu(gv) && GvIMPORTED_CV(gv))
6751                 {
6752                     ogv = gv;
6753                 }
6754             }
6755             if (ogv) {
6756                 orig_keyword = tmp;
6757                 tmp = 0;                /* overridden by import or by GLOBAL */
6758             }
6759             else if (gv && !gvp
6760                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6761                      && GvCVu(gv))
6762             {
6763                 tmp = 0;                /* any sub overrides "weak" keyword */
6764             }
6765             else {                      /* no override */
6766                 tmp = -tmp;
6767                 if (tmp == KEY_dump) {
6768                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6769                                    "dump() better written as CORE::dump()");
6770                 }
6771                 gv = NULL;
6772                 gvp = 0;
6773                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6774                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6775                                    "Ambiguous call resolved as CORE::%s(), "
6776                                    "qualify as such or use &",
6777                                    GvENAME(hgv));
6778             }
6779         }
6780
6781       reserved_word:
6782         switch (tmp) {
6783
6784         default:                        /* not a keyword */
6785             /* Trade off - by using this evil construction we can pull the
6786                variable gv into the block labelled keylookup. If not, then
6787                we have to give it function scope so that the goto from the
6788                earlier ':' case doesn't bypass the initialisation.  */
6789             if (0) {
6790             just_a_word_zero_gv:
6791                 sv = NULL;
6792                 cv = NULL;
6793                 gv = NULL;
6794                 gvp = NULL;
6795                 rv2cv_op = NULL;
6796                 orig_keyword = 0;
6797                 lex = 0;
6798                 off = 0;
6799             }
6800           just_a_word: {
6801                 int pkgname = 0;
6802                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6803                 const char penultchar =
6804                     lastchar && PL_bufptr - 2 >= PL_linestart
6805                          ? PL_bufptr[-2]
6806                          : 0;
6807 #ifdef PERL_MAD
6808                 SV *nextPL_nextwhite = 0;
6809 #endif
6810
6811
6812                 /* Get the rest if it looks like a package qualifier */
6813
6814                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6815                     STRLEN morelen;
6816                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6817                                   TRUE, &morelen);
6818                     if (!morelen)
6819                         Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6820                                         SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6821                                             (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6822                                 *s == '\'' ? "'" : "::");
6823                     len += morelen;
6824                     pkgname = 1;
6825                 }
6826
6827                 if (PL_expect == XOPERATOR) {
6828                     if (PL_bufptr == PL_linestart) {
6829                         CopLINE_dec(PL_curcop);
6830                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6831                         CopLINE_inc(PL_curcop);
6832                     }
6833                     else
6834                         no_op("Bareword",s);
6835                 }
6836
6837                 /* Look for a subroutine with this name in current package,
6838                    unless this is a lexical sub, or name is "Foo::",
6839                    in which case Foo is a bareword
6840                    (and a package name). */
6841
6842                 if (len > 2 && !PL_madskills &&
6843                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6844                 {
6845                     if (ckWARN(WARN_BAREWORD)
6846                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6847                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6848                             "Bareword \"%"SVf"\" refers to nonexistent package",
6849                              SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6850                                         (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6851                     len -= 2;
6852                     PL_tokenbuf[len] = '\0';
6853                     gv = NULL;
6854                     gvp = 0;
6855                 }
6856                 else {
6857                     if (!lex && !gv) {
6858                         /* Mustn't actually add anything to a symbol table.
6859                            But also don't want to "initialise" any placeholder
6860                            constants that might already be there into full
6861                            blown PVGVs with attached PVCV.  */
6862                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6863                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6864                                                SVt_PVCV);
6865                     }
6866                     len = 0;
6867                 }
6868
6869                 /* if we saw a global override before, get the right name */
6870
6871                 if (!sv)
6872                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6873                     len ? len : strlen(PL_tokenbuf));
6874                 if (gvp) {
6875                     SV * const tmp_sv = sv;
6876                     sv = newSVpvs("CORE::GLOBAL::");
6877                     sv_catsv(sv, tmp_sv);
6878                     SvREFCNT_dec(tmp_sv);
6879                 }
6880
6881 #ifdef PERL_MAD
6882                 if (PL_madskills && !PL_thistoken) {
6883                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6884                     PL_thistoken = newSVpvn(start,s - start);
6885                     PL_realtokenstart = s - SvPVX(PL_linestr);
6886                 }
6887 #endif
6888
6889                 /* Presume this is going to be a bareword of some sort. */
6890                 CLINE;
6891                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6892                 pl_yylval.opval->op_private = OPpCONST_BARE;
6893
6894                 /* And if "Foo::", then that's what it certainly is. */
6895                 if (len)
6896                     goto safe_bareword;
6897
6898                 if (!off)
6899                 {
6900                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6901                     const_op->op_private = OPpCONST_BARE;
6902                     rv2cv_op = newCVREF(0, const_op);
6903                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
6904                 }
6905
6906                 /* See if it's the indirect object for a list operator. */
6907
6908                 if (PL_oldoldbufptr &&
6909                     PL_oldoldbufptr < PL_bufptr &&
6910                     (PL_oldoldbufptr == PL_last_lop
6911                      || PL_oldoldbufptr == PL_last_uni) &&
6912                     /* NO SKIPSPACE BEFORE HERE! */
6913                     (PL_expect == XREF ||
6914                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6915                 {
6916                     bool immediate_paren = *s == '(';
6917
6918                     /* (Now we can afford to cross potential line boundary.) */
6919                     s = SKIPSPACE2(s,nextPL_nextwhite);
6920 #ifdef PERL_MAD
6921                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6922 #endif
6923
6924                     /* Two barewords in a row may indicate method call. */
6925
6926                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6927                         (tmp = intuit_method(s, gv, cv))) {
6928                         op_free(rv2cv_op);
6929                         if (tmp == METHOD && !PL_lex_allbrackets &&
6930                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6931                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6932                         return REPORT(tmp);
6933                     }
6934
6935                     /* If not a declared subroutine, it's an indirect object. */
6936                     /* (But it's an indir obj regardless for sort.) */
6937                     /* Also, if "_" follows a filetest operator, it's a bareword */
6938
6939                     if (
6940                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6941                          (!cv &&
6942                         (PL_last_lop_op != OP_MAPSTART &&
6943                          PL_last_lop_op != OP_GREPSTART))))
6944                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6945                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6946                        )
6947                     {
6948                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6949                         goto bareword;
6950                     }
6951                 }
6952
6953                 PL_expect = XOPERATOR;
6954 #ifdef PERL_MAD
6955                 if (isSPACE(*s))
6956                     s = SKIPSPACE2(s,nextPL_nextwhite);
6957                 PL_nextwhite = nextPL_nextwhite;
6958 #else
6959                 s = skipspace(s);
6960 #endif
6961
6962                 /* Is this a word before a => operator? */
6963                 if (*s == '=' && s[1] == '>' && !pkgname) {
6964                     op_free(rv2cv_op);
6965                     CLINE;
6966                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6967                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6968                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6969                     TERM(WORD);
6970                 }
6971
6972                 /* If followed by a paren, it's certainly a subroutine. */
6973                 if (*s == '(') {
6974                     CLINE;
6975                     if (cv) {
6976                         d = s + 1;
6977                         while (SPACE_OR_TAB(*d))
6978                             d++;
6979                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6980                             s = d + 1;
6981                             goto its_constant;
6982                         }
6983                     }
6984 #ifdef PERL_MAD
6985                     if (PL_madskills) {
6986                         PL_nextwhite = PL_thiswhite;
6987                         PL_thiswhite = 0;
6988                     }
6989                     start_force(PL_curforce);
6990 #endif
6991                     NEXTVAL_NEXTTOKE.opval =
6992                         off ? rv2cv_op : pl_yylval.opval;
6993                     PL_expect = XOPERATOR;
6994 #ifdef PERL_MAD
6995                     if (PL_madskills) {
6996                         PL_nextwhite = nextPL_nextwhite;
6997                         curmad('X', PL_thistoken);
6998                         PL_thistoken = newSVpvs("");
6999                     }
7000 #endif
7001                     if (off)
7002                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7003                     else op_free(rv2cv_op),        force_next(WORD);
7004                     pl_yylval.ival = 0;
7005                     TOKEN('&');
7006                 }
7007
7008                 /* If followed by var or block, call it a method (unless sub) */
7009
7010                 if ((*s == '$' || *s == '{') && !cv) {
7011                     op_free(rv2cv_op);
7012                     PL_last_lop = PL_oldbufptr;
7013                     PL_last_lop_op = OP_METHOD;
7014                     if (!PL_lex_allbrackets &&
7015                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7016                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7017                     PREBLOCK(METHOD);
7018                 }
7019
7020                 /* If followed by a bareword, see if it looks like indir obj. */
7021
7022                 if (!orig_keyword
7023                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7024                         && (tmp = intuit_method(s, gv, cv))) {
7025                     op_free(rv2cv_op);
7026                     if (tmp == METHOD && !PL_lex_allbrackets &&
7027                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7028                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7029                     return REPORT(tmp);
7030                 }
7031
7032                 /* Not a method, so call it a subroutine (if defined) */
7033
7034                 if (cv) {
7035                     if (lastchar == '-' && penultchar != '-') {
7036                         const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7037                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7038                                 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7039                                 SVfARG(tmpsv), SVfARG(tmpsv));
7040                     }
7041                     /* Check for a constant sub */
7042                     if ((sv = cv_const_sv(cv))) {
7043                   its_constant:
7044                         op_free(rv2cv_op);
7045                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7046                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7047                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
7048                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
7049                         TOKEN(WORD);
7050                     }
7051
7052                     op_free(pl_yylval.opval);
7053                     pl_yylval.opval = rv2cv_op;
7054                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7055                     PL_last_lop = PL_oldbufptr;
7056                     PL_last_lop_op = OP_ENTERSUB;
7057                     /* Is there a prototype? */
7058                     if (
7059 #ifdef PERL_MAD
7060                         cv &&
7061 #endif
7062                         SvPOK(cv))
7063                     {
7064                         STRLEN protolen = CvPROTOLEN(cv);
7065                         const char *proto = CvPROTO(cv);
7066                         bool optional;
7067                         if (!protolen)
7068                             TERM(FUNC0SUB);
7069                         if ((optional = *proto == ';'))
7070                           do
7071                             proto++;
7072                           while (*proto == ';');
7073                         if (
7074                             (
7075                                 (
7076                                     *proto == '$' || *proto == '_'
7077                                  || *proto == '*' || *proto == '+'
7078                                 )
7079                              && proto[1] == '\0'
7080                             )
7081                          || (
7082                              *proto == '\\' && proto[1] && proto[2] == '\0'
7083                             )
7084                         )
7085                             UNIPROTO(UNIOPSUB,optional);
7086                         if (*proto == '\\' && proto[1] == '[') {
7087                             const char *p = proto + 2;
7088                             while(*p && *p != ']')
7089                                 ++p;
7090                             if(*p == ']' && !p[1])
7091                                 UNIPROTO(UNIOPSUB,optional);
7092                         }
7093                         if (*proto == '&' && *s == '{') {
7094                             if (PL_curstash)
7095                                 sv_setpvs(PL_subname, "__ANON__");
7096                             else
7097                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7098                             if (!PL_lex_allbrackets &&
7099                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7100                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7101                             PREBLOCK(LSTOPSUB);
7102                         }
7103                     }
7104 #ifdef PERL_MAD
7105                     {
7106                         if (PL_madskills) {
7107                             PL_nextwhite = PL_thiswhite;
7108                             PL_thiswhite = 0;
7109                         }
7110                         start_force(PL_curforce);
7111                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7112                         PL_expect = XTERM;
7113                         if (PL_madskills) {
7114                             PL_nextwhite = nextPL_nextwhite;
7115                             curmad('X', PL_thistoken);
7116                             PL_thistoken = newSVpvs("");
7117                         }
7118                         force_next(off ? PRIVATEREF : WORD);
7119                         if (!PL_lex_allbrackets &&
7120                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7121                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7122                         TOKEN(NOAMP);
7123                     }
7124                 }
7125
7126                 /* Guess harder when madskills require "best effort". */
7127                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7128                     int probable_sub = 0;
7129                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7130                         probable_sub = 1;
7131                     else if (isALPHA(*s)) {
7132                         char tmpbuf[1024];
7133                         STRLEN tmplen;
7134                         d = s;
7135                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7136                         if (!keyword(tmpbuf, tmplen, 0))
7137                             probable_sub = 1;
7138                         else {
7139                             while (d < PL_bufend && isSPACE(*d))
7140                                 d++;
7141                             if (*d == '=' && d[1] == '>')
7142                                 probable_sub = 1;
7143                         }
7144                     }
7145                     if (probable_sub) {
7146                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7147                                         SVt_PVCV);
7148                         op_free(pl_yylval.opval);
7149                         pl_yylval.opval = rv2cv_op;
7150                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7151                         PL_last_lop = PL_oldbufptr;
7152                         PL_last_lop_op = OP_ENTERSUB;
7153                         PL_nextwhite = PL_thiswhite;
7154                         PL_thiswhite = 0;
7155                         start_force(PL_curforce);
7156                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7157                         PL_expect = XTERM;
7158                         PL_nextwhite = nextPL_nextwhite;
7159                         curmad('X', PL_thistoken);
7160                         PL_thistoken = newSVpvs("");
7161                         force_next(off ? PRIVATEREF : WORD);
7162                         if (!PL_lex_allbrackets &&
7163                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7164                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7165                         TOKEN(NOAMP);
7166                     }
7167 #else
7168                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7169                     PL_expect = XTERM;
7170                     force_next(off ? PRIVATEREF : WORD);
7171                     if (!PL_lex_allbrackets &&
7172                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7173                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7174                     TOKEN(NOAMP);
7175 #endif
7176                 }
7177
7178                 /* Call it a bare word */
7179
7180                 if (PL_hints & HINT_STRICT_SUBS)
7181                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7182                 else {
7183                 bareword:
7184                     /* after "print" and similar functions (corresponding to
7185                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7186                      * a filehandle should be subject to "strict subs".
7187                      * Likewise for the optional indirect-object argument to system
7188                      * or exec, which can't be a bareword */
7189                     if ((PL_last_lop_op == OP_PRINT
7190                             || PL_last_lop_op == OP_PRTF
7191                             || PL_last_lop_op == OP_SAY
7192                             || PL_last_lop_op == OP_SYSTEM
7193                             || PL_last_lop_op == OP_EXEC)
7194                             && (PL_hints & HINT_STRICT_SUBS))
7195                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7196                     if (lastchar != '-') {
7197                         if (ckWARN(WARN_RESERVED)) {
7198                             d = PL_tokenbuf;
7199                             while (isLOWER(*d))
7200                                 d++;
7201                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7202                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7203                                        PL_tokenbuf);
7204                         }
7205                     }
7206                 }
7207                 op_free(rv2cv_op);
7208
7209             safe_bareword:
7210                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7211                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7212                                      "Operator or semicolon missing before %c%"SVf,
7213                                      lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7214                                                     strlen(PL_tokenbuf),
7215                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7216                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7217                                      "Ambiguous use of %c resolved as operator %c",
7218                                      lastchar, lastchar);
7219                 }
7220                 TOKEN(WORD);
7221             }
7222
7223         case KEY___FILE__:
7224             FUN0OP(
7225                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7226             );
7227
7228         case KEY___LINE__:
7229             FUN0OP(
7230                 (OP*)newSVOP(OP_CONST, 0,
7231                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7232             );
7233
7234         case KEY___PACKAGE__:
7235             FUN0OP(
7236                 (OP*)newSVOP(OP_CONST, 0,
7237                                         (PL_curstash
7238                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7239                                          : &PL_sv_undef))
7240             );
7241
7242         case KEY___DATA__:
7243         case KEY___END__: {
7244             GV *gv;
7245             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7246                 const char *pname = "main";
7247                 STRLEN plen = 4;
7248                 U32 putf8 = 0;
7249                 if (PL_tokenbuf[2] == 'D')
7250                 {
7251                     HV * const stash =
7252                         PL_curstash ? PL_curstash : PL_defstash;
7253                     pname = HvNAME_get(stash);
7254                     plen  = HvNAMELEN (stash);
7255                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7256                 }
7257                 gv = gv_fetchpvn_flags(
7258                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7259                         plen+6, GV_ADD|putf8, SVt_PVIO
7260                 );
7261                 GvMULTI_on(gv);
7262                 if (!GvIO(gv))
7263                     GvIOp(gv) = newIO();
7264                 IoIFP(GvIOp(gv)) = PL_rsfp;
7265 #if defined(HAS_FCNTL) && defined(F_SETFD)
7266                 {
7267                     const int fd = PerlIO_fileno(PL_rsfp);
7268                     fcntl(fd,F_SETFD,fd >= 3);
7269                 }
7270 #endif
7271                 /* Mark this internal pseudo-handle as clean */
7272                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7273                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7274                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7275                 else
7276                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7277 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7278                 /* if the script was opened in binmode, we need to revert
7279                  * it to text mode for compatibility; but only iff it has CRs
7280                  * XXX this is a questionable hack at best. */
7281                 if (PL_bufend-PL_bufptr > 2
7282                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7283                 {
7284                     Off_t loc = 0;
7285                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7286                         loc = PerlIO_tell(PL_rsfp);
7287                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7288                     }
7289 #ifdef NETWARE
7290                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7291 #else
7292                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7293 #endif  /* NETWARE */
7294                         if (loc > 0)
7295                             PerlIO_seek(PL_rsfp, loc, 0);
7296                     }
7297                 }
7298 #endif
7299 #ifdef PERLIO_LAYERS
7300                 if (!IN_BYTES) {
7301                     if (UTF)
7302                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7303                     else if (PL_encoding) {
7304                         SV *name;
7305                         dSP;
7306                         ENTER;
7307                         SAVETMPS;
7308                         PUSHMARK(sp);
7309                         EXTEND(SP, 1);
7310                         XPUSHs(PL_encoding);
7311                         PUTBACK;
7312                         call_method("name", G_SCALAR);
7313                         SPAGAIN;
7314                         name = POPs;
7315                         PUTBACK;
7316                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7317                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7318                                                       SVfARG(name)));
7319                         FREETMPS;
7320                         LEAVE;
7321                     }
7322                 }
7323 #endif
7324 #ifdef PERL_MAD
7325                 if (PL_madskills) {
7326                     if (PL_realtokenstart >= 0) {
7327                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7328                         if (!PL_endwhite)
7329                             PL_endwhite = newSVpvs("");
7330                         sv_catsv(PL_endwhite, PL_thiswhite);
7331                         PL_thiswhite = 0;
7332                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7333                         PL_realtokenstart = -1;
7334                     }
7335                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7336                            != NULL) ;
7337                 }
7338 #endif
7339                 PL_rsfp = NULL;
7340             }
7341             goto fake_eof;
7342         }
7343
7344         case KEY___SUB__:
7345             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7346
7347         case KEY_AUTOLOAD:
7348         case KEY_DESTROY:
7349         case KEY_BEGIN:
7350         case KEY_UNITCHECK:
7351         case KEY_CHECK:
7352         case KEY_INIT:
7353         case KEY_END:
7354             if (PL_expect == XSTATE) {
7355                 s = PL_bufptr;
7356                 goto really_sub;
7357             }
7358             goto just_a_word;
7359
7360         case KEY_CORE:
7361             if (*s == ':' && s[1] == ':') {
7362                 STRLEN olen = len;
7363                 d = s;
7364                 s += 2;
7365                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7366                 if ((*s == ':' && s[1] == ':')
7367                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7368                 {
7369                     s = d;
7370                     len = olen;
7371                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7372                     goto just_a_word;
7373                 }
7374                 if (!tmp)
7375                     Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7376                                     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7377                                                 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7378                 if (tmp < 0)
7379                     tmp = -tmp;
7380                 else if (tmp == KEY_require || tmp == KEY_do
7381                       || tmp == KEY_glob)
7382                     /* that's a way to remember we saw "CORE::" */
7383                     orig_keyword = tmp;
7384                 goto reserved_word;
7385             }
7386             goto just_a_word;
7387
7388         case KEY_abs:
7389             UNI(OP_ABS);
7390
7391         case KEY_alarm:
7392             UNI(OP_ALARM);
7393
7394         case KEY_accept:
7395             LOP(OP_ACCEPT,XTERM);
7396
7397         case KEY_and:
7398             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7399                 return REPORT(0);
7400             OPERATOR(ANDOP);
7401
7402         case KEY_atan2:
7403             LOP(OP_ATAN2,XTERM);
7404
7405         case KEY_bind:
7406             LOP(OP_BIND,XTERM);
7407
7408         case KEY_binmode:
7409             LOP(OP_BINMODE,XTERM);
7410
7411         case KEY_bless:
7412             LOP(OP_BLESS,XTERM);
7413
7414         case KEY_break:
7415             FUN0(OP_BREAK);
7416
7417         case KEY_chop:
7418             UNI(OP_CHOP);
7419
7420         case KEY_continue:
7421                     /* We have to disambiguate the two senses of
7422                       "continue". If the next token is a '{' then
7423                       treat it as the start of a continue block;
7424                       otherwise treat it as a control operator.
7425                      */
7426                     s = skipspace(s);
7427                     if (*s == '{')
7428             PREBLOCK(CONTINUE);
7429                     else
7430                         FUN0(OP_CONTINUE);
7431
7432         case KEY_chdir:
7433             /* may use HOME */
7434             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7435             UNI(OP_CHDIR);
7436
7437         case KEY_close:
7438             UNI(OP_CLOSE);
7439
7440         case KEY_closedir:
7441             UNI(OP_CLOSEDIR);
7442
7443         case KEY_cmp:
7444             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7445                 return REPORT(0);
7446             Eop(OP_SCMP);
7447
7448         case KEY_caller:
7449             UNI(OP_CALLER);
7450
7451         case KEY_crypt:
7452 #ifdef FCRYPT
7453             if (!PL_cryptseen) {
7454                 PL_cryptseen = TRUE;
7455                 init_des();
7456             }
7457 #endif
7458             LOP(OP_CRYPT,XTERM);
7459
7460         case KEY_chmod:
7461             LOP(OP_CHMOD,XTERM);
7462
7463         case KEY_chown:
7464             LOP(OP_CHOWN,XTERM);
7465
7466         case KEY_connect:
7467             LOP(OP_CONNECT,XTERM);
7468
7469         case KEY_chr:
7470             UNI(OP_CHR);
7471
7472         case KEY_cos:
7473             UNI(OP_COS);
7474
7475         case KEY_chroot:
7476             UNI(OP_CHROOT);
7477
7478         case KEY_default:
7479             PREBLOCK(DEFAULT);
7480
7481         case KEY_do:
7482             s = SKIPSPACE1(s);
7483             if (*s == '{')
7484                 PRETERMBLOCK(DO);
7485             if (*s != '\'') {
7486                 *PL_tokenbuf = '&';
7487                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7488                               1, &len);
7489                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7490                     d = SKIPSPACE1(d);
7491                     if (*d == '(') {
7492                         force_ident_maybe_lex('&');
7493                         s = d;
7494                     }
7495                 }
7496             }
7497             if (orig_keyword == KEY_do) {
7498                 orig_keyword = 0;
7499                 pl_yylval.ival = 1;
7500             }
7501             else
7502                 pl_yylval.ival = 0;
7503             OPERATOR(DO);
7504
7505         case KEY_die:
7506             PL_hints |= HINT_BLOCK_SCOPE;
7507             LOP(OP_DIE,XTERM);
7508
7509         case KEY_defined:
7510             UNI(OP_DEFINED);
7511
7512         case KEY_delete:
7513             UNI(OP_DELETE);
7514
7515         case KEY_dbmopen:
7516             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7517                               STR_WITH_LEN("NDBM_File::"),
7518                               STR_WITH_LEN("DB_File::"),
7519                               STR_WITH_LEN("GDBM_File::"),
7520                               STR_WITH_LEN("SDBM_File::"),
7521                               STR_WITH_LEN("ODBM_File::"),
7522                               NULL);
7523             LOP(OP_DBMOPEN,XTERM);
7524
7525         case KEY_dbmclose:
7526             UNI(OP_DBMCLOSE);
7527
7528         case KEY_dump:
7529             PL_expect = XOPERATOR;
7530             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7531             LOOPX(OP_DUMP);
7532
7533         case KEY_else:
7534             PREBLOCK(ELSE);
7535
7536         case KEY_elsif:
7537             pl_yylval.ival = CopLINE(PL_curcop);
7538             OPERATOR(ELSIF);
7539
7540         case KEY_eq:
7541             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7542                 return REPORT(0);
7543             Eop(OP_SEQ);
7544
7545         case KEY_exists:
7546             UNI(OP_EXISTS);
7547
7548         case KEY_exit:
7549             if (PL_madskills)
7550                 UNI(OP_INT);
7551             UNI(OP_EXIT);
7552
7553         case KEY_eval:
7554             s = SKIPSPACE1(s);
7555             if (*s == '{') { /* block eval */
7556                 PL_expect = XTERMBLOCK;
7557                 UNIBRACK(OP_ENTERTRY);
7558             }
7559             else { /* string eval */
7560                 PL_expect = XTERM;
7561                 UNIBRACK(OP_ENTEREVAL);
7562             }
7563
7564         case KEY_evalbytes:
7565             PL_expect = XTERM;
7566             UNIBRACK(-OP_ENTEREVAL);
7567
7568         case KEY_eof:
7569             UNI(OP_EOF);
7570
7571         case KEY_exp:
7572             UNI(OP_EXP);
7573
7574         case KEY_each:
7575             UNI(OP_EACH);
7576
7577         case KEY_exec:
7578             LOP(OP_EXEC,XREF);
7579
7580         case KEY_endhostent:
7581             FUN0(OP_EHOSTENT);
7582
7583         case KEY_endnetent:
7584             FUN0(OP_ENETENT);
7585
7586         case KEY_endservent:
7587             FUN0(OP_ESERVENT);
7588
7589         case KEY_endprotoent:
7590             FUN0(OP_EPROTOENT);
7591
7592         case KEY_endpwent:
7593             FUN0(OP_EPWENT);
7594
7595         case KEY_endgrent:
7596             FUN0(OP_EGRENT);
7597
7598         case KEY_for:
7599         case KEY_foreach:
7600             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7601                 return REPORT(0);
7602             pl_yylval.ival = CopLINE(PL_curcop);
7603             s = SKIPSPACE1(s);
7604             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7605                 char *p = s;
7606 #ifdef PERL_MAD
7607                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7608 #endif
7609
7610                 if ((PL_bufend - p) >= 3 &&
7611                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7612                     p += 2;
7613                 else if ((PL_bufend - p) >= 4 &&
7614                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7615                     p += 3;
7616                 p = PEEKSPACE(p);
7617                 if (isIDFIRST_lazy_if(p,UTF)) {
7618                     p = scan_ident(p, PL_bufend,
7619                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7620                     p = PEEKSPACE(p);
7621                 }
7622                 if (*p != '$')
7623                     Perl_croak(aTHX_ "Missing $ on loop variable");
7624 #ifdef PERL_MAD
7625                 s = SvPVX(PL_linestr) + soff;
7626 #endif
7627             }
7628             OPERATOR(FOR);
7629
7630         case KEY_formline:
7631             LOP(OP_FORMLINE,XTERM);
7632
7633         case KEY_fork:
7634             FUN0(OP_FORK);
7635
7636         case KEY_fc:
7637             UNI(OP_FC);
7638
7639         case KEY_fcntl:
7640             LOP(OP_FCNTL,XTERM);
7641
7642         case KEY_fileno:
7643             UNI(OP_FILENO);
7644
7645         case KEY_flock:
7646             LOP(OP_FLOCK,XTERM);
7647
7648         case KEY_gt:
7649             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7650                 return REPORT(0);
7651             Rop(OP_SGT);
7652
7653         case KEY_ge:
7654             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7655                 return REPORT(0);
7656             Rop(OP_SGE);
7657
7658         case KEY_grep:
7659             LOP(OP_GREPSTART, XREF);
7660
7661         case KEY_goto:
7662             PL_expect = XOPERATOR;
7663             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7664             LOOPX(OP_GOTO);
7665
7666         case KEY_gmtime:
7667             UNI(OP_GMTIME);
7668
7669         case KEY_getc:
7670             UNIDOR(OP_GETC);
7671
7672         case KEY_getppid:
7673             FUN0(OP_GETPPID);
7674
7675         case KEY_getpgrp:
7676             UNI(OP_GETPGRP);
7677
7678         case KEY_getpriority:
7679             LOP(OP_GETPRIORITY,XTERM);
7680
7681         case KEY_getprotobyname:
7682             UNI(OP_GPBYNAME);
7683
7684         case KEY_getprotobynumber:
7685             LOP(OP_GPBYNUMBER,XTERM);
7686
7687         case KEY_getprotoent:
7688             FUN0(OP_GPROTOENT);
7689
7690         case KEY_getpwent:
7691             FUN0(OP_GPWENT);
7692
7693         case KEY_getpwnam:
7694             UNI(OP_GPWNAM);
7695
7696         case KEY_getpwuid:
7697             UNI(OP_GPWUID);
7698
7699         case KEY_getpeername:
7700             UNI(OP_GETPEERNAME);
7701
7702         case KEY_gethostbyname:
7703             UNI(OP_GHBYNAME);
7704
7705         case KEY_gethostbyaddr:
7706             LOP(OP_GHBYADDR,XTERM);
7707
7708         case KEY_gethostent:
7709             FUN0(OP_GHOSTENT);
7710
7711         case KEY_getnetbyname:
7712             UNI(OP_GNBYNAME);
7713
7714         case KEY_getnetbyaddr:
7715             LOP(OP_GNBYADDR,XTERM);
7716
7717         case KEY_getnetent:
7718             FUN0(OP_GNETENT);
7719
7720         case KEY_getservbyname:
7721             LOP(OP_GSBYNAME,XTERM);
7722
7723         case KEY_getservbyport:
7724             LOP(OP_GSBYPORT,XTERM);
7725
7726         case KEY_getservent:
7727             FUN0(OP_GSERVENT);
7728
7729         case KEY_getsockname:
7730             UNI(OP_GETSOCKNAME);
7731
7732         case KEY_getsockopt:
7733             LOP(OP_GSOCKOPT,XTERM);
7734
7735         case KEY_getgrent:
7736             FUN0(OP_GGRENT);
7737
7738         case KEY_getgrnam:
7739             UNI(OP_GGRNAM);
7740
7741         case KEY_getgrgid:
7742             UNI(OP_GGRGID);
7743
7744         case KEY_getlogin:
7745             FUN0(OP_GETLOGIN);
7746
7747         case KEY_given:
7748             pl_yylval.ival = CopLINE(PL_curcop);
7749             OPERATOR(GIVEN);
7750
7751         case KEY_glob:
7752             LOP(
7753              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7754              XTERM
7755             );
7756
7757         case KEY_hex:
7758             UNI(OP_HEX);
7759
7760         case KEY_if:
7761             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7762                 return REPORT(0);
7763             pl_yylval.ival = CopLINE(PL_curcop);
7764             OPERATOR(IF);
7765
7766         case KEY_index:
7767             LOP(OP_INDEX,XTERM);
7768
7769         case KEY_int:
7770             UNI(OP_INT);
7771
7772         case KEY_ioctl:
7773             LOP(OP_IOCTL,XTERM);
7774
7775         case KEY_join:
7776             LOP(OP_JOIN,XTERM);
7777
7778         case KEY_keys:
7779             UNI(OP_KEYS);
7780
7781         case KEY_kill:
7782             LOP(OP_KILL,XTERM);
7783
7784         case KEY_last:
7785             PL_expect = XOPERATOR;
7786             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7787             LOOPX(OP_LAST);
7788
7789         case KEY_lc:
7790             UNI(OP_LC);
7791
7792         case KEY_lcfirst:
7793             UNI(OP_LCFIRST);
7794
7795         case KEY_local:
7796             pl_yylval.ival = 0;
7797             OPERATOR(LOCAL);
7798
7799         case KEY_length:
7800             UNI(OP_LENGTH);
7801
7802         case KEY_lt:
7803             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7804                 return REPORT(0);
7805             Rop(OP_SLT);
7806
7807         case KEY_le:
7808             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7809                 return REPORT(0);
7810             Rop(OP_SLE);
7811
7812         case KEY_localtime:
7813             UNI(OP_LOCALTIME);
7814
7815         case KEY_log:
7816             UNI(OP_LOG);
7817
7818         case KEY_link:
7819             LOP(OP_LINK,XTERM);
7820
7821         case KEY_listen:
7822             LOP(OP_LISTEN,XTERM);
7823
7824         case KEY_lock:
7825             UNI(OP_LOCK);
7826
7827         case KEY_lstat:
7828             UNI(OP_LSTAT);
7829
7830         case KEY_m:
7831             s = scan_pat(s,OP_MATCH);
7832             TERM(sublex_start());
7833
7834         case KEY_map:
7835             LOP(OP_MAPSTART, XREF);
7836
7837         case KEY_mkdir:
7838             LOP(OP_MKDIR,XTERM);
7839
7840         case KEY_msgctl:
7841             LOP(OP_MSGCTL,XTERM);
7842
7843         case KEY_msgget:
7844             LOP(OP_MSGGET,XTERM);
7845
7846         case KEY_msgrcv:
7847             LOP(OP_MSGRCV,XTERM);
7848
7849         case KEY_msgsnd:
7850             LOP(OP_MSGSND,XTERM);
7851
7852         case KEY_our:
7853         case KEY_my:
7854         case KEY_state:
7855             PL_in_my = (U16)tmp;
7856             s = SKIPSPACE1(s);
7857             if (isIDFIRST_lazy_if(s,UTF)) {
7858 #ifdef PERL_MAD
7859                 char* start = s;
7860 #endif
7861                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7862                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7863                 {
7864                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7865                         Perl_croak(aTHX_
7866                                   "Experimental \"%s\" subs not enabled",
7867                                    tmp == KEY_my    ? "my"    :
7868                                    tmp == KEY_state ? "state" : "our");
7869                     goto really_sub;
7870                 }
7871                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7872                 if (!PL_in_my_stash) {
7873                     char tmpbuf[1024];
7874                     PL_bufptr = s;
7875                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7876                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7877                 }
7878 #ifdef PERL_MAD
7879                 if (PL_madskills) {     /* just add type to declarator token */
7880                     sv_catsv(PL_thistoken, PL_nextwhite);
7881                     PL_nextwhite = 0;
7882                     sv_catpvn(PL_thistoken, start, s - start);
7883                 }
7884 #endif
7885             }
7886             pl_yylval.ival = 1;
7887             OPERATOR(MY);
7888
7889         case KEY_next:
7890             PL_expect = XOPERATOR;
7891             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7892             LOOPX(OP_NEXT);
7893
7894         case KEY_ne:
7895             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7896                 return REPORT(0);
7897             Eop(OP_SNE);
7898
7899         case KEY_no:
7900             s = tokenize_use(0, s);
7901             TERM(USE);
7902
7903         case KEY_not:
7904             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7905                 FUN1(OP_NOT);
7906             else {
7907                 if (!PL_lex_allbrackets &&
7908                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7909                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7910                 OPERATOR(NOTOP);
7911             }
7912
7913         case KEY_open:
7914             s = SKIPSPACE1(s);
7915             if (isIDFIRST_lazy_if(s,UTF)) {
7916                 const char *t;
7917                 for (d = s; isALNUM_lazy_if(d,UTF);) {
7918                     d += UTF ? UTF8SKIP(d) : 1;
7919                     if (UTF) {
7920                         while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7921                             d += UTF ? UTF8SKIP(d) : 1;
7922                         }
7923                     }
7924                 }
7925                 for (t=d; isSPACE(*t);)
7926                     t++;
7927                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7928                     /* [perl #16184] */
7929                     && !(t[0] == '=' && t[1] == '>')
7930                     && !(t[0] == ':' && t[1] == ':')
7931                     && !keyword(s, d-s, 0)
7932                 ) {
7933                     SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7934                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7935                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7936                            "Precedence problem: open %"SVf" should be open(%"SVf")",
7937                             SVfARG(tmpsv), SVfARG(tmpsv));
7938                 }
7939             }
7940             LOP(OP_OPEN,XTERM);
7941
7942         case KEY_or:
7943             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7944                 return REPORT(0);
7945             pl_yylval.ival = OP_OR;
7946             OPERATOR(OROP);
7947
7948         case KEY_ord:
7949             UNI(OP_ORD);
7950
7951         case KEY_oct:
7952             UNI(OP_OCT);
7953
7954         case KEY_opendir:
7955             LOP(OP_OPEN_DIR,XTERM);
7956
7957         case KEY_print:
7958             checkcomma(s,PL_tokenbuf,"filehandle");
7959             LOP(OP_PRINT,XREF);
7960
7961         case KEY_printf:
7962             checkcomma(s,PL_tokenbuf,"filehandle");
7963             LOP(OP_PRTF,XREF);
7964
7965         case KEY_prototype:
7966             UNI(OP_PROTOTYPE);
7967
7968         case KEY_push:
7969             LOP(OP_PUSH,XTERM);
7970
7971         case KEY_pop:
7972             UNIDOR(OP_POP);
7973
7974         case KEY_pos:
7975             UNIDOR(OP_POS);
7976
7977         case KEY_pack:
7978             LOP(OP_PACK,XTERM);
7979
7980         case KEY_package:
7981             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7982             s = SKIPSPACE1(s);
7983             s = force_strict_version(s);
7984             PL_lex_expect = XBLOCK;
7985             OPERATOR(PACKAGE);
7986
7987         case KEY_pipe:
7988             LOP(OP_PIPE_OP,XTERM);
7989
7990         case KEY_q:
7991             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7992             if (!s)
7993                 missingterm(NULL);
7994             pl_yylval.ival = OP_CONST;
7995             TERM(sublex_start());
7996
7997         case KEY_quotemeta:
7998             UNI(OP_QUOTEMETA);
7999
8000         case KEY_qw: {
8001             OP *words = NULL;
8002             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8003             if (!s)
8004                 missingterm(NULL);
8005             PL_expect = XOPERATOR;
8006             if (SvCUR(PL_lex_stuff)) {
8007                 int warned_comma = !ckWARN(WARN_QW);
8008                 int warned_comment = warned_comma;
8009                 d = SvPV_force(PL_lex_stuff, len);
8010                 while (len) {
8011                     for (; isSPACE(*d) && len; --len, ++d)
8012                         /**/;
8013                     if (len) {
8014                         SV *sv;
8015                         const char *b = d;
8016                         if (!warned_comma || !warned_comment) {
8017                             for (; !isSPACE(*d) && len; --len, ++d) {
8018                                 if (!warned_comma && *d == ',') {
8019                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8020                                         "Possible attempt to separate words with commas");
8021                                     ++warned_comma;
8022                                 }
8023                                 else if (!warned_comment && *d == '#') {
8024                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8025                                         "Possible attempt to put comments in qw() list");
8026                                     ++warned_comment;
8027                                 }
8028                             }
8029                         }
8030                         else {
8031                             for (; !isSPACE(*d) && len; --len, ++d)
8032                                 /**/;
8033                         }
8034                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8035                         words = op_append_elem(OP_LIST, words,
8036                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8037                     }
8038                 }
8039             }
8040             if (!words)
8041                 words = newNULLLIST();
8042             if (PL_lex_stuff) {
8043                 SvREFCNT_dec(PL_lex_stuff);
8044                 PL_lex_stuff = NULL;
8045             }
8046             PL_expect = XOPERATOR;
8047             pl_yylval.opval = sawparens(words);
8048             TOKEN(QWLIST);
8049         }
8050
8051         case KEY_qq:
8052             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8053             if (!s)
8054                 missingterm(NULL);
8055             pl_yylval.ival = OP_STRINGIFY;
8056             if (SvIVX(PL_lex_stuff) == '\'')
8057                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8058             TERM(sublex_start());
8059
8060         case KEY_qr:
8061             s = scan_pat(s,OP_QR);
8062             TERM(sublex_start());
8063
8064         case KEY_qx:
8065             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8066             if (!s)
8067                 missingterm(NULL);
8068             readpipe_override();
8069             TERM(sublex_start());
8070
8071         case KEY_return:
8072             OLDLOP(OP_RETURN);
8073
8074         case KEY_require:
8075             s = SKIPSPACE1(s);
8076             PL_expect = XOPERATOR;
8077             if (isDIGIT(*s)) {
8078                 s = force_version(s, FALSE);
8079             }
8080             else if (*s != 'v' || !isDIGIT(s[1])
8081                     || (s = force_version(s, TRUE), *s == 'v'))
8082             {
8083                 *PL_tokenbuf = '\0';
8084                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8085                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8086                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8087                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8088                 else if (*s == '<')
8089                     yyerror("<> should be quotes");
8090             }
8091             if (orig_keyword == KEY_require) {
8092                 orig_keyword = 0;
8093                 pl_yylval.ival = 1;
8094             }
8095             else
8096                 pl_yylval.ival = 0;
8097             PL_expect = XTERM;
8098             PL_bufptr = s;
8099             PL_last_uni = PL_oldbufptr;
8100             PL_last_lop_op = OP_REQUIRE;
8101             s = skipspace(s);
8102             return REPORT( (int)REQUIRE );
8103
8104         case KEY_reset:
8105             UNI(OP_RESET);
8106
8107         case KEY_redo:
8108             PL_expect = XOPERATOR;
8109             s = force_word(s,WORD,TRUE,FALSE,FALSE);
8110             LOOPX(OP_REDO);
8111
8112         case KEY_rename:
8113             LOP(OP_RENAME,XTERM);
8114
8115         case KEY_rand:
8116             UNI(OP_RAND);
8117
8118         case KEY_rmdir:
8119             UNI(OP_RMDIR);
8120
8121         case KEY_rindex:
8122             LOP(OP_RINDEX,XTERM);
8123
8124         case KEY_read:
8125             LOP(OP_READ,XTERM);
8126
8127         case KEY_readdir:
8128             UNI(OP_READDIR);
8129
8130         case KEY_readline:
8131             UNIDOR(OP_READLINE);
8132
8133         case KEY_readpipe:
8134             UNIDOR(OP_BACKTICK);
8135
8136         case KEY_rewinddir:
8137             UNI(OP_REWINDDIR);
8138
8139         case KEY_recv:
8140             LOP(OP_RECV,XTERM);
8141
8142         case KEY_reverse:
8143             LOP(OP_REVERSE,XTERM);
8144
8145         case KEY_readlink:
8146             UNIDOR(OP_READLINK);
8147
8148         case KEY_ref:
8149             UNI(OP_REF);
8150
8151         case KEY_s:
8152             s = scan_subst(s);
8153             if (pl_yylval.opval)
8154                 TERM(sublex_start());
8155             else
8156                 TOKEN(1);       /* force error */
8157
8158         case KEY_say:
8159             checkcomma(s,PL_tokenbuf,"filehandle");
8160             LOP(OP_SAY,XREF);
8161
8162         case KEY_chomp:
8163             UNI(OP_CHOMP);
8164
8165         case KEY_scalar:
8166             UNI(OP_SCALAR);
8167
8168         case KEY_select:
8169             LOP(OP_SELECT,XTERM);
8170
8171         case KEY_seek:
8172             LOP(OP_SEEK,XTERM);
8173
8174         case KEY_semctl:
8175             LOP(OP_SEMCTL,XTERM);
8176
8177         case KEY_semget:
8178             LOP(OP_SEMGET,XTERM);
8179
8180         case KEY_semop:
8181             LOP(OP_SEMOP,XTERM);
8182
8183         case KEY_send:
8184             LOP(OP_SEND,XTERM);
8185
8186         case KEY_setpgrp:
8187             LOP(OP_SETPGRP,XTERM);
8188
8189         case KEY_setpriority:
8190             LOP(OP_SETPRIORITY,XTERM);
8191
8192         case KEY_sethostent:
8193             UNI(OP_SHOSTENT);
8194
8195         case KEY_setnetent:
8196             UNI(OP_SNETENT);
8197
8198         case KEY_setservent:
8199             UNI(OP_SSERVENT);
8200
8201         case KEY_setprotoent:
8202             UNI(OP_SPROTOENT);
8203
8204         case KEY_setpwent:
8205             FUN0(OP_SPWENT);
8206
8207         case KEY_setgrent:
8208             FUN0(OP_SGRENT);
8209
8210         case KEY_seekdir:
8211             LOP(OP_SEEKDIR,XTERM);
8212
8213         case KEY_setsockopt:
8214             LOP(OP_SSOCKOPT,XTERM);
8215
8216         case KEY_shift:
8217             UNIDOR(OP_SHIFT);
8218
8219         case KEY_shmctl:
8220             LOP(OP_SHMCTL,XTERM);
8221
8222         case KEY_shmget:
8223             LOP(OP_SHMGET,XTERM);
8224
8225         case KEY_shmread:
8226             LOP(OP_SHMREAD,XTERM);
8227
8228         case KEY_shmwrite:
8229             LOP(OP_SHMWRITE,XTERM);
8230
8231         case KEY_shutdown:
8232             LOP(OP_SHUTDOWN,XTERM);
8233
8234         case KEY_sin:
8235             UNI(OP_SIN);
8236
8237         case KEY_sleep:
8238             UNI(OP_SLEEP);
8239
8240         case KEY_socket:
8241             LOP(OP_SOCKET,XTERM);
8242
8243         case KEY_socketpair:
8244             LOP(OP_SOCKPAIR,XTERM);
8245
8246         case KEY_sort:
8247             checkcomma(s,PL_tokenbuf,"subroutine name");
8248             s = SKIPSPACE1(s);
8249             PL_expect = XTERM;
8250             s = force_word(s,WORD,TRUE,TRUE,FALSE);
8251             LOP(OP_SORT,XREF);
8252
8253         case KEY_split:
8254             LOP(OP_SPLIT,XTERM);
8255
8256         case KEY_sprintf:
8257             LOP(OP_SPRINTF,XTERM);
8258
8259         case KEY_splice:
8260             LOP(OP_SPLICE,XTERM);
8261
8262         case KEY_sqrt:
8263             UNI(OP_SQRT);
8264
8265         case KEY_srand:
8266             UNI(OP_SRAND);
8267
8268         case KEY_stat:
8269             UNI(OP_STAT);
8270
8271         case KEY_study:
8272             UNI(OP_STUDY);
8273
8274         case KEY_substr:
8275             LOP(OP_SUBSTR,XTERM);
8276
8277         case KEY_format:
8278         case KEY_sub:
8279           really_sub:
8280             {
8281                 char * const tmpbuf = PL_tokenbuf + 1;
8282                 SSize_t tboffset = 0;
8283                 expectation attrful;
8284                 bool have_name, have_proto;
8285                 const int key = tmp;
8286
8287 #ifdef PERL_MAD
8288                 SV *tmpwhite = 0;
8289
8290                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8291                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8292                 PL_thistoken = 0;
8293
8294                 d = s;
8295                 s = SKIPSPACE2(s,tmpwhite);
8296 #else
8297                 d = s;
8298                 s = skipspace(s);
8299 #endif
8300
8301                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8302                     (*s == ':' && s[1] == ':'))
8303                 {
8304 #ifdef PERL_MAD
8305                     SV *nametoke = NULL;
8306 #endif
8307
8308                     PL_expect = XBLOCK;
8309                     attrful = XATTRBLOCK;
8310                     /* remember buffer pos'n for later force_word */
8311                     tboffset = s - PL_oldbufptr;
8312                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8313                                   &len);
8314 #ifdef PERL_MAD
8315                     if (PL_madskills)
8316                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8317 #endif
8318                     *PL_tokenbuf = '&';
8319                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8320                      || pad_findmy_pvn(
8321                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8322                         ) != NOT_IN_PAD)
8323                         sv_setpvn(PL_subname, tmpbuf, len);
8324                     else {
8325                         sv_setsv(PL_subname,PL_curstname);
8326                         sv_catpvs(PL_subname,"::");
8327                         sv_catpvn(PL_subname,tmpbuf,len);
8328                     }
8329                     if (SvUTF8(PL_linestr))
8330                         SvUTF8_on(PL_subname);
8331                     have_name = TRUE;
8332
8333
8334 #ifdef PERL_MAD
8335                     start_force(0);
8336                     CURMAD('X', nametoke);
8337                     CURMAD('_', tmpwhite);
8338                     force_ident_maybe_lex('&');
8339
8340                     s = SKIPSPACE2(d,tmpwhite);
8341 #else
8342                     s = skipspace(d);
8343 #endif
8344                 }
8345                 else {
8346                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8347                     {
8348                         *d = '\0';
8349                         /* diag_listed_as: Missing name in "%s sub" */
8350                         Perl_croak(aTHX_
8351                                   "Missing name in \"%s\"", PL_bufptr);
8352                     }
8353                     PL_expect = XTERMBLOCK;
8354                     attrful = XATTRTERM;
8355                     sv_setpvs(PL_subname,"?");
8356                     have_name = FALSE;
8357                 }
8358
8359                 if (key == KEY_format) {
8360 #ifdef PERL_MAD
8361                     PL_thistoken = subtoken;
8362                     s = d;
8363 #else
8364                     if (have_name)
8365                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8366                                           FALSE, TRUE, TRUE);
8367 #endif
8368                     PREBLOCK(FORMAT);
8369                 }
8370
8371                 /* Look for a prototype */
8372                 if (*s == '(') {
8373                     char *p;
8374                     bool bad_proto = FALSE;
8375                     bool in_brackets = FALSE;
8376                     char greedy_proto = ' ';
8377                     bool proto_after_greedy_proto = FALSE;
8378                     bool must_be_last = FALSE;
8379                     bool underscore = FALSE;
8380                     bool seen_underscore = FALSE;
8381                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8382                     STRLEN tmplen;
8383
8384                     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8385                     if (!s)
8386                         Perl_croak(aTHX_ "Prototype not terminated");
8387                     /* strip spaces and check for bad characters */
8388                     d = SvPV(PL_lex_stuff, tmplen);
8389                     tmp = 0;
8390                     for (p = d; tmplen; tmplen--, ++p) {
8391                         if (!isSPACE(*p)) {
8392                             d[tmp++] = *p;
8393
8394                             if (warnillegalproto) {
8395                                 if (must_be_last)
8396                                     proto_after_greedy_proto = TRUE;
8397                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8398                                     bad_proto = TRUE;
8399                                 }
8400                                 else {
8401                                     if ( underscore ) {
8402                                         if ( !strchr(";@%", *p) )
8403                                             bad_proto = TRUE;
8404                                         underscore = FALSE;
8405                                     }
8406                                     if ( *p == '[' ) {
8407                                         in_brackets = TRUE;
8408                                     }
8409                                     else if ( *p == ']' ) {
8410                                         in_brackets = FALSE;
8411                                     }
8412                                     else if ( (*p == '@' || *p == '%') &&
8413                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8414                                          !in_brackets ) {
8415                                         must_be_last = TRUE;
8416                                         greedy_proto = *p;
8417                                     }
8418                                     else if ( *p == '_' ) {
8419                                         underscore = seen_underscore = TRUE;
8420                                     }
8421                                 }
8422                             }
8423                         }
8424                     }
8425                     d[tmp] = '\0';
8426                     if (proto_after_greedy_proto)
8427                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8428                                     "Prototype after '%c' for %"SVf" : %s",
8429                                     greedy_proto, SVfARG(PL_subname), d);
8430                     if (bad_proto) {
8431                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8432                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8433                                     "Illegal character %sin prototype for %"SVf" : %s",
8434                                     seen_underscore ? "after '_' " : "",
8435                                     SVfARG(PL_subname),
8436                                     SvUTF8(PL_lex_stuff)
8437                                         ? sv_uni_display(dsv,
8438                                             newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8439                                             tmp,
8440                                             UNI_DISPLAY_ISPRINT)
8441                                         : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8442                                             PERL_PV_ESCAPE_NONASCII));
8443                     }
8444                     SvCUR_set(PL_lex_stuff, tmp);
8445                     have_proto = TRUE;
8446
8447 #ifdef PERL_MAD
8448                     start_force(0);
8449                     CURMAD('q', PL_thisopen);
8450                     CURMAD('_', tmpwhite);
8451                     CURMAD('=', PL_thisstuff);
8452                     CURMAD('Q', PL_thisclose);
8453                     NEXTVAL_NEXTTOKE.opval =
8454                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8455                     PL_lex_stuff = NULL;
8456                     force_next(THING);
8457
8458                     s = SKIPSPACE2(s,tmpwhite);
8459 #else
8460                     s = skipspace(s);
8461 #endif
8462                 }
8463                 else
8464                     have_proto = FALSE;
8465
8466                 if (*s == ':' && s[1] != ':')
8467                     PL_expect = attrful;
8468                 else if (*s != '{' && key == KEY_sub) {
8469                     if (!have_name)
8470                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8471                     else if (*s != ';' && *s != '}')
8472                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8473                 }
8474
8475 #ifdef PERL_MAD
8476                 start_force(0);
8477                 if (tmpwhite) {
8478                     if (PL_madskills)
8479                         curmad('^', newSVpvs(""));
8480                     CURMAD('_', tmpwhite);
8481                 }
8482                 force_next(0);
8483
8484                 PL_thistoken = subtoken;
8485 #else
8486                 if (have_proto) {
8487                     NEXTVAL_NEXTTOKE.opval =
8488                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8489                     PL_lex_stuff = NULL;
8490                     force_next(THING);
8491                 }
8492 #endif
8493                 if (!have_name) {
8494                     if (PL_curstash)
8495                         sv_setpvs(PL_subname, "__ANON__");
8496                     else
8497                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8498                     TOKEN(ANONSUB);
8499                 }
8500 #ifndef PERL_MAD
8501                 force_ident_maybe_lex('&');
8502 #endif
8503                 TOKEN(SUB);
8504             }
8505
8506         case KEY_system:
8507             LOP(OP_SYSTEM,XREF);
8508
8509         case KEY_symlink:
8510             LOP(OP_SYMLINK,XTERM);
8511
8512         case KEY_syscall:
8513             LOP(OP_SYSCALL,XTERM);
8514
8515         case KEY_sysopen:
8516             LOP(OP_SYSOPEN,XTERM);
8517
8518         case KEY_sysseek:
8519             LOP(OP_SYSSEEK,XTERM);
8520
8521         case KEY_sysread:
8522             LOP(OP_SYSREAD,XTERM);
8523
8524         case KEY_syswrite:
8525             LOP(OP_SYSWRITE,XTERM);
8526
8527         case KEY_tr:
8528         case KEY_y:
8529             s = scan_trans(s);
8530             TERM(sublex_start());
8531
8532         case KEY_tell:
8533             UNI(OP_TELL);
8534
8535         case KEY_telldir:
8536             UNI(OP_TELLDIR);
8537
8538         case KEY_tie:
8539             LOP(OP_TIE,XTERM);
8540
8541         case KEY_tied:
8542             UNI(OP_TIED);
8543
8544         case KEY_time:
8545             FUN0(OP_TIME);
8546
8547         case KEY_times:
8548             FUN0(OP_TMS);
8549
8550         case KEY_truncate:
8551             LOP(OP_TRUNCATE,XTERM);
8552
8553         case KEY_uc:
8554             UNI(OP_UC);
8555
8556         case KEY_ucfirst:
8557             UNI(OP_UCFIRST);
8558
8559         case KEY_untie:
8560             UNI(OP_UNTIE);
8561
8562         case KEY_until:
8563             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8564                 return REPORT(0);
8565             pl_yylval.ival = CopLINE(PL_curcop);
8566             OPERATOR(UNTIL);
8567
8568         case KEY_unless:
8569             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8570                 return REPORT(0);
8571             pl_yylval.ival = CopLINE(PL_curcop);
8572             OPERATOR(UNLESS);
8573
8574         case KEY_unlink:
8575             LOP(OP_UNLINK,XTERM);
8576
8577         case KEY_undef:
8578             UNIDOR(OP_UNDEF);
8579
8580         case KEY_unpack:
8581             LOP(OP_UNPACK,XTERM);
8582
8583         case KEY_utime:
8584             LOP(OP_UTIME,XTERM);
8585
8586         case KEY_umask:
8587             UNIDOR(OP_UMASK);
8588
8589         case KEY_unshift:
8590             LOP(OP_UNSHIFT,XTERM);
8591
8592         case KEY_use:
8593             s = tokenize_use(1, s);
8594             OPERATOR(USE);
8595
8596         case KEY_values:
8597             UNI(OP_VALUES);
8598
8599         case KEY_vec:
8600             LOP(OP_VEC,XTERM);
8601
8602         case KEY_when:
8603             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8604                 return REPORT(0);
8605             pl_yylval.ival = CopLINE(PL_curcop);
8606             OPERATOR(WHEN);
8607
8608         case KEY_while:
8609             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8610                 return REPORT(0);
8611             pl_yylval.ival = CopLINE(PL_curcop);
8612             OPERATOR(WHILE);
8613
8614         case KEY_warn:
8615             PL_hints |= HINT_BLOCK_SCOPE;
8616             LOP(OP_WARN,XTERM);
8617
8618         case KEY_wait:
8619             FUN0(OP_WAIT);
8620
8621         case KEY_waitpid:
8622             LOP(OP_WAITPID,XTERM);
8623
8624         case KEY_wantarray:
8625             FUN0(OP_WANTARRAY);
8626
8627         case KEY_write:
8628 #ifdef EBCDIC
8629         {
8630             char ctl_l[2];
8631             ctl_l[0] = toCTRL('L');
8632             ctl_l[1] = '\0';
8633             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8634         }
8635 #else
8636             /* Make sure $^L is defined */
8637             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8638 #endif
8639             UNI(OP_ENTERWRITE);
8640
8641         case KEY_x:
8642             if (PL_expect == XOPERATOR) {
8643                 if (*s == '=' && !PL_lex_allbrackets &&
8644                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8645                     return REPORT(0);
8646                 Mop(OP_REPEAT);
8647             }
8648             check_uni();
8649             goto just_a_word;
8650
8651         case KEY_xor:
8652             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8653                 return REPORT(0);
8654             pl_yylval.ival = OP_XOR;
8655             OPERATOR(OROP);
8656         }
8657     }}
8658 }
8659 #ifdef __SC__
8660 #pragma segment Main
8661 #endif
8662
8663 static int
8664 S_pending_ident(pTHX)
8665 {
8666     dVAR;
8667     PADOFFSET tmp = 0;
8668     const char pit = (char)pl_yylval.ival;
8669     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8670     /* All routes through this function want to know if there is a colon.  */
8671     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8672
8673     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8674           "### Pending identifier '%s'\n", PL_tokenbuf); });
8675
8676     /* if we're in a my(), we can't allow dynamics here.
8677        $foo'bar has already been turned into $foo::bar, so
8678        just check for colons.
8679
8680        if it's a legal name, the OP is a PADANY.
8681     */
8682     if (PL_in_my) {
8683         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8684             if (has_colon)
8685                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8686                                   "variable %s in \"our\"",
8687                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8688             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8689         }
8690         else {
8691             if (has_colon)
8692                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8693                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8694                             UTF ? SVf_UTF8 : 0);
8695
8696             pl_yylval.opval = newOP(OP_PADANY, 0);
8697             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8698                                                         UTF ? SVf_UTF8 : 0);
8699             return PRIVATEREF;
8700         }
8701     }
8702
8703     /*
8704        build the ops for accesses to a my() variable.
8705     */
8706
8707     if (!has_colon) {
8708         if (!PL_in_my)
8709             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8710                                     UTF ? SVf_UTF8 : 0);
8711         if (tmp != NOT_IN_PAD) {
8712             /* might be an "our" variable" */
8713             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8714                 /* build ops for a bareword */
8715                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8716                 HEK * const stashname = HvNAME_HEK(stash);
8717                 SV *  const sym = newSVhek(stashname);
8718                 sv_catpvs(sym, "::");
8719                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8720                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8721                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8722                 if (pit != '&')
8723                   gv_fetchsv(sym,
8724                     (PL_in_eval
8725                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8726                         : GV_ADDMULTI
8727                     ),
8728                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8729                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8730                      : SVt_PVHV));
8731                 return WORD;
8732             }
8733
8734             pl_yylval.opval = newOP(OP_PADANY, 0);
8735             pl_yylval.opval->op_targ = tmp;
8736             return PRIVATEREF;
8737         }
8738     }
8739
8740     /*
8741        Whine if they've said @foo in a doublequoted string,
8742        and @foo isn't a variable we can find in the symbol
8743        table.
8744     */
8745     if (ckWARN(WARN_AMBIGUOUS) &&
8746         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8747         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8748                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8749         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8750                 /* DO NOT warn for @- and @+ */
8751                 && !( PL_tokenbuf[2] == '\0' &&
8752                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8753            )
8754         {
8755             /* Downgraded from fatal to warning 20000522 mjd */
8756             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8757                         "Possible unintended interpolation of %"SVf" in string",
8758                         SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8759                                         SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8760         }
8761     }
8762
8763     /* build ops for a bareword */
8764     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8765                                    newSVpvn_flags(PL_tokenbuf + 1,
8766                                                       tokenbuf_len - 1,
8767                                                       UTF ? SVf_UTF8 : 0 ));
8768     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8769     if (pit != '&')
8770         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8771                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8772                      | ( UTF ? SVf_UTF8 : 0 ),
8773                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8774                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8775                       : SVt_PVHV));
8776     return WORD;
8777 }
8778
8779 STATIC void
8780 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8781 {
8782     dVAR;
8783
8784     PERL_ARGS_ASSERT_CHECKCOMMA;
8785
8786     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8787         if (ckWARN(WARN_SYNTAX)) {
8788             int level = 1;
8789             const char *w;
8790             for (w = s+2; *w && level; w++) {
8791                 if (*w == '(')
8792                     ++level;
8793                 else if (*w == ')')
8794                     --level;
8795             }
8796             while (isSPACE(*w))
8797                 ++w;
8798             /* the list of chars below is for end of statements or
8799              * block / parens, boolean operators (&&, ||, //) and branch
8800              * constructs (or, and, if, until, unless, while, err, for).
8801              * Not a very solid hack... */
8802             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8803                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8804                             "%s (...) interpreted as function",name);
8805         }
8806     }
8807     while (s < PL_bufend && isSPACE(*s))
8808         s++;
8809     if (*s == '(')
8810         s++;
8811     while (s < PL_bufend && isSPACE(*s))
8812         s++;
8813     if (isIDFIRST_lazy_if(s,UTF)) {
8814         const char * const w = s;
8815         s += UTF ? UTF8SKIP(s) : 1;
8816         while (isALNUM_lazy_if(s,UTF))
8817             s += UTF ? UTF8SKIP(s) : 1;
8818         while (s < PL_bufend && isSPACE(*s))
8819             s++;
8820         if (*s == ',') {
8821             GV* gv;
8822             if (keyword(w, s - w, 0))
8823                 return;
8824
8825             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8826             if (gv && GvCVu(gv))
8827                 return;
8828             Perl_croak(aTHX_ "No comma allowed after %s", what);
8829         }
8830     }
8831 }
8832
8833 /* Either returns sv, or mortalizes sv and returns a new SV*.
8834    Best used as sv=new_constant(..., sv, ...).
8835    If s, pv are NULL, calls subroutine with one argument,
8836    and type is used with error messages only. */
8837
8838 STATIC SV *
8839 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8840                SV *sv, SV *pv, const char *type, STRLEN typelen)
8841 {
8842     dVAR; dSP;
8843     HV * table = GvHV(PL_hintgv);                /* ^H */
8844     SV *res;
8845     SV **cvp;
8846     SV *cv, *typesv;
8847     const char *why1 = "", *why2 = "", *why3 = "";
8848
8849     PERL_ARGS_ASSERT_NEW_CONSTANT;
8850
8851     /* charnames doesn't work well if there have been errors found */
8852     if (PL_error_count > 0 && strEQ(key,"charnames"))
8853         return &PL_sv_undef;
8854
8855     if (!table
8856         || ! (PL_hints & HINT_LOCALIZE_HH)
8857         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8858         || ! SvOK(*cvp))
8859     {
8860         SV *msg;
8861
8862         /* Here haven't found what we're looking for.  If it is charnames,
8863          * perhaps it needs to be loaded.  Try doing that before giving up */
8864         if (strEQ(key,"charnames")) {
8865             Perl_load_module(aTHX_
8866                             0,
8867                             newSVpvs("_charnames"),
8868                              /* version parameter; no need to specify it, as if
8869                               * we get too early a version, will fail anyway,
8870                               * not being able to find '_charnames' */
8871                             NULL,
8872                             newSVpvs(":full"),
8873                             newSVpvs(":short"),
8874                             NULL);
8875             SPAGAIN;
8876             table = GvHV(PL_hintgv);
8877             if (table
8878                 && (PL_hints & HINT_LOCALIZE_HH)
8879                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8880                 && SvOK(*cvp))
8881             {
8882                 goto now_ok;
8883             }
8884         }
8885         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8886             msg = Perl_newSVpvf(aTHX_
8887                             "Constant(%s) unknown", (type ? type: "undef"));
8888         }
8889         else {
8890         why1 = "$^H{";
8891         why2 = key;
8892         why3 = "} is not defined";
8893     report:
8894         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8895                             (type ? type: "undef"), why1, why2, why3);
8896         }
8897         yyerror(SvPVX_const(msg));
8898         SvREFCNT_dec(msg);
8899         return sv;
8900     }
8901 now_ok:
8902     sv_2mortal(sv);                     /* Parent created it permanently */
8903     cv = *cvp;
8904     if (!pv && s)
8905         pv = newSVpvn_flags(s, len, SVs_TEMP);
8906     if (type && pv)
8907         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8908     else
8909         typesv = &PL_sv_undef;
8910
8911     PUSHSTACKi(PERLSI_OVERLOAD);
8912     ENTER ;
8913     SAVETMPS;
8914
8915     PUSHMARK(SP) ;
8916     EXTEND(sp, 3);
8917     if (pv)
8918         PUSHs(pv);
8919     PUSHs(sv);
8920     if (pv)
8921         PUSHs(typesv);
8922     PUTBACK;
8923     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8924
8925     SPAGAIN ;
8926
8927     /* Check the eval first */
8928     if (!PL_in_eval && SvTRUE(ERRSV)) {
8929         sv_catpvs(ERRSV, "Propagated");
8930         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8931         (void)POPs;
8932         res = SvREFCNT_inc_simple(sv);
8933     }
8934     else {
8935         res = POPs;
8936         SvREFCNT_inc_simple_void(res);
8937     }
8938
8939     PUTBACK ;
8940     FREETMPS ;
8941     LEAVE ;
8942     POPSTACK;
8943
8944     if (!SvOK(res)) {
8945         why1 = "Call to &{$^H{";
8946         why2 = key;
8947         why3 = "}} did not return a defined value";
8948         sv = res;
8949         goto report;
8950     }
8951
8952     return res;
8953 }
8954
8955 /* Returns a NUL terminated string, with the length of the string written to
8956    *slp
8957    */
8958 STATIC char *
8959 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8960 {
8961     dVAR;
8962     char *d = dest;
8963     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8964
8965     PERL_ARGS_ASSERT_SCAN_WORD;
8966
8967     for (;;) {
8968         if (d >= e)
8969             Perl_croak(aTHX_ ident_too_long);
8970         if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))   /* UTF handled below */
8971             *d++ = *s++;
8972         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8973             *d++ = ':';
8974             *d++ = ':';
8975             s++;
8976         }
8977         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8978             *d++ = *s++;
8979             *d++ = *s++;
8980         }
8981         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8982             char *t = s + UTF8SKIP(s);
8983             size_t len;
8984             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8985                 t += UTF8SKIP(t);
8986             len = t - s;
8987             if (d + len > e)
8988                 Perl_croak(aTHX_ ident_too_long);
8989             Copy(s, d, len, char);
8990             d += len;
8991             s = t;
8992         }
8993         else {
8994             *d = '\0';
8995             *slp = d - dest;
8996             return s;
8997         }
8998     }
8999 }
9000
9001 STATIC char *
9002 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9003 {
9004     dVAR;
9005     char *bracket = NULL;
9006     char funny = *s++;
9007     char *d = dest;
9008     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9009
9010     PERL_ARGS_ASSERT_SCAN_IDENT;
9011
9012     if (isSPACE(*s))
9013         s = PEEKSPACE(s);
9014     if (isDIGIT(*s)) {
9015         while (isDIGIT(*s)) {
9016             if (d >= e)
9017                 Perl_croak(aTHX_ ident_too_long);
9018             *d++ = *s++;
9019         }
9020     }
9021     else {
9022         for (;;) {
9023             if (d >= e)
9024                 Perl_croak(aTHX_ ident_too_long);
9025             if (isALNUM(*s))    /* UTF handled below */
9026                 *d++ = *s++;
9027             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9028                 *d++ = ':';
9029                 *d++ = ':';
9030                 s++;
9031             }
9032             else if (*s == ':' && s[1] == ':') {
9033                 *d++ = *s++;
9034                 *d++ = *s++;
9035             }
9036             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9037                 char *t = s + UTF8SKIP(s);
9038                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9039                     t += UTF8SKIP(t);
9040                 if (d + (t - s) > e)
9041                     Perl_croak(aTHX_ ident_too_long);
9042                 Copy(s, d, t - s, char);
9043                 d += t - s;
9044                 s = t;
9045             }
9046             else
9047                 break;
9048         }
9049     }
9050     *d = '\0';
9051     d = dest;
9052     if (*d) {
9053         if (PL_lex_state != LEX_NORMAL)
9054             PL_lex_state = LEX_INTERPENDMAYBE;
9055         return s;
9056     }
9057     if (*s == '$' && s[1] &&
9058         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9059     {
9060         return s;
9061     }
9062     if (*s == '{') {
9063         bracket = s;
9064         s++;
9065     }
9066     if (s < send) {
9067         if (UTF) {
9068             const STRLEN skip = UTF8SKIP(s);
9069             STRLEN i;
9070             d[skip] = '\0';
9071             for ( i = 0; i < skip; i++ )
9072                 d[i] = *s++;
9073         }
9074         else {
9075             *d = *s++;
9076             d[1] = '\0';
9077         }
9078     }
9079     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9080         *d = toCTRL(*s);
9081         s++;
9082     }
9083     else if (ck_uni && !bracket)
9084         check_uni();
9085     if (bracket) {
9086         if (isSPACE(s[-1])) {
9087             while (s < send) {
9088                 const char ch = *s++;
9089                 if (!SPACE_OR_TAB(ch)) {
9090                     *d = ch;
9091                     break;
9092                 }
9093             }
9094         }
9095         if (isIDFIRST_lazy_if(d,UTF)) {
9096             d += UTF8SKIP(d);
9097             if (UTF) {
9098                 char *end = s;
9099                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9100                     end += UTF8SKIP(end);
9101                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9102                         end += UTF8SKIP(end);
9103                 }
9104                 Copy(s, d, end - s, char);
9105                 d += end - s;
9106                 s = end;
9107             }
9108             else {
9109                 while ((isALNUM(*s) || *s == ':') && d < e)
9110                     *d++ = *s++;
9111                 if (d >= e)
9112                     Perl_croak(aTHX_ ident_too_long);
9113             }
9114             *d = '\0';
9115             while (s < send && SPACE_OR_TAB(*s))
9116                 s++;
9117             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9118                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9119                     const char * const brack =
9120                         (const char *)
9121                         ((*s == '[') ? "[...]" : "{...}");
9122    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9123                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9124                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9125                         funny, dest, brack, funny, dest, brack);
9126                 }
9127                 bracket++;
9128                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9129                 PL_lex_allbrackets++;
9130                 return s;
9131             }
9132         }
9133         /* Handle extended ${^Foo} variables
9134          * 1999-02-27 mjd-perl-patch@plover.com */
9135         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9136                  && isALNUM(*s))
9137         {
9138             d++;
9139             while (isALNUM(*s) && d < e) {
9140                 *d++ = *s++;
9141             }
9142             if (d >= e)
9143                 Perl_croak(aTHX_ ident_too_long);
9144             *d = '\0';
9145         }
9146         if (*s == '}') {
9147             s++;
9148             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9149                 PL_lex_state = LEX_INTERPEND;
9150                 PL_expect = XREF;
9151             }
9152             if (PL_lex_state == LEX_NORMAL) {
9153                 if (ckWARN(WARN_AMBIGUOUS) &&
9154                     (keyword(dest, d - dest, 0)
9155                      || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9156                 {
9157                     SV *tmp = newSVpvn_flags( dest, d - dest,
9158                                             SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9159                     if (funny == '#')
9160                         funny = '@';
9161                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9162                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9163                         funny, tmp, funny, tmp);
9164                 }
9165             }
9166         }
9167         else {
9168             s = bracket;                /* let the parser handle it */
9169             *dest = '\0';
9170         }
9171     }
9172     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9173         PL_lex_state = LEX_INTERPEND;
9174     return s;
9175 }
9176
9177 static bool
9178 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9179
9180     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9181      * the parse starting at 's', based on the subset that are valid in this
9182      * context input to this routine in 'valid_flags'. Advances s.  Returns
9183      * TRUE if the input should be treated as a valid flag, so the next char
9184      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9185      * first call on the current regex.  This routine will set it to any
9186      * charset modifier found.  The caller shouldn't change it.  This way,
9187      * another charset modifier encountered in the parse can be detected as an
9188      * error, as we have decided to allow only one */
9189
9190     const char c = **s;
9191     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9192
9193     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9194         if (isALNUM_lazy_if(*s, UTF)) {
9195             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9196                        UTF ? SVf_UTF8 : 0);
9197             (*s) += charlen;
9198             /* Pretend that it worked, so will continue processing before
9199              * dieing */
9200             return TRUE;
9201         }
9202         return FALSE;
9203     }
9204
9205     switch (c) {
9206
9207         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9208         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9209         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9210         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9211         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9212         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9213         case LOCALE_PAT_MOD:
9214             if (*charset) {
9215                 goto multiple_charsets;
9216             }
9217             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9218             *charset = c;
9219             break;
9220         case UNICODE_PAT_MOD:
9221             if (*charset) {
9222                 goto multiple_charsets;
9223             }
9224             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9225             *charset = c;
9226             break;
9227         case ASCII_RESTRICT_PAT_MOD:
9228             if (! *charset) {
9229                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9230             }
9231             else {
9232
9233                 /* Error if previous modifier wasn't an 'a', but if it was, see
9234                  * if, and accept, a second occurrence (only) */
9235                 if (*charset != 'a'
9236                     || get_regex_charset(*pmfl)
9237                         != REGEX_ASCII_RESTRICTED_CHARSET)
9238                 {
9239                         goto multiple_charsets;
9240                 }
9241                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9242             }
9243             *charset = c;
9244             break;
9245         case DEPENDS_PAT_MOD:
9246             if (*charset) {
9247                 goto multiple_charsets;
9248             }
9249             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9250             *charset = c;
9251             break;
9252     }
9253
9254     (*s)++;
9255     return TRUE;
9256
9257     multiple_charsets:
9258         if (*charset != c) {
9259             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9260         }
9261         else if (c == 'a') {
9262             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9263         }
9264         else {
9265             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9266         }
9267
9268         /* Pretend that it worked, so will continue processing before dieing */
9269         (*s)++;
9270         return TRUE;
9271 }
9272
9273 STATIC char *
9274 S_scan_pat(pTHX_ char *start, I32 type)
9275 {
9276     dVAR;
9277     PMOP *pm;
9278     char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9279     const char * const valid_flags =
9280         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9281     char charset = '\0';    /* character set modifier */
9282 #ifdef PERL_MAD
9283     char *modstart;
9284 #endif
9285
9286     PERL_ARGS_ASSERT_SCAN_PAT;
9287
9288     /* this was only needed for the initial scan_str; set it to false
9289      * so that any (?{}) code blocks etc are parsed normally */
9290     PL_reg_state.re_reparsing = FALSE;
9291     if (!s) {
9292         const char * const delimiter = skipspace(start);
9293         Perl_croak(aTHX_
9294                    (const char *)
9295                    (*delimiter == '?'
9296                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9297                     : "Search pattern not terminated" ));
9298     }
9299
9300     pm = (PMOP*)newPMOP(type, 0);
9301     if (PL_multi_open == '?') {
9302         /* This is the only point in the code that sets PMf_ONCE:  */
9303         pm->op_pmflags |= PMf_ONCE;
9304
9305         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9306            allows us to restrict the list needed by reset to just the ??
9307            matches.  */
9308         assert(type != OP_TRANS);
9309         if (PL_curstash) {
9310             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9311             U32 elements;
9312             if (!mg) {
9313                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9314                                  0);
9315             }
9316             elements = mg->mg_len / sizeof(PMOP**);
9317             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9318             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9319             mg->mg_len = elements * sizeof(PMOP**);
9320             PmopSTASH_set(pm,PL_curstash);
9321         }
9322     }
9323 #ifdef PERL_MAD
9324     modstart = s;
9325 #endif
9326
9327     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9328      * anon CV. False positives like qr/[(?{]/ are harmless */
9329
9330     if (type == OP_QR) {
9331         STRLEN len;
9332         char *e, *p = SvPV(PL_lex_stuff, len);
9333         e = p + len;
9334         for (; p < e; p++) {
9335             if (p[0] == '(' && p[1] == '?'
9336                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9337             {
9338                 pm->op_pmflags |= PMf_HAS_CV;
9339                 break;
9340             }
9341         }
9342         pm->op_pmflags |= PMf_IS_QR;
9343     }
9344
9345     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9346 #ifdef PERL_MAD
9347     if (PL_madskills && modstart != s) {
9348         SV* tmptoken = newSVpvn(modstart, s - modstart);
9349         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9350     }
9351 #endif
9352     /* issue a warning if /c is specified,but /g is not */
9353     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9354     {
9355         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9356                        "Use of /c modifier is meaningless without /g" );
9357     }
9358
9359     PL_lex_op = (OP*)pm;
9360     pl_yylval.ival = OP_MATCH;
9361     return s;
9362 }
9363
9364 STATIC char *
9365 S_scan_subst(pTHX_ char *start)
9366 {
9367     dVAR;
9368     char *s;
9369     PMOP *pm;
9370     I32 first_start;
9371     I32 es = 0;
9372     char charset = '\0';    /* character set modifier */
9373 #ifdef PERL_MAD
9374     char *modstart;
9375 #endif
9376
9377     PERL_ARGS_ASSERT_SCAN_SUBST;
9378
9379     pl_yylval.ival = OP_NULL;
9380
9381     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9382
9383     if (!s)
9384         Perl_croak(aTHX_ "Substitution pattern not terminated");
9385
9386     if (s[-1] == PL_multi_open)
9387         s--;
9388 #ifdef PERL_MAD
9389     if (PL_madskills) {
9390         CURMAD('q', PL_thisopen);
9391         CURMAD('_', PL_thiswhite);
9392         CURMAD('E', PL_thisstuff);
9393         CURMAD('Q', PL_thisclose);
9394         PL_realtokenstart = s - SvPVX(PL_linestr);
9395     }
9396 #endif
9397
9398     first_start = PL_multi_start;
9399     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9400     if (!s) {
9401         if (PL_lex_stuff) {
9402             SvREFCNT_dec(PL_lex_stuff);
9403             PL_lex_stuff = NULL;
9404         }
9405         Perl_croak(aTHX_ "Substitution replacement not terminated");
9406     }
9407     PL_multi_start = first_start;       /* so whole substitution is taken together */
9408
9409     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9410
9411 #ifdef PERL_MAD
9412     if (PL_madskills) {
9413         CURMAD('z', PL_thisopen);
9414         CURMAD('R', PL_thisstuff);
9415         CURMAD('Z', PL_thisclose);
9416     }
9417     modstart = s;
9418 #endif
9419
9420     while (*s) {
9421         if (*s == EXEC_PAT_MOD) {
9422             s++;
9423             es++;
9424         }
9425         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9426         {
9427             break;
9428         }
9429     }
9430
9431 #ifdef PERL_MAD
9432     if (PL_madskills) {
9433         if (modstart != s)
9434             curmad('m', newSVpvn(modstart, s - modstart));
9435         append_madprops(PL_thismad, (OP*)pm, 0);
9436         PL_thismad = 0;
9437     }
9438 #endif
9439     if ((pm->op_pmflags & PMf_CONTINUE)) {
9440         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9441     }
9442
9443     if (es) {
9444         SV * const repl = newSVpvs("");
9445
9446         PL_multi_end = 0;
9447         pm->op_pmflags |= PMf_EVAL;
9448         while (es-- > 0) {
9449             if (es)
9450                 sv_catpvs(repl, "eval ");
9451             else
9452                 sv_catpvs(repl, "do ");
9453         }
9454         sv_catpvs(repl, "{");
9455         sv_catsv(repl, PL_sublex_info.repl);
9456         sv_catpvs(repl, "}");
9457         SvEVALED_on(repl);
9458         SvREFCNT_dec(PL_sublex_info.repl);
9459         PL_sublex_info.repl = repl;
9460     }
9461
9462     PL_lex_op = (OP*)pm;
9463     pl_yylval.ival = OP_SUBST;
9464     return s;
9465 }
9466
9467 STATIC char *
9468 S_scan_trans(pTHX_ char *start)
9469 {
9470     dVAR;
9471     char* s;
9472     OP *o;
9473     U8 squash;
9474     U8 del;
9475     U8 complement;
9476     bool nondestruct = 0;
9477 #ifdef PERL_MAD
9478     char *modstart;
9479 #endif
9480
9481     PERL_ARGS_ASSERT_SCAN_TRANS;
9482
9483     pl_yylval.ival = OP_NULL;
9484
9485     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9486     if (!s)
9487         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9488
9489     if (s[-1] == PL_multi_open)
9490         s--;
9491 #ifdef PERL_MAD
9492     if (PL_madskills) {
9493         CURMAD('q', PL_thisopen);
9494         CURMAD('_', PL_thiswhite);
9495         CURMAD('E', PL_thisstuff);
9496         CURMAD('Q', PL_thisclose);
9497         PL_realtokenstart = s - SvPVX(PL_linestr);
9498     }
9499 #endif
9500
9501     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9502     if (!s) {
9503         if (PL_lex_stuff) {
9504             SvREFCNT_dec(PL_lex_stuff);
9505             PL_lex_stuff = NULL;
9506         }
9507         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9508     }
9509     if (PL_madskills) {
9510         CURMAD('z', PL_thisopen);
9511         CURMAD('R', PL_thisstuff);
9512         CURMAD('Z', PL_thisclose);
9513     }
9514
9515     complement = del = squash = 0;
9516 #ifdef PERL_MAD
9517     modstart = s;
9518 #endif
9519     while (1) {
9520         switch (*s) {
9521         case 'c':
9522             complement = OPpTRANS_COMPLEMENT;
9523             break;
9524         case 'd':
9525             del = OPpTRANS_DELETE;
9526             break;
9527         case 's':
9528             squash = OPpTRANS_SQUASH;
9529             break;
9530         case 'r':
9531             nondestruct = 1;
9532             break;
9533         default:
9534             goto no_more;
9535         }
9536         s++;
9537     }
9538   no_more:
9539
9540     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9541     o->op_private &= ~OPpTRANS_ALL;
9542     o->op_private |= del|squash|complement|
9543       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9544       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9545
9546     PL_lex_op = o;
9547     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9548
9549 #ifdef PERL_MAD
9550     if (PL_madskills) {
9551         if (modstart != s)
9552             curmad('m', newSVpvn(modstart, s - modstart));
9553         append_madprops(PL_thismad, o, 0);
9554         PL_thismad = 0;
9555     }
9556 #endif
9557
9558     return s;
9559 }
9560
9561 /* scan_heredoc
9562    Takes a pointer to the first < in <<FOO.
9563    Returns a pointer to the byte following <<FOO.
9564
9565    This function scans a heredoc, which involves different methods
9566    depending on whether we are in a string eval, quoted construct, etc.
9567    This is because PL_linestr could containing a single line of input, or
9568    a whole string being evalled, or the contents of the current quote-
9569    like operator.
9570
9571    The two basic methods are:
9572     - Steal lines from the input stream
9573     - Scan the heredoc in PL_linestr and remove it therefrom
9574
9575    In a file scope or filtered eval, the first method is used; in a
9576    string eval, the second.
9577
9578    In a quote-like operator, we have to choose between the two,
9579    depending on where we can find a newline.  We peek into outer lex-
9580    ing scopes until we find one with a newline in it.  If we reach the
9581    outermost lexing scope and it is a file, we use the stream method.
9582    Otherwise it is treated as an eval.
9583 */
9584
9585 STATIC char *
9586 S_scan_heredoc(pTHX_ register char *s)
9587 {
9588     dVAR;
9589     I32 op_type = OP_SCALAR;
9590     I32 len;
9591     SV *tmpstr;
9592     char term;
9593     char *d;
9594     char *e;
9595     char *peek;
9596     const bool infile = PL_rsfp || PL_parser->filtered;
9597     LEXSHARED *shared = PL_parser->lex_shared;
9598 #ifdef PERL_MAD
9599     I32 stuffstart = s - SvPVX(PL_linestr);
9600     char *tstart;
9601
9602     PL_realtokenstart = -1;
9603 #endif
9604
9605     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9606
9607     s += 2;
9608     d = PL_tokenbuf + 1;
9609     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9610     *PL_tokenbuf = '\n';
9611     peek = s;
9612     while (SPACE_OR_TAB(*peek))
9613         peek++;
9614     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9615         s = peek;
9616         term = *s++;
9617         s = delimcpy(d, e, s, PL_bufend, term, &len);
9618         if (s == PL_bufend)
9619             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9620         d += len;
9621         s++;
9622     }
9623     else {
9624         if (*s == '\\')
9625             /* <<\FOO is equivalent to <<'FOO' */
9626             s++, term = '\'';
9627         else
9628             term = '"';
9629         if (!isALNUM_lazy_if(s,UTF))
9630             deprecate("bare << to mean <<\"\"");
9631         for (; isALNUM_lazy_if(s,UTF); s++) {
9632             if (d < e)
9633                 *d++ = *s;
9634         }
9635     }
9636     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9637         Perl_croak(aTHX_ "Delimiter for here document is too long");
9638     *d++ = '\n';
9639     *d = '\0';
9640     len = d - PL_tokenbuf;
9641
9642 #ifdef PERL_MAD
9643     if (PL_madskills) {
9644         tstart = PL_tokenbuf + 1;
9645         PL_thisclose = newSVpvn(tstart, len - 1);
9646         tstart = SvPVX(PL_linestr) + stuffstart;
9647         PL_thisopen = newSVpvn(tstart, s - tstart);
9648         stuffstart = s - SvPVX(PL_linestr);
9649     }
9650 #endif
9651 #ifndef PERL_STRICT_CR
9652     d = strchr(s, '\r');
9653     if (d) {
9654         char * const olds = s;
9655         s = d;
9656         while (s < PL_bufend) {
9657             if (*s == '\r') {
9658                 *d++ = '\n';
9659                 if (*++s == '\n')
9660                     s++;
9661             }
9662             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9663                 *d++ = *s++;
9664                 s++;
9665             }
9666             else
9667                 *d++ = *s++;
9668         }
9669         *d = '\0';
9670         PL_bufend = d;
9671         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9672         s = olds;
9673     }
9674 #endif
9675 #ifdef PERL_MAD
9676     if (PL_madskills) {
9677         tstart = SvPVX(PL_linestr) + stuffstart;
9678         if (PL_thisstuff)
9679             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9680         else
9681             PL_thisstuff = newSVpvn(tstart, s - tstart);
9682     }
9683
9684     stuffstart = s - SvPVX(PL_linestr);
9685 #endif
9686
9687     tmpstr = newSV_type(SVt_PVIV);
9688     SvGROW(tmpstr, 80);
9689     if (term == '\'') {
9690         op_type = OP_CONST;
9691         SvIV_set(tmpstr, -1);
9692     }
9693     else if (term == '`') {
9694         op_type = OP_BACKTICK;
9695         SvIV_set(tmpstr, '\\');
9696     }
9697
9698     PL_multi_start = CopLINE(PL_curcop) + 1;
9699     PL_multi_open = PL_multi_close = '<';
9700     /* inside a string eval or quote-like operator */
9701     if (!infile || PL_lex_inwhat) {
9702         SV *linestr;
9703         char *bufend;
9704         char * const olds = s;
9705         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9706         /* These two fields are not set until an inner lexing scope is
9707            entered.  But we need them set here. */
9708         shared->ls_bufptr  = s;
9709         shared->ls_linestr = PL_linestr;
9710         if (PL_lex_inwhat)
9711           /* Look for a newline.  If the current buffer does not have one,
9712              peek into the line buffer of the parent lexing scope, going
9713              up as many levels as necessary to find one with a newline
9714              after bufptr.
9715            */
9716           while (!(s = (char *)memchr(
9717                     (void *)shared->ls_bufptr, '\n',
9718                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9719                 ))) {
9720             shared = shared->ls_prev;
9721             /* shared is only null if we have gone beyond the outermost
9722                lexing scope.  In a file, we will have broken out of the
9723                loop in the previous iteration.  In an eval, the string buf-
9724                fer ends with "\n;", so the while condition below will have
9725                evaluated to false.  So shared can never be null. */
9726             assert(shared);
9727             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9728                most lexing scope.  In a file, shared->ls_linestr at that
9729                level is just one line, so there is no body to steal. */
9730             if (infile && !shared->ls_prev) {
9731                 s = olds;
9732                 goto streaming;
9733             }
9734           }
9735         else {  /* eval */
9736             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9737             assert(s);
9738         }
9739         linestr = shared->ls_linestr;
9740         bufend = SvEND(linestr);
9741         d = s;
9742         while (s < bufend &&
9743           (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9744             if (*s++ == '\n')
9745                 ++shared->herelines;
9746         }
9747         if (s >= bufend) {
9748             goto interminable;
9749         }
9750         sv_setpvn(tmpstr,d+1,s-d);
9751 #ifdef PERL_MAD
9752         if (PL_madskills) {
9753             if (PL_thisstuff)
9754                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9755             else
9756                 PL_thisstuff = newSVpvn(d + 1, s - d);
9757             stuffstart = s - SvPVX(PL_linestr);
9758         }
9759 #endif
9760         s += len - 1;
9761         /* the preceding stmt passes a newline */
9762         shared->herelines++;
9763
9764         /* s now points to the newline after the heredoc terminator.
9765            d points to the newline before the body of the heredoc.
9766          */
9767
9768         /* We are going to modify linestr in place here, so set
9769            aside copies of the string if necessary for re-evals or
9770            (caller $n)[6]. */
9771         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9772            check shared->re_eval_str. */
9773         if (shared->re_eval_start || shared->re_eval_str) {
9774             /* Set aside the rest of the regexp */
9775             if (!shared->re_eval_str)
9776                 shared->re_eval_str =
9777                        newSVpvn(shared->re_eval_start,
9778                                 bufend - shared->re_eval_start);
9779             shared->re_eval_start -= s-d;
9780         }
9781         if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9782          && cx->blk_eval.cur_text == linestr) {
9783             cx->blk_eval.cur_text = newSVsv(linestr);
9784             SvSCREAM_on(cx->blk_eval.cur_text);
9785         }
9786         /* Copy everything from s onwards back to d. */
9787         Move(s,d,bufend-s + 1,char);
9788         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9789         /* Setting PL_bufend only applies when we have not dug deeper
9790            into other scopes, because sublex_done sets PL_bufend to
9791            SvEND(PL_linestr). */
9792         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9793         s = olds;
9794     }
9795     else
9796     {
9797       SV *linestr_save;
9798      streaming:
9799       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9800       term = PL_tokenbuf[1];
9801       len--;
9802       linestr_save = PL_linestr; /* must restore this afterwards */
9803       d = s;                     /* and this */
9804       PL_linestr = newSVpvs("");
9805       PL_bufend = SvPVX(PL_linestr);
9806       while (1) {
9807 #ifdef PERL_MAD
9808         if (PL_madskills) {
9809             tstart = SvPVX(PL_linestr) + stuffstart;
9810             if (PL_thisstuff)
9811                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9812             else
9813                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9814         }
9815 #endif
9816         PL_bufptr = PL_bufend;
9817         CopLINE_set(PL_curcop,
9818                     PL_multi_start + shared->herelines);
9819         if (!lex_next_chunk(LEX_NO_TERM)
9820          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9821             SvREFCNT_dec(linestr_save);
9822             goto interminable;
9823         }
9824         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9825         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9826             lex_grow_linestr(SvCUR(PL_linestr) + 2);
9827             sv_catpvs(PL_linestr, "\n\0");
9828         }
9829         s = PL_bufptr;
9830 #ifdef PERL_MAD
9831         stuffstart = s - SvPVX(PL_linestr);
9832 #endif
9833         shared->herelines++;
9834         PL_last_lop = PL_last_uni = NULL;
9835 #ifndef PERL_STRICT_CR
9836         if (PL_bufend - PL_linestart >= 2) {
9837             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9838                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9839             {
9840                 PL_bufend[-2] = '\n';
9841                 PL_bufend--;
9842                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9843             }
9844             else if (PL_bufend[-1] == '\r')
9845                 PL_bufend[-1] = '\n';
9846         }
9847         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9848             PL_bufend[-1] = '\n';
9849 #endif
9850         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9851             SvREFCNT_dec(PL_linestr);
9852             PL_linestr = linestr_save;
9853             PL_linestart = SvPVX(linestr_save);
9854             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9855             s = d;
9856             break;
9857         }
9858         else {
9859             sv_catsv(tmpstr,PL_linestr);
9860         }
9861       }
9862     }
9863     PL_multi_end = CopLINE(PL_curcop);
9864     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9865         SvPV_shrink_to_cur(tmpstr);
9866     }
9867     if (!IN_BYTES) {
9868         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9869             SvUTF8_on(tmpstr);
9870         else if (PL_encoding)
9871             sv_recode_to_utf8(tmpstr, PL_encoding);
9872     }
9873     PL_lex_stuff = tmpstr;
9874     pl_yylval.ival = op_type;
9875     return s;
9876
9877   interminable:
9878     SvREFCNT_dec(tmpstr);
9879     CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9880     missingterm(PL_tokenbuf + 1);
9881 }
9882
9883 /* scan_inputsymbol
9884    takes: current position in input buffer
9885    returns: new position in input buffer
9886    side-effects: pl_yylval and lex_op are set.
9887
9888    This code handles:
9889
9890    <>           read from ARGV
9891    <FH>         read from filehandle
9892    <pkg::FH>    read from package qualified filehandle
9893    <pkg'FH>     read from package qualified filehandle
9894    <$fh>        read from filehandle in $fh
9895    <*.h>        filename glob
9896
9897 */
9898
9899 STATIC char *
9900 S_scan_inputsymbol(pTHX_ char *start)
9901 {
9902     dVAR;
9903     char *s = start;            /* current position in buffer */
9904     char *end;
9905     I32 len;
9906     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9907     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9908
9909     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9910
9911     end = strchr(s, '\n');
9912     if (!end)
9913         end = PL_bufend;
9914     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9915
9916     /* die if we didn't have space for the contents of the <>,
9917        or if it didn't end, or if we see a newline
9918     */
9919
9920     if (len >= (I32)sizeof PL_tokenbuf)
9921         Perl_croak(aTHX_ "Excessively long <> operator");
9922     if (s >= end)
9923         Perl_croak(aTHX_ "Unterminated <> operator");
9924
9925     s++;
9926
9927     /* check for <$fh>
9928        Remember, only scalar variables are interpreted as filehandles by
9929        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9930        treated as a glob() call.
9931        This code makes use of the fact that except for the $ at the front,
9932        a scalar variable and a filehandle look the same.
9933     */
9934     if (*d == '$' && d[1]) d++;
9935
9936     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9937     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9938         d += UTF ? UTF8SKIP(d) : 1;
9939
9940     /* If we've tried to read what we allow filehandles to look like, and
9941        there's still text left, then it must be a glob() and not a getline.
9942        Use scan_str to pull out the stuff between the <> and treat it
9943        as nothing more than a string.
9944     */
9945
9946     if (d - PL_tokenbuf != len) {
9947         pl_yylval.ival = OP_GLOB;
9948         s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9949         if (!s)
9950            Perl_croak(aTHX_ "Glob not terminated");
9951         return s;
9952     }
9953     else {
9954         bool readline_overriden = FALSE;
9955         GV *gv_readline;
9956         GV **gvp;
9957         /* we're in a filehandle read situation */
9958         d = PL_tokenbuf;
9959
9960         /* turn <> into <ARGV> */
9961         if (!len)
9962             Copy("ARGV",d,5,char);
9963
9964         /* Check whether readline() is overriden */
9965         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9966         if ((gv_readline
9967                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9968                 ||
9969                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9970                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9971                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9972             readline_overriden = TRUE;
9973
9974         /* if <$fh>, create the ops to turn the variable into a
9975            filehandle
9976         */
9977         if (*d == '$') {
9978             /* try to find it in the pad for this block, otherwise find
9979                add symbol table ops
9980             */
9981             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9982             if (tmp != NOT_IN_PAD) {
9983                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9984                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9985                     HEK * const stashname = HvNAME_HEK(stash);
9986                     SV * const sym = sv_2mortal(newSVhek(stashname));
9987                     sv_catpvs(sym, "::");
9988                     sv_catpv(sym, d+1);
9989                     d = SvPVX(sym);
9990                     goto intro_sym;
9991                 }
9992                 else {
9993                     OP * const o = newOP(OP_PADSV, 0);
9994                     o->op_targ = tmp;
9995                     PL_lex_op = readline_overriden
9996                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9997                                 op_append_elem(OP_LIST, o,
9998                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9999                         : (OP*)newUNOP(OP_READLINE, 0, o);
10000                 }
10001             }
10002             else {
10003                 GV *gv;
10004                 ++d;
10005 intro_sym:
10006                 gv = gv_fetchpv(d,
10007                                 (PL_in_eval
10008                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10009                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10010                                 SVt_PV);
10011                 PL_lex_op = readline_overriden
10012                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10013                             op_append_elem(OP_LIST,
10014                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10015                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10016                     : (OP*)newUNOP(OP_READLINE, 0,
10017                             newUNOP(OP_RV2SV, 0,
10018                                 newGVOP(OP_GV, 0, gv)));
10019             }
10020             if (!readline_overriden)
10021                 PL_lex_op->op_flags |= OPf_SPECIAL;
10022             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10023             pl_yylval.ival = OP_NULL;
10024         }
10025
10026         /* If it's none of the above, it must be a literal filehandle
10027            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10028         else {
10029             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10030             PL_lex_op = readline_overriden
10031                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10032                         op_append_elem(OP_LIST,
10033                             newGVOP(OP_GV, 0, gv),
10034                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10035                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10036             pl_yylval.ival = OP_NULL;
10037         }
10038     }
10039
10040     return s;
10041 }
10042
10043
10044 /* scan_str
10045    takes: start position in buffer
10046           keep_quoted preserve \ on the embedded delimiter(s)
10047           keep_delims preserve the delimiters around the string
10048           re_reparse  compiling a run-time /(?{})/:
10049                         collapse // to /,  and skip encoding src
10050    returns: position to continue reading from buffer
10051    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10052         updates the read buffer.
10053
10054    This subroutine pulls a string out of the input.  It is called for:
10055         q               single quotes           q(literal text)
10056         '               single quotes           'literal text'
10057         qq              double quotes           qq(interpolate $here please)
10058         "               double quotes           "interpolate $here please"
10059         qx              backticks               qx(/bin/ls -l)
10060         `               backticks               `/bin/ls -l`
10061         qw              quote words             @EXPORT_OK = qw( func() $spam )
10062         m//             regexp match            m/this/
10063         s///            regexp substitute       s/this/that/
10064         tr///           string transliterate    tr/this/that/
10065         y///            string transliterate    y/this/that/
10066         ($*@)           sub prototypes          sub foo ($)
10067         (stuff)         sub attr parameters     sub foo : attr(stuff)
10068         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10069
10070    In most of these cases (all but <>, patterns and transliterate)
10071    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10072    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10073    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10074    calls scan_str().
10075
10076    It skips whitespace before the string starts, and treats the first
10077    character as the delimiter.  If the delimiter is one of ([{< then
10078    the corresponding "close" character )]}> is used as the closing
10079    delimiter.  It allows quoting of delimiters, and if the string has
10080    balanced delimiters ([{<>}]) it allows nesting.
10081
10082    On success, the SV with the resulting string is put into lex_stuff or,
10083    if that is already non-NULL, into lex_repl. The second case occurs only
10084    when parsing the RHS of the special constructs s/// and tr/// (y///).
10085    For convenience, the terminating delimiter character is stuffed into
10086    SvIVX of the SV.
10087 */
10088
10089 STATIC char *
10090 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10091 {
10092     dVAR;
10093     SV *sv;                             /* scalar value: string */
10094     const char *tmps;                   /* temp string, used for delimiter matching */
10095     char *s = start;            /* current position in the buffer */
10096     char term;                  /* terminating character */
10097     char *to;                   /* current position in the sv's data */
10098     I32 brackets = 1;                   /* bracket nesting level */
10099     bool has_utf8 = FALSE;              /* is there any utf8 content? */
10100     I32 termcode;                       /* terminating char. code */
10101     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
10102     STRLEN termlen;                     /* length of terminating string */
10103     int last_off = 0;                   /* last position for nesting bracket */
10104 #ifdef PERL_MAD
10105     int stuffstart;
10106     char *tstart;
10107 #endif
10108
10109     PERL_ARGS_ASSERT_SCAN_STR;
10110
10111     /* skip space before the delimiter */
10112     if (isSPACE(*s)) {
10113         s = PEEKSPACE(s);
10114     }
10115
10116 #ifdef PERL_MAD
10117     if (PL_realtokenstart >= 0) {
10118         stuffstart = PL_realtokenstart;
10119         PL_realtokenstart = -1;
10120     }
10121     else
10122         stuffstart = start - SvPVX(PL_linestr);
10123 #endif
10124     /* mark where we are, in case we need to report errors */
10125     CLINE;
10126
10127     /* after skipping whitespace, the next character is the terminator */
10128     term = *s;
10129     if (!UTF) {
10130         termcode = termstr[0] = term;
10131         termlen = 1;
10132     }
10133     else {
10134         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10135         Copy(s, termstr, termlen, U8);
10136         if (!UTF8_IS_INVARIANT(term))
10137             has_utf8 = TRUE;
10138     }
10139
10140     /* mark where we are */
10141     PL_multi_start = CopLINE(PL_curcop);
10142     PL_multi_open = term;
10143
10144     /* find corresponding closing delimiter */
10145     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10146         termcode = termstr[0] = term = tmps[5];
10147
10148     PL_multi_close = term;
10149
10150     /* create a new SV to hold the contents.  79 is the SV's initial length.
10151        What a random number. */
10152     sv = newSV_type(SVt_PVIV);
10153     SvGROW(sv, 80);
10154     SvIV_set(sv, termcode);
10155     (void)SvPOK_only(sv);               /* validate pointer */
10156
10157     /* move past delimiter and try to read a complete string */
10158     if (keep_delims)
10159         sv_catpvn(sv, s, termlen);
10160     s += termlen;
10161 #ifdef PERL_MAD
10162     tstart = SvPVX(PL_linestr) + stuffstart;
10163     if (!PL_thisopen && !keep_delims) {
10164         PL_thisopen = newSVpvn(tstart, s - tstart);
10165         stuffstart = s - SvPVX(PL_linestr);
10166     }
10167 #endif
10168     for (;;) {
10169         if (PL_encoding && !UTF && !re_reparse) {
10170             bool cont = TRUE;
10171
10172             while (cont) {
10173                 int offset = s - SvPVX_const(PL_linestr);
10174                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10175                                            &offset, (char*)termstr, termlen);
10176                 const char * const ns = SvPVX_const(PL_linestr) + offset;
10177                 char * const svlast = SvEND(sv) - 1;
10178
10179                 for (; s < ns; s++) {
10180                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10181                         COPLINE_INC_WITH_HERELINES;
10182                 }
10183                 if (!found)
10184                     goto read_more_line;
10185                 else {
10186                     /* handle quoted delimiters */
10187                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10188                         const char *t;
10189                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10190                             t--;
10191                         if ((svlast-1 - t) % 2) {
10192                             if (!keep_quoted) {
10193                                 *(svlast-1) = term;
10194                                 *svlast = '\0';
10195                                 SvCUR_set(sv, SvCUR(sv) - 1);
10196                             }
10197                             continue;
10198                         }
10199                     }
10200                     if (PL_multi_open == PL_multi_close) {
10201                         cont = FALSE;
10202                     }
10203                     else {
10204                         const char *t;
10205                         char *w;
10206                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10207                             /* At here, all closes are "was quoted" one,
10208                                so we don't check PL_multi_close. */
10209                             if (*t == '\\') {
10210                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10211                                     t++;
10212                                 else
10213                                     *w++ = *t++;
10214                             }
10215                             else if (*t == PL_multi_open)
10216                                 brackets++;
10217
10218                             *w = *t;
10219                         }
10220                         if (w < t) {
10221                             *w++ = term;
10222                             *w = '\0';
10223                             SvCUR_set(sv, w - SvPVX_const(sv));
10224                         }
10225                         last_off = w - SvPVX(sv);
10226                         if (--brackets <= 0)
10227                             cont = FALSE;
10228                     }
10229                 }
10230             }
10231             if (!keep_delims) {
10232                 SvCUR_set(sv, SvCUR(sv) - 1);
10233                 *SvEND(sv) = '\0';
10234             }
10235             break;
10236         }
10237
10238         /* extend sv if need be */
10239         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10240         /* set 'to' to the next character in the sv's string */
10241         to = SvPVX(sv)+SvCUR(sv);
10242
10243         /* if open delimiter is the close delimiter read unbridle */
10244         if (PL_multi_open == PL_multi_close) {
10245             for (; s < PL_bufend; s++,to++) {
10246                 /* embedded newlines increment the current line number */
10247                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10248                     COPLINE_INC_WITH_HERELINES;
10249                 /* handle quoted delimiters */
10250                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10251                     if (!keep_quoted
10252                         && (s[1] == term
10253                             || (re_reparse && s[1] == '\\'))
10254                     )
10255                         s++;
10256                     /* any other quotes are simply copied straight through */
10257                     else
10258                         *to++ = *s++;
10259                 }
10260                 /* terminate when run out of buffer (the for() condition), or
10261                    have found the terminator */
10262                 else if (*s == term) {
10263                     if (termlen == 1)
10264                         break;
10265                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10266                         break;
10267                 }
10268                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10269                     has_utf8 = TRUE;
10270                 *to = *s;
10271             }
10272         }
10273
10274         /* if the terminator isn't the same as the start character (e.g.,
10275            matched brackets), we have to allow more in the quoting, and
10276            be prepared for nested brackets.
10277         */
10278         else {
10279             /* read until we run out of string, or we find the terminator */
10280             for (; s < PL_bufend; s++,to++) {
10281                 /* embedded newlines increment the line count */
10282                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10283                     COPLINE_INC_WITH_HERELINES;
10284                 /* backslashes can escape the open or closing characters */
10285                 if (*s == '\\' && s+1 < PL_bufend) {
10286                     if (!keep_quoted &&
10287                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10288                         s++;
10289                     else
10290                         *to++ = *s++;
10291                 }
10292                 /* allow nested opens and closes */
10293                 else if (*s == PL_multi_close && --brackets <= 0)
10294                     break;
10295                 else if (*s == PL_multi_open)
10296                     brackets++;
10297                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10298                     has_utf8 = TRUE;
10299                 *to = *s;
10300             }
10301         }
10302         /* terminate the copied string and update the sv's end-of-string */
10303         *to = '\0';
10304         SvCUR_set(sv, to - SvPVX_const(sv));
10305
10306         /*
10307          * this next chunk reads more into the buffer if we're not done yet
10308          */
10309
10310         if (s < PL_bufend)
10311             break;              /* handle case where we are done yet :-) */
10312
10313 #ifndef PERL_STRICT_CR
10314         if (to - SvPVX_const(sv) >= 2) {
10315             if ((to[-2] == '\r' && to[-1] == '\n') ||
10316                 (to[-2] == '\n' && to[-1] == '\r'))
10317             {
10318                 to[-2] = '\n';
10319                 to--;
10320                 SvCUR_set(sv, to - SvPVX_const(sv));
10321             }
10322             else if (to[-1] == '\r')
10323                 to[-1] = '\n';
10324         }
10325         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10326             to[-1] = '\n';
10327 #endif
10328
10329      read_more_line:
10330         /* if we're out of file, or a read fails, bail and reset the current
10331            line marker so we can report where the unterminated string began
10332         */
10333 #ifdef PERL_MAD
10334         if (PL_madskills) {
10335             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10336             if (PL_thisstuff)
10337                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10338             else
10339                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10340         }
10341 #endif
10342         COPLINE_INC_WITH_HERELINES;
10343         PL_bufptr = PL_bufend;
10344         if (!lex_next_chunk(0)) {
10345             sv_free(sv);
10346             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10347             return NULL;
10348         }
10349         s = PL_bufptr;
10350 #ifdef PERL_MAD
10351         stuffstart = 0;
10352 #endif
10353     }
10354
10355     /* at this point, we have successfully read the delimited string */
10356
10357     if (!PL_encoding || UTF || re_reparse) {
10358 #ifdef PERL_MAD
10359         if (PL_madskills) {
10360             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10361             const int len = s - tstart;
10362             if (PL_thisstuff)
10363                 sv_catpvn(PL_thisstuff, tstart, len);
10364             else
10365                 PL_thisstuff = newSVpvn(tstart, len);
10366             if (!PL_thisclose && !keep_delims)
10367                 PL_thisclose = newSVpvn(s,termlen);
10368         }
10369 #endif
10370
10371         if (keep_delims)
10372             sv_catpvn(sv, s, termlen);
10373         s += termlen;
10374     }
10375 #ifdef PERL_MAD
10376     else {
10377         if (PL_madskills) {
10378             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10379             const int len = s - tstart - termlen;
10380             if (PL_thisstuff)
10381                 sv_catpvn(PL_thisstuff, tstart, len);
10382             else
10383                 PL_thisstuff = newSVpvn(tstart, len);
10384             if (!PL_thisclose && !keep_delims)
10385                 PL_thisclose = newSVpvn(s - termlen,termlen);
10386         }
10387     }
10388 #endif
10389     if (has_utf8 || (PL_encoding && !re_reparse))
10390         SvUTF8_on(sv);
10391
10392     PL_multi_end = CopLINE(PL_curcop);
10393
10394     /* if we allocated too much space, give some back */
10395     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10396         SvLEN_set(sv, SvCUR(sv) + 1);
10397         SvPV_renew(sv, SvLEN(sv));
10398     }
10399
10400     /* decide whether this is the first or second quoted string we've read
10401        for this op
10402     */
10403
10404     if (PL_lex_stuff)
10405         PL_sublex_info.repl = sv;
10406     else
10407         PL_lex_stuff = sv;
10408     return s;
10409 }
10410
10411 /*
10412   scan_num
10413   takes: pointer to position in buffer
10414   returns: pointer to new position in buffer
10415   side-effects: builds ops for the constant in pl_yylval.op
10416
10417   Read a number in any of the formats that Perl accepts:
10418
10419   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10420   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10421   0b[01](_?[01])*
10422   0[0-7](_?[0-7])*
10423   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10424
10425   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10426   thing it reads.
10427
10428   If it reads a number without a decimal point or an exponent, it will
10429   try converting the number to an integer and see if it can do so
10430   without loss of precision.
10431 */
10432
10433 char *
10434 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10435 {
10436     dVAR;
10437     const char *s = start;      /* current position in buffer */
10438     char *d;                    /* destination in temp buffer */
10439     char *e;                    /* end of temp buffer */
10440     NV nv;                              /* number read, as a double */
10441     SV *sv = NULL;                      /* place to put the converted number */
10442     bool floatit;                       /* boolean: int or float? */
10443     const char *lastub = NULL;          /* position of last underbar */
10444     static char const number_too_long[] = "Number too long";
10445
10446     PERL_ARGS_ASSERT_SCAN_NUM;
10447
10448     /* We use the first character to decide what type of number this is */
10449
10450     switch (*s) {
10451     default:
10452         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10453
10454     /* if it starts with a 0, it could be an octal number, a decimal in
10455        0.13 disguise, or a hexadecimal number, or a binary number. */
10456     case '0':
10457         {
10458           /* variables:
10459              u          holds the "number so far"
10460              shift      the power of 2 of the base
10461                         (hex == 4, octal == 3, binary == 1)
10462              overflowed was the number more than we can hold?
10463
10464              Shift is used when we add a digit.  It also serves as an "are
10465              we in octal/hex/binary?" indicator to disallow hex characters
10466              when in octal mode.
10467            */
10468             NV n = 0.0;
10469             UV u = 0;
10470             I32 shift;
10471             bool overflowed = FALSE;
10472             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10473             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10474             static const char* const bases[5] =
10475               { "", "binary", "", "octal", "hexadecimal" };
10476             static const char* const Bases[5] =
10477               { "", "Binary", "", "Octal", "Hexadecimal" };
10478             static const char* const maxima[5] =
10479               { "",
10480                 "0b11111111111111111111111111111111",
10481                 "",
10482                 "037777777777",
10483                 "0xffffffff" };
10484             const char *base, *Base, *max;
10485
10486             /* check for hex */
10487             if (s[1] == 'x' || s[1] == 'X') {
10488                 shift = 4;
10489                 s += 2;
10490                 just_zero = FALSE;
10491             } else if (s[1] == 'b' || s[1] == 'B') {
10492                 shift = 1;
10493                 s += 2;
10494                 just_zero = FALSE;
10495             }
10496             /* check for a decimal in disguise */
10497             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10498                 goto decimal;
10499             /* so it must be octal */
10500             else {
10501                 shift = 3;
10502                 s++;
10503             }
10504
10505             if (*s == '_') {
10506                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10507                                "Misplaced _ in number");
10508                lastub = s++;
10509             }
10510
10511             base = bases[shift];
10512             Base = Bases[shift];
10513             max  = maxima[shift];
10514
10515             /* read the rest of the number */
10516             for (;;) {
10517                 /* x is used in the overflow test,
10518                    b is the digit we're adding on. */
10519                 UV x, b;
10520
10521                 switch (*s) {
10522
10523                 /* if we don't mention it, we're done */
10524                 default:
10525                     goto out;
10526
10527                 /* _ are ignored -- but warned about if consecutive */
10528                 case '_':
10529                     if (lastub && s == lastub + 1)
10530                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10531                                        "Misplaced _ in number");
10532                     lastub = s++;
10533                     break;
10534
10535                 /* 8 and 9 are not octal */
10536                 case '8': case '9':
10537                     if (shift == 3)
10538                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10539                     /* FALL THROUGH */
10540
10541                 /* octal digits */
10542                 case '2': case '3': case '4':
10543                 case '5': case '6': case '7':
10544                     if (shift == 1)
10545                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10546                     /* FALL THROUGH */
10547
10548                 case '0': case '1':
10549                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10550                     goto digit;
10551
10552                 /* hex digits */
10553                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10554                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10555                     /* make sure they said 0x */
10556                     if (shift != 4)
10557                         goto out;
10558                     b = (*s++ & 7) + 9;
10559
10560                     /* Prepare to put the digit we have onto the end
10561                        of the number so far.  We check for overflows.
10562                     */
10563
10564                   digit:
10565                     just_zero = FALSE;
10566                     if (!overflowed) {
10567                         x = u << shift; /* make room for the digit */
10568
10569                         if ((x >> shift) != u
10570                             && !(PL_hints & HINT_NEW_BINARY)) {
10571                             overflowed = TRUE;
10572                             n = (NV) u;
10573                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10574                                              "Integer overflow in %s number",
10575                                              base);
10576                         } else
10577                             u = x | b;          /* add the digit to the end */
10578                     }
10579                     if (overflowed) {
10580                         n *= nvshift[shift];
10581                         /* If an NV has not enough bits in its
10582                          * mantissa to represent an UV this summing of
10583                          * small low-order numbers is a waste of time
10584                          * (because the NV cannot preserve the
10585                          * low-order bits anyway): we could just
10586                          * remember when did we overflow and in the
10587                          * end just multiply n by the right
10588                          * amount. */
10589                         n += (NV) b;
10590                     }
10591                     break;
10592                 }
10593             }
10594
10595           /* if we get here, we had success: make a scalar value from
10596              the number.
10597           */
10598           out:
10599
10600             /* final misplaced underbar check */
10601             if (s[-1] == '_') {
10602                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10603             }
10604
10605             if (overflowed) {
10606                 if (n > 4294967295.0)
10607                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10608                                    "%s number > %s non-portable",
10609                                    Base, max);
10610                 sv = newSVnv(n);
10611             }
10612             else {
10613 #if UVSIZE > 4
10614                 if (u > 0xffffffff)
10615                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10616                                    "%s number > %s non-portable",
10617                                    Base, max);
10618 #endif
10619                 sv = newSVuv(u);
10620             }
10621             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10622                 sv = new_constant(start, s - start, "integer",
10623                                   sv, NULL, NULL, 0);
10624             else if (PL_hints & HINT_NEW_BINARY)
10625                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10626         }
10627         break;
10628
10629     /*
10630       handle decimal numbers.
10631       we're also sent here when we read a 0 as the first digit
10632     */
10633     case '1': case '2': case '3': case '4': case '5':
10634     case '6': case '7': case '8': case '9': case '.':
10635       decimal:
10636         d = PL_tokenbuf;
10637         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10638         floatit = FALSE;
10639
10640         /* read next group of digits and _ and copy into d */
10641         while (isDIGIT(*s) || *s == '_') {
10642             /* skip underscores, checking for misplaced ones
10643                if -w is on
10644             */
10645             if (*s == '_') {
10646                 if (lastub && s == lastub + 1)
10647                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10648                                    "Misplaced _ in number");
10649                 lastub = s++;
10650             }
10651             else {
10652                 /* check for end of fixed-length buffer */
10653                 if (d >= e)
10654                     Perl_croak(aTHX_ number_too_long);
10655                 /* if we're ok, copy the character */
10656                 *d++ = *s++;
10657             }
10658         }
10659
10660         /* final misplaced underbar check */
10661         if (lastub && s == lastub + 1) {
10662             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10663         }
10664
10665         /* read a decimal portion if there is one.  avoid
10666            3..5 being interpreted as the number 3. followed
10667            by .5
10668         */
10669         if (*s == '.' && s[1] != '.') {
10670             floatit = TRUE;
10671             *d++ = *s++;
10672
10673             if (*s == '_') {
10674                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10675                                "Misplaced _ in number");
10676                 lastub = s;
10677             }
10678
10679             /* copy, ignoring underbars, until we run out of digits.
10680             */
10681             for (; isDIGIT(*s) || *s == '_'; s++) {
10682                 /* fixed length buffer check */
10683                 if (d >= e)
10684                     Perl_croak(aTHX_ number_too_long);
10685                 if (*s == '_') {
10686                    if (lastub && s == lastub + 1)
10687                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10688                                       "Misplaced _ in number");
10689                    lastub = s;
10690                 }
10691                 else
10692                     *d++ = *s;
10693             }
10694             /* fractional part ending in underbar? */
10695             if (s[-1] == '_') {
10696                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10697                                "Misplaced _ in number");
10698             }
10699             if (*s == '.' && isDIGIT(s[1])) {
10700                 /* oops, it's really a v-string, but without the "v" */
10701                 s = start;
10702                 goto vstring;
10703             }
10704         }
10705
10706         /* read exponent part, if present */
10707         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10708             floatit = TRUE;
10709             s++;
10710
10711             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10712             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10713
10714             /* stray preinitial _ */
10715             if (*s == '_') {
10716                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10717                                "Misplaced _ in number");
10718                 lastub = s++;
10719             }
10720
10721             /* allow positive or negative exponent */
10722             if (*s == '+' || *s == '-')
10723                 *d++ = *s++;
10724
10725             /* stray initial _ */
10726             if (*s == '_') {
10727                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10728                                "Misplaced _ in number");
10729                 lastub = s++;
10730             }
10731
10732             /* read digits of exponent */
10733             while (isDIGIT(*s) || *s == '_') {
10734                 if (isDIGIT(*s)) {
10735                     if (d >= e)
10736                         Perl_croak(aTHX_ number_too_long);
10737                     *d++ = *s++;
10738                 }
10739                 else {
10740                    if (((lastub && s == lastub + 1) ||
10741                         (!isDIGIT(s[1]) && s[1] != '_')))
10742                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10743                                       "Misplaced _ in number");
10744                    lastub = s++;
10745                 }
10746             }
10747         }
10748
10749
10750         /*
10751            We try to do an integer conversion first if no characters
10752            indicating "float" have been found.
10753          */
10754
10755         if (!floatit) {
10756             UV uv;
10757             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10758
10759             if (flags == IS_NUMBER_IN_UV) {
10760               if (uv <= IV_MAX)
10761                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10762               else
10763                 sv = newSVuv(uv);
10764             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10765               if (uv <= (UV) IV_MIN)
10766                 sv = newSViv(-(IV)uv);
10767               else
10768                 floatit = TRUE;
10769             } else
10770               floatit = TRUE;
10771         }
10772         if (floatit) {
10773             /* terminate the string */
10774             *d = '\0';
10775             nv = Atof(PL_tokenbuf);
10776             sv = newSVnv(nv);
10777         }
10778
10779         if ( floatit
10780              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10781             const char *const key = floatit ? "float" : "integer";
10782             const STRLEN keylen = floatit ? 5 : 7;
10783             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10784                                 key, keylen, sv, NULL, NULL, 0);
10785         }
10786         break;
10787
10788     /* if it starts with a v, it could be a v-string */
10789     case 'v':
10790 vstring:
10791                 sv = newSV(5); /* preallocate storage space */
10792                 s = scan_vstring(s, PL_bufend, sv);
10793         break;
10794     }
10795
10796     /* make the op for the constant and return */
10797
10798     if (sv)
10799         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10800     else
10801         lvalp->opval = NULL;
10802
10803     return (char *)s;
10804 }
10805
10806 STATIC char *
10807 S_scan_formline(pTHX_ register char *s)
10808 {
10809     dVAR;
10810     char *eol;
10811     char *t;
10812     SV * const stuff = newSVpvs("");
10813     bool needargs = FALSE;
10814     bool eofmt = FALSE;
10815 #ifdef PERL_MAD
10816     char *tokenstart = s;
10817     SV* savewhite = NULL;
10818
10819     if (PL_madskills) {
10820         savewhite = PL_thiswhite;
10821         PL_thiswhite = 0;
10822     }
10823 #endif
10824
10825     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10826
10827     while (!needargs) {
10828         if (*s == '.') {
10829             t = s+1;
10830 #ifdef PERL_STRICT_CR
10831             while (SPACE_OR_TAB(*t))
10832                 t++;
10833 #else
10834             while (SPACE_OR_TAB(*t) || *t == '\r')
10835                 t++;
10836 #endif
10837             if (*t == '\n' || t == PL_bufend) {
10838                 eofmt = TRUE;
10839                 break;
10840             }
10841         }
10842         eol = (char *) memchr(s,'\n',PL_bufend-s);
10843         if (!eol++)
10844                 eol = PL_bufend;
10845         if (*s != '#') {
10846             for (t = s; t < eol; t++) {
10847                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10848                     needargs = FALSE;
10849                     goto enough;        /* ~~ must be first line in formline */
10850                 }
10851                 if (*t == '@' || *t == '^')
10852                     needargs = TRUE;
10853             }
10854             if (eol > s) {
10855                 sv_catpvn(stuff, s, eol-s);
10856 #ifndef PERL_STRICT_CR
10857                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10858                     char *end = SvPVX(stuff) + SvCUR(stuff);
10859                     end[-2] = '\n';
10860                     end[-1] = '\0';
10861                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10862                 }
10863 #endif
10864             }
10865             else
10866               break;
10867         }
10868         s = (char*)eol;
10869         if ((PL_rsfp || PL_parser->filtered)
10870          && PL_parser->form_lex_state == LEX_NORMAL) {
10871             bool got_some;
10872 #ifdef PERL_MAD
10873             if (PL_madskills) {
10874                 if (PL_thistoken)
10875                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10876                 else
10877                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10878             }
10879 #endif
10880             PL_bufptr = PL_bufend;
10881             COPLINE_INC_WITH_HERELINES;
10882             got_some = lex_next_chunk(0);
10883             CopLINE_dec(PL_curcop);
10884             s = PL_bufptr;
10885 #ifdef PERL_MAD
10886             tokenstart = PL_bufptr;
10887 #endif
10888             if (!got_some)
10889                 break;
10890         }
10891         incline(s);
10892     }
10893   enough:
10894     if (!SvCUR(stuff) || needargs)
10895         PL_lex_state = PL_parser->form_lex_state;
10896     if (SvCUR(stuff)) {
10897         PL_expect = XSTATE;
10898         if (needargs) {
10899             start_force(PL_curforce);
10900             NEXTVAL_NEXTTOKE.ival = 0;
10901             force_next(FORMLBRACK);
10902         }
10903         if (!IN_BYTES) {
10904             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10905                 SvUTF8_on(stuff);
10906             else if (PL_encoding)
10907                 sv_recode_to_utf8(stuff, PL_encoding);
10908         }
10909         start_force(PL_curforce);
10910         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10911         force_next(THING);
10912     }
10913     else {
10914         SvREFCNT_dec(stuff);
10915         if (eofmt)
10916             PL_lex_formbrack = 0;
10917     }
10918 #ifdef PERL_MAD
10919     if (PL_madskills) {
10920         if (PL_thistoken)
10921             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10922         else
10923             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10924         PL_thiswhite = savewhite;
10925     }
10926 #endif
10927     return s;
10928 }
10929
10930 I32
10931 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10932 {
10933     dVAR;
10934     const I32 oldsavestack_ix = PL_savestack_ix;
10935     CV* const outsidecv = PL_compcv;
10936
10937     SAVEI32(PL_subline);
10938     save_item(PL_subname);
10939     SAVESPTR(PL_compcv);
10940
10941     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10942     CvFLAGS(PL_compcv) |= flags;
10943
10944     PL_subline = CopLINE(PL_curcop);
10945     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10946     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10947     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10948     if (outsidecv && CvPADLIST(outsidecv))
10949         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10950
10951     return oldsavestack_ix;
10952 }
10953
10954 #ifdef __SC__
10955 #pragma segment Perl_yylex
10956 #endif
10957 static int
10958 S_yywarn(pTHX_ const char *const s, U32 flags)
10959 {
10960     dVAR;
10961
10962     PERL_ARGS_ASSERT_YYWARN;
10963
10964     PL_in_eval |= EVAL_WARNONLY;
10965     yyerror_pv(s, flags);
10966     PL_in_eval &= ~EVAL_WARNONLY;
10967     return 0;
10968 }
10969
10970 int
10971 Perl_yyerror(pTHX_ const char *const s)
10972 {
10973     PERL_ARGS_ASSERT_YYERROR;
10974     return yyerror_pvn(s, strlen(s), 0);
10975 }
10976
10977 int
10978 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10979 {
10980     PERL_ARGS_ASSERT_YYERROR_PV;
10981     return yyerror_pvn(s, strlen(s), flags);
10982 }
10983
10984 int
10985 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10986 {
10987     dVAR;
10988     const char *context = NULL;
10989     int contlen = -1;
10990     SV *msg;
10991     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10992     int yychar  = PL_parser->yychar;
10993     U32 is_utf8 = flags & SVf_UTF8;
10994
10995     PERL_ARGS_ASSERT_YYERROR_PVN;
10996
10997     if (!yychar || (yychar == ';' && !PL_rsfp))
10998         sv_catpvs(where_sv, "at EOF");
10999     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11000       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11001       PL_oldbufptr != PL_bufptr) {
11002         /*
11003                 Only for NetWare:
11004                 The code below is removed for NetWare because it abends/crashes on NetWare
11005                 when the script has error such as not having the closing quotes like:
11006                     if ($var eq "value)
11007                 Checking of white spaces is anyway done in NetWare code.
11008         */
11009 #ifndef NETWARE
11010         while (isSPACE(*PL_oldoldbufptr))
11011             PL_oldoldbufptr++;
11012 #endif
11013         context = PL_oldoldbufptr;
11014         contlen = PL_bufptr - PL_oldoldbufptr;
11015     }
11016     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11017       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11018         /*
11019                 Only for NetWare:
11020                 The code below is removed for NetWare because it abends/crashes on NetWare
11021                 when the script has error such as not having the closing quotes like:
11022                     if ($var eq "value)
11023                 Checking of white spaces is anyway done in NetWare code.
11024         */
11025 #ifndef NETWARE
11026         while (isSPACE(*PL_oldbufptr))
11027             PL_oldbufptr++;
11028 #endif
11029         context = PL_oldbufptr;
11030         contlen = PL_bufptr - PL_oldbufptr;
11031     }
11032     else if (yychar > 255)
11033         sv_catpvs(where_sv, "next token ???");
11034     else if (yychar == -2) { /* YYEMPTY */
11035         if (PL_lex_state == LEX_NORMAL ||
11036            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11037             sv_catpvs(where_sv, "at end of line");
11038         else if (PL_lex_inpat)
11039             sv_catpvs(where_sv, "within pattern");
11040         else
11041             sv_catpvs(where_sv, "within string");
11042     }
11043     else {
11044         sv_catpvs(where_sv, "next char ");
11045         if (yychar < 32)
11046             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11047         else if (isPRINT_LC(yychar)) {
11048             const char string = yychar;
11049             sv_catpvn(where_sv, &string, 1);
11050         }
11051         else
11052             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11053     }
11054     msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
11055     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11056         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11057     if (context)
11058         Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11059                             SVfARG(newSVpvn_flags(context, contlen,
11060                                         SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11061     else
11062         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11063     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11064         Perl_sv_catpvf(aTHX_ msg,
11065         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11066                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11067         PL_multi_end = 0;
11068     }
11069     if (PL_in_eval & EVAL_WARNONLY) {
11070         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11071     }
11072     else
11073         qerror(msg);
11074     if (PL_error_count >= 10) {
11075         if (PL_in_eval && SvCUR(ERRSV))
11076             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11077                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
11078         else
11079             Perl_croak(aTHX_ "%s has too many errors.\n",
11080             OutCopFILE(PL_curcop));
11081     }
11082     PL_in_my = 0;
11083     PL_in_my_stash = NULL;
11084     return 0;
11085 }
11086 #ifdef __SC__
11087 #pragma segment Main
11088 #endif
11089
11090 STATIC char*
11091 S_swallow_bom(pTHX_ U8 *s)
11092 {
11093     dVAR;
11094     const STRLEN slen = SvCUR(PL_linestr);
11095
11096     PERL_ARGS_ASSERT_SWALLOW_BOM;
11097
11098     switch (s[0]) {
11099     case 0xFF:
11100         if (s[1] == 0xFE) {
11101             /* UTF-16 little-endian? (or UTF-32LE?) */
11102             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11103                 /* diag_listed_as: Unsupported script encoding %s */
11104                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11105 #ifndef PERL_NO_UTF16_FILTER
11106             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11107             s += 2;
11108             if (PL_bufend > (char*)s) {
11109                 s = add_utf16_textfilter(s, TRUE);
11110             }
11111 #else
11112             /* diag_listed_as: Unsupported script encoding %s */
11113             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11114 #endif
11115         }
11116         break;
11117     case 0xFE:
11118         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11119 #ifndef PERL_NO_UTF16_FILTER
11120             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11121             s += 2;
11122             if (PL_bufend > (char *)s) {
11123                 s = add_utf16_textfilter(s, FALSE);
11124             }
11125 #else
11126             /* diag_listed_as: Unsupported script encoding %s */
11127             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11128 #endif
11129         }
11130         break;
11131     case 0xEF:
11132         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11133             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11134             s += 3;                      /* UTF-8 */
11135         }
11136         break;
11137     case 0:
11138         if (slen > 3) {
11139              if (s[1] == 0) {
11140                   if (s[2] == 0xFE && s[3] == 0xFF) {
11141                        /* UTF-32 big-endian */
11142                        /* diag_listed_as: Unsupported script encoding %s */
11143                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11144                   }
11145              }
11146              else if (s[2] == 0 && s[3] != 0) {
11147                   /* Leading bytes
11148                    * 00 xx 00 xx
11149                    * are a good indicator of UTF-16BE. */
11150 #ifndef PERL_NO_UTF16_FILTER
11151                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11152                   s = add_utf16_textfilter(s, FALSE);
11153 #else
11154                   /* diag_listed_as: Unsupported script encoding %s */
11155                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11156 #endif
11157              }
11158         }
11159 #ifdef EBCDIC
11160     case 0xDD:
11161         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11162             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11163             s += 4;                      /* UTF-8 */
11164         }
11165         break;
11166 #endif
11167
11168     default:
11169          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11170                   /* Leading bytes
11171                    * xx 00 xx 00
11172                    * are a good indicator of UTF-16LE. */
11173 #ifndef PERL_NO_UTF16_FILTER
11174               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11175               s = add_utf16_textfilter(s, TRUE);
11176 #else
11177               /* diag_listed_as: Unsupported script encoding %s */
11178               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11179 #endif
11180          }
11181     }
11182     return (char*)s;
11183 }
11184
11185
11186 #ifndef PERL_NO_UTF16_FILTER
11187 static I32
11188 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11189 {
11190     dVAR;
11191     SV *const filter = FILTER_DATA(idx);
11192     /* We re-use this each time round, throwing the contents away before we
11193        return.  */
11194     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11195     SV *const utf8_buffer = filter;
11196     IV status = IoPAGE(filter);
11197     const bool reverse = cBOOL(IoLINES(filter));
11198     I32 retval;
11199
11200     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11201
11202     /* As we're automatically added, at the lowest level, and hence only called
11203        from this file, we can be sure that we're not called in block mode. Hence
11204        don't bother writing code to deal with block mode.  */
11205     if (maxlen) {
11206         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11207     }
11208     if (status < 0) {
11209         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11210     }
11211     DEBUG_P(PerlIO_printf(Perl_debug_log,
11212                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11213                           FPTR2DPTR(void *, S_utf16_textfilter),
11214                           reverse ? 'l' : 'b', idx, maxlen, status,
11215                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11216
11217     while (1) {
11218         STRLEN chars;
11219         STRLEN have;
11220         I32 newlen;
11221         U8 *end;
11222         /* First, look in our buffer of existing UTF-8 data:  */
11223         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11224
11225         if (nl) {
11226             ++nl;
11227         } else if (status == 0) {
11228             /* EOF */
11229             IoPAGE(filter) = 0;
11230             nl = SvEND(utf8_buffer);
11231         }
11232         if (nl) {
11233             STRLEN got = nl - SvPVX(utf8_buffer);
11234             /* Did we have anything to append?  */
11235             retval = got != 0;
11236             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11237             /* Everything else in this code works just fine if SVp_POK isn't
11238                set.  This, however, needs it, and we need it to work, else
11239                we loop infinitely because the buffer is never consumed.  */
11240             sv_chop(utf8_buffer, nl);
11241             break;
11242         }
11243
11244         /* OK, not a complete line there, so need to read some more UTF-16.
11245            Read an extra octect if the buffer currently has an odd number. */
11246         while (1) {
11247             if (status <= 0)
11248                 break;
11249             if (SvCUR(utf16_buffer) >= 2) {
11250                 /* Location of the high octet of the last complete code point.
11251                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11252                    *coupled* with all the benefits of partial reads and
11253                    endianness.  */
11254                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11255                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11256
11257                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11258                     break;
11259                 }
11260
11261                 /* We have the first half of a surrogate. Read more.  */
11262                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11263             }
11264
11265             status = FILTER_READ(idx + 1, utf16_buffer,
11266                                  160 + (SvCUR(utf16_buffer) & 1));
11267             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11268             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11269             if (status < 0) {
11270                 /* Error */
11271                 IoPAGE(filter) = status;
11272                 return status;
11273             }
11274         }
11275
11276         chars = SvCUR(utf16_buffer) >> 1;
11277         have = SvCUR(utf8_buffer);
11278         SvGROW(utf8_buffer, have + chars * 3 + 1);
11279
11280         if (reverse) {
11281             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11282                                          (U8*)SvPVX_const(utf8_buffer) + have,
11283                                          chars * 2, &newlen);
11284         } else {
11285             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11286                                 (U8*)SvPVX_const(utf8_buffer) + have,
11287                                 chars * 2, &newlen);
11288         }
11289         SvCUR_set(utf8_buffer, have + newlen);
11290         *end = '\0';
11291
11292         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11293            it's private to us, and utf16_to_utf8{,reversed} take a
11294            (pointer,length) pair, rather than a NUL-terminated string.  */
11295         if(SvCUR(utf16_buffer) & 1) {
11296             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11297             SvCUR_set(utf16_buffer, 1);
11298         } else {
11299             SvCUR_set(utf16_buffer, 0);
11300         }
11301     }
11302     DEBUG_P(PerlIO_printf(Perl_debug_log,
11303                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11304                           status,
11305                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11306     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11307     return retval;
11308 }
11309
11310 static U8 *
11311 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11312 {
11313     SV *filter = filter_add(S_utf16_textfilter, NULL);
11314
11315     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11316
11317     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11318     sv_setpvs(filter, "");
11319     IoLINES(filter) = reversed;
11320     IoPAGE(filter) = 1; /* Not EOF */
11321
11322     /* Sadly, we have to return a valid pointer, come what may, so we have to
11323        ignore any error return from this.  */
11324     SvCUR_set(PL_linestr, 0);
11325     if (FILTER_READ(0, PL_linestr, 0)) {
11326         SvUTF8_on(PL_linestr);
11327     } else {
11328         SvUTF8_on(PL_linestr);
11329     }
11330     PL_bufend = SvEND(PL_linestr);
11331     return (U8*)SvPVX(PL_linestr);
11332 }
11333 #endif
11334
11335 /*
11336 Returns a pointer to the next character after the parsed
11337 vstring, as well as updating the passed in sv.
11338
11339 Function must be called like
11340
11341         sv = newSV(5);
11342         s = scan_vstring(s,e,sv);
11343
11344 where s and e are the start and end of the string.
11345 The sv should already be large enough to store the vstring
11346 passed in, for performance reasons.
11347
11348 */
11349
11350 char *
11351 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11352 {
11353     dVAR;
11354     const char *pos = s;
11355     const char *start = s;
11356
11357     PERL_ARGS_ASSERT_SCAN_VSTRING;
11358
11359     if (*pos == 'v') pos++;  /* get past 'v' */
11360     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11361         pos++;
11362     if ( *pos != '.') {
11363         /* this may not be a v-string if followed by => */
11364         const char *next = pos;
11365         while (next < e && isSPACE(*next))
11366             ++next;
11367         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11368             /* return string not v-string */
11369             sv_setpvn(sv,(char *)s,pos-s);
11370             return (char *)pos;
11371         }
11372     }
11373
11374     if (!isALPHA(*pos)) {
11375         U8 tmpbuf[UTF8_MAXBYTES+1];
11376
11377         if (*s == 'v')
11378             s++;  /* get past 'v' */
11379
11380         sv_setpvs(sv, "");
11381
11382         for (;;) {
11383             /* this is atoi() that tolerates underscores */
11384             U8 *tmpend;
11385             UV rev = 0;
11386             const char *end = pos;
11387             UV mult = 1;
11388             while (--end >= s) {
11389                 if (*end != '_') {
11390                     const UV orev = rev;
11391                     rev += (*end - '0') * mult;
11392                     mult *= 10;
11393                     if (orev > rev)
11394                         /* diag_listed_as: Integer overflow in %s number */
11395                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11396                                          "Integer overflow in decimal number");
11397                 }
11398             }
11399 #ifdef EBCDIC
11400             if (rev > 0x7FFFFFFF)
11401                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11402 #endif
11403             /* Append native character for the rev point */
11404             tmpend = uvchr_to_utf8(tmpbuf, rev);
11405             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11406             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11407                  SvUTF8_on(sv);
11408             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11409                  s = ++pos;
11410             else {
11411                  s = pos;
11412                  break;
11413             }
11414             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11415                  pos++;
11416         }
11417         SvPOK_on(sv);
11418         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11419         SvRMAGICAL_on(sv);
11420     }
11421     return (char *)s;
11422 }
11423
11424 int
11425 Perl_keyword_plugin_standard(pTHX_
11426         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11427 {
11428     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11429     PERL_UNUSED_CONTEXT;
11430     PERL_UNUSED_ARG(keyword_ptr);
11431     PERL_UNUSED_ARG(keyword_len);
11432     PERL_UNUSED_ARG(op_ptr);
11433     return KEYWORD_PLUGIN_DECLINE;
11434 }
11435
11436 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11437 static void
11438 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11439 {
11440     SAVEI32(PL_lex_brackets);
11441     if (PL_lex_brackets > 100)
11442         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11443     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11444     SAVEI32(PL_lex_allbrackets);
11445     PL_lex_allbrackets = 0;
11446     SAVEI8(PL_lex_fakeeof);
11447     PL_lex_fakeeof = (U8)fakeeof;
11448     if(yyparse(gramtype) && !PL_parser->error_count)
11449         qerror(Perl_mess(aTHX_ "Parse error"));
11450 }
11451
11452 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11453 static OP *
11454 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11455 {
11456     OP *o;
11457     ENTER;
11458     SAVEVPTR(PL_eval_root);
11459     PL_eval_root = NULL;
11460     parse_recdescent(gramtype, fakeeof);
11461     o = PL_eval_root;
11462     LEAVE;
11463     return o;
11464 }
11465
11466 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11467 static OP *
11468 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11469 {
11470     OP *exprop;
11471     if (flags & ~PARSE_OPTIONAL)
11472         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11473     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11474     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11475         if (!PL_parser->error_count)
11476             qerror(Perl_mess(aTHX_ "Parse error"));
11477         exprop = newOP(OP_NULL, 0);
11478     }
11479     return exprop;
11480 }
11481
11482 /*
11483 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11484
11485 Parse a Perl arithmetic expression.  This may contain operators of precedence
11486 down to the bit shift operators.  The expression must be followed (and thus
11487 terminated) either by a comparison or lower-precedence operator or by
11488 something that would normally terminate an expression such as semicolon.
11489 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11490 otherwise it is mandatory.  It is up to the caller to ensure that the
11491 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11492 the source of the code to be parsed and the lexical context for the
11493 expression.
11494
11495 The op tree representing the expression is returned.  If an optional
11496 expression is absent, a null pointer is returned, otherwise the pointer
11497 will be non-null.
11498
11499 If an error occurs in parsing or compilation, in most cases a valid op
11500 tree is returned anyway.  The error is reflected in the parser state,
11501 normally resulting in a single exception at the top level of parsing
11502 which covers all the compilation errors that occurred.  Some compilation
11503 errors, however, will throw an exception immediately.
11504
11505 =cut
11506 */
11507
11508 OP *
11509 Perl_parse_arithexpr(pTHX_ U32 flags)
11510 {
11511     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11512 }
11513
11514 /*
11515 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11516
11517 Parse a Perl term expression.  This may contain operators of precedence
11518 down to the assignment operators.  The expression must be followed (and thus
11519 terminated) either by a comma or lower-precedence operator or by
11520 something that would normally terminate an expression such as semicolon.
11521 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11522 otherwise it is mandatory.  It is up to the caller to ensure that the
11523 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11524 the source of the code to be parsed and the lexical context for the
11525 expression.
11526
11527 The op tree representing the expression is returned.  If an optional
11528 expression is absent, a null pointer is returned, otherwise the pointer
11529 will be non-null.
11530
11531 If an error occurs in parsing or compilation, in most cases a valid op
11532 tree is returned anyway.  The error is reflected in the parser state,
11533 normally resulting in a single exception at the top level of parsing
11534 which covers all the compilation errors that occurred.  Some compilation
11535 errors, however, will throw an exception immediately.
11536
11537 =cut
11538 */
11539
11540 OP *
11541 Perl_parse_termexpr(pTHX_ U32 flags)
11542 {
11543     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11544 }
11545
11546 /*
11547 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11548
11549 Parse a Perl list expression.  This may contain operators of precedence
11550 down to the comma operator.  The expression must be followed (and thus
11551 terminated) either by a low-precedence logic operator such as C<or> or by
11552 something that would normally terminate an expression such as semicolon.
11553 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11554 otherwise it is mandatory.  It is up to the caller to ensure that the
11555 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11556 the source of the code to be parsed and the lexical context for the
11557 expression.
11558
11559 The op tree representing the expression is returned.  If an optional
11560 expression is absent, a null pointer is returned, otherwise the pointer
11561 will be non-null.
11562
11563 If an error occurs in parsing or compilation, in most cases a valid op
11564 tree is returned anyway.  The error is reflected in the parser state,
11565 normally resulting in a single exception at the top level of parsing
11566 which covers all the compilation errors that occurred.  Some compilation
11567 errors, however, will throw an exception immediately.
11568
11569 =cut
11570 */
11571
11572 OP *
11573 Perl_parse_listexpr(pTHX_ U32 flags)
11574 {
11575     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11576 }
11577
11578 /*
11579 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11580
11581 Parse a single complete Perl expression.  This allows the full
11582 expression grammar, including the lowest-precedence operators such
11583 as C<or>.  The expression must be followed (and thus terminated) by a
11584 token that an expression would normally be terminated by: end-of-file,
11585 closing bracketing punctuation, semicolon, or one of the keywords that
11586 signals a postfix expression-statement modifier.  If I<flags> includes
11587 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11588 mandatory.  It is up to the caller to ensure that the dynamic parser
11589 state (L</PL_parser> et al) is correctly set to reflect the source of
11590 the code to be parsed and the lexical context for the expression.
11591
11592 The op tree representing the expression is returned.  If an optional
11593 expression is absent, a null pointer is returned, otherwise the pointer
11594 will be non-null.
11595
11596 If an error occurs in parsing or compilation, in most cases a valid op
11597 tree is returned anyway.  The error is reflected in the parser state,
11598 normally resulting in a single exception at the top level of parsing
11599 which covers all the compilation errors that occurred.  Some compilation
11600 errors, however, will throw an exception immediately.
11601
11602 =cut
11603 */
11604
11605 OP *
11606 Perl_parse_fullexpr(pTHX_ U32 flags)
11607 {
11608     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11609 }
11610
11611 /*
11612 =for apidoc Amx|OP *|parse_block|U32 flags
11613
11614 Parse a single complete Perl code block.  This consists of an opening
11615 brace, a sequence of statements, and a closing brace.  The block
11616 constitutes a lexical scope, so C<my> variables and various compile-time
11617 effects can be contained within it.  It is up to the caller to ensure
11618 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11619 reflect the source of the code to be parsed and the lexical context for
11620 the statement.
11621
11622 The op tree representing the code block is returned.  This is always a
11623 real op, never a null pointer.  It will normally be a C<lineseq> list,
11624 including C<nextstate> or equivalent ops.  No ops to construct any kind
11625 of runtime scope are included by virtue of it being a block.
11626
11627 If an error occurs in parsing or compilation, in most cases a valid op
11628 tree (most likely null) is returned anyway.  The error is reflected in
11629 the parser state, normally resulting in a single exception at the top
11630 level of parsing which covers all the compilation errors that occurred.
11631 Some compilation errors, however, will throw an exception immediately.
11632
11633 The I<flags> parameter is reserved for future use, and must always
11634 be zero.
11635
11636 =cut
11637 */
11638
11639 OP *
11640 Perl_parse_block(pTHX_ U32 flags)
11641 {
11642     if (flags)
11643         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11644     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11645 }
11646
11647 /*
11648 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11649
11650 Parse a single unadorned Perl statement.  This may be a normal imperative
11651 statement or a declaration that has compile-time effect.  It does not
11652 include any label or other affixture.  It is up to the caller to ensure
11653 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11654 reflect the source of the code to be parsed and the lexical context for
11655 the statement.
11656
11657 The op tree representing the statement is returned.  This may be a
11658 null pointer if the statement is null, for example if it was actually
11659 a subroutine definition (which has compile-time side effects).  If not
11660 null, it will be ops directly implementing the statement, suitable to
11661 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11662 equivalent op (except for those embedded in a scope contained entirely
11663 within the statement).
11664
11665 If an error occurs in parsing or compilation, in most cases a valid op
11666 tree (most likely null) is returned anyway.  The error is reflected in
11667 the parser state, normally resulting in a single exception at the top
11668 level of parsing which covers all the compilation errors that occurred.
11669 Some compilation errors, however, will throw an exception immediately.
11670
11671 The I<flags> parameter is reserved for future use, and must always
11672 be zero.
11673
11674 =cut
11675 */
11676
11677 OP *
11678 Perl_parse_barestmt(pTHX_ U32 flags)
11679 {
11680     if (flags)
11681         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11682     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11683 }
11684
11685 /*
11686 =for apidoc Amx|SV *|parse_label|U32 flags
11687
11688 Parse a single label, possibly optional, of the type that may prefix a
11689 Perl statement.  It is up to the caller to ensure that the dynamic parser
11690 state (L</PL_parser> et al) is correctly set to reflect the source of
11691 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11692 label is optional, otherwise it is mandatory.
11693
11694 The name of the label is returned in the form of a fresh scalar.  If an
11695 optional label is absent, a null pointer is returned.
11696
11697 If an error occurs in parsing, which can only occur if the label is
11698 mandatory, a valid label is returned anyway.  The error is reflected in
11699 the parser state, normally resulting in a single exception at the top
11700 level of parsing which covers all the compilation errors that occurred.
11701
11702 =cut
11703 */
11704
11705 SV *
11706 Perl_parse_label(pTHX_ U32 flags)
11707 {
11708     if (flags & ~PARSE_OPTIONAL)
11709         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11710     if (PL_lex_state == LEX_KNOWNEXT) {
11711         PL_parser->yychar = yylex();
11712         if (PL_parser->yychar == LABEL) {
11713             SV *lsv;
11714             PL_parser->yychar = YYEMPTY;
11715             lsv = newSV_type(SVt_PV);
11716             sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11717             return lsv;
11718         } else {
11719             yyunlex();
11720             goto no_label;
11721         }
11722     } else {
11723         char *s, *t;
11724         STRLEN wlen, bufptr_pos;
11725         lex_read_space(0);
11726         t = s = PL_bufptr;
11727         if (!isIDFIRST_lazy_if(s, UTF))
11728             goto no_label;
11729         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11730         if (word_takes_any_delimeter(s, wlen))
11731             goto no_label;
11732         bufptr_pos = s - SvPVX(PL_linestr);
11733         PL_bufptr = t;
11734         lex_read_space(LEX_KEEP_PREVIOUS);
11735         t = PL_bufptr;
11736         s = SvPVX(PL_linestr) + bufptr_pos;
11737         if (t[0] == ':' && t[1] != ':') {
11738             PL_oldoldbufptr = PL_oldbufptr;
11739             PL_oldbufptr = s;
11740             PL_bufptr = t+1;
11741             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11742         } else {
11743             PL_bufptr = s;
11744             no_label:
11745             if (flags & PARSE_OPTIONAL) {
11746                 return NULL;
11747             } else {
11748                 qerror(Perl_mess(aTHX_ "Parse error"));
11749                 return newSVpvs("x");
11750             }
11751         }
11752     }
11753 }
11754
11755 /*
11756 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11757
11758 Parse a single complete Perl statement.  This may be a normal imperative
11759 statement or a declaration that has compile-time effect, and may include
11760 optional labels.  It is up to the caller to ensure that the dynamic
11761 parser state (L</PL_parser> et al) is correctly set to reflect the source
11762 of the code to be parsed and the lexical context for the statement.
11763
11764 The op tree representing the statement is returned.  This may be a
11765 null pointer if the statement is null, for example if it was actually
11766 a subroutine definition (which has compile-time side effects).  If not
11767 null, it will be the result of a L</newSTATEOP> call, normally including
11768 a C<nextstate> or equivalent op.
11769
11770 If an error occurs in parsing or compilation, in most cases a valid op
11771 tree (most likely null) is returned anyway.  The error is reflected in
11772 the parser state, normally resulting in a single exception at the top
11773 level of parsing which covers all the compilation errors that occurred.
11774 Some compilation errors, however, will throw an exception immediately.
11775
11776 The I<flags> parameter is reserved for future use, and must always
11777 be zero.
11778
11779 =cut
11780 */
11781
11782 OP *
11783 Perl_parse_fullstmt(pTHX_ U32 flags)
11784 {
11785     if (flags)
11786         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11787     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11788 }
11789
11790 /*
11791 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11792
11793 Parse a sequence of zero or more Perl statements.  These may be normal
11794 imperative statements, including optional labels, or declarations
11795 that have compile-time effect, or any mixture thereof.  The statement
11796 sequence ends when a closing brace or end-of-file is encountered in a
11797 place where a new statement could have validly started.  It is up to
11798 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11799 is correctly set to reflect the source of the code to be parsed and the
11800 lexical context for the statements.
11801
11802 The op tree representing the statement sequence is returned.  This may
11803 be a null pointer if the statements were all null, for example if there
11804 were no statements or if there were only subroutine definitions (which
11805 have compile-time side effects).  If not null, it will be a C<lineseq>
11806 list, normally including C<nextstate> or equivalent ops.
11807
11808 If an error occurs in parsing or compilation, in most cases a valid op
11809 tree is returned anyway.  The error is reflected in the parser state,
11810 normally resulting in a single exception at the top level of parsing
11811 which covers all the compilation errors that occurred.  Some compilation
11812 errors, however, will throw an exception immediately.
11813
11814 The I<flags> parameter is reserved for future use, and must always
11815 be zero.
11816
11817 =cut
11818 */
11819
11820 OP *
11821 Perl_parse_stmtseq(pTHX_ U32 flags)
11822 {
11823     OP *stmtseqop;
11824     I32 c;
11825     if (flags)
11826         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11827     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11828     c = lex_peek_unichar(0);
11829     if (c != -1 && c != /*{*/'}')
11830         qerror(Perl_mess(aTHX_ "Parse error"));
11831     return stmtseqop;
11832 }
11833
11834 /*
11835  * Local variables:
11836  * c-indentation-style: bsd
11837  * c-basic-offset: 4
11838  * indent-tabs-mode: nil
11839  * End:
11840  *
11841  * ex: set ts=8 sts=4 sw=4 et:
11842  */