toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char ident_too_long[] = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN0OP       : zero-argument function, with its op created in this file
 225  * FUN1         : not used, except for not, which isn't a UNIOP
 226  * BOop         : bitwise or or xor
 227  * BAop         : bitwise and
 228  * SHop         : shift operator
 229  * PWop         : power operator
 230  * PMop         : pattern-matching operator
 231  * Aop          : addition-level operator
 232  * Mop          : multiplication-level operator
 233  * Eop          : equality-testing operator
 234  * Rop          : relational operator <= != gt
 235  *
 236  * Also see LOP and lop() below.
 237  */
 238
 239 #ifdef DEBUGGING /* Serve -DT. */
 240 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 241 #else
 242 #   define REPORT(retval) (retval)
 243 #endif
 244
 245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 253 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 254 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 255 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI3(f,x,have_x) { \
 273         pl_yylval.ival = f; \
 274         if (have_x) PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI3(f,XTERM,1)
 284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 285 #define UNIPROTO(f,optional) { \
 286         if (optional) PL_last_uni = PL_oldbufptr; \
 287         OPERATOR(f); \
 288         }
 289
 290 #define UNIBRACK(f) UNI3(f,0,0)
 291
 292 /* grandfather return to old style */
 293 #define OLDLOP(f) \
 294         do { \
 295             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 296                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 297             pl_yylval.ival = (f); \
 298             PL_expect = XTERM; \
 299             PL_bufptr = s; \
 300             return (int)LSTOP; \
 301         } while(0)
 302
 303 #define COPLINE_INC_WITH_HERELINES                  \
 304     STMT_START {                                     \
 305         CopLINE_inc(PL_curcop);                       \
 306         if (PL_parser->lex_shared->herelines)          \
 307             CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
 308             PL_parser->lex_shared->herelines = 0;                    \
 309     } STMT_END
 310
 311
 312 #ifdef DEBUGGING
 313
 314 /* how to interpret the pl_yylval associated with the token */
 315 enum token_type {
 316     TOKENTYPE_NONE,
 317     TOKENTYPE_IVAL,
 318     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 319     TOKENTYPE_PVAL,
 320     TOKENTYPE_OPVAL
 321 };
 322
 323 static struct debug_tokens {
 324     const int token;
 325     enum token_type type;
 326     const char *name;
 327 } const debug_tokens[] =
 328 {
 329     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 330     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 331     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 332     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 333     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 334     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 335     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 336     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 337     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 338     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 339     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 340     { DO,               TOKENTYPE_NONE,         "DO" },
 341     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 342     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 343     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 344     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 345     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 346     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 347     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 348     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 349     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 350     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 351     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 352     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 353     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 354     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 355     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 356     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 357     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 358     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 359     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 360     { IF,               TOKENTYPE_IVAL,         "IF" },
 361     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 362     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 363     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 364     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 365     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 366     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 367     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 368     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 369     { MY,               TOKENTYPE_IVAL,         "MY" },
 370     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 371     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 372     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 373     { OROR,             TOKENTYPE_NONE,         "OROR" },
 374     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 375     { PEG,              TOKENTYPE_NONE,         "PEG" },
 376     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 377     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 378     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 379     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 380     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 381     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 382     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 383     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 384     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 385     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 386     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 387     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 388     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 389     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 390     { SUB,              TOKENTYPE_NONE,         "SUB" },
 391     { THING,            TOKENTYPE_OPVAL,        "THING" },
 392     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 393     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 394     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 395     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 396     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 397     { USE,              TOKENTYPE_IVAL,         "USE" },
 398     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 399     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 400     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 401     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 402     { 0,                TOKENTYPE_NONE,         NULL }
 403 };
 404
 405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 406
 407 STATIC int
 408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 409 {
 410     dVAR;
 411
 412     PERL_ARGS_ASSERT_TOKEREPORT;
 413
 414     if (DEBUG_T_TEST) {
 415         const char *name = NULL;
 416         enum token_type type = TOKENTYPE_NONE;
 417         const struct debug_tokens *p;
 418         SV* const report = newSVpvs("<== ");
 419
 420         for (p = debug_tokens; p->token; p++) {
 421             if (p->token == (int)rv) {
 422                 name = p->name;
 423                 type = p->type;
 424                 break;
 425             }
 426         }
 427         if (name)
 428             Perl_sv_catpv(aTHX_ report, name);
 429         else if ((char)rv > ' ' && (char)rv <= '~')
 430             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 431         else if (!rv)
 432             sv_catpvs(report, "EOF");
 433         else
 434             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 435         switch (type) {
 436         case TOKENTYPE_NONE:
 437             break;
 438         case TOKENTYPE_IVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 440             break;
 441         case TOKENTYPE_OPNUM:
 442             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 443                                     PL_op_name[lvalp->ival]);
 444             break;
 445         case TOKENTYPE_PVAL:
 446             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 447             break;
 448         case TOKENTYPE_OPVAL:
 449             if (lvalp->opval) {
 450                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 451                                     PL_op_name[lvalp->opval->op_type]);
 452                 if (lvalp->opval->op_type == OP_CONST) {
 453                     Perl_sv_catpvf(aTHX_ report, " %s",
 454                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 455                 }
 456
 457             }
 458             else
 459                 sv_catpvs(report, "(opval=null)");
 460             break;
 461         }
 462         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 463     };
 464     return (int)rv;
 465 }
 466
 467
 468 /* print the buffer with suitable escapes */
 469
 470 STATIC void
 471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 472 {
 473     SV* const tmp = newSVpvs("");
 474
 475     PERL_ARGS_ASSERT_PRINTBUF;
 476
 477     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 478     SvREFCNT_dec(tmp);
 479 }
 480
 481 #endif
 482
 483 static int
 484 S_deprecate_commaless_var_list(pTHX) {
 485     PL_expect = XTERM;
 486     deprecate("comma-less variable list");
 487     return REPORT(','); /* grandfather non-comma-format format */
 488 }
 489
 490 /*
 491  * S_ao
 492  *
 493  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 494  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 495  */
 496
 497 STATIC int
 498 S_ao(pTHX_ int toketype)
 499 {
 500     dVAR;
 501     if (*PL_bufptr == '=') {
 502         PL_bufptr++;
 503         if (toketype == ANDAND)
 504             pl_yylval.ival = OP_ANDASSIGN;
 505         else if (toketype == OROR)
 506             pl_yylval.ival = OP_ORASSIGN;
 507         else if (toketype == DORDOR)
 508             pl_yylval.ival = OP_DORASSIGN;
 509         toketype = ASSIGNOP;
 510     }
 511     return toketype;
 512 }
 513
 514 /*
 515  * S_no_op
 516  * When Perl expects an operator and finds something else, no_op
 517  * prints the warning.  It always prints "<something> found where
 518  * operator expected.  It prints "Missing semicolon on previous line?"
 519  * if the surprise occurs at the start of the line.  "do you need to
 520  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 521  * where the compiler doesn't know if foo is a method call or a function.
 522  * It prints "Missing operator before end of line" if there's nothing
 523  * after the missing operator, or "... before <...>" if there is something
 524  * after the missing operator.
 525  */
 526
 527 STATIC void
 528 S_no_op(pTHX_ const char *const what, char *s)
 529 {
 530     dVAR;
 531     char * const oldbp = PL_bufptr;
 532     const bool is_first = (PL_oldbufptr == PL_linestart);
 533
 534     PERL_ARGS_ASSERT_NO_OP;
 535
 536     if (!s)
 537         s = oldbp;
 538     else
 539         PL_bufptr = s;
 540     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 541     if (ckWARN_d(WARN_SYNTAX)) {
 542         if (is_first)
 543             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                     "\t(Missing semicolon on previous line?)\n");
 545         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 546             const char *t;
 547             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
 548                                                             t += UTF ? UTF8SKIP(t) : 1)
 549                 NOOP;
 550             if (t < PL_bufptr && isSPACE(*t))
 551                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 552                         "\t(Do you need to predeclare %"SVf"?)\n",
 553                     SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
 554                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 555         }
 556         else {
 557             assert(s >= oldbp);
 558             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 559                     "\t(Missing operator before %"SVf"?)\n",
 560                     SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
 561                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 562         }
 563     }
 564     PL_bufptr = oldbp;
 565 }
 566
 567 /*
 568  * S_missingterm
 569  * Complain about missing quote/regexp/heredoc terminator.
 570  * If it's called with NULL then it cauterizes the line buffer.
 571  * If we're in a delimited string and the delimiter is a control
 572  * character, it's reformatted into a two-char sequence like ^C.
 573  * This is fatal.
 574  */
 575
 576 STATIC void
 577 S_missingterm(pTHX_ char *s)
 578 {
 579     dVAR;
 580     char tmpbuf[3];
 581     char q;
 582     if (s) {
 583         char * const nl = strrchr(s,'\n');
 584         if (nl)
 585             *nl = '\0';
 586     }
 587     else if (isCNTRL(PL_multi_close)) {
 588         *tmpbuf = '^';
 589         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 590         tmpbuf[2] = '\0';
 591         s = tmpbuf;
 592     }
 593     else {
 594         *tmpbuf = (char)PL_multi_close;
 595         tmpbuf[1] = '\0';
 596         s = tmpbuf;
 597     }
 598     q = strchr(s,'"') ? '\'' : '"';
 599     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 600 }
 601
 602 #include "feature.h"
 603
 604 /*
 605  * Check whether the named feature is enabled.
 606  */
 607 bool
 608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 609 {
 610     dVAR;
 611     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 612
 613     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 614
 615     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 616
 617     if (namelen > MAX_FEATURE_LEN)
 618         return FALSE;
 619     memcpy(&he_name[8], name, namelen);
 620
 621     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 622                                      REFCOUNTED_HE_EXISTS));
 623 }
 624
 625 /*
 626  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 627  * utf16-to-utf8-reversed.
 628  */
 629
 630 #ifdef PERL_CR_FILTER
 631 static void
 632 strip_return(SV *sv)
 633 {
 634     const char *s = SvPVX_const(sv);
 635     const char * const e = s + SvCUR(sv);
 636
 637     PERL_ARGS_ASSERT_STRIP_RETURN;
 638
 639     /* outer loop optimized to do nothing if there are no CR-LFs */
 640     while (s < e) {
 641         if (*s++ == '\r' && *s == '\n') {
 642             /* hit a CR-LF, need to copy the rest */
 643             char *d = s - 1;
 644             *d++ = *s++;
 645             while (s < e) {
 646                 if (*s == '\r' && s[1] == '\n')
 647                     s++;
 648                 *d++ = *s++;
 649             }
 650             SvCUR(sv) -= s - d;
 651             return;
 652         }
 653     }
 654 }
 655
 656 STATIC I32
 657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 658 {
 659     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 660     if (count > 0 && !maxlen)
 661         strip_return(sv);
 662     return count;
 663 }
 664 #endif
 665
 666 /*
 667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 668
 669 Creates and initialises a new lexer/parser state object, supplying
 670 a context in which to lex and parse from a new source of Perl code.
 671 A pointer to the new state object is placed in L</PL_parser>.  An entry
 672 is made on the save stack so that upon unwinding the new state object
 673 will be destroyed and the former value of L</PL_parser> will be restored.
 674 Nothing else need be done to clean up the parsing context.
 675
 676 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 677 non-null, provides a string (in SV form) containing code to be parsed.
 678 A copy of the string is made, so subsequent modification of I<line>
 679 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 680 from which code will be read to be parsed.  If both are non-null, the
 681 code in I<line> comes first and must consist of complete lines of input,
 682 and I<rsfp> supplies the remainder of the source.
 683
 684 The I<flags> parameter is reserved for future use.  Currently it is only
 685 used by perl internally, so extensions should always pass zero.
 686
 687 =cut
 688 */
 689
 690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 691    can share filters with the current parser.
 692    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 693    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 694    destruction. This is used to handle the case of defaulting to reading the
 695    script from the standard input because no filename was given on the command
 696    line (without getting confused by situation where STDIN has been closed, so
 697    the script handle is opened on fd 0)  */
 698
 699 void
 700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 701 {
 702     dVAR;
 703     const char *s = NULL;
 704     yy_parser *parser, *oparser;
 705     if (flags && flags & ~LEX_START_FLAGS)
 706         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 707
 708     /* create and initialise a parser */
 709
 710     Newxz(parser, 1, yy_parser);
 711     parser->old_parser = oparser = PL_parser;
 712     PL_parser = parser;
 713
 714     parser->stack = NULL;
 715     parser->ps = NULL;
 716     parser->stack_size = 0;
 717
 718     /* on scope exit, free this parser and restore any outer one */
 719     SAVEPARSER(parser);
 720     parser->saved_curcop = PL_curcop;
 721
 722     /* initialise lexer state */
 723
 724 #ifdef PERL_MAD
 725     parser->curforce = -1;
 726 #else
 727     parser->nexttoke = 0;
 728 #endif
 729     parser->error_count = oparser ? oparser->error_count : 0;
 730     parser->copline = NOLINE;
 731     parser->lex_state = LEX_NORMAL;
 732     parser->expect = XSTATE;
 733     parser->rsfp = rsfp;
 734     parser->rsfp_filters =
 735       !(flags & LEX_START_SAME_FILTER) || !oparser
 736         ? NULL
 737         : MUTABLE_AV(SvREFCNT_inc(
 738             oparser->rsfp_filters
 739              ? oparser->rsfp_filters
 740              : (oparser->rsfp_filters = newAV())
 741           ));
 742
 743     Newx(parser->lex_brackstack, 120, char);
 744     Newx(parser->lex_casestack, 12, char);
 745     *parser->lex_casestack = '\0';
 746     Newxz(parser->lex_shared, 1, LEXSHARED);
 747
 748     if (line) {
 749         STRLEN len;
 750         s = SvPV_const(line, len);
 751         parser->linestr = flags & LEX_START_COPIED
 752                             ? SvREFCNT_inc_simple_NN(line)
 753                             : newSVpvn_flags(s, len, SvUTF8(line));
 754         sv_catpvs(parser->linestr, "\n;");
 755     } else {
 756         parser->linestr = newSVpvs("\n;");
 757     }
 758     parser->oldoldbufptr =
 759         parser->oldbufptr =
 760         parser->bufptr =
 761         parser->linestart = SvPVX(parser->linestr);
 762     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 763     parser->last_lop = parser->last_uni = NULL;
 764     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 765                                  |LEX_DONT_CLOSE_RSFP);
 766
 767     parser->in_pod = parser->filtered = 0;
 768 }
 769
 770
 771 /* delete a parser object */
 772
 773 void
 774 Perl_parser_free(pTHX_  const yy_parser *parser)
 775 {
 776     PERL_ARGS_ASSERT_PARSER_FREE;
 777
 778     PL_curcop = parser->saved_curcop;
 779     SvREFCNT_dec(parser->linestr);
 780
 781     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 782         PerlIO_clearerr(parser->rsfp);
 783     else if (parser->rsfp && (!parser->old_parser ||
 784                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 785         PerlIO_close(parser->rsfp);
 786     SvREFCNT_dec(parser->rsfp_filters);
 787
 788     Safefree(parser->lex_brackstack);
 789     Safefree(parser->lex_casestack);
 790     Safefree(parser->lex_shared);
 791     PL_parser = parser->old_parser;
 792     Safefree(parser);
 793 }
 794
 795
 796 /*
 797 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 798
 799 Buffer scalar containing the chunk currently under consideration of the
 800 text currently being lexed.  This is always a plain string scalar (for
 801 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 802 normal scalar means; instead refer to the buffer directly by the pointer
 803 variables described below.
 804
 805 The lexer maintains various C<char*> pointers to things in the
 806 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 807 reallocated, all of these pointers must be updated.  Don't attempt to
 808 do this manually, but rather use L</lex_grow_linestr> if you need to
 809 reallocate the buffer.
 810
 811 The content of the text chunk in the buffer is commonly exactly one
 812 complete line of input, up to and including a newline terminator,
 813 but there are situations where it is otherwise.  The octets of the
 814 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 815 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 816 flag on this scalar, which may disagree with it.
 817
 818 For direct examination of the buffer, the variable
 819 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 820 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 821 of these pointers is usually preferable to examination of the scalar
 822 through normal scalar means.
 823
 824 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 825
 826 Direct pointer to the end of the chunk of text currently being lexed, the
 827 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 828 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 829 always located at the end of the buffer, and does not count as part of
 830 the buffer's contents.
 831
 832 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 833
 834 Points to the current position of lexing inside the lexer buffer.
 835 Characters around this point may be freely examined, within
 836 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 837 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 838 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 839
 840 Lexing code (whether in the Perl core or not) moves this pointer past
 841 the characters that it consumes.  It is also expected to perform some
 842 bookkeeping whenever a newline character is consumed.  This movement
 843 can be more conveniently performed by the function L</lex_read_to>,
 844 which handles newlines appropriately.
 845
 846 Interpretation of the buffer's octets can be abstracted out by
 847 using the slightly higher-level functions L</lex_peek_unichar> and
 848 L</lex_read_unichar>.
 849
 850 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 851
 852 Points to the start of the current line inside the lexer buffer.
 853 This is useful for indicating at which column an error occurred, and
 854 not much else.  This must be updated by any lexing code that consumes
 855 a newline; the function L</lex_read_to> handles this detail.
 856
 857 =cut
 858 */
 859
 860 /*
 861 =for apidoc Amx|bool|lex_bufutf8
 862
 863 Indicates whether the octets in the lexer buffer
 864 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 865 of Unicode characters.  If not, they should be interpreted as Latin-1
 866 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 867
 868 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 869 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 870 encoding.
 871
 872 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 873 is significant, but not the whole story regarding the input character
 874 encoding.  Normally, when a file is being read, the scalar contains octets
 875 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 876 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 877 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 878 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 879 is in effect.  This logic may change in the future; use this function
 880 instead of implementing the logic yourself.
 881
 882 =cut
 883 */
 884
 885 bool
 886 Perl_lex_bufutf8(pTHX)
 887 {
 888     return UTF;
 889 }
 890
 891 /*
 892 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 893
 894 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 895 at least I<len> octets (including terminating NUL).  Returns a
 896 pointer to the reallocated buffer.  This is necessary before making
 897 any direct modification of the buffer that would increase its length.
 898 L</lex_stuff_pvn> provides a more convenient way to insert text into
 899 the buffer.
 900
 901 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 902 this function updates all of the lexer's variables that point directly
 903 into the buffer.
 904
 905 =cut
 906 */
 907
 908 char *
 909 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 910 {
 911     SV *linestr;
 912     char *buf;
 913     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 914     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 915     linestr = PL_parser->linestr;
 916     buf = SvPVX(linestr);
 917     if (len <= SvLEN(linestr))
 918         return buf;
 919     bufend_pos = PL_parser->bufend - buf;
 920     bufptr_pos = PL_parser->bufptr - buf;
 921     oldbufptr_pos = PL_parser->oldbufptr - buf;
 922     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 923     linestart_pos = PL_parser->linestart - buf;
 924     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 925     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 926     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 927                             PL_parser->lex_shared->re_eval_start - buf : 0;
 928
 929     buf = sv_grow(linestr, len);
 930
 931     PL_parser->bufend = buf + bufend_pos;
 932     PL_parser->bufptr = buf + bufptr_pos;
 933     PL_parser->oldbufptr = buf + oldbufptr_pos;
 934     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 935     PL_parser->linestart = buf + linestart_pos;
 936     if (PL_parser->last_uni)
 937         PL_parser->last_uni = buf + last_uni_pos;
 938     if (PL_parser->last_lop)
 939         PL_parser->last_lop = buf + last_lop_pos;
 940     if (PL_parser->lex_shared->re_eval_start)
 941         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 942     return buf;
 943 }
 944
 945 /*
 946 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 947
 948 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 949 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 950 reallocating the buffer if necessary.  This means that lexing code that
 951 runs later will see the characters as if they had appeared in the input.
 952 It is not recommended to do this as part of normal parsing, and most
 953 uses of this facility run the risk of the inserted characters being
 954 interpreted in an unintended manner.
 955
 956 The string to be inserted is represented by I<len> octets starting
 957 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 958 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 959 The characters are recoded for the lexer buffer, according to how the
 960 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 961 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 962 function is more convenient.
 963
 964 =cut
 965 */
 966
 967 void
 968 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 969 {
 970     dVAR;
 971     char *bufptr;
 972     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 973     if (flags & ~(LEX_STUFF_UTF8))
 974         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 975     if (UTF) {
 976         if (flags & LEX_STUFF_UTF8) {
 977             goto plain_copy;
 978         } else {
 979             STRLEN highhalf = 0;
 980             const char *p, *e = pv+len;
 981             for (p = pv; p != e; p++)
 982                 highhalf += !!(((U8)*p) & 0x80);
 983             if (!highhalf)
 984                 goto plain_copy;
 985             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 986             bufptr = PL_parser->bufptr;
 987             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 988             SvCUR_set(PL_parser->linestr,
 989                 SvCUR(PL_parser->linestr) + len+highhalf);
 990             PL_parser->bufend += len+highhalf;
 991             for (p = pv; p != e; p++) {
 992                 U8 c = (U8)*p;
 993                 if (c & 0x80) {
 994                     *bufptr++ = (char)(0xc0 | (c >> 6));
 995                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 996                 } else {
 997                     *bufptr++ = (char)c;
 998                 }
 999             }
1000         }
1001     } else {
1002         if (flags & LEX_STUFF_UTF8) {
1003             STRLEN highhalf = 0;
1004             const char *p, *e = pv+len;
1005             for (p = pv; p != e; p++) {
1006                 U8 c = (U8)*p;
1007                 if (c >= 0xc4) {
1008                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1009                                 "non-Latin-1 character into Latin-1 input");
1010                 } else if (c >= 0xc2 && p+1 != e &&
1011                             (((U8)p[1]) & 0xc0) == 0x80) {
1012                     p++;
1013                     highhalf++;
1014                 } else if (c >= 0x80) {
1015                     /* malformed UTF-8 */
1016                     ENTER;
1017                     SAVESPTR(PL_warnhook);
1018                     PL_warnhook = PERL_WARNHOOK_FATAL;
1019                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1020                     LEAVE;
1021                 }
1022             }
1023             if (!highhalf)
1024                 goto plain_copy;
1025             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1026             bufptr = PL_parser->bufptr;
1027             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1028             SvCUR_set(PL_parser->linestr,
1029                 SvCUR(PL_parser->linestr) + len-highhalf);
1030             PL_parser->bufend += len-highhalf;
1031             for (p = pv; p != e; p++) {
1032                 U8 c = (U8)*p;
1033                 if (c & 0x80) {
1034                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1035                     p++;
1036                 } else {
1037                     *bufptr++ = (char)c;
1038                 }
1039             }
1040         } else {
1041             plain_copy:
1042             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043             bufptr = PL_parser->bufptr;
1044             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046             PL_parser->bufend += len;
1047             Copy(pv, bufptr, len, char);
1048         }
1049     }
1050 }
1051
1052 /*
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1054
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary.  This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1062
1063 The string to be inserted is represented by octets starting at I<pv>
1064 and continuing to the first nul.  These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in I<flags>.  The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1076 {
1077     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078     lex_stuff_pvn(pv, strlen(pv), flags);
1079 }
1080
1081 /*
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1083
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary.  This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1091
1092 The string to be inserted is the string value of I<sv>.  The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1097
1098 =cut
1099 */
1100
1101 void
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103 {
1104     char *pv;
1105     STRLEN len;
1106     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1107     if (flags)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1109     pv = SvPV(sv, len);
1110     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111 }
1112
1113 /*
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1115
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1120
1121 This is not the normal way to consume lexed text.  For that, use
1122 L</lex_read_to>.
1123
1124 =cut
1125 */
1126
1127 void
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1129 {
1130     char *buf, *bufend;
1131     STRLEN unstuff_len;
1132     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133     buf = PL_parser->bufptr;
1134     if (ptr < buf)
1135         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1136     if (ptr == buf)
1137         return;
1138     bufend = PL_parser->bufend;
1139     if (ptr > bufend)
1140         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141     unstuff_len = ptr - buf;
1142     Move(ptr, buf, bufend+1-ptr, char);
1143     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144     PL_parser->bufend = bufend - unstuff_len;
1145 }
1146
1147 /*
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1149
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1154
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_read_to(pTHX_ char *ptr)
1164 {
1165     char *s;
1166     PERL_ARGS_ASSERT_LEX_READ_TO;
1167     s = PL_parser->bufptr;
1168     if (ptr < s || ptr > PL_parser->bufend)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170     for (; s != ptr; s++)
1171         if (*s == '\n') {
1172             COPLINE_INC_WITH_HERELINES;
1173             PL_parser->linestart = s+1;
1174         }
1175     PL_parser->bufptr = ptr;
1176 }
1177
1178 /*
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1180
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to I<ptr>.  The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately.  I<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1186
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it.  However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1194
1195 =cut
1196 */
1197
1198 void
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1200 {
1201     char *buf;
1202     STRLEN discard_len;
1203     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204     buf = SvPVX(PL_parser->linestr);
1205     if (ptr < buf)
1206         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1207     if (ptr == buf)
1208         return;
1209     if (ptr > PL_parser->bufptr)
1210         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211     discard_len = ptr - buf;
1212     if (PL_parser->oldbufptr < ptr)
1213         PL_parser->oldbufptr = ptr;
1214     if (PL_parser->oldoldbufptr < ptr)
1215         PL_parser->oldoldbufptr = ptr;
1216     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217         PL_parser->last_uni = NULL;
1218     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219         PL_parser->last_lop = NULL;
1220     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222     PL_parser->bufend -= discard_len;
1223     PL_parser->bufptr -= discard_len;
1224     PL_parser->oldbufptr -= discard_len;
1225     PL_parser->oldoldbufptr -= discard_len;
1226     if (PL_parser->last_uni)
1227         PL_parser->last_uni -= discard_len;
1228     if (PL_parser->last_lop)
1229         PL_parser->last_lop -= discard_len;
1230 }
1231
1232 /*
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1234
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more.  It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1240
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1245 will not be discarded.  If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1247
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1250
1251 =cut
1252 */
1253
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM  0x40000000
1256
1257 bool
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1259 {
1260     SV *linestr;
1261     char *buf;
1262     STRLEN old_bufend_pos, new_bufend_pos;
1263     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265     bool got_some_for_debugger = 0;
1266     bool got_some;
1267     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269     linestr = PL_parser->linestr;
1270     buf = SvPVX(linestr);
1271     if (!(flags & LEX_KEEP_PREVIOUS) &&
1272             PL_parser->bufptr == PL_parser->bufend) {
1273         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1274         linestart_pos = 0;
1275         if (PL_parser->last_uni != PL_parser->bufend)
1276             PL_parser->last_uni = NULL;
1277         if (PL_parser->last_lop != PL_parser->bufend)
1278             PL_parser->last_lop = NULL;
1279         last_uni_pos = last_lop_pos = 0;
1280         *buf = 0;
1281         SvCUR(linestr) = 0;
1282     } else {
1283         old_bufend_pos = PL_parser->bufend - buf;
1284         bufptr_pos = PL_parser->bufptr - buf;
1285         oldbufptr_pos = PL_parser->oldbufptr - buf;
1286         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1287         linestart_pos = PL_parser->linestart - buf;
1288         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1289         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1290     }
1291     if (flags & LEX_FAKE_EOF) {
1292         goto eof;
1293     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1294         got_some = 0;
1295     } else if (filter_gets(linestr, old_bufend_pos)) {
1296         got_some = 1;
1297         got_some_for_debugger = 1;
1298     } else if (flags & LEX_NO_TERM) {
1299         got_some = 0;
1300     } else {
1301         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1302             sv_setpvs(linestr, "");
1303         eof:
1304         /* End of real input.  Close filehandle (unless it was STDIN),
1305          * then add implicit termination.
1306          */
1307         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1308             PerlIO_clearerr(PL_parser->rsfp);
1309         else if (PL_parser->rsfp)
1310             (void)PerlIO_close(PL_parser->rsfp);
1311         PL_parser->rsfp = NULL;
1312         PL_parser->in_pod = PL_parser->filtered = 0;
1313 #ifdef PERL_MAD
1314         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1315             PL_faketokens = 1;
1316 #endif
1317         if (!PL_in_eval && PL_minus_p) {
1318             sv_catpvs(linestr,
1319                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1320             PL_minus_n = PL_minus_p = 0;
1321         } else if (!PL_in_eval && PL_minus_n) {
1322             sv_catpvs(linestr, /*{*/";}");
1323             PL_minus_n = 0;
1324         } else
1325             sv_catpvs(linestr, ";");
1326         got_some = 1;
1327     }
1328     buf = SvPVX(linestr);
1329     new_bufend_pos = SvCUR(linestr);
1330     PL_parser->bufend = buf + new_bufend_pos;
1331     PL_parser->bufptr = buf + bufptr_pos;
1332     PL_parser->oldbufptr = buf + oldbufptr_pos;
1333     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1334     PL_parser->linestart = buf + linestart_pos;
1335     if (PL_parser->last_uni)
1336         PL_parser->last_uni = buf + last_uni_pos;
1337     if (PL_parser->last_lop)
1338         PL_parser->last_lop = buf + last_lop_pos;
1339     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1340             PL_curstash != PL_debstash) {
1341         /* debugger active and we're not compiling the debugger code,
1342          * so store the line into the debugger's array of lines
1343          */
1344         update_debugger_info(NULL, buf+old_bufend_pos,
1345             new_bufend_pos-old_bufend_pos);
1346     }
1347     return got_some;
1348 }
1349
1350 /*
1351 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1352
1353 Looks ahead one (Unicode) character in the text currently being lexed.
1354 Returns the codepoint (unsigned integer value) of the next character,
1355 or -1 if lexing has reached the end of the input text.  To consume the
1356 peeked character, use L</lex_read_unichar>.
1357
1358 If the next character is in (or extends into) the next chunk of input
1359 text, the next chunk will be read in.  Normally the current chunk will be
1360 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1361 then the current chunk will not be discarded.
1362
1363 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1364 is encountered, an exception is generated.
1365
1366 =cut
1367 */
1368
1369 I32
1370 Perl_lex_peek_unichar(pTHX_ U32 flags)
1371 {
1372     dVAR;
1373     char *s, *bufend;
1374     if (flags & ~(LEX_KEEP_PREVIOUS))
1375         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1376     s = PL_parser->bufptr;
1377     bufend = PL_parser->bufend;
1378     if (UTF) {
1379         U8 head;
1380         I32 unichar;
1381         STRLEN len, retlen;
1382         if (s == bufend) {
1383             if (!lex_next_chunk(flags))
1384                 return -1;
1385             s = PL_parser->bufptr;
1386             bufend = PL_parser->bufend;
1387         }
1388         head = (U8)*s;
1389         if (!(head & 0x80))
1390             return head;
1391         if (head & 0x40) {
1392             len = PL_utf8skip[head];
1393             while ((STRLEN)(bufend-s) < len) {
1394                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1395                     break;
1396                 s = PL_parser->bufptr;
1397                 bufend = PL_parser->bufend;
1398             }
1399         }
1400         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1401         if (retlen == (STRLEN)-1) {
1402             /* malformed UTF-8 */
1403             ENTER;
1404             SAVESPTR(PL_warnhook);
1405             PL_warnhook = PERL_WARNHOOK_FATAL;
1406             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1407             LEAVE;
1408         }
1409         return unichar;
1410     } else {
1411         if (s == bufend) {
1412             if (!lex_next_chunk(flags))
1413                 return -1;
1414             s = PL_parser->bufptr;
1415         }
1416         return (U8)*s;
1417     }
1418 }
1419
1420 /*
1421 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1422
1423 Reads the next (Unicode) character in the text currently being lexed.
1424 Returns the codepoint (unsigned integer value) of the character read,
1425 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1426 if lexing has reached the end of the input text.  To non-destructively
1427 examine the next character, use L</lex_peek_unichar> instead.
1428
1429 If the next character is in (or extends into) the next chunk of input
1430 text, the next chunk will be read in.  Normally the current chunk will be
1431 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1432 then the current chunk will not be discarded.
1433
1434 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1435 is encountered, an exception is generated.
1436
1437 =cut
1438 */
1439
1440 I32
1441 Perl_lex_read_unichar(pTHX_ U32 flags)
1442 {
1443     I32 c;
1444     if (flags & ~(LEX_KEEP_PREVIOUS))
1445         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1446     c = lex_peek_unichar(flags);
1447     if (c != -1) {
1448         if (c == '\n')
1449             COPLINE_INC_WITH_HERELINES;
1450         if (UTF)
1451             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1452         else
1453             ++(PL_parser->bufptr);
1454     }
1455     return c;
1456 }
1457
1458 /*
1459 =for apidoc Amx|void|lex_read_space|U32 flags
1460
1461 Reads optional spaces, in Perl style, in the text currently being
1462 lexed.  The spaces may include ordinary whitespace characters and
1463 Perl-style comments.  C<#line> directives are processed if encountered.
1464 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1465 at a non-space character (or the end of the input text).
1466
1467 If spaces extend into the next chunk of input text, the next chunk will
1468 be read in.  Normally the current chunk will be discarded at the same
1469 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1470 chunk will not be discarded.
1471
1472 =cut
1473 */
1474
1475 #define LEX_NO_NEXT_CHUNK 0x80000000
1476
1477 void
1478 Perl_lex_read_space(pTHX_ U32 flags)
1479 {
1480     char *s, *bufend;
1481     bool need_incline = 0;
1482     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1483         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1484 #ifdef PERL_MAD
1485     if (PL_skipwhite) {
1486         sv_free(PL_skipwhite);
1487         PL_skipwhite = NULL;
1488     }
1489     if (PL_madskills)
1490         PL_skipwhite = newSVpvs("");
1491 #endif /* PERL_MAD */
1492     s = PL_parser->bufptr;
1493     bufend = PL_parser->bufend;
1494     while (1) {
1495         char c = *s;
1496         if (c == '#') {
1497             do {
1498                 c = *++s;
1499             } while (!(c == '\n' || (c == 0 && s == bufend)));
1500         } else if (c == '\n') {
1501             s++;
1502             PL_parser->linestart = s;
1503             if (s == bufend)
1504                 need_incline = 1;
1505             else
1506                 incline(s);
1507         } else if (isSPACE(c)) {
1508             s++;
1509         } else if (c == 0 && s == bufend) {
1510             bool got_more;
1511 #ifdef PERL_MAD
1512             if (PL_madskills)
1513                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1514 #endif /* PERL_MAD */
1515             if (flags & LEX_NO_NEXT_CHUNK)
1516                 break;
1517             PL_parser->bufptr = s;
1518             COPLINE_INC_WITH_HERELINES;
1519             got_more = lex_next_chunk(flags);
1520             CopLINE_dec(PL_curcop);
1521             s = PL_parser->bufptr;
1522             bufend = PL_parser->bufend;
1523             if (!got_more)
1524                 break;
1525             if (need_incline && PL_parser->rsfp) {
1526                 incline(s);
1527                 need_incline = 0;
1528             }
1529         } else {
1530             break;
1531         }
1532     }
1533 #ifdef PERL_MAD
1534     if (PL_madskills)
1535         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1536 #endif /* PERL_MAD */
1537     PL_parser->bufptr = s;
1538 }
1539
1540 /*
1541  * S_incline
1542  * This subroutine has nothing to do with tilting, whether at windmills
1543  * or pinball tables.  Its name is short for "increment line".  It
1544  * increments the current line number in CopLINE(PL_curcop) and checks
1545  * to see whether the line starts with a comment of the form
1546  *    # line 500 "foo.pm"
1547  * If so, it sets the current line number and file to the values in the comment.
1548  */
1549
1550 STATIC void
1551 S_incline(pTHX_ const char *s)
1552 {
1553     dVAR;
1554     const char *t;
1555     const char *n;
1556     const char *e;
1557     line_t line_num;
1558
1559     PERL_ARGS_ASSERT_INCLINE;
1560
1561     COPLINE_INC_WITH_HERELINES;
1562     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1563      && s+1 == PL_bufend && *s == ';') {
1564         /* fake newline in string eval */
1565         CopLINE_dec(PL_curcop);
1566         return;
1567     }
1568     if (*s++ != '#')
1569         return;
1570     while (SPACE_OR_TAB(*s))
1571         s++;
1572     if (strnEQ(s, "line", 4))
1573         s += 4;
1574     else
1575         return;
1576     if (SPACE_OR_TAB(*s))
1577         s++;
1578     else
1579         return;
1580     while (SPACE_OR_TAB(*s))
1581         s++;
1582     if (!isDIGIT(*s))
1583         return;
1584
1585     n = s;
1586     while (isDIGIT(*s))
1587         s++;
1588     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1589         return;
1590     while (SPACE_OR_TAB(*s))
1591         s++;
1592     if (*s == '"' && (t = strchr(s+1, '"'))) {
1593         s++;
1594         e = t + 1;
1595     }
1596     else {
1597         t = s;
1598         while (!isSPACE(*t))
1599             t++;
1600         e = t;
1601     }
1602     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1603         e++;
1604     if (*e != '\n' && *e != '\0')
1605         return;         /* false alarm */
1606
1607     line_num = atoi(n)-1;
1608
1609     if (t - s > 0) {
1610         const STRLEN len = t - s;
1611         SV *const temp_sv = CopFILESV(PL_curcop);
1612         const char *cf;
1613         STRLEN tmplen;
1614
1615         if (temp_sv) {
1616             cf = SvPVX(temp_sv);
1617             tmplen = SvCUR(temp_sv);
1618         } else {
1619             cf = NULL;
1620             tmplen = 0;
1621         }
1622
1623         if (!PL_rsfp && !PL_parser->filtered) {
1624             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1625              * to *{"::_<newfilename"} */
1626             /* However, the long form of evals is only turned on by the
1627                debugger - usually they're "(eval %lu)" */
1628             char smallbuf[128];
1629             char *tmpbuf;
1630             GV **gvp;
1631             STRLEN tmplen2 = len;
1632             if (tmplen + 2 <= sizeof smallbuf)
1633                 tmpbuf = smallbuf;
1634             else
1635                 Newx(tmpbuf, tmplen + 2, char);
1636             tmpbuf[0] = '_';
1637             tmpbuf[1] = '<';
1638             memcpy(tmpbuf + 2, cf, tmplen);
1639             tmplen += 2;
1640             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1641             if (gvp) {
1642                 char *tmpbuf2;
1643                 GV *gv2;
1644
1645                 if (tmplen2 + 2 <= sizeof smallbuf)
1646                     tmpbuf2 = smallbuf;
1647                 else
1648                     Newx(tmpbuf2, tmplen2 + 2, char);
1649
1650                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1651                     /* Either they malloc'd it, or we malloc'd it,
1652                        so no prefix is present in ours.  */
1653                     tmpbuf2[0] = '_';
1654                     tmpbuf2[1] = '<';
1655                 }
1656
1657                 memcpy(tmpbuf2 + 2, s, tmplen2);
1658                 tmplen2 += 2;
1659
1660                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1661                 if (!isGV(gv2)) {
1662                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1663                     /* adjust ${"::_<newfilename"} to store the new file name */
1664                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1665                     /* The line number may differ. If that is the case,
1666                        alias the saved lines that are in the array.
1667                        Otherwise alias the whole array. */
1668                     if (CopLINE(PL_curcop) == line_num) {
1669                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1670                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1671                     }
1672                     else if (GvAV(*gvp)) {
1673                         AV * const av = GvAV(*gvp);
1674                         const I32 start = CopLINE(PL_curcop)+1;
1675                         I32 items = AvFILLp(av) - start;
1676                         if (items > 0) {
1677                             AV * const av2 = GvAVn(gv2);
1678                             SV **svp = AvARRAY(av) + start;
1679                             I32 l = (I32)line_num+1;
1680                             while (items--)
1681                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1682                         }
1683                     }
1684                 }
1685
1686                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1687             }
1688             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1689         }
1690         CopFILE_free(PL_curcop);
1691         CopFILE_setn(PL_curcop, s, len);
1692     }
1693     CopLINE_set(PL_curcop, line_num);
1694 }
1695
1696 #ifdef PERL_MAD
1697 /* skip space before PL_thistoken */
1698
1699 STATIC char *
1700 S_skipspace0(pTHX_ register char *s)
1701 {
1702     PERL_ARGS_ASSERT_SKIPSPACE0;
1703
1704     s = skipspace(s);
1705     if (!PL_madskills)
1706         return s;
1707     if (PL_skipwhite) {
1708         if (!PL_thiswhite)
1709             PL_thiswhite = newSVpvs("");
1710         sv_catsv(PL_thiswhite, PL_skipwhite);
1711         sv_free(PL_skipwhite);
1712         PL_skipwhite = 0;
1713     }
1714     PL_realtokenstart = s - SvPVX(PL_linestr);
1715     return s;
1716 }
1717
1718 /* skip space after PL_thistoken */
1719
1720 STATIC char *
1721 S_skipspace1(pTHX_ register char *s)
1722 {
1723     const char *start = s;
1724     I32 startoff = start - SvPVX(PL_linestr);
1725
1726     PERL_ARGS_ASSERT_SKIPSPACE1;
1727
1728     s = skipspace(s);
1729     if (!PL_madskills)
1730         return s;
1731     start = SvPVX(PL_linestr) + startoff;
1732     if (!PL_thistoken && PL_realtokenstart >= 0) {
1733         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1734         PL_thistoken = newSVpvn(tstart, start - tstart);
1735     }
1736     PL_realtokenstart = -1;
1737     if (PL_skipwhite) {
1738         if (!PL_nextwhite)
1739             PL_nextwhite = newSVpvs("");
1740         sv_catsv(PL_nextwhite, PL_skipwhite);
1741         sv_free(PL_skipwhite);
1742         PL_skipwhite = 0;
1743     }
1744     return s;
1745 }
1746
1747 STATIC char *
1748 S_skipspace2(pTHX_ register char *s, SV **svp)
1749 {
1750     char *start;
1751     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1752     const I32 startoff = s - SvPVX(PL_linestr);
1753
1754     PERL_ARGS_ASSERT_SKIPSPACE2;
1755
1756     s = skipspace(s);
1757     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1758     if (!PL_madskills || !svp)
1759         return s;
1760     start = SvPVX(PL_linestr) + startoff;
1761     if (!PL_thistoken && PL_realtokenstart >= 0) {
1762         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1763         PL_thistoken = newSVpvn(tstart, start - tstart);
1764         PL_realtokenstart = -1;
1765     }
1766     if (PL_skipwhite) {
1767         if (!*svp)
1768             *svp = newSVpvs("");
1769         sv_setsv(*svp, PL_skipwhite);
1770         sv_free(PL_skipwhite);
1771         PL_skipwhite = 0;
1772     }
1773
1774     return s;
1775 }
1776 #endif
1777
1778 STATIC void
1779 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1780 {
1781     AV *av = CopFILEAVx(PL_curcop);
1782     if (av) {
1783         SV * const sv = newSV_type(SVt_PVMG);
1784         if (orig_sv)
1785             sv_setsv(sv, orig_sv);
1786         else
1787             sv_setpvn(sv, buf, len);
1788         (void)SvIOK_on(sv);
1789         SvIV_set(sv, 0);
1790         av_store(av, (I32)CopLINE(PL_curcop), sv);
1791     }
1792 }
1793
1794 /*
1795  * S_skipspace
1796  * Called to gobble the appropriate amount and type of whitespace.
1797  * Skips comments as well.
1798  */
1799
1800 STATIC char *
1801 S_skipspace(pTHX_ register char *s)
1802 {
1803 #ifdef PERL_MAD
1804     char *start = s;
1805 #endif /* PERL_MAD */
1806     PERL_ARGS_ASSERT_SKIPSPACE;
1807 #ifdef PERL_MAD
1808     if (PL_skipwhite) {
1809         sv_free(PL_skipwhite);
1810         PL_skipwhite = NULL;
1811     }
1812 #endif /* PERL_MAD */
1813     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1814         while (s < PL_bufend && SPACE_OR_TAB(*s))
1815             s++;
1816     } else {
1817         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1818         PL_bufptr = s;
1819         lex_read_space(LEX_KEEP_PREVIOUS |
1820                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1821                     LEX_NO_NEXT_CHUNK : 0));
1822         s = PL_bufptr;
1823         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1824         if (PL_linestart > PL_bufptr)
1825             PL_bufptr = PL_linestart;
1826         return s;
1827     }
1828 #ifdef PERL_MAD
1829     if (PL_madskills)
1830         PL_skipwhite = newSVpvn(start, s-start);
1831 #endif /* PERL_MAD */
1832     return s;
1833 }
1834
1835 /*
1836  * S_check_uni
1837  * Check the unary operators to ensure there's no ambiguity in how they're
1838  * used.  An ambiguous piece of code would be:
1839  *     rand + 5
1840  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1841  * the +5 is its argument.
1842  */
1843
1844 STATIC void
1845 S_check_uni(pTHX)
1846 {
1847     dVAR;
1848     const char *s;
1849     const char *t;
1850
1851     if (PL_oldoldbufptr != PL_last_uni)
1852         return;
1853     while (isSPACE(*PL_last_uni))
1854         PL_last_uni++;
1855     s = PL_last_uni;
1856     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1857         s++;
1858     if ((t = strchr(s, '(')) && t < PL_bufptr)
1859         return;
1860
1861     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1862                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1863                      (int)(s - PL_last_uni), PL_last_uni);
1864 }
1865
1866 /*
1867  * LOP : macro to build a list operator.  Its behaviour has been replaced
1868  * with a subroutine, S_lop() for which LOP is just another name.
1869  */
1870
1871 #define LOP(f,x) return lop(f,x,s)
1872
1873 /*
1874  * S_lop
1875  * Build a list operator (or something that might be one).  The rules:
1876  *  - if we have a next token, then it's a list operator [why?]
1877  *  - if the next thing is an opening paren, then it's a function
1878  *  - else it's a list operator
1879  */
1880
1881 STATIC I32
1882 S_lop(pTHX_ I32 f, int x, char *s)
1883 {
1884     dVAR;
1885
1886     PERL_ARGS_ASSERT_LOP;
1887
1888     pl_yylval.ival = f;
1889     CLINE;
1890     PL_expect = x;
1891     PL_bufptr = s;
1892     PL_last_lop = PL_oldbufptr;
1893     PL_last_lop_op = (OPCODE)f;
1894 #ifdef PERL_MAD
1895     if (PL_lasttoke)
1896         goto lstop;
1897 #else
1898     if (PL_nexttoke)
1899         goto lstop;
1900 #endif
1901     if (*s == '(')
1902         return REPORT(FUNC);
1903     s = PEEKSPACE(s);
1904     if (*s == '(')
1905         return REPORT(FUNC);
1906     else {
1907         lstop:
1908         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1909             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1910         return REPORT(LSTOP);
1911     }
1912 }
1913
1914 #ifdef PERL_MAD
1915  /*
1916  * S_start_force
1917  * Sets up for an eventual force_next().  start_force(0) basically does
1918  * an unshift, while start_force(-1) does a push.  yylex removes items
1919  * on the "pop" end.
1920  */
1921
1922 STATIC void
1923 S_start_force(pTHX_ int where)
1924 {
1925     int i;
1926
1927     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1928         where = PL_lasttoke;
1929     assert(PL_curforce < 0 || PL_curforce == where);
1930     if (PL_curforce != where) {
1931         for (i = PL_lasttoke; i > where; --i) {
1932             PL_nexttoke[i] = PL_nexttoke[i-1];
1933         }
1934         PL_lasttoke++;
1935     }
1936     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1937         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1938     PL_curforce = where;
1939     if (PL_nextwhite) {
1940         if (PL_madskills)
1941             curmad('^', newSVpvs(""));
1942         CURMAD('_', PL_nextwhite);
1943     }
1944 }
1945
1946 STATIC void
1947 S_curmad(pTHX_ char slot, SV *sv)
1948 {
1949     MADPROP **where;
1950
1951     if (!sv)
1952         return;
1953     if (PL_curforce < 0)
1954         where = &PL_thismad;
1955     else
1956         where = &PL_nexttoke[PL_curforce].next_mad;
1957
1958     if (PL_faketokens)
1959         sv_setpvs(sv, "");
1960     else {
1961         if (!IN_BYTES) {
1962             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1963                 SvUTF8_on(sv);
1964             else if (PL_encoding) {
1965                 sv_recode_to_utf8(sv, PL_encoding);
1966             }
1967         }
1968     }
1969
1970     /* keep a slot open for the head of the list? */
1971     if (slot != '_' && *where && (*where)->mad_key == '^') {
1972         (*where)->mad_key = slot;
1973         sv_free(MUTABLE_SV(((*where)->mad_val)));
1974         (*where)->mad_val = (void*)sv;
1975     }
1976     else
1977         addmad(newMADsv(slot, sv), where, 0);
1978 }
1979 #else
1980 #  define start_force(where)    NOOP
1981 #  define curmad(slot, sv)      NOOP
1982 #endif
1983
1984 /*
1985  * S_force_next
1986  * When the lexer realizes it knows the next token (for instance,
1987  * it is reordering tokens for the parser) then it can call S_force_next
1988  * to know what token to return the next time the lexer is called.  Caller
1989  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1990  * and possibly PL_expect to ensure the lexer handles the token correctly.
1991  */
1992
1993 STATIC void
1994 S_force_next(pTHX_ I32 type)
1995 {
1996     dVAR;
1997 #ifdef DEBUGGING
1998     if (DEBUG_T_TEST) {
1999         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2000         tokereport(type, &NEXTVAL_NEXTTOKE);
2001     }
2002 #endif
2003     /* Don’t let opslab_force_free snatch it */
2004     if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2005         assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2006         NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2007     }
2008 #ifdef PERL_MAD
2009     if (PL_curforce < 0)
2010         start_force(PL_lasttoke);
2011     PL_nexttoke[PL_curforce].next_type = type;
2012     if (PL_lex_state != LEX_KNOWNEXT)
2013         PL_lex_defer = PL_lex_state;
2014     PL_lex_state = LEX_KNOWNEXT;
2015     PL_lex_expect = PL_expect;
2016     PL_curforce = -1;
2017 #else
2018     PL_nexttype[PL_nexttoke] = type;
2019     PL_nexttoke++;
2020     if (PL_lex_state != LEX_KNOWNEXT) {
2021         PL_lex_defer = PL_lex_state;
2022         PL_lex_expect = PL_expect;
2023         PL_lex_state = LEX_KNOWNEXT;
2024     }
2025 #endif
2026 }
2027
2028 void
2029 Perl_yyunlex(pTHX)
2030 {
2031     int yyc = PL_parser->yychar;
2032     if (yyc != YYEMPTY) {
2033         if (yyc) {
2034             start_force(-1);
2035             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2036             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2037                 PL_lex_allbrackets--;
2038                 PL_lex_brackets--;
2039                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2040             } else if (yyc == '('/*)*/) {
2041                 PL_lex_allbrackets--;
2042                 yyc |= (2<<24);
2043             }
2044             force_next(yyc);
2045         }
2046         PL_parser->yychar = YYEMPTY;
2047     }
2048 }
2049
2050 STATIC SV *
2051 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2052 {
2053     dVAR;
2054     SV * const sv = newSVpvn_utf8(start, len,
2055                                   !IN_BYTES
2056                                   && UTF
2057                                   && !is_ascii_string((const U8*)start, len)
2058                                   && is_utf8_string((const U8*)start, len));
2059     return sv;
2060 }
2061
2062 /*
2063  * S_force_word
2064  * When the lexer knows the next thing is a word (for instance, it has
2065  * just seen -> and it knows that the next char is a word char, then
2066  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2067  * lookahead.
2068  *
2069  * Arguments:
2070  *   char *start : buffer position (must be within PL_linestr)
2071  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2072  *   int check_keyword : if true, Perl checks to make sure the word isn't
2073  *       a keyword (do this if the word is a label, e.g. goto FOO)
2074  *   int allow_pack : if true, : characters will also be allowed (require,
2075  *       use, etc. do this)
2076  *   int allow_initial_tick : used by the "sub" lexer only.
2077  */
2078
2079 STATIC char *
2080 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2081 {
2082     dVAR;
2083     char *s;
2084     STRLEN len;
2085
2086     PERL_ARGS_ASSERT_FORCE_WORD;
2087
2088     start = SKIPSPACE1(start);
2089     s = start;
2090     if (isIDFIRST_lazy_if(s,UTF) ||
2091         (allow_pack && *s == ':') ||
2092         (allow_initial_tick && *s == '\'') )
2093     {
2094         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2095         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2096             return start;
2097         start_force(PL_curforce);
2098         if (PL_madskills)
2099             curmad('X', newSVpvn(start,s-start));
2100         if (token == METHOD) {
2101             s = SKIPSPACE1(s);
2102             if (*s == '(')
2103                 PL_expect = XTERM;
2104             else {
2105                 PL_expect = XOPERATOR;
2106             }
2107         }
2108         if (PL_madskills)
2109             curmad('g', newSVpvs( "forced" ));
2110         NEXTVAL_NEXTTOKE.opval
2111             = (OP*)newSVOP(OP_CONST,0,
2112                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2113         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2114         force_next(token);
2115     }
2116     return s;
2117 }
2118
2119 /*
2120  * S_force_ident
2121  * Called when the lexer wants $foo *foo &foo etc, but the program
2122  * text only contains the "foo" portion.  The first argument is a pointer
2123  * to the "foo", and the second argument is the type symbol to prefix.
2124  * Forces the next token to be a "WORD".
2125  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2126  */
2127
2128 STATIC void
2129 S_force_ident(pTHX_ register const char *s, int kind)
2130 {
2131     dVAR;
2132
2133     PERL_ARGS_ASSERT_FORCE_IDENT;
2134
2135     if (*s) {
2136         const STRLEN len = strlen(s);
2137         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2138                                                                 UTF ? SVf_UTF8 : 0));
2139         start_force(PL_curforce);
2140         NEXTVAL_NEXTTOKE.opval = o;
2141         force_next(WORD);
2142         if (kind) {
2143             o->op_private = OPpCONST_ENTERED;
2144             /* XXX see note in pp_entereval() for why we forgo typo
2145                warnings if the symbol must be introduced in an eval.
2146                GSAR 96-10-12 */
2147             gv_fetchpvn_flags(s, len,
2148                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2149                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2150                               kind == '$' ? SVt_PV :
2151                               kind == '@' ? SVt_PVAV :
2152                               kind == '%' ? SVt_PVHV :
2153                               SVt_PVGV
2154                               );
2155         }
2156     }
2157 }
2158
2159 static void
2160 S_force_ident_maybe_lex(pTHX_ char pit)
2161 {
2162     start_force(PL_curforce);
2163     NEXTVAL_NEXTTOKE.ival = pit;
2164     force_next('p');
2165 }
2166
2167 NV
2168 Perl_str_to_version(pTHX_ SV *sv)
2169 {
2170     NV retval = 0.0;
2171     NV nshift = 1.0;
2172     STRLEN len;
2173     const char *start = SvPV_const(sv,len);
2174     const char * const end = start + len;
2175     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2176
2177     PERL_ARGS_ASSERT_STR_TO_VERSION;
2178
2179     while (start < end) {
2180         STRLEN skip;
2181         UV n;
2182         if (utf)
2183             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2184         else {
2185             n = *(U8*)start;
2186             skip = 1;
2187         }
2188         retval += ((NV)n)/nshift;
2189         start += skip;
2190         nshift *= 1000;
2191     }
2192     return retval;
2193 }
2194
2195 /*
2196  * S_force_version
2197  * Forces the next token to be a version number.
2198  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2199  * and if "guessing" is TRUE, then no new token is created (and the caller
2200  * must use an alternative parsing method).
2201  */
2202
2203 STATIC char *
2204 S_force_version(pTHX_ char *s, int guessing)
2205 {
2206     dVAR;
2207     OP *version = NULL;
2208     char *d;
2209 #ifdef PERL_MAD
2210     I32 startoff = s - SvPVX(PL_linestr);
2211 #endif
2212
2213     PERL_ARGS_ASSERT_FORCE_VERSION;
2214
2215     s = SKIPSPACE1(s);
2216
2217     d = s;
2218     if (*d == 'v')
2219         d++;
2220     if (isDIGIT(*d)) {
2221         while (isDIGIT(*d) || *d == '_' || *d == '.')
2222             d++;
2223 #ifdef PERL_MAD
2224         if (PL_madskills) {
2225             start_force(PL_curforce);
2226             curmad('X', newSVpvn(s,d-s));
2227         }
2228 #endif
2229         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2230             SV *ver;
2231 #ifdef USE_LOCALE_NUMERIC
2232             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2233             setlocale(LC_NUMERIC, "C");
2234 #endif
2235             s = scan_num(s, &pl_yylval);
2236 #ifdef USE_LOCALE_NUMERIC
2237             setlocale(LC_NUMERIC, loc);
2238             Safefree(loc);
2239 #endif
2240             version = pl_yylval.opval;
2241             ver = cSVOPx(version)->op_sv;
2242             if (SvPOK(ver) && !SvNIOK(ver)) {
2243                 SvUPGRADE(ver, SVt_PVNV);
2244                 SvNV_set(ver, str_to_version(ver));
2245                 SvNOK_on(ver);          /* hint that it is a version */
2246             }
2247         }
2248         else if (guessing) {
2249 #ifdef PERL_MAD
2250             if (PL_madskills) {
2251                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2252                 PL_nextwhite = 0;
2253                 s = SvPVX(PL_linestr) + startoff;
2254             }
2255 #endif
2256             return s;
2257         }
2258     }
2259
2260 #ifdef PERL_MAD
2261     if (PL_madskills && !version) {
2262         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2263         PL_nextwhite = 0;
2264         s = SvPVX(PL_linestr) + startoff;
2265     }
2266 #endif
2267     /* NOTE: The parser sees the package name and the VERSION swapped */
2268     start_force(PL_curforce);
2269     NEXTVAL_NEXTTOKE.opval = version;
2270     force_next(WORD);
2271
2272     return s;
2273 }
2274
2275 /*
2276  * S_force_strict_version
2277  * Forces the next token to be a version number using strict syntax rules.
2278  */
2279
2280 STATIC char *
2281 S_force_strict_version(pTHX_ char *s)
2282 {
2283     dVAR;
2284     OP *version = NULL;
2285 #ifdef PERL_MAD
2286     I32 startoff = s - SvPVX(PL_linestr);
2287 #endif
2288     const char *errstr = NULL;
2289
2290     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2291
2292     while (isSPACE(*s)) /* leading whitespace */
2293         s++;
2294
2295     if (is_STRICT_VERSION(s,&errstr)) {
2296         SV *ver = newSV(0);
2297         s = (char *)scan_version(s, ver, 0);
2298         version = newSVOP(OP_CONST, 0, ver);
2299     }
2300     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2301             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2302     {
2303         PL_bufptr = s;
2304         if (errstr)
2305             yyerror(errstr); /* version required */
2306         return s;
2307     }
2308
2309 #ifdef PERL_MAD
2310     if (PL_madskills && !version) {
2311         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2312         PL_nextwhite = 0;
2313         s = SvPVX(PL_linestr) + startoff;
2314     }
2315 #endif
2316     /* NOTE: The parser sees the package name and the VERSION swapped */
2317     start_force(PL_curforce);
2318     NEXTVAL_NEXTTOKE.opval = version;
2319     force_next(WORD);
2320
2321     return s;
2322 }
2323
2324 /*
2325  * S_tokeq
2326  * Tokenize a quoted string passed in as an SV.  It finds the next
2327  * chunk, up to end of string or a backslash.  It may make a new
2328  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2329  * turns \\ into \.
2330  */
2331
2332 STATIC SV *
2333 S_tokeq(pTHX_ SV *sv)
2334 {
2335     dVAR;
2336     char *s;
2337     char *send;
2338     char *d;
2339     STRLEN len = 0;
2340     SV *pv = sv;
2341
2342     PERL_ARGS_ASSERT_TOKEQ;
2343
2344     if (!SvLEN(sv))
2345         goto finish;
2346
2347     s = SvPV_force(sv, len);
2348     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2349         goto finish;
2350     send = s + len;
2351     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2352     while (s < send && !(*s == '\\' && s[1] == '\\'))
2353         s++;
2354     if (s == send)
2355         goto finish;
2356     d = s;
2357     if ( PL_hints & HINT_NEW_STRING ) {
2358         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2359     }
2360     while (s < send) {
2361         if (*s == '\\') {
2362             if (s + 1 < send && (s[1] == '\\'))
2363                 s++;            /* all that, just for this */
2364         }
2365         *d++ = *s++;
2366     }
2367     *d = '\0';
2368     SvCUR_set(sv, d - SvPVX_const(sv));
2369   finish:
2370     if ( PL_hints & HINT_NEW_STRING )
2371        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2372     return sv;
2373 }
2374
2375 /*
2376  * Now come three functions related to double-quote context,
2377  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2378  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2379  * interact with PL_lex_state, and create fake ( ... ) argument lists
2380  * to handle functions and concatenation.
2381  * For example,
2382  *   "foo\lbar"
2383  * is tokenised as
2384  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2385  */
2386
2387 /*
2388  * S_sublex_start
2389  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2390  *
2391  * Pattern matching will set PL_lex_op to the pattern-matching op to
2392  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2393  *
2394  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2395  *
2396  * Everything else becomes a FUNC.
2397  *
2398  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2399  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2400  * call to S_sublex_push().
2401  */
2402
2403 STATIC I32
2404 S_sublex_start(pTHX)
2405 {
2406     dVAR;
2407     const I32 op_type = pl_yylval.ival;
2408
2409     if (op_type == OP_NULL) {
2410         pl_yylval.opval = PL_lex_op;
2411         PL_lex_op = NULL;
2412         return THING;
2413     }
2414     if (op_type == OP_CONST || op_type == OP_READLINE) {
2415         SV *sv = tokeq(PL_lex_stuff);
2416
2417         if (SvTYPE(sv) == SVt_PVIV) {
2418             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2419             STRLEN len;
2420             const char * const p = SvPV_const(sv, len);
2421             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2422             SvREFCNT_dec(sv);
2423             sv = nsv;
2424         }
2425         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2426         PL_lex_stuff = NULL;
2427         /* Allow <FH> // "foo" */
2428         if (op_type == OP_READLINE)
2429             PL_expect = XTERMORDORDOR;
2430         return THING;
2431     }
2432     else if (op_type == OP_BACKTICK && PL_lex_op) {
2433         /* readpipe() vas overriden */
2434         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2435         pl_yylval.opval = PL_lex_op;
2436         PL_lex_op = NULL;
2437         PL_lex_stuff = NULL;
2438         return THING;
2439     }
2440
2441     PL_sublex_info.super_state = PL_lex_state;
2442     PL_sublex_info.sub_inwhat = (U16)op_type;
2443     PL_sublex_info.sub_op = PL_lex_op;
2444     PL_lex_state = LEX_INTERPPUSH;
2445
2446     PL_expect = XTERM;
2447     if (PL_lex_op) {
2448         pl_yylval.opval = PL_lex_op;
2449         PL_lex_op = NULL;
2450         return PMFUNC;
2451     }
2452     else
2453         return FUNC;
2454 }
2455
2456 /*
2457  * S_sublex_push
2458  * Create a new scope to save the lexing state.  The scope will be
2459  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2460  * to the uc, lc, etc. found before.
2461  * Sets PL_lex_state to LEX_INTERPCONCAT.
2462  */
2463
2464 STATIC I32
2465 S_sublex_push(pTHX)
2466 {
2467     dVAR;
2468     LEXSHARED *shared;
2469     ENTER;
2470
2471     PL_lex_state = PL_sublex_info.super_state;
2472     SAVEBOOL(PL_lex_dojoin);
2473     SAVEI32(PL_lex_brackets);
2474     SAVEI32(PL_lex_allbrackets);
2475     SAVEI32(PL_lex_formbrack);
2476     SAVEI8(PL_lex_fakeeof);
2477     SAVEI32(PL_lex_casemods);
2478     SAVEI32(PL_lex_starts);
2479     SAVEI8(PL_lex_state);
2480     SAVESPTR(PL_lex_repl);
2481     SAVEVPTR(PL_lex_inpat);
2482     SAVEI16(PL_lex_inwhat);
2483     SAVECOPLINE(PL_curcop);
2484     SAVEPPTR(PL_bufptr);
2485     SAVEPPTR(PL_bufend);
2486     SAVEPPTR(PL_oldbufptr);
2487     SAVEPPTR(PL_oldoldbufptr);
2488     SAVEPPTR(PL_last_lop);
2489     SAVEPPTR(PL_last_uni);
2490     SAVEPPTR(PL_linestart);
2491     SAVESPTR(PL_linestr);
2492     SAVEGENERICPV(PL_lex_brackstack);
2493     SAVEGENERICPV(PL_lex_casestack);
2494     SAVEGENERICPV(PL_parser->lex_shared);
2495
2496     /* The here-doc parser needs to be able to peek into outer lexing
2497        scopes to find the body of the here-doc.  So we put PL_linestr and
2498        PL_bufptr into lex_shared, to ‘share’ those values.
2499      */
2500     PL_parser->lex_shared->ls_linestr = PL_linestr;
2501     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2502
2503     PL_linestr = PL_lex_stuff;
2504     PL_lex_repl = PL_sublex_info.repl;
2505     PL_lex_stuff = NULL;
2506     PL_sublex_info.repl = NULL;
2507
2508     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2509         = SvPVX(PL_linestr);
2510     PL_bufend += SvCUR(PL_linestr);
2511     PL_last_lop = PL_last_uni = NULL;
2512     SAVEFREESV(PL_linestr);
2513     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2514
2515     PL_lex_dojoin = FALSE;
2516     PL_lex_brackets = PL_lex_formbrack = 0;
2517     PL_lex_allbrackets = 0;
2518     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2519     Newx(PL_lex_brackstack, 120, char);
2520     Newx(PL_lex_casestack, 12, char);
2521     PL_lex_casemods = 0;
2522     *PL_lex_casestack = '\0';
2523     PL_lex_starts = 0;
2524     PL_lex_state = LEX_INTERPCONCAT;
2525     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2526
2527     Newxz(shared, 1, LEXSHARED);
2528     shared->ls_prev = PL_parser->lex_shared;
2529     PL_parser->lex_shared = shared;
2530
2531     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2532     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2533     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2534         PL_lex_inpat = PL_sublex_info.sub_op;
2535     else
2536         PL_lex_inpat = NULL;
2537
2538     return '(';
2539 }
2540
2541 /*
2542  * S_sublex_done
2543  * Restores lexer state after a S_sublex_push.
2544  */
2545
2546 STATIC I32
2547 S_sublex_done(pTHX)
2548 {
2549     dVAR;
2550     if (!PL_lex_starts++) {
2551         SV * const sv = newSVpvs("");
2552         if (SvUTF8(PL_linestr))
2553             SvUTF8_on(sv);
2554         PL_expect = XOPERATOR;
2555         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2556         return THING;
2557     }
2558
2559     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2560         PL_lex_state = LEX_INTERPCASEMOD;
2561         return yylex();
2562     }
2563
2564     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2565     assert(PL_lex_inwhat != OP_TRANSR);
2566     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2567         PL_linestr = PL_lex_repl;
2568         PL_lex_inpat = 0;
2569         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2570         PL_bufend += SvCUR(PL_linestr);
2571         PL_last_lop = PL_last_uni = NULL;
2572         PL_lex_dojoin = FALSE;
2573         PL_lex_brackets = 0;
2574         PL_lex_allbrackets = 0;
2575         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2576         PL_lex_casemods = 0;
2577         *PL_lex_casestack = '\0';
2578         PL_lex_starts = 0;
2579         if (SvEVALED(PL_lex_repl)) {
2580             PL_lex_state = LEX_INTERPNORMAL;
2581             PL_lex_starts++;
2582             /*  we don't clear PL_lex_repl here, so that we can check later
2583                 whether this is an evalled subst; that means we rely on the
2584                 logic to ensure sublex_done() is called again only via the
2585                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2586         }
2587         else {
2588             PL_lex_state = LEX_INTERPCONCAT;
2589             PL_lex_repl = NULL;
2590         }
2591         return ',';
2592     }
2593     else {
2594 #ifdef PERL_MAD
2595         if (PL_madskills) {
2596             if (PL_thiswhite) {
2597                 if (!PL_endwhite)
2598                     PL_endwhite = newSVpvs("");
2599                 sv_catsv(PL_endwhite, PL_thiswhite);
2600                 PL_thiswhite = 0;
2601             }
2602             if (PL_thistoken)
2603                 sv_setpvs(PL_thistoken,"");
2604             else
2605                 PL_realtokenstart = -1;
2606         }
2607 #endif
2608         LEAVE;
2609         PL_bufend = SvPVX(PL_linestr);
2610         PL_bufend += SvCUR(PL_linestr);
2611         PL_expect = XOPERATOR;
2612         PL_sublex_info.sub_inwhat = 0;
2613         return ')';
2614     }
2615 }
2616
2617 /*
2618   scan_const
2619
2620   Extracts the next constant part of a pattern, double-quoted string,
2621   or transliteration.  This is terrifying code.
2622
2623   For example, in parsing the double-quoted string "ab\x63$d", it would
2624   stop at the '$' and return an OP_CONST containing 'abc'.
2625
2626   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2627   processing a pattern (PL_lex_inpat is true), a transliteration
2628   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2629
2630   Returns a pointer to the character scanned up to. If this is
2631   advanced from the start pointer supplied (i.e. if anything was
2632   successfully parsed), will leave an OP_CONST for the substring scanned
2633   in pl_yylval. Caller must intuit reason for not parsing further
2634   by looking at the next characters herself.
2635
2636   In patterns:
2637     expand:
2638       \N{ABC}  => \N{U+41.42.43}
2639
2640     pass through:
2641         all other \-char, including \N and \N{ apart from \N{ABC}
2642
2643     stops on:
2644         @ and $ where it appears to be a var, but not for $ as tail anchor
2645         \l \L \u \U \Q \E
2646         (?{  or  (??{
2647
2648
2649   In transliterations:
2650     characters are VERY literal, except for - not at the start or end
2651     of the string, which indicates a range. If the range is in bytes,
2652     scan_const expands the range to the full set of intermediate
2653     characters. If the range is in utf8, the hyphen is replaced with
2654     a certain range mark which will be handled by pmtrans() in op.c.
2655
2656   In double-quoted strings:
2657     backslashes:
2658       double-quoted style: \r and \n
2659       constants: \x31, etc.
2660       deprecated backrefs: \1 (in substitution replacements)
2661       case and quoting: \U \Q \E
2662     stops on @ and $
2663
2664   scan_const does *not* construct ops to handle interpolated strings.
2665   It stops processing as soon as it finds an embedded $ or @ variable
2666   and leaves it to the caller to work out what's going on.
2667
2668   embedded arrays (whether in pattern or not) could be:
2669       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2670
2671   $ in double-quoted strings must be the symbol of an embedded scalar.
2672
2673   $ in pattern could be $foo or could be tail anchor.  Assumption:
2674   it's a tail anchor if $ is the last thing in the string, or if it's
2675   followed by one of "()| \r\n\t"
2676
2677   \1 (backreferences) are turned into $1 in substitutions
2678
2679   The structure of the code is
2680       while (there's a character to process) {
2681           handle transliteration ranges
2682           skip regexp comments /(?#comment)/ and codes /(?{code})/
2683           skip #-initiated comments in //x patterns
2684           check for embedded arrays
2685           check for embedded scalars
2686           if (backslash) {
2687               deprecate \1 in substitution replacements
2688               handle string-changing backslashes \l \U \Q \E, etc.
2689               switch (what was escaped) {
2690                   handle \- in a transliteration (becomes a literal -)
2691                   if a pattern and not \N{, go treat as regular character
2692                   handle \132 (octal characters)
2693                   handle \x15 and \x{1234} (hex characters)
2694                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2695                   handle \cV (control characters)
2696                   handle printf-style backslashes (\f, \r, \n, etc)
2697               } (end switch)
2698               continue
2699           } (end if backslash)
2700           handle regular character
2701     } (end while character to read)
2702
2703 */
2704
2705 STATIC char *
2706 S_scan_const(pTHX_ char *start)
2707 {
2708     dVAR;
2709     char *send = PL_bufend;             /* end of the constant */
2710     SV *sv = newSV(send - start);               /* sv for the constant.  See
2711                                                    note below on sizing. */
2712     char *s = start;                    /* start of the constant */
2713     char *d = SvPVX(sv);                /* destination for copies */
2714     bool dorange = FALSE;                       /* are we in a translit range? */
2715     bool didrange = FALSE;                      /* did we just finish a range? */
2716     bool in_charclass = FALSE;                  /* within /[...]/ */
2717     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2718     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2719                                                    to be UTF8?  But, this can
2720                                                    show as true when the source
2721                                                    isn't utf8, as for example
2722                                                    when it is entirely composed
2723                                                    of hex constants */
2724
2725     /* Note on sizing:  The scanned constant is placed into sv, which is
2726      * initialized by newSV() assuming one byte of output for every byte of
2727      * input.  This routine expects newSV() to allocate an extra byte for a
2728      * trailing NUL, which this routine will append if it gets to the end of
2729      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2730      * CAPITAL LETTER A}), or more output than input if the constant ends up
2731      * recoded to utf8, but each time a construct is found that might increase
2732      * the needed size, SvGROW() is called.  Its size parameter each time is
2733      * based on the best guess estimate at the time, namely the length used so
2734      * far, plus the length the current construct will occupy, plus room for
2735      * the trailing NUL, plus one byte for every input byte still unscanned */
2736
2737     UV uv;
2738 #ifdef EBCDIC
2739     UV literal_endpoint = 0;
2740     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2741 #endif
2742
2743     PERL_ARGS_ASSERT_SCAN_CONST;
2744
2745     assert(PL_lex_inwhat != OP_TRANSR);
2746     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2747         /* If we are doing a trans and we know we want UTF8 set expectation */
2748         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2749         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2750     }
2751
2752
2753     while (s < send || dorange) {
2754
2755         /* get transliterations out of the way (they're most literal) */
2756         if (PL_lex_inwhat == OP_TRANS) {
2757             /* expand a range A-Z to the full set of characters.  AIE! */
2758             if (dorange) {
2759                 I32 i;                          /* current expanded character */
2760                 I32 min;                        /* first character in range */
2761                 I32 max;                        /* last character in range */
2762
2763 #ifdef EBCDIC
2764                 UV uvmax = 0;
2765 #endif
2766
2767                 if (has_utf8
2768 #ifdef EBCDIC
2769                     && !native_range
2770 #endif
2771                     ) {
2772                     char * const c = (char*)utf8_hop((U8*)d, -1);
2773                     char *e = d++;
2774                     while (e-- > c)
2775                         *(e + 1) = *e;
2776                     *c = (char)UTF_TO_NATIVE(0xff);
2777                     /* mark the range as done, and continue */
2778                     dorange = FALSE;
2779                     didrange = TRUE;
2780                     continue;
2781                 }
2782
2783                 i = d - SvPVX_const(sv);                /* remember current offset */
2784 #ifdef EBCDIC
2785                 SvGROW(sv,
2786                        SvLEN(sv) + (has_utf8 ?
2787                                     (512 - UTF_CONTINUATION_MARK +
2788                                      UNISKIP(0x100))
2789                                     : 256));
2790                 /* How many two-byte within 0..255: 128 in UTF-8,
2791                  * 96 in UTF-8-mod. */
2792 #else
2793                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2794 #endif
2795                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2796 #ifdef EBCDIC
2797                 if (has_utf8) {
2798                     int j;
2799                     for (j = 0; j <= 1; j++) {
2800                         char * const c = (char*)utf8_hop((U8*)d, -1);
2801                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2802                         if (j)
2803                             min = (U8)uv;
2804                         else if (uv < 256)
2805                             max = (U8)uv;
2806                         else {
2807                             max = (U8)0xff; /* only to \xff */
2808                             uvmax = uv; /* \x{100} to uvmax */
2809                         }
2810                         d = c; /* eat endpoint chars */
2811                      }
2812                 }
2813                else {
2814 #endif
2815                    d -= 2;              /* eat the first char and the - */
2816                    min = (U8)*d;        /* first char in range */
2817                    max = (U8)d[1];      /* last char in range  */
2818 #ifdef EBCDIC
2819                }
2820 #endif
2821
2822                 if (min > max) {
2823                     SvREFCNT_dec(sv);
2824                     Perl_croak(aTHX_
2825                                "Invalid range \"%c-%c\" in transliteration operator",
2826                                (char)min, (char)max);
2827                 }
2828
2829 #ifdef EBCDIC
2830                 if (literal_endpoint == 2 &&
2831                     ((isLOWER(min) && isLOWER(max)) ||
2832                      (isUPPER(min) && isUPPER(max)))) {
2833                     if (isLOWER(min)) {
2834                         for (i = min; i <= max; i++)
2835                             if (isLOWER(i))
2836                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2837                     } else {
2838                         for (i = min; i <= max; i++)
2839                             if (isUPPER(i))
2840                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2841                     }
2842                 }
2843                 else
2844 #endif
2845                     for (i = min; i <= max; i++)
2846 #ifdef EBCDIC
2847                         if (has_utf8) {
2848                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2849                             if (UNI_IS_INVARIANT(ch))
2850                                 *d++ = (U8)i;
2851                             else {
2852                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2853                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2854                             }
2855                         }
2856                         else
2857 #endif
2858                             *d++ = (char)i;
2859
2860 #ifdef EBCDIC
2861                 if (uvmax) {
2862                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2863                     if (uvmax > 0x101)
2864                         *d++ = (char)UTF_TO_NATIVE(0xff);
2865                     if (uvmax > 0x100)
2866                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2867                 }
2868 #endif
2869
2870                 /* mark the range as done, and continue */
2871                 dorange = FALSE;
2872                 didrange = TRUE;
2873 #ifdef EBCDIC
2874                 literal_endpoint = 0;
2875 #endif
2876                 continue;
2877             }
2878
2879             /* range begins (ignore - as first or last char) */
2880             else if (*s == '-' && s+1 < send  && s != start) {
2881                 if (didrange) {
2882                     SvREFCNT_dec(sv);
2883                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2884                 }
2885                 if (has_utf8
2886 #ifdef EBCDIC
2887                     && !native_range
2888 #endif
2889                     ) {
2890                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2891                     s++;
2892                     continue;
2893                 }
2894                 dorange = TRUE;
2895                 s++;
2896             }
2897             else {
2898                 didrange = FALSE;
2899 #ifdef EBCDIC
2900                 literal_endpoint = 0;
2901                 native_range = TRUE;
2902 #endif
2903             }
2904         }
2905
2906         /* if we get here, we're not doing a transliteration */
2907
2908         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2909             char *s1 = s-1;
2910             int esc = 0;
2911             while (s1 >= start && *s1-- == '\\')
2912                 esc = !esc;
2913             if (!esc)
2914                 in_charclass = TRUE;
2915         }
2916
2917         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2918             char *s1 = s-1;
2919             int esc = 0;
2920             while (s1 >= start && *s1-- == '\\')
2921                 esc = !esc;
2922             if (!esc)
2923                 in_charclass = FALSE;
2924         }
2925
2926         /* skip for regexp comments /(?#comment)/, except for the last
2927          * char, which will be done separately.
2928          * Stop on (?{..}) and friends */
2929
2930         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2931             if (s[2] == '#') {
2932                 while (s+1 < send && *s != ')')
2933                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2934             }
2935             else if (!PL_lex_casemods && !in_charclass &&
2936                      (    s[2] == '{' /* This should match regcomp.c */
2937                       || (s[2] == '?' && s[3] == '{')))
2938             {
2939                 break;
2940             }
2941         }
2942
2943         /* likewise skip #-initiated comments in //x patterns */
2944         else if (*s == '#' && PL_lex_inpat &&
2945           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2946             while (s+1 < send && *s != '\n')
2947                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2948         }
2949
2950         /* no further processing of single-quoted regex */
2951         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2952             goto default_action;
2953
2954         /* check for embedded arrays
2955            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2956            */
2957         else if (*s == '@' && s[1]) {
2958             if (isALNUM_lazy_if(s+1,UTF))
2959                 break;
2960             if (strchr(":'{$", s[1]))
2961                 break;
2962             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2963                 break; /* in regexp, neither @+ nor @- are interpolated */
2964         }
2965
2966         /* check for embedded scalars.  only stop if we're sure it's a
2967            variable.
2968         */
2969         else if (*s == '$') {
2970             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2971                 break;
2972             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2973                 if (s[1] == '\\') {
2974                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2975                                    "Possible unintended interpolation of $\\ in regex");
2976                 }
2977                 break;          /* in regexp, $ might be tail anchor */
2978             }
2979         }
2980
2981         /* End of else if chain - OP_TRANS rejoin rest */
2982
2983         /* backslashes */
2984         if (*s == '\\' && s+1 < send) {
2985             char* e;    /* Can be used for ending '}', etc. */
2986
2987             s++;
2988
2989             /* warn on \1 - \9 in substitution replacements, but note that \11
2990              * is an octal; and \19 is \1 followed by '9' */
2991             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2992                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2993             {
2994                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2995                 *--s = '$';
2996                 break;
2997             }
2998
2999             /* string-change backslash escapes */
3000             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3001                 --s;
3002                 break;
3003             }
3004             /* In a pattern, process \N, but skip any other backslash escapes.
3005              * This is because we don't want to translate an escape sequence
3006              * into a meta symbol and have the regex compiler use the meta
3007              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3008              * in spite of this, we do have to process \N here while the proper
3009              * charnames handler is in scope.  See bugs #56444 and #62056.
3010              * There is a complication because \N in a pattern may also stand
3011              * for 'match a non-nl', and not mean a charname, in which case its
3012              * processing should be deferred to the regex compiler.  To be a
3013              * charname it must be followed immediately by a '{', and not look
3014              * like \N followed by a curly quantifier, i.e., not something like
3015              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3016              * quantifier */
3017             else if (PL_lex_inpat
3018                     && (*s != 'N'
3019                         || s[1] != '{'
3020                         || regcurly(s + 1)))
3021             {
3022                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3023                 goto default_action;
3024             }
3025
3026             switch (*s) {
3027
3028             /* quoted - in transliterations */
3029             case '-':
3030                 if (PL_lex_inwhat == OP_TRANS) {
3031                     *d++ = *s++;
3032                     continue;
3033                 }
3034                 /* FALL THROUGH */
3035             default:
3036                 {
3037                     if ((isALNUMC(*s)))
3038                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3039                                        "Unrecognized escape \\%c passed through",
3040                                        *s);
3041                     /* default action is to copy the quoted character */
3042                     goto default_action;
3043                 }
3044
3045             /* eg. \132 indicates the octal constant 0132 */
3046             case '0': case '1': case '2': case '3':
3047             case '4': case '5': case '6': case '7':
3048                 {
3049                     I32 flags = 0;
3050                     STRLEN len = 3;
3051                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3052                     s += len;
3053                 }
3054                 goto NUM_ESCAPE_INSERT;
3055
3056             /* eg. \o{24} indicates the octal constant \024 */
3057             case 'o':
3058                 {
3059                     STRLEN len;
3060                     const char* error;
3061
3062                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3063                     s += len;
3064                     if (! valid) {
3065                         yyerror(error);
3066                         continue;
3067                     }
3068                     goto NUM_ESCAPE_INSERT;
3069                 }
3070
3071             /* eg. \x24 indicates the hex constant 0x24 */
3072             case 'x':
3073                 {
3074                     STRLEN len;
3075                     const char* error;
3076
3077                     bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3078                     s += len;
3079                     if (! valid) {
3080                         yyerror(error);
3081                         continue;
3082                     }
3083                 }
3084
3085               NUM_ESCAPE_INSERT:
3086                 /* Insert oct or hex escaped character.  There will always be
3087                  * enough room in sv since such escapes will be longer than any
3088                  * UTF-8 sequence they can end up as, except if they force us
3089                  * to recode the rest of the string into utf8 */
3090
3091                 /* Here uv is the ordinal of the next character being added in
3092                  * unicode (converted from native). */
3093                 if (!UNI_IS_INVARIANT(uv)) {
3094                     if (!has_utf8 && uv > 255) {
3095                         /* Might need to recode whatever we have accumulated so
3096                          * far if it contains any chars variant in utf8 or
3097                          * utf-ebcdic. */
3098
3099                         SvCUR_set(sv, d - SvPVX_const(sv));
3100                         SvPOK_on(sv);
3101                         *d = '\0';
3102                         /* See Note on sizing above.  */
3103                         sv_utf8_upgrade_flags_grow(sv,
3104                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3105                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3106                         d = SvPVX(sv) + SvCUR(sv);
3107                         has_utf8 = TRUE;
3108                     }
3109
3110                     if (has_utf8) {
3111                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3112                         if (PL_lex_inwhat == OP_TRANS &&
3113                             PL_sublex_info.sub_op) {
3114                             PL_sublex_info.sub_op->op_private |=
3115                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3116                                              : OPpTRANS_TO_UTF);
3117                         }
3118 #ifdef EBCDIC
3119                         if (uv > 255 && !dorange)
3120                             native_range = FALSE;
3121 #endif
3122                     }
3123                     else {
3124                         *d++ = (char)uv;
3125                     }
3126                 }
3127                 else {
3128                     *d++ = (char) uv;
3129                 }
3130                 continue;
3131
3132             case 'N':
3133                 /* In a non-pattern \N must be a named character, like \N{LATIN
3134                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3135                  * mean to match a non-newline.  For non-patterns, named
3136                  * characters are converted to their string equivalents. In
3137                  * patterns, named characters are not converted to their
3138                  * ultimate forms for the same reasons that other escapes
3139                  * aren't.  Instead, they are converted to the \N{U+...} form
3140                  * to get the value from the charnames that is in effect right
3141                  * now, while preserving the fact that it was a named character
3142                  * so that the regex compiler knows this */
3143
3144                 /* This section of code doesn't generally use the
3145                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3146                  * a close examination of this macro and determined it is a
3147                  * no-op except on utfebcdic variant characters.  Every
3148                  * character generated by this that would normally need to be
3149                  * enclosed by this macro is invariant, so the macro is not
3150                  * needed, and would complicate use of copy().  XXX There are
3151                  * other parts of this file where the macro is used
3152                  * inconsistently, but are saved by it being a no-op */
3153
3154                 /* The structure of this section of code (besides checking for
3155                  * errors and upgrading to utf8) is:
3156                  *  Further disambiguate between the two meanings of \N, and if
3157                  *      not a charname, go process it elsewhere
3158                  *  If of form \N{U+...}, pass it through if a pattern;
3159                  *      otherwise convert to utf8
3160                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3161                  *  pattern; otherwise convert to utf8 */
3162
3163                 /* Here, s points to the 'N'; the test below is guaranteed to
3164                  * succeed if we are being called on a pattern as we already
3165                  * know from a test above that the next character is a '{'.
3166                  * On a non-pattern \N must mean 'named sequence, which
3167                  * requires braces */
3168                 s++;
3169                 if (*s != '{') {
3170                     yyerror("Missing braces on \\N{}");
3171                     continue;
3172                 }
3173                 s++;
3174
3175                 /* If there is no matching '}', it is an error. */
3176                 if (! (e = strchr(s, '}'))) {
3177                     if (! PL_lex_inpat) {
3178                         yyerror("Missing right brace on \\N{}");
3179                     } else {
3180                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3181                     }
3182                     continue;
3183                 }
3184
3185                 /* Here it looks like a named character */
3186
3187                 if (PL_lex_inpat) {
3188
3189                     /* XXX This block is temporary code.  \N{} implies that the
3190                      * pattern is to have Unicode semantics, and therefore
3191                      * currently has to be encoded in utf8.  By putting it in
3192                      * utf8 now, we save a whole pass in the regular expression
3193                      * compiler.  Once that code is changed so Unicode
3194                      * semantics doesn't necessarily have to be in utf8, this
3195                      * block should be removed.  However, the code that parses
3196                      * the output of this would have to be changed to not
3197                      * necessarily expect utf8 */
3198                     if (!has_utf8) {
3199                         SvCUR_set(sv, d - SvPVX_const(sv));
3200                         SvPOK_on(sv);
3201                         *d = '\0';
3202                         /* See Note on sizing above.  */
3203                         sv_utf8_upgrade_flags_grow(sv,
3204                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3205                                         /* 5 = '\N{' + cur char + NUL */
3206                                         (STRLEN)(send - s) + 5);
3207                         d = SvPVX(sv) + SvCUR(sv);
3208                         has_utf8 = TRUE;
3209                     }
3210                 }
3211
3212                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3213                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3214                                 | PERL_SCAN_DISALLOW_PREFIX;
3215                     STRLEN len;
3216
3217                     /* For \N{U+...}, the '...' is a unicode value even on
3218                      * EBCDIC machines */
3219                     s += 2;         /* Skip to next char after the 'U+' */
3220                     len = e - s;
3221                     uv = grok_hex(s, &len, &flags, NULL);
3222                     if (len == 0 || len != (STRLEN)(e - s)) {
3223                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3224                         s = e + 1;
3225                         continue;
3226                     }
3227
3228                     if (PL_lex_inpat) {
3229
3230                         /* On non-EBCDIC platforms, pass through to the regex
3231                          * compiler unchanged.  The reason we evaluated the
3232                          * number above is to make sure there wasn't a syntax
3233                          * error.  But on EBCDIC we convert to native so
3234                          * downstream code can continue to assume it's native
3235                          */
3236                         s -= 5;     /* Include the '\N{U+' */
3237 #ifdef EBCDIC
3238                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3239                                                                and the \0 */
3240                                     "\\N{U+%X}",
3241                                     (unsigned int) UNI_TO_NATIVE(uv));
3242 #else
3243                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3244                         d += e - s + 1;
3245 #endif
3246                     }
3247                     else {  /* Not a pattern: convert the hex to string */
3248
3249                          /* If destination is not in utf8, unconditionally
3250                           * recode it to be so.  This is because \N{} implies
3251                           * Unicode semantics, and scalars have to be in utf8
3252                           * to guarantee those semantics */
3253                         if (! has_utf8) {
3254                             SvCUR_set(sv, d - SvPVX_const(sv));
3255                             SvPOK_on(sv);
3256                             *d = '\0';
3257                             /* See Note on sizing above.  */
3258                             sv_utf8_upgrade_flags_grow(
3259                                         sv,
3260                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3261                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3262                             d = SvPVX(sv) + SvCUR(sv);
3263                             has_utf8 = TRUE;
3264                         }
3265
3266                         /* Add the string to the output */
3267                         if (UNI_IS_INVARIANT(uv)) {
3268                             *d++ = (char) uv;
3269                         }
3270                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3271                     }
3272                 }
3273                 else { /* Here is \N{NAME} but not \N{U+...}. */
3274
3275                     SV *res;            /* result from charnames */
3276                     const char *str;    /* the string in 'res' */
3277                     STRLEN len;         /* its length */
3278
3279                     /* Get the value for NAME */
3280                     res = newSVpvn(s, e - s);
3281                     res = new_constant( NULL, 0, "charnames",
3282                                         /* includes all of: \N{...} */
3283                                         res, NULL, s - 3, e - s + 4 );
3284
3285                     /* Most likely res will be in utf8 already since the
3286                      * standard charnames uses pack U, but a custom translator
3287                      * can leave it otherwise, so make sure.  XXX This can be
3288                      * revisited to not have charnames use utf8 for characters
3289                      * that don't need it when regexes don't have to be in utf8
3290                      * for Unicode semantics.  If doing so, remember EBCDIC */
3291                     sv_utf8_upgrade(res);
3292                     str = SvPV_const(res, len);
3293
3294                     /* Don't accept malformed input */
3295                     if (! is_utf8_string((U8 *) str, len)) {
3296                         yyerror("Malformed UTF-8 returned by \\N");
3297                     }
3298                     else if (PL_lex_inpat) {
3299
3300                         if (! len) { /* The name resolved to an empty string */
3301                             Copy("\\N{}", d, 4, char);
3302                             d += 4;
3303                         }
3304                         else {
3305                             /* In order to not lose information for the regex
3306                             * compiler, pass the result in the specially made
3307                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3308                             * the code points in hex of each character
3309                             * returned by charnames */
3310
3311                             const char *str_end = str + len;
3312                             STRLEN char_length;     /* cur char's byte length */
3313                             STRLEN output_length;   /* and the number of bytes
3314                                                        after this is translated
3315                                                        into hex digits */
3316                             const STRLEN off = d - SvPVX_const(sv);
3317
3318                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3319                              * max('U+', '.'); and 1 for NUL */
3320                             char hex_string[2 * UTF8_MAXBYTES + 5];
3321
3322                             /* Get the first character of the result. */
3323                             U32 uv = utf8n_to_uvuni((U8 *) str,
3324                                                     len,
3325                                                     &char_length,
3326                                                     UTF8_ALLOW_ANYUV);
3327
3328                             /* The call to is_utf8_string() above hopefully
3329                              * guarantees that there won't be an error.  But
3330                              * it's easy here to make sure.  The function just
3331                              * above warns and returns 0 if invalid utf8, but
3332                              * it can also return 0 if the input is validly a
3333                              * NUL. Disambiguate */
3334                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3335                                 uv = UNICODE_REPLACEMENT;
3336                             }
3337
3338                             /* Convert first code point to hex, including the
3339                              * boiler plate before it.  For all these, we
3340                              * convert to native format so that downstream code
3341                              * can continue to assume the input is native */
3342                             output_length =
3343                                 my_snprintf(hex_string, sizeof(hex_string),
3344                                             "\\N{U+%X",
3345                                             (unsigned int) UNI_TO_NATIVE(uv));
3346
3347                             /* Make sure there is enough space to hold it */
3348                             d = off + SvGROW(sv, off
3349                                                  + output_length
3350                                                  + (STRLEN)(send - e)
3351                                                  + 2);  /* '}' + NUL */
3352                             /* And output it */
3353                             Copy(hex_string, d, output_length, char);
3354                             d += output_length;
3355
3356                             /* For each subsequent character, append dot and
3357                              * its ordinal in hex */
3358                             while ((str += char_length) < str_end) {
3359                                 const STRLEN off = d - SvPVX_const(sv);
3360                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3361                                                         str_end - str,
3362                                                         &char_length,
3363                                                         UTF8_ALLOW_ANYUV);
3364                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3365                                     uv = UNICODE_REPLACEMENT;
3366                                 }
3367
3368                                 output_length =
3369                                     my_snprintf(hex_string, sizeof(hex_string),
3370                                             ".%X",
3371                                             (unsigned int) UNI_TO_NATIVE(uv));
3372
3373                                 d = off + SvGROW(sv, off
3374                                                      + output_length
3375                                                      + (STRLEN)(send - e)
3376                                                      + 2);      /* '}' +  NUL */
3377                                 Copy(hex_string, d, output_length, char);
3378                                 d += output_length;
3379                             }
3380
3381                             *d++ = '}'; /* Done.  Add the trailing brace */
3382                         }
3383                     }
3384                     else { /* Here, not in a pattern.  Convert the name to a
3385                             * string. */
3386
3387                          /* If destination is not in utf8, unconditionally
3388                           * recode it to be so.  This is because \N{} implies
3389                           * Unicode semantics, and scalars have to be in utf8
3390                           * to guarantee those semantics */
3391                         if (! has_utf8) {
3392                             SvCUR_set(sv, d - SvPVX_const(sv));
3393                             SvPOK_on(sv);
3394                             *d = '\0';
3395                             /* See Note on sizing above.  */
3396                             sv_utf8_upgrade_flags_grow(sv,
3397                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3398                                                 len + (STRLEN)(send - s) + 1);
3399                             d = SvPVX(sv) + SvCUR(sv);
3400                             has_utf8 = TRUE;
3401                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3402
3403                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3404                              * set correctly here). */
3405                             const STRLEN off = d - SvPVX_const(sv);
3406                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3407                         }
3408                         Copy(str, d, len, char);
3409                         d += len;
3410                     }
3411                     SvREFCNT_dec(res);
3412
3413                     /* Deprecate non-approved name syntax */
3414                     if (ckWARN_d(WARN_DEPRECATED)) {
3415                         bool problematic = FALSE;
3416                         char* i = s;
3417
3418                         /* For non-ut8 input, look to see that the first
3419                          * character is an alpha, then loop through the rest
3420                          * checking that each is a continuation */
3421                         if (! this_utf8) {
3422                             if (! isALPHAU(*i)) problematic = TRUE;
3423                             else for (i = s + 1; i < e; i++) {
3424                                 if (isCHARNAME_CONT(*i)) continue;
3425                                 problematic = TRUE;
3426                                 break;
3427                             }
3428                         }
3429                         else {
3430                             /* Similarly for utf8.  For invariants can check
3431                              * directly.  We accept anything above the latin1
3432                              * range because it is immaterial to Perl if it is
3433                              * correct or not, and is expensive to check.  But
3434                              * it is fairly easy in the latin1 range to convert
3435                              * the variants into a single character and check
3436                              * those */
3437                             if (UTF8_IS_INVARIANT(*i)) {
3438                                 if (! isALPHAU(*i)) problematic = TRUE;
3439                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3440                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3441                                                                             *(i+1)))))
3442                                 {
3443                                     problematic = TRUE;
3444                                 }
3445                             }
3446                             if (! problematic) for (i = s + UTF8SKIP(s);
3447                                                     i < e;
3448                                                     i+= UTF8SKIP(i))
3449                             {
3450                                 if (UTF8_IS_INVARIANT(*i)) {
3451                                     if (isCHARNAME_CONT(*i)) continue;
3452                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3453                                     continue;
3454                                 } else if (isCHARNAME_CONT(
3455                                             UNI_TO_NATIVE(
3456                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3457                                 {
3458                                     continue;
3459                                 }
3460                                 problematic = TRUE;
3461                                 break;
3462                             }
3463                         }
3464                         if (problematic) {
3465                             /* The e-i passed to the final %.*s makes sure that
3466                              * should the trailing NUL be missing that this
3467                              * print won't run off the end of the string */
3468                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3469                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3470                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3471                         }
3472                     }
3473                 } /* End \N{NAME} */
3474 #ifdef EBCDIC
3475                 if (!dorange)
3476                     native_range = FALSE; /* \N{} is defined to be Unicode */
3477 #endif
3478                 s = e + 1;  /* Point to just after the '}' */
3479                 continue;
3480
3481             /* \c is a control character */
3482             case 'c':
3483                 s++;
3484                 if (s < send) {
3485                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3486                 }
3487                 else {
3488                     yyerror("Missing control char name in \\c");
3489                 }
3490                 continue;
3491
3492             /* printf-style backslashes, formfeeds, newlines, etc */
3493             case 'b':
3494                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3495                 break;
3496             case 'n':
3497                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3498                 break;
3499             case 'r':
3500                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3501                 break;
3502             case 'f':
3503                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3504                 break;
3505             case 't':
3506                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3507                 break;
3508             case 'e':
3509                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3510                 break;
3511             case 'a':
3512                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3513                 break;
3514             } /* end switch */
3515
3516             s++;
3517             continue;
3518         } /* end if (backslash) */
3519 #ifdef EBCDIC
3520         else
3521             literal_endpoint++;
3522 #endif
3523
3524     default_action:
3525         /* If we started with encoded form, or already know we want it,
3526            then encode the next character */
3527         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3528             STRLEN len  = 1;
3529
3530
3531             /* One might think that it is wasted effort in the case of the
3532              * source being utf8 (this_utf8 == TRUE) to take the next character
3533              * in the source, convert it to an unsigned value, and then convert
3534              * it back again.  But the source has not been validated here.  The
3535              * routine that does the conversion checks for errors like
3536              * malformed utf8 */
3537
3538             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3539             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3540             if (!has_utf8) {
3541                 SvCUR_set(sv, d - SvPVX_const(sv));
3542                 SvPOK_on(sv);
3543                 *d = '\0';
3544                 /* See Note on sizing above.  */
3545                 sv_utf8_upgrade_flags_grow(sv,
3546                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3547                                         need + (STRLEN)(send - s) + 1);
3548                 d = SvPVX(sv) + SvCUR(sv);
3549                 has_utf8 = TRUE;
3550             } else if (need > len) {
3551                 /* encoded value larger than old, may need extra space (NOTE:
3552                  * SvCUR() is not set correctly here).   See Note on sizing
3553                  * above.  */
3554                 const STRLEN off = d - SvPVX_const(sv);
3555                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3556             }
3557             s += len;
3558
3559             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3560 #ifdef EBCDIC
3561             if (uv > 255 && !dorange)
3562                 native_range = FALSE;
3563 #endif
3564         }
3565         else {
3566             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3567         }
3568     } /* while loop to process each character */
3569
3570     /* terminate the string and set up the sv */
3571     *d = '\0';
3572     SvCUR_set(sv, d - SvPVX_const(sv));
3573     if (SvCUR(sv) >= SvLEN(sv))
3574         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3575                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3576
3577     SvPOK_on(sv);
3578     if (PL_encoding && !has_utf8) {
3579         sv_recode_to_utf8(sv, PL_encoding);
3580         if (SvUTF8(sv))
3581             has_utf8 = TRUE;
3582     }
3583     if (has_utf8) {
3584         SvUTF8_on(sv);
3585         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3586             PL_sublex_info.sub_op->op_private |=
3587                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3588         }
3589     }
3590
3591     /* shrink the sv if we allocated more than we used */
3592     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3593         SvPV_shrink_to_cur(sv);
3594     }
3595
3596     /* return the substring (via pl_yylval) only if we parsed anything */
3597     if (s > PL_bufptr) {
3598         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3599             const char *const key = PL_lex_inpat ? "qr" : "q";
3600             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3601             const char *type;
3602             STRLEN typelen;
3603
3604             if (PL_lex_inwhat == OP_TRANS) {
3605                 type = "tr";
3606                 typelen = 2;
3607             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3608                 type = "s";
3609                 typelen = 1;
3610             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3611                 type = "q";
3612                 typelen = 1;
3613             } else  {
3614                 type = "qq";
3615                 typelen = 2;
3616             }
3617
3618             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3619                                 type, typelen);
3620         }
3621         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3622     } else
3623         SvREFCNT_dec(sv);
3624     return s;
3625 }
3626
3627 /* S_intuit_more
3628  * Returns TRUE if there's more to the expression (e.g., a subscript),
3629  * FALSE otherwise.
3630  *
3631  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3632  *
3633  * ->[ and ->{ return TRUE
3634  * { and [ outside a pattern are always subscripts, so return TRUE
3635  * if we're outside a pattern and it's not { or [, then return FALSE
3636  * if we're in a pattern and the first char is a {
3637  *   {4,5} (any digits around the comma) returns FALSE
3638  * if we're in a pattern and the first char is a [
3639  *   [] returns FALSE
3640  *   [SOMETHING] has a funky algorithm to decide whether it's a
3641  *      character class or not.  It has to deal with things like
3642  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3643  * anything else returns TRUE
3644  */
3645
3646 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3647
3648 STATIC int
3649 S_intuit_more(pTHX_ register char *s)
3650 {
3651     dVAR;
3652
3653     PERL_ARGS_ASSERT_INTUIT_MORE;
3654
3655     if (PL_lex_brackets)
3656         return TRUE;
3657     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3658         return TRUE;
3659     if (*s != '{' && *s != '[')
3660         return FALSE;
3661     if (!PL_lex_inpat)
3662         return TRUE;
3663
3664     /* In a pattern, so maybe we have {n,m}. */
3665     if (*s == '{') {
3666         if (regcurly(s)) {
3667             return FALSE;
3668         }
3669         return TRUE;
3670     }
3671
3672     /* On the other hand, maybe we have a character class */
3673
3674     s++;
3675     if (*s == ']' || *s == '^')
3676         return FALSE;
3677     else {
3678         /* this is terrifying, and it works */
3679         int weight = 2;         /* let's weigh the evidence */
3680         char seen[256];
3681         unsigned char un_char = 255, last_un_char;
3682         const char * const send = strchr(s,']');
3683         char tmpbuf[sizeof PL_tokenbuf * 4];
3684
3685         if (!send)              /* has to be an expression */
3686             return TRUE;
3687
3688         Zero(seen,256,char);
3689         if (*s == '$')
3690             weight -= 3;
3691         else if (isDIGIT(*s)) {
3692             if (s[1] != ']') {
3693                 if (isDIGIT(s[1]) && s[2] == ']')
3694                     weight -= 10;
3695             }
3696             else
3697                 weight -= 100;
3698         }
3699         for (; s < send; s++) {
3700             last_un_char = un_char;
3701             un_char = (unsigned char)*s;
3702             switch (*s) {
3703             case '@':
3704             case '&':
3705             case '$':
3706                 weight -= seen[un_char] * 10;
3707                 if (isALNUM_lazy_if(s+1,UTF)) {
3708                     int len;
3709                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3710                     len = (int)strlen(tmpbuf);
3711                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3712                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3713                         weight -= 100;
3714                     else
3715                         weight -= 10;
3716                 }
3717                 else if (*s == '$' && s[1] &&
3718                   strchr("[#!%*<>()-=",s[1])) {
3719                     if (/*{*/ strchr("])} =",s[2]))
3720                         weight -= 10;
3721                     else
3722                         weight -= 1;
3723                 }
3724                 break;
3725             case '\\':
3726                 un_char = 254;
3727                 if (s[1]) {
3728                     if (strchr("wds]",s[1]))
3729                         weight += 100;
3730                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3731                         weight += 1;
3732                     else if (strchr("rnftbxcav",s[1]))
3733                         weight += 40;
3734                     else if (isDIGIT(s[1])) {
3735                         weight += 40;
3736                         while (s[1] && isDIGIT(s[1]))
3737                             s++;
3738                     }
3739                 }
3740                 else
3741                     weight += 100;
3742                 break;
3743             case '-':
3744                 if (s[1] == '\\')
3745                     weight += 50;
3746                 if (strchr("aA01! ",last_un_char))
3747                     weight += 30;
3748                 if (strchr("zZ79~",s[1]))
3749                     weight += 30;
3750                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3751                     weight -= 5;        /* cope with negative subscript */
3752                 break;
3753             default:
3754                 if (!isALNUM(last_un_char)
3755                     && !(last_un_char == '$' || last_un_char == '@'
3756                          || last_un_char == '&')
3757                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3758                     char *d = tmpbuf;
3759                     while (isALPHA(*s))
3760                         *d++ = *s++;
3761                     *d = '\0';
3762                     if (keyword(tmpbuf, d - tmpbuf, 0))
3763                         weight -= 150;
3764                 }
3765                 if (un_char == last_un_char + 1)
3766                     weight += 5;
3767                 weight -= seen[un_char];
3768                 break;
3769             }
3770             seen[un_char]++;
3771         }
3772         if (weight >= 0)        /* probably a character class */
3773             return FALSE;
3774     }
3775
3776     return TRUE;
3777 }
3778
3779 /*
3780  * S_intuit_method
3781  *
3782  * Does all the checking to disambiguate
3783  *   foo bar
3784  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3785  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3786  *
3787  * First argument is the stuff after the first token, e.g. "bar".
3788  *
3789  * Not a method if foo is a filehandle.
3790  * Not a method if foo is a subroutine prototyped to take a filehandle.
3791  * Not a method if it's really "Foo $bar"
3792  * Method if it's "foo $bar"
3793  * Not a method if it's really "print foo $bar"
3794  * Method if it's really "foo package::" (interpreted as package->foo)
3795  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3796  * Not a method if bar is a filehandle or package, but is quoted with
3797  *   =>
3798  */
3799
3800 STATIC int
3801 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3802 {
3803     dVAR;
3804     char *s = start + (*start == '$');
3805     char tmpbuf[sizeof PL_tokenbuf];
3806     STRLEN len;
3807     GV* indirgv;
3808 #ifdef PERL_MAD
3809     int soff;
3810 #endif
3811
3812     PERL_ARGS_ASSERT_INTUIT_METHOD;
3813
3814     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3815             return 0;
3816     if (cv && SvPOK(cv)) {
3817                 const char *proto = CvPROTO(cv);
3818                 if (proto) {
3819                     if (*proto == ';')
3820                         proto++;
3821                     if (*proto == '*')
3822                         return 0;
3823                 }
3824     }
3825     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3826     /* start is the beginning of the possible filehandle/object,
3827      * and s is the end of it
3828      * tmpbuf is a copy of it
3829      */
3830
3831     if (*start == '$') {
3832         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3833                 isUPPER(*PL_tokenbuf))
3834             return 0;
3835 #ifdef PERL_MAD
3836         len = start - SvPVX(PL_linestr);
3837 #endif
3838         s = PEEKSPACE(s);
3839 #ifdef PERL_MAD
3840         start = SvPVX(PL_linestr) + len;
3841 #endif
3842         PL_bufptr = start;
3843         PL_expect = XREF;
3844         return *s == '(' ? FUNCMETH : METHOD;
3845     }
3846     if (!keyword(tmpbuf, len, 0)) {
3847         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3848             len -= 2;
3849             tmpbuf[len] = '\0';
3850 #ifdef PERL_MAD
3851             soff = s - SvPVX(PL_linestr);
3852 #endif
3853             goto bare_package;
3854         }
3855         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3856         if (indirgv && GvCVu(indirgv))
3857             return 0;
3858         /* filehandle or package name makes it a method */
3859         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3860 #ifdef PERL_MAD
3861             soff = s - SvPVX(PL_linestr);
3862 #endif
3863             s = PEEKSPACE(s);
3864             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3865                 return 0;       /* no assumptions -- "=>" quotes bareword */
3866       bare_package:
3867             start_force(PL_curforce);
3868             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3869                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3870             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3871             if (PL_madskills)
3872                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3873                                                             ( UTF ? SVf_UTF8 : 0 )));
3874             PL_expect = XTERM;
3875             force_next(WORD);
3876             PL_bufptr = s;
3877 #ifdef PERL_MAD
3878             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3879 #endif
3880             return *s == '(' ? FUNCMETH : METHOD;
3881         }
3882     }
3883     return 0;
3884 }
3885
3886 /* Encoded script support. filter_add() effectively inserts a
3887  * 'pre-processing' function into the current source input stream.
3888  * Note that the filter function only applies to the current source file
3889  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3890  *
3891  * The datasv parameter (which may be NULL) can be used to pass
3892  * private data to this instance of the filter. The filter function
3893  * can recover the SV using the FILTER_DATA macro and use it to
3894  * store private buffers and state information.
3895  *
3896  * The supplied datasv parameter is upgraded to a PVIO type
3897  * and the IoDIRP/IoANY field is used to store the function pointer,
3898  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3899  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3900  * private use must be set using malloc'd pointers.
3901  */
3902
3903 SV *
3904 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3905 {
3906     dVAR;
3907     if (!funcp)
3908         return NULL;
3909
3910     if (!PL_parser)
3911         return NULL;
3912
3913     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3914         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3915
3916     if (!PL_rsfp_filters)
3917         PL_rsfp_filters = newAV();
3918     if (!datasv)
3919         datasv = newSV(0);
3920     SvUPGRADE(datasv, SVt_PVIO);
3921     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3922     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3923     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3924                           FPTR2DPTR(void *, IoANY(datasv)),
3925                           SvPV_nolen(datasv)));
3926     av_unshift(PL_rsfp_filters, 1);
3927     av_store(PL_rsfp_filters, 0, datasv) ;
3928     if (
3929         !PL_parser->filtered
3930      && PL_parser->lex_flags & LEX_EVALBYTES
3931      && PL_bufptr < PL_bufend
3932     ) {
3933         const char *s = PL_bufptr;
3934         while (s < PL_bufend) {
3935             if (*s == '\n') {
3936                 SV *linestr = PL_parser->linestr;
3937                 char *buf = SvPVX(linestr);
3938                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3939                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3940                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3941                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3942                 STRLEN const last_uni_pos =
3943                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3944                 STRLEN const last_lop_pos =
3945                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3946                 av_push(PL_rsfp_filters, linestr);
3947                 PL_parser->linestr =
3948                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3949                 buf = SvPVX(PL_parser->linestr);
3950                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3951                 PL_parser->bufptr = buf + bufptr_pos;
3952                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3953                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3954                 PL_parser->linestart = buf + linestart_pos;
3955                 if (PL_parser->last_uni)
3956                     PL_parser->last_uni = buf + last_uni_pos;
3957                 if (PL_parser->last_lop)
3958                     PL_parser->last_lop = buf + last_lop_pos;
3959                 SvLEN(linestr) = SvCUR(linestr);
3960                 SvCUR(linestr) = s-SvPVX(linestr);
3961                 PL_parser->filtered = 1;
3962                 break;
3963             }
3964             s++;
3965         }
3966     }
3967     return(datasv);
3968 }
3969
3970
3971 /* Delete most recently added instance of this filter function. */
3972 void
3973 Perl_filter_del(pTHX_ filter_t funcp)
3974 {
3975     dVAR;
3976     SV *datasv;
3977
3978     PERL_ARGS_ASSERT_FILTER_DEL;
3979
3980 #ifdef DEBUGGING
3981     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3982                           FPTR2DPTR(void*, funcp)));
3983 #endif
3984     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3985         return;
3986     /* if filter is on top of stack (usual case) just pop it off */
3987     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3988     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3989         sv_free(av_pop(PL_rsfp_filters));
3990
3991         return;
3992     }
3993     /* we need to search for the correct entry and clear it     */
3994     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3995 }
3996
3997
3998 /* Invoke the idxth filter function for the current rsfp.        */
3999 /* maxlen 0 = read one text line */
4000 I32
4001 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4002 {
4003     dVAR;
4004     filter_t funcp;
4005     SV *datasv = NULL;
4006     /* This API is bad. It should have been using unsigned int for maxlen.
4007        Not sure if we want to change the API, but if not we should sanity
4008        check the value here.  */
4009     unsigned int correct_length
4010         = maxlen < 0 ?
4011 #ifdef PERL_MICRO
4012         0x7FFFFFFF
4013 #else
4014         INT_MAX
4015 #endif
4016         : maxlen;
4017
4018     PERL_ARGS_ASSERT_FILTER_READ;
4019
4020     if (!PL_parser || !PL_rsfp_filters)
4021         return -1;
4022     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4023         /* Provide a default input filter to make life easy.    */
4024         /* Note that we append to the line. This is handy.      */
4025         DEBUG_P(PerlIO_printf(Perl_debug_log,
4026                               "filter_read %d: from rsfp\n", idx));
4027         if (correct_length) {
4028             /* Want a block */
4029             int len ;
4030             const int old_len = SvCUR(buf_sv);
4031
4032             /* ensure buf_sv is large enough */
4033             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4034             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4035                                    correct_length)) <= 0) {
4036                 if (PerlIO_error(PL_rsfp))
4037                     return -1;          /* error */
4038                 else
4039                     return 0 ;          /* end of file */
4040             }
4041             SvCUR_set(buf_sv, old_len + len) ;
4042             SvPVX(buf_sv)[old_len + len] = '\0';
4043         } else {
4044             /* Want a line */
4045             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4046                 if (PerlIO_error(PL_rsfp))
4047                     return -1;          /* error */
4048                 else
4049                     return 0 ;          /* end of file */
4050             }
4051         }
4052         return SvCUR(buf_sv);
4053     }
4054     /* Skip this filter slot if filter has been deleted */
4055     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4056         DEBUG_P(PerlIO_printf(Perl_debug_log,
4057                               "filter_read %d: skipped (filter deleted)\n",
4058                               idx));
4059         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4060     }
4061     if (SvTYPE(datasv) != SVt_PVIO) {
4062         if (correct_length) {
4063             /* Want a block */
4064             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4065             if (!remainder) return 0; /* eof */
4066             if (correct_length > remainder) correct_length = remainder;
4067             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4068             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4069         } else {
4070             /* Want a line */
4071             const char *s = SvEND(datasv);
4072             const char *send = SvPVX(datasv) + SvLEN(datasv);
4073             while (s < send) {
4074                 if (*s == '\n') {
4075                     s++;
4076                     break;
4077                 }
4078                 s++;
4079             }
4080             if (s == send) return 0; /* eof */
4081             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4082             SvCUR_set(datasv, s-SvPVX(datasv));
4083         }
4084         return SvCUR(buf_sv);
4085     }
4086     /* Get function pointer hidden within datasv        */
4087     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4088     DEBUG_P(PerlIO_printf(Perl_debug_log,
4089                           "filter_read %d: via function %p (%s)\n",
4090                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4091     /* Call function. The function is expected to       */
4092     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4093     /* Return: <0:error, =0:eof, >0:not eof             */
4094     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4095 }
4096
4097 STATIC char *
4098 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4099 {
4100     dVAR;
4101
4102     PERL_ARGS_ASSERT_FILTER_GETS;
4103
4104 #ifdef PERL_CR_FILTER
4105     if (!PL_rsfp_filters) {
4106         filter_add(S_cr_textfilter,NULL);
4107     }
4108 #endif
4109     if (PL_rsfp_filters) {
4110         if (!append)
4111             SvCUR_set(sv, 0);   /* start with empty line        */
4112         if (FILTER_READ(0, sv, 0) > 0)
4113             return ( SvPVX(sv) ) ;
4114         else
4115             return NULL ;
4116     }
4117     else
4118         return (sv_gets(sv, PL_rsfp, append));
4119 }
4120
4121 STATIC HV *
4122 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4123 {
4124     dVAR;
4125     GV *gv;
4126
4127     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4128
4129     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4130         return PL_curstash;
4131
4132     if (len > 2 &&
4133         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4134         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4135     {
4136         return GvHV(gv);                        /* Foo:: */
4137     }
4138
4139     /* use constant CLASS => 'MyClass' */
4140     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4141     if (gv && GvCV(gv)) {
4142         SV * const sv = cv_const_sv(GvCV(gv));
4143         if (sv)
4144             pkgname = SvPV_const(sv, len);
4145     }
4146
4147     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4148 }
4149
4150 /*
4151  * S_readpipe_override
4152  * Check whether readpipe() is overridden, and generates the appropriate
4153  * optree, provided sublex_start() is called afterwards.
4154  */
4155 STATIC void
4156 S_readpipe_override(pTHX)
4157 {
4158     GV **gvp;
4159     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4160     pl_yylval.ival = OP_BACKTICK;
4161     if ((gv_readpipe
4162                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4163             ||
4164             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4165              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4166              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4167     {
4168         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4169             op_append_elem(OP_LIST,
4170                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4171                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4172     }
4173 }
4174
4175 #ifdef PERL_MAD
4176  /*
4177  * Perl_madlex
4178  * The intent of this yylex wrapper is to minimize the changes to the
4179  * tokener when we aren't interested in collecting madprops.  It remains
4180  * to be seen how successful this strategy will be...
4181  */
4182
4183 int
4184 Perl_madlex(pTHX)
4185 {
4186     int optype;
4187     char *s = PL_bufptr;
4188
4189     /* make sure PL_thiswhite is initialized */
4190     PL_thiswhite = 0;
4191     PL_thismad = 0;
4192
4193     /* previous token ate up our whitespace? */
4194     if (!PL_lasttoke && PL_nextwhite) {
4195         PL_thiswhite = PL_nextwhite;
4196         PL_nextwhite = 0;
4197     }
4198
4199     /* isolate the token, and figure out where it is without whitespace */
4200     PL_realtokenstart = -1;
4201     PL_thistoken = 0;
4202     optype = yylex();
4203     s = PL_bufptr;
4204     assert(PL_curforce < 0);
4205
4206     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4207         if (!PL_thistoken) {
4208             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4209                 PL_thistoken = newSVpvs("");
4210             else {
4211                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4212                 PL_thistoken = newSVpvn(tstart, s - tstart);
4213             }
4214         }
4215         if (PL_thismad) /* install head */
4216             CURMAD('X', PL_thistoken);
4217     }
4218
4219     /* last whitespace of a sublex? */
4220     if (optype == ')' && PL_endwhite) {
4221         CURMAD('X', PL_endwhite);
4222     }
4223
4224     if (!PL_thismad) {
4225
4226         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4227         if (!PL_thiswhite && !PL_endwhite && !optype) {
4228             sv_free(PL_thistoken);
4229             PL_thistoken = 0;
4230             return 0;
4231         }
4232
4233         /* put off final whitespace till peg */
4234         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4235             PL_nextwhite = PL_thiswhite;
4236             PL_thiswhite = 0;
4237         }
4238         else if (PL_thisopen) {
4239             CURMAD('q', PL_thisopen);
4240             if (PL_thistoken)
4241                 sv_free(PL_thistoken);
4242             PL_thistoken = 0;
4243         }
4244         else {
4245             /* Store actual token text as madprop X */
4246             CURMAD('X', PL_thistoken);
4247         }
4248
4249         if (PL_thiswhite) {
4250             /* add preceding whitespace as madprop _ */
4251             CURMAD('_', PL_thiswhite);
4252         }
4253
4254         if (PL_thisstuff) {
4255             /* add quoted material as madprop = */
4256             CURMAD('=', PL_thisstuff);
4257         }
4258
4259         if (PL_thisclose) {
4260             /* add terminating quote as madprop Q */
4261             CURMAD('Q', PL_thisclose);
4262         }
4263     }
4264
4265     /* special processing based on optype */
4266
4267     switch (optype) {
4268
4269     /* opval doesn't need a TOKEN since it can already store mp */
4270     case WORD:
4271     case METHOD:
4272     case FUNCMETH:
4273     case THING:
4274     case PMFUNC:
4275     case PRIVATEREF:
4276     case FUNC0SUB:
4277     case UNIOPSUB:
4278     case LSTOPSUB:
4279     case LABEL:
4280         if (pl_yylval.opval)
4281             append_madprops(PL_thismad, pl_yylval.opval, 0);
4282         PL_thismad = 0;
4283         return optype;
4284
4285     /* fake EOF */
4286     case 0:
4287         optype = PEG;
4288         if (PL_endwhite) {
4289             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4290             PL_endwhite = 0;
4291         }
4292         break;
4293
4294     case ']':
4295     case '}':
4296         if (PL_faketokens)
4297             break;
4298         /* remember any fake bracket that lexer is about to discard */
4299         if (PL_lex_brackets == 1 &&
4300             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4301         {
4302             s = PL_bufptr;
4303             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4304                 s++;
4305             if (*s == '}') {
4306                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4307                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4308                 PL_thiswhite = 0;
4309                 PL_bufptr = s - 1;
4310                 break;  /* don't bother looking for trailing comment */
4311             }
4312             else
4313                 s = PL_bufptr;
4314         }
4315         if (optype == ']')
4316             break;
4317         /* FALLTHROUGH */
4318
4319     /* attach a trailing comment to its statement instead of next token */
4320     case ';':
4321         if (PL_faketokens)
4322             break;
4323         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4324             s = PL_bufptr;
4325             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4326                 s++;
4327             if (*s == '\n' || *s == '#') {
4328                 while (s < PL_bufend && *s != '\n')
4329                     s++;
4330                 if (s < PL_bufend)
4331                     s++;
4332                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4333                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4334                 PL_thiswhite = 0;
4335                 PL_bufptr = s;
4336             }
4337         }
4338         break;
4339
4340     /* ival */
4341     default:
4342         break;
4343
4344     }
4345
4346     /* Create new token struct.  Note: opvals return early above. */
4347     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4348     PL_thismad = 0;
4349     return optype;
4350 }
4351 #endif
4352
4353 STATIC char *
4354 S_tokenize_use(pTHX_ int is_use, char *s) {
4355     dVAR;
4356
4357     PERL_ARGS_ASSERT_TOKENIZE_USE;
4358
4359     if (PL_expect != XSTATE)
4360         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4361                     is_use ? "use" : "no"));
4362     PL_expect = XTERM;
4363     s = SKIPSPACE1(s);
4364     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4365         s = force_version(s, TRUE);
4366         if (*s == ';' || *s == '}'
4367                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4368             start_force(PL_curforce);
4369             NEXTVAL_NEXTTOKE.opval = NULL;
4370             force_next(WORD);
4371         }
4372         else if (*s == 'v') {
4373             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4374             s = force_version(s, FALSE);
4375         }
4376     }
4377     else {
4378         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4379         s = force_version(s, FALSE);
4380     }
4381     pl_yylval.ival = is_use;
4382     return s;
4383 }
4384 #ifdef DEBUGGING
4385     static const char* const exp_name[] =
4386         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4387           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4388         };
4389 #endif
4390
4391 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4392 STATIC bool
4393 S_word_takes_any_delimeter(char *p, STRLEN len)
4394 {
4395     return (len == 1 && strchr("msyq", p[0])) ||
4396            (len == 2 && (
4397             (p[0] == 't' && p[1] == 'r') ||
4398             (p[0] == 'q' && strchr("qwxr", p[1]))));
4399 }
4400
4401 /*
4402   yylex
4403
4404   Works out what to call the token just pulled out of the input
4405   stream.  The yacc parser takes care of taking the ops we return and
4406   stitching them into a tree.
4407
4408   Returns:
4409     The type of the next token
4410
4411   Structure:
4412       Switch based on the current state:
4413           - if we already built the token before, use it
4414           - if we have a case modifier in a string, deal with that
4415           - handle other cases of interpolation inside a string
4416           - scan the next line if we are inside a format
4417       In the normal state switch on the next character:
4418           - default:
4419             if alphabetic, go to key lookup
4420             unrecoginized character - croak
4421           - 0/4/26: handle end-of-line or EOF
4422           - cases for whitespace
4423           - \n and #: handle comments and line numbers
4424           - various operators, brackets and sigils
4425           - numbers
4426           - quotes
4427           - 'v': vstrings (or go to key lookup)
4428           - 'x' repetition operator (or go to key lookup)
4429           - other ASCII alphanumerics (key lookup begins here):
4430               word before => ?
4431               keyword plugin
4432               scan built-in keyword (but do nothing with it yet)
4433               check for statement label
4434               check for lexical subs
4435                   goto just_a_word if there is one
4436               see whether built-in keyword is overridden
4437               switch on keyword number:
4438                   - default: just_a_word:
4439                       not a built-in keyword; handle bareword lookup
4440                       disambiguate between method and sub call
4441                       fall back to bareword
4442                   - cases for built-in keywords
4443 */
4444
4445
4446 #ifdef __SC__
4447 #pragma segment Perl_yylex
4448 #endif
4449 int
4450 Perl_yylex(pTHX)
4451 {
4452     dVAR;
4453     char *s = PL_bufptr;
4454     char *d;
4455     STRLEN len;
4456     bool bof = FALSE;
4457     U8 formbrack = 0;
4458     U32 fake_eof = 0;
4459
4460     /* orig_keyword, gvp, and gv are initialized here because
4461      * jump to the label just_a_word_zero can bypass their
4462      * initialization later. */
4463     I32 orig_keyword = 0;
4464     GV *gv = NULL;
4465     GV **gvp = NULL;
4466
4467     DEBUG_T( {
4468         SV* tmp = newSVpvs("");
4469         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4470             (IV)CopLINE(PL_curcop),
4471             lex_state_names[PL_lex_state],
4472             exp_name[PL_expect],
4473             pv_display(tmp, s, strlen(s), 0, 60));
4474         SvREFCNT_dec(tmp);
4475     } );
4476
4477     switch (PL_lex_state) {
4478 #ifdef COMMENTARY
4479     case LEX_NORMAL:            /* Some compilers will produce faster */
4480     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4481         break;
4482 #endif
4483
4484     /* when we've already built the next token, just pull it out of the queue */
4485     case LEX_KNOWNEXT:
4486 #ifdef PERL_MAD
4487         PL_lasttoke--;
4488         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4489         if (PL_madskills) {
4490             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4491             PL_nexttoke[PL_lasttoke].next_mad = 0;
4492             if (PL_thismad && PL_thismad->mad_key == '_') {
4493                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4494                 PL_thismad->mad_val = 0;
4495                 mad_free(PL_thismad);
4496                 PL_thismad = 0;
4497             }
4498         }
4499         if (!PL_lasttoke) {
4500             PL_lex_state = PL_lex_defer;
4501             PL_expect = PL_lex_expect;
4502             PL_lex_defer = LEX_NORMAL;
4503             if (!PL_nexttoke[PL_lasttoke].next_type)
4504                 return yylex();
4505         }
4506 #else
4507         PL_nexttoke--;
4508         pl_yylval = PL_nextval[PL_nexttoke];
4509         if (!PL_nexttoke) {
4510             PL_lex_state = PL_lex_defer;
4511             PL_expect = PL_lex_expect;
4512             PL_lex_defer = LEX_NORMAL;
4513         }
4514 #endif
4515         {
4516             I32 next_type;
4517 #ifdef PERL_MAD
4518             next_type = PL_nexttoke[PL_lasttoke].next_type;
4519 #else
4520             next_type = PL_nexttype[PL_nexttoke];
4521 #endif
4522             if (next_type & (7<<24)) {
4523                 if (next_type & (1<<24)) {
4524                     if (PL_lex_brackets > 100)
4525                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4526                     PL_lex_brackstack[PL_lex_brackets++] =
4527                         (char) ((next_type >> 16) & 0xff);
4528                 }
4529                 if (next_type & (2<<24))
4530                     PL_lex_allbrackets++;
4531                 if (next_type & (4<<24))
4532                     PL_lex_allbrackets--;
4533                 next_type &= 0xffff;
4534             }
4535             if (S_is_opval_token(next_type) && pl_yylval.opval)
4536                 pl_yylval.opval->op_savefree = 0; /* release */
4537             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4538         }
4539
4540     /* interpolated case modifiers like \L \U, including \Q and \E.
4541        when we get here, PL_bufptr is at the \
4542     */
4543     case LEX_INTERPCASEMOD:
4544 #ifdef DEBUGGING
4545         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4546             Perl_croak(aTHX_
4547                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4548                        PL_bufptr, PL_bufend, *PL_bufptr);
4549 #endif
4550         /* handle \E or end of string */
4551         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4552             /* if at a \E */
4553             if (PL_lex_casemods) {
4554                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4555                 PL_lex_casestack[PL_lex_casemods] = '\0';
4556
4557                 if (PL_bufptr != PL_bufend
4558                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4559                         || oldmod == 'F')) {
4560                     PL_bufptr += 2;
4561                     PL_lex_state = LEX_INTERPCONCAT;
4562 #ifdef PERL_MAD
4563                     if (PL_madskills)
4564                         PL_thistoken = newSVpvs("\\E");
4565 #endif
4566                 }
4567                 PL_lex_allbrackets--;
4568                 return REPORT(')');
4569             }
4570             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4571                /* Got an unpaired \E */
4572                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4573                         "Useless use of \\E");
4574             }
4575 #ifdef PERL_MAD
4576             while (PL_bufptr != PL_bufend &&
4577               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4578                 if (!PL_thiswhite)
4579                     PL_thiswhite = newSVpvs("");
4580                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4581                 PL_bufptr += 2;
4582             }
4583 #else
4584             if (PL_bufptr != PL_bufend)
4585                 PL_bufptr += 2;
4586 #endif
4587             PL_lex_state = LEX_INTERPCONCAT;
4588             return yylex();
4589         }
4590         else {
4591             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4592               "### Saw case modifier\n"); });
4593             s = PL_bufptr + 1;
4594             if (s[1] == '\\' && s[2] == 'E') {
4595 #ifdef PERL_MAD
4596                 if (!PL_thiswhite)
4597                     PL_thiswhite = newSVpvs("");
4598                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4599 #endif
4600                 PL_bufptr = s + 3;
4601                 PL_lex_state = LEX_INTERPCONCAT;
4602                 return yylex();
4603             }
4604             else {
4605                 I32 tmp;
4606                 if (!PL_madskills) /* when just compiling don't need correct */
4607                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4608                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4609                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4610                     (strchr(PL_lex_casestack, 'L')
4611                         || strchr(PL_lex_casestack, 'U')
4612                         || strchr(PL_lex_casestack, 'F'))) {
4613                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4614                     PL_lex_allbrackets--;
4615                     return REPORT(')');
4616                 }
4617                 if (PL_lex_casemods > 10)
4618                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4619                 PL_lex_casestack[PL_lex_casemods++] = *s;
4620                 PL_lex_casestack[PL_lex_casemods] = '\0';
4621                 PL_lex_state = LEX_INTERPCONCAT;
4622                 start_force(PL_curforce);
4623                 NEXTVAL_NEXTTOKE.ival = 0;
4624                 force_next((2<<24)|'(');
4625                 start_force(PL_curforce);
4626                 if (*s == 'l')
4627                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4628                 else if (*s == 'u')
4629                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4630                 else if (*s == 'L')
4631                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4632                 else if (*s == 'U')
4633                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4634                 else if (*s == 'Q')
4635                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4636                 else if (*s == 'F')
4637                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4638                 else
4639                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4640                 if (PL_madskills) {
4641                     SV* const tmpsv = newSVpvs("\\ ");
4642                     /* replace the space with the character we want to escape
4643                      */
4644                     SvPVX(tmpsv)[1] = *s;
4645                     curmad('_', tmpsv);
4646                 }
4647                 PL_bufptr = s + 1;
4648             }
4649             force_next(FUNC);
4650             if (PL_lex_starts) {
4651                 s = PL_bufptr;
4652                 PL_lex_starts = 0;
4653 #ifdef PERL_MAD
4654                 if (PL_madskills) {
4655                     if (PL_thistoken)
4656                         sv_free(PL_thistoken);
4657                     PL_thistoken = newSVpvs("");
4658                 }
4659 #endif
4660                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4661                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4662                     OPERATOR(',');
4663                 else
4664                     Aop(OP_CONCAT);
4665             }
4666             else
4667                 return yylex();
4668         }
4669
4670     case LEX_INTERPPUSH:
4671         return REPORT(sublex_push());
4672
4673     case LEX_INTERPSTART:
4674         if (PL_bufptr == PL_bufend)
4675             return REPORT(sublex_done());
4676         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4677               "### Interpolated variable\n"); });
4678         PL_expect = XTERM;
4679         PL_lex_dojoin = (*PL_bufptr == '@');
4680         PL_lex_state = LEX_INTERPNORMAL;
4681         if (PL_lex_dojoin) {
4682             start_force(PL_curforce);
4683             NEXTVAL_NEXTTOKE.ival = 0;
4684             force_next(',');
4685             start_force(PL_curforce);
4686             force_ident("\"", '$');
4687             start_force(PL_curforce);
4688             NEXTVAL_NEXTTOKE.ival = 0;
4689             force_next('$');
4690             start_force(PL_curforce);
4691             NEXTVAL_NEXTTOKE.ival = 0;
4692             force_next((2<<24)|'(');
4693             start_force(PL_curforce);
4694             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4695             force_next(FUNC);
4696         }
4697         /* Convert (?{...}) and friends to 'do {...}' */
4698         if (PL_lex_inpat && *PL_bufptr == '(') {
4699             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4700             PL_bufptr += 2;
4701             if (*PL_bufptr != '{')
4702                 PL_bufptr++;
4703             start_force(PL_curforce);
4704             /* XXX probably need a CURMAD(something) here */
4705             PL_expect = XTERMBLOCK;
4706             force_next(DO);
4707         }
4708
4709         if (PL_lex_starts++) {
4710             s = PL_bufptr;
4711 #ifdef PERL_MAD
4712             if (PL_madskills) {
4713                 if (PL_thistoken)
4714                     sv_free(PL_thistoken);
4715                 PL_thistoken = newSVpvs("");
4716             }
4717 #endif
4718             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4719             if (!PL_lex_casemods && PL_lex_inpat)
4720                 OPERATOR(',');
4721             else
4722                 Aop(OP_CONCAT);
4723         }
4724         return yylex();
4725
4726     case LEX_INTERPENDMAYBE:
4727         if (intuit_more(PL_bufptr)) {
4728             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4729             break;
4730         }
4731         /* FALL THROUGH */
4732
4733     case LEX_INTERPEND:
4734         if (PL_lex_dojoin) {
4735             PL_lex_dojoin = FALSE;
4736             PL_lex_state = LEX_INTERPCONCAT;
4737 #ifdef PERL_MAD
4738             if (PL_madskills) {
4739                 if (PL_thistoken)
4740                     sv_free(PL_thistoken);
4741                 PL_thistoken = newSVpvs("");
4742             }
4743 #endif
4744             PL_lex_allbrackets--;
4745             return REPORT(')');
4746         }
4747         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4748             && SvEVALED(PL_lex_repl))
4749         {
4750             if (PL_bufptr != PL_bufend)
4751                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4752             PL_lex_repl = NULL;
4753         }
4754         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4755            re_eval_str.  If the here-doc body’s length equals the previous
4756            value of re_eval_start, re_eval_start will now be null.  So
4757            check re_eval_str as well. */
4758         if (PL_parser->lex_shared->re_eval_start
4759          || PL_parser->lex_shared->re_eval_str) {
4760             SV *sv;
4761             if (*PL_bufptr != ')')
4762                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4763             PL_bufptr++;
4764             /* having compiled a (?{..}) expression, return the original
4765              * text too, as a const */
4766             if (PL_parser->lex_shared->re_eval_str) {
4767                 sv = PL_parser->lex_shared->re_eval_str;
4768                 PL_parser->lex_shared->re_eval_str = NULL;
4769                 SvCUR_set(sv,
4770                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4771                 SvPV_shrink_to_cur(sv);
4772             }
4773             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4774                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4775             start_force(PL_curforce);
4776             /* XXX probably need a CURMAD(something) here */
4777             NEXTVAL_NEXTTOKE.opval =
4778                     (OP*)newSVOP(OP_CONST, 0,
4779                                  sv);
4780             force_next(THING);
4781             PL_parser->lex_shared->re_eval_start = NULL;
4782             PL_expect = XTERM;
4783             return REPORT(',');
4784         }
4785
4786         /* FALLTHROUGH */
4787     case LEX_INTERPCONCAT:
4788 #ifdef DEBUGGING
4789         if (PL_lex_brackets)
4790             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4791                        (long) PL_lex_brackets);
4792 #endif
4793         if (PL_bufptr == PL_bufend)
4794             return REPORT(sublex_done());
4795
4796         /* m'foo' still needs to be parsed for possible (?{...}) */
4797         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4798             SV *sv = newSVsv(PL_linestr);
4799             sv = tokeq(sv);
4800             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4801             s = PL_bufend;
4802         }
4803         else {
4804             s = scan_const(PL_bufptr);
4805             if (*s == '\\')
4806                 PL_lex_state = LEX_INTERPCASEMOD;
4807             else
4808                 PL_lex_state = LEX_INTERPSTART;
4809         }
4810
4811         if (s != PL_bufptr) {
4812             start_force(PL_curforce);
4813             if (PL_madskills) {
4814                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4815             }
4816             NEXTVAL_NEXTTOKE = pl_yylval;
4817             PL_expect = XTERM;
4818             force_next(THING);
4819             if (PL_lex_starts++) {
4820 #ifdef PERL_MAD
4821                 if (PL_madskills) {
4822                     if (PL_thistoken)
4823                         sv_free(PL_thistoken);
4824                     PL_thistoken = newSVpvs("");
4825                 }
4826 #endif
4827                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4828                 if (!PL_lex_casemods && PL_lex_inpat)
4829                     OPERATOR(',');
4830                 else
4831                     Aop(OP_CONCAT);
4832             }
4833             else {
4834                 PL_bufptr = s;
4835                 return yylex();
4836             }
4837         }
4838
4839         return yylex();
4840     case LEX_FORMLINE:
4841         s = scan_formline(PL_bufptr);
4842         if (!PL_lex_formbrack)
4843         {
4844             formbrack = 1;
4845             goto rightbracket;
4846         }
4847         PL_bufptr = s;
4848         return yylex();
4849     }
4850
4851     s = PL_bufptr;
4852     PL_oldoldbufptr = PL_oldbufptr;
4853     PL_oldbufptr = s;
4854
4855   retry:
4856 #ifdef PERL_MAD
4857     if (PL_thistoken) {
4858         sv_free(PL_thistoken);
4859         PL_thistoken = 0;
4860     }
4861     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4862 #endif
4863     switch (*s) {
4864     default:
4865         if (isIDFIRST_lazy_if(s,UTF))
4866             goto keylookup;
4867         {
4868         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4869         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4870                                                     UTF8SKIP(s),
4871                                                     SVs_TEMP | SVf_UTF8),
4872                                             10, UNI_DISPLAY_ISPRINT))
4873                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4874         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4875         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4876             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4877         } else {
4878             d = PL_linestart;
4879         }
4880         *s = '\0';
4881         sv_setpv(dsv, d);
4882         if (UTF)
4883             SvUTF8_on(dsv);
4884         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4885     }
4886     case 4:
4887     case 26:
4888         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4889     case 0:
4890 #ifdef PERL_MAD
4891         if (PL_madskills)
4892             PL_faketokens = 0;
4893 #endif
4894         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4895             PL_last_uni = 0;
4896             PL_last_lop = 0;
4897             if (PL_lex_brackets &&
4898                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4899                 yyerror((const char *)
4900                         (PL_lex_formbrack
4901                          ? "Format not terminated"
4902                          : "Missing right curly or square bracket"));
4903             }
4904             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4905                         "### Tokener got EOF\n");
4906             } );
4907             TOKEN(0);
4908         }
4909         if (s++ < PL_bufend)
4910             goto retry;                 /* ignore stray nulls */
4911         PL_last_uni = 0;
4912         PL_last_lop = 0;
4913         if (!PL_in_eval && !PL_preambled) {
4914             PL_preambled = TRUE;
4915 #ifdef PERL_MAD
4916             if (PL_madskills)
4917                 PL_faketokens = 1;
4918 #endif
4919             if (PL_perldb) {
4920                 /* Generate a string of Perl code to load the debugger.
4921                  * If PERL5DB is set, it will return the contents of that,
4922                  * otherwise a compile-time require of perl5db.pl.  */
4923
4924                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4925
4926                 if (pdb) {
4927                     sv_setpv(PL_linestr, pdb);
4928                     sv_catpvs(PL_linestr,";");
4929                 } else {
4930                     SETERRNO(0,SS_NORMAL);
4931                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4932                 }
4933             } else
4934                 sv_setpvs(PL_linestr,"");
4935             if (PL_preambleav) {
4936                 SV **svp = AvARRAY(PL_preambleav);
4937                 SV **const end = svp + AvFILLp(PL_preambleav);
4938                 while(svp <= end) {
4939                     sv_catsv(PL_linestr, *svp);
4940                     ++svp;
4941                     sv_catpvs(PL_linestr, ";");
4942                 }
4943                 sv_free(MUTABLE_SV(PL_preambleav));
4944                 PL_preambleav = NULL;
4945             }
4946             if (PL_minus_E)
4947                 sv_catpvs(PL_linestr,
4948                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4949             if (PL_minus_n || PL_minus_p) {
4950                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4951                 if (PL_minus_l)
4952                     sv_catpvs(PL_linestr,"chomp;");
4953                 if (PL_minus_a) {
4954                     if (PL_minus_F) {
4955                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4956                              || *PL_splitstr == '"')
4957                               && strchr(PL_splitstr + 1, *PL_splitstr))
4958                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4959                         else {
4960                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4961                                bytes can be used as quoting characters.  :-) */
4962                             const char *splits = PL_splitstr;
4963                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4964                             do {
4965                                 /* Need to \ \s  */
4966                                 if (*splits == '\\')
4967                                     sv_catpvn(PL_linestr, splits, 1);
4968                                 sv_catpvn(PL_linestr, splits, 1);
4969                             } while (*splits++);
4970                             /* This loop will embed the trailing NUL of
4971                                PL_linestr as the last thing it does before
4972                                terminating.  */
4973                             sv_catpvs(PL_linestr, ");");
4974                         }
4975                     }
4976                     else
4977                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4978                 }
4979             }
4980             sv_catpvs(PL_linestr, "\n");
4981             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4982             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4983             PL_last_lop = PL_last_uni = NULL;
4984             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4985                 update_debugger_info(PL_linestr, NULL, 0);
4986             goto retry;
4987         }
4988         do {
4989             fake_eof = 0;
4990             bof = PL_rsfp ? TRUE : FALSE;
4991             if (0) {
4992               fake_eof:
4993                 fake_eof = LEX_FAKE_EOF;
4994             }
4995             PL_bufptr = PL_bufend;
4996             COPLINE_INC_WITH_HERELINES;
4997             if (!lex_next_chunk(fake_eof)) {
4998                 CopLINE_dec(PL_curcop);
4999                 s = PL_bufptr;
5000                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5001             }
5002             CopLINE_dec(PL_curcop);
5003 #ifdef PERL_MAD
5004             if (!PL_rsfp)
5005                 PL_realtokenstart = -1;
5006 #endif
5007             s = PL_bufptr;
5008             /* If it looks like the start of a BOM or raw UTF-16,
5009              * check if it in fact is. */
5010             if (bof && PL_rsfp &&
5011                      (*s == 0 ||
5012                       *(U8*)s == 0xEF ||
5013                       *(U8*)s >= 0xFE ||
5014                       s[1] == 0)) {
5015                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5016                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5017 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5018                 /* offset may include swallowed CR */
5019                 if (!bof)
5020                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5021 #endif
5022                 if (bof) {
5023                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5024                     s = swallow_bom((U8*)s);
5025                 }
5026             }
5027             if (PL_parser->in_pod) {
5028                 /* Incest with pod. */
5029 #ifdef PERL_MAD
5030                 if (PL_madskills)
5031                     sv_catsv(PL_thiswhite, PL_linestr);
5032 #endif
5033                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5034                     sv_setpvs(PL_linestr, "");
5035                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5036                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5037                     PL_last_lop = PL_last_uni = NULL;
5038                     PL_parser->in_pod = 0;
5039                 }
5040             }
5041             if (PL_rsfp || PL_parser->filtered)
5042                 incline(s);
5043         } while (PL_parser->in_pod);
5044         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5045         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5046         PL_last_lop = PL_last_uni = NULL;
5047         if (CopLINE(PL_curcop) == 1) {
5048             while (s < PL_bufend && isSPACE(*s))
5049                 s++;
5050             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5051                 s++;
5052 #ifdef PERL_MAD
5053             if (PL_madskills)
5054                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5055 #endif
5056             d = NULL;
5057             if (!PL_in_eval) {
5058                 if (*s == '#' && *(s+1) == '!')
5059                     d = s + 2;
5060 #ifdef ALTERNATE_SHEBANG
5061                 else {
5062                     static char const as[] = ALTERNATE_SHEBANG;
5063                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5064                         d = s + (sizeof(as) - 1);
5065                 }
5066 #endif /* ALTERNATE_SHEBANG */
5067             }
5068             if (d) {
5069                 char *ipath;
5070                 char *ipathend;
5071
5072                 while (isSPACE(*d))
5073                     d++;
5074                 ipath = d;
5075                 while (*d && !isSPACE(*d))
5076                     d++;
5077                 ipathend = d;
5078
5079 #ifdef ARG_ZERO_IS_SCRIPT
5080                 if (ipathend > ipath) {
5081                     /*
5082                      * HP-UX (at least) sets argv[0] to the script name,
5083                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5084                      * at least, set argv[0] to the basename of the Perl
5085                      * interpreter. So, having found "#!", we'll set it right.
5086                      */
5087                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5088                                                     SVt_PV)); /* $^X */
5089                     assert(SvPOK(x) || SvGMAGICAL(x));
5090                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5091                         sv_setpvn(x, ipath, ipathend - ipath);
5092                         SvSETMAGIC(x);
5093                     }
5094                     else {
5095                         STRLEN blen;
5096                         STRLEN llen;
5097                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5098                         const char * const lstart = SvPV_const(x,llen);
5099                         if (llen < blen) {
5100                             bstart += blen - llen;
5101                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5102                                 sv_setpvn(x, ipath, ipathend - ipath);
5103                                 SvSETMAGIC(x);
5104                             }
5105                         }
5106                     }
5107                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5108                 }
5109 #endif /* ARG_ZERO_IS_SCRIPT */
5110
5111                 /*
5112                  * Look for options.
5113                  */
5114                 d = instr(s,"perl -");
5115                 if (!d) {
5116                     d = instr(s,"perl");
5117 #if defined(DOSISH)
5118                     /* avoid getting into infinite loops when shebang
5119                      * line contains "Perl" rather than "perl" */
5120                     if (!d) {
5121                         for (d = ipathend-4; d >= ipath; --d) {
5122                             if ((*d == 'p' || *d == 'P')
5123                                 && !ibcmp(d, "perl", 4))
5124                             {
5125                                 break;
5126                             }
5127                         }
5128                         if (d < ipath)
5129                             d = NULL;
5130                     }
5131 #endif
5132                 }
5133 #ifdef ALTERNATE_SHEBANG
5134                 /*
5135                  * If the ALTERNATE_SHEBANG on this system starts with a
5136                  * character that can be part of a Perl expression, then if
5137                  * we see it but not "perl", we're probably looking at the
5138                  * start of Perl code, not a request to hand off to some
5139                  * other interpreter.  Similarly, if "perl" is there, but
5140                  * not in the first 'word' of the line, we assume the line
5141                  * contains the start of the Perl program.
5142                  */
5143                 if (d && *s != '#') {
5144                     const char *c = ipath;
5145                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5146                         c++;
5147                     if (c < d)
5148                         d = NULL;       /* "perl" not in first word; ignore */
5149                     else
5150                         *s = '#';       /* Don't try to parse shebang line */
5151                 }
5152 #endif /* ALTERNATE_SHEBANG */
5153                 if (!d &&
5154                     *s == '#' &&
5155                     ipathend > ipath &&
5156                     !PL_minus_c &&
5157                     !instr(s,"indir") &&
5158                     instr(PL_origargv[0],"perl"))
5159                 {
5160                     dVAR;
5161                     char **newargv;
5162
5163                     *ipathend = '\0';
5164                     s = ipathend + 1;
5165                     while (s < PL_bufend && isSPACE(*s))
5166                         s++;
5167                     if (s < PL_bufend) {
5168                         Newx(newargv,PL_origargc+3,char*);
5169                         newargv[1] = s;
5170                         while (s < PL_bufend && !isSPACE(*s))
5171                             s++;
5172                         *s = '\0';
5173                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5174                     }
5175                     else
5176                         newargv = PL_origargv;
5177                     newargv[0] = ipath;
5178                     PERL_FPU_PRE_EXEC
5179                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5180                     PERL_FPU_POST_EXEC
5181                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5182                 }
5183                 if (d) {
5184                     while (*d && !isSPACE(*d))
5185                         d++;
5186                     while (SPACE_OR_TAB(*d))
5187                         d++;
5188
5189                     if (*d++ == '-') {
5190                         const bool switches_done = PL_doswitches;
5191                         const U32 oldpdb = PL_perldb;
5192                         const bool oldn = PL_minus_n;
5193                         const bool oldp = PL_minus_p;
5194                         const char *d1 = d;
5195
5196                         do {
5197                             bool baduni = FALSE;
5198                             if (*d1 == 'C') {
5199                                 const char *d2 = d1 + 1;
5200                                 if (parse_unicode_opts((const char **)&d2)
5201                                     != PL_unicode)
5202                                     baduni = TRUE;
5203                             }
5204                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5205                                 const char * const m = d1;
5206                                 while (*d1 && !isSPACE(*d1))
5207                                     d1++;
5208                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5209                                       (int)(d1 - m), m);
5210                             }
5211                             d1 = moreswitches(d1);
5212                         } while (d1);
5213                         if (PL_doswitches && !switches_done) {
5214                             int argc = PL_origargc;
5215                             char **argv = PL_origargv;
5216                             do {
5217                                 argc--,argv++;
5218                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5219                             init_argv_symbols(argc,argv);
5220                         }
5221                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5222                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5223                               /* if we have already added "LINE: while (<>) {",
5224                                  we must not do it again */
5225                         {
5226                             sv_setpvs(PL_linestr, "");
5227                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5228                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5229                             PL_last_lop = PL_last_uni = NULL;
5230                             PL_preambled = FALSE;
5231                             if (PERLDB_LINE || PERLDB_SAVESRC)
5232                                 (void)gv_fetchfile(PL_origfilename);
5233                             goto retry;
5234                         }
5235                     }
5236                 }
5237             }
5238         }
5239         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5240             PL_lex_state = LEX_FORMLINE;
5241             start_force(PL_curforce);
5242             NEXTVAL_NEXTTOKE.ival = 0;
5243             force_next(FORMRBRACK);
5244             TOKEN(';');
5245         }
5246         goto retry;
5247     case '\r':
5248 #ifdef PERL_STRICT_CR
5249         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5250         Perl_croak(aTHX_
5251       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5252 #endif
5253     case ' ': case '\t': case '\f': case 013:
5254 #ifdef PERL_MAD
5255         PL_realtokenstart = -1;
5256         if (!PL_thiswhite)
5257             PL_thiswhite = newSVpvs("");
5258         sv_catpvn(PL_thiswhite, s, 1);
5259 #endif
5260         s++;
5261         goto retry;
5262     case '#':
5263     case '\n':
5264 #ifdef PERL_MAD
5265         PL_realtokenstart = -1;
5266         if (PL_madskills)
5267             PL_faketokens = 0;
5268 #endif
5269         if (PL_lex_state != LEX_NORMAL ||
5270              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5271             if (*s == '#' && s == PL_linestart && PL_in_eval
5272              && !PL_rsfp && !PL_parser->filtered) {
5273                 /* handle eval qq[#line 1 "foo"\n ...] */
5274                 CopLINE_dec(PL_curcop);
5275                 incline(s);
5276             }
5277             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5278                 s = SKIPSPACE0(s);
5279                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5280                     incline(s);
5281             }
5282             else {
5283                 const bool in_comment = *s == '#';
5284                 d = s;
5285                 while (d < PL_bufend && *d != '\n')
5286                     d++;
5287                 if (d < PL_bufend)
5288                     d++;
5289                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5290                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5291                                d, PL_bufend);
5292 #ifdef PERL_MAD
5293                 if (PL_madskills)
5294                     PL_thiswhite = newSVpvn(s, d - s);
5295 #endif
5296                 s = d;
5297                 if (in_comment && d == PL_bufend
5298                  && PL_lex_state == LEX_INTERPNORMAL
5299                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5300                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5301                 else incline(s);
5302             }
5303             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5304                 PL_lex_state = LEX_FORMLINE;
5305                 start_force(PL_curforce);
5306                 NEXTVAL_NEXTTOKE.ival = 0;
5307                 force_next(FORMRBRACK);
5308                 TOKEN(';');
5309             }
5310         }
5311         else {
5312 #ifdef PERL_MAD
5313             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5314                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5315                     PL_faketokens = 0;
5316                     s = SKIPSPACE0(s);
5317                     TOKEN(PEG); /* make sure any #! line is accessible */
5318                 }
5319                 s = SKIPSPACE0(s);
5320             }
5321             else {
5322 /*              if (PL_madskills && PL_lex_formbrack) { */
5323                     d = s;
5324                     while (d < PL_bufend && *d != '\n')
5325                         d++;
5326                     if (d < PL_bufend)
5327                         d++;
5328                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5329                       Perl_croak(aTHX_ "panic: input overflow");
5330                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5331                         if (!PL_thiswhite)
5332                             PL_thiswhite = newSVpvs("");
5333                         if (CopLINE(PL_curcop) == 1) {
5334                             sv_setpvs(PL_thiswhite, "");
5335                             PL_faketokens = 0;
5336                         }
5337                         sv_catpvn(PL_thiswhite, s, d - s);
5338                     }
5339                     s = d;
5340 /*              }
5341                 *s = '\0';
5342                 PL_bufend = s; */
5343             }
5344 #else
5345             *s = '\0';
5346             PL_bufend = s;
5347 #endif
5348         }
5349         goto retry;
5350     case '-':
5351         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5352             I32 ftst = 0;
5353             char tmp;
5354
5355             s++;
5356             PL_bufptr = s;
5357             tmp = *s++;
5358
5359             while (s < PL_bufend && SPACE_OR_TAB(*s))
5360                 s++;
5361
5362             if (strnEQ(s,"=>",2)) {
5363                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5364                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5365                 OPERATOR('-');          /* unary minus */
5366             }
5367             PL_last_uni = PL_oldbufptr;
5368             switch (tmp) {
5369             case 'r': ftst = OP_FTEREAD;        break;
5370             case 'w': ftst = OP_FTEWRITE;       break;
5371             case 'x': ftst = OP_FTEEXEC;        break;
5372             case 'o': ftst = OP_FTEOWNED;       break;
5373             case 'R': ftst = OP_FTRREAD;        break;
5374             case 'W': ftst = OP_FTRWRITE;       break;
5375             case 'X': ftst = OP_FTREXEC;        break;
5376             case 'O': ftst = OP_FTROWNED;       break;
5377             case 'e': ftst = OP_FTIS;           break;
5378             case 'z': ftst = OP_FTZERO;         break;
5379             case 's': ftst = OP_FTSIZE;         break;
5380             case 'f': ftst = OP_FTFILE;         break;
5381             case 'd': ftst = OP_FTDIR;          break;
5382             case 'l': ftst = OP_FTLINK;         break;
5383             case 'p': ftst = OP_FTPIPE;         break;
5384             case 'S': ftst = OP_FTSOCK;         break;
5385             case 'u': ftst = OP_FTSUID;         break;
5386             case 'g': ftst = OP_FTSGID;         break;
5387             case 'k': ftst = OP_FTSVTX;         break;
5388             case 'b': ftst = OP_FTBLK;          break;
5389             case 'c': ftst = OP_FTCHR;          break;
5390             case 't': ftst = OP_FTTTY;          break;
5391             case 'T': ftst = OP_FTTEXT;         break;
5392             case 'B': ftst = OP_FTBINARY;       break;
5393             case 'M': case 'A': case 'C':
5394                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5395                 switch (tmp) {
5396                 case 'M': ftst = OP_FTMTIME;    break;
5397                 case 'A': ftst = OP_FTATIME;    break;
5398                 case 'C': ftst = OP_FTCTIME;    break;
5399                 default:                        break;
5400                 }
5401                 break;
5402             default:
5403                 break;
5404             }
5405             if (ftst) {
5406                 PL_last_lop_op = (OPCODE)ftst;
5407                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5408                         "### Saw file test %c\n", (int)tmp);
5409                 } );
5410                 FTST(ftst);
5411             }
5412             else {
5413                 /* Assume it was a minus followed by a one-letter named
5414                  * subroutine call (or a -bareword), then. */
5415                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5416                         "### '-%c' looked like a file test but was not\n",
5417                         (int) tmp);
5418                 } );
5419                 s = --PL_bufptr;
5420             }
5421         }
5422         {
5423             const char tmp = *s++;
5424             if (*s == tmp) {
5425                 s++;
5426                 if (PL_expect == XOPERATOR)
5427                     TERM(POSTDEC);
5428                 else
5429                     OPERATOR(PREDEC);
5430             }
5431             else if (*s == '>') {
5432                 s++;
5433                 s = SKIPSPACE1(s);
5434                 if (isIDFIRST_lazy_if(s,UTF)) {
5435                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5436                     TOKEN(ARROW);
5437                 }
5438                 else if (*s == '$')
5439                     OPERATOR(ARROW);
5440                 else
5441                     TERM(ARROW);
5442             }
5443             if (PL_expect == XOPERATOR) {
5444                 if (*s == '=' && !PL_lex_allbrackets &&
5445                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5446                     s--;
5447                     TOKEN(0);
5448                 }
5449                 Aop(OP_SUBTRACT);
5450             }
5451             else {
5452                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5453                     check_uni();
5454                 OPERATOR('-');          /* unary minus */
5455             }
5456         }
5457
5458     case '+':
5459         {
5460             const char tmp = *s++;
5461             if (*s == tmp) {
5462                 s++;
5463                 if (PL_expect == XOPERATOR)
5464                     TERM(POSTINC);
5465                 else
5466                     OPERATOR(PREINC);
5467             }
5468             if (PL_expect == XOPERATOR) {
5469                 if (*s == '=' && !PL_lex_allbrackets &&
5470                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5471                     s--;
5472                     TOKEN(0);
5473                 }
5474                 Aop(OP_ADD);
5475             }
5476             else {
5477                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5478                     check_uni();
5479                 OPERATOR('+');
5480             }
5481         }
5482
5483     case '*':
5484         if (PL_expect != XOPERATOR) {
5485             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5486             PL_expect = XOPERATOR;
5487             force_ident(PL_tokenbuf, '*');
5488             if (!*PL_tokenbuf)
5489                 PREREF('*');
5490             TERM('*');
5491         }
5492         s++;
5493         if (*s == '*') {
5494             s++;
5495             if (*s == '=' && !PL_lex_allbrackets &&
5496                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5497                 s -= 2;
5498                 TOKEN(0);
5499             }
5500             PWop(OP_POW);
5501         }
5502         if (*s == '=' && !PL_lex_allbrackets &&
5503                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5504             s--;
5505             TOKEN(0);
5506         }
5507         Mop(OP_MULTIPLY);
5508
5509     case '%':
5510         if (PL_expect == XOPERATOR) {
5511             if (s[1] == '=' && !PL_lex_allbrackets &&
5512                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5513                 TOKEN(0);
5514             ++s;
5515             Mop(OP_MODULO);
5516         }
5517         PL_tokenbuf[0] = '%';
5518         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5519                 sizeof PL_tokenbuf - 1, FALSE);
5520         if (!PL_tokenbuf[1]) {
5521             PREREF('%');
5522         }
5523         PL_expect = XOPERATOR;
5524         force_ident_maybe_lex('%');
5525         TERM('%');
5526
5527     case '^':
5528         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5529                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5530             TOKEN(0);
5531         s++;
5532         BOop(OP_BIT_XOR);
5533     case '[':
5534         if (PL_lex_brackets > 100)
5535             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5536         PL_lex_brackstack[PL_lex_brackets++] = 0;
5537         PL_lex_allbrackets++;
5538         {
5539             const char tmp = *s++;
5540             OPERATOR(tmp);
5541         }
5542     case '~':
5543         if (s[1] == '~'
5544             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5545         {
5546             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5547                 TOKEN(0);
5548             s += 2;
5549             Eop(OP_SMARTMATCH);
5550         }
5551         s++;
5552         OPERATOR('~');
5553     case ',':
5554         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5555             TOKEN(0);
5556         s++;
5557         OPERATOR(',');
5558     case ':':
5559         if (s[1] == ':') {
5560             len = 0;
5561             goto just_a_word_zero_gv;
5562         }
5563         s++;
5564         switch (PL_expect) {
5565             OP *attrs;
5566 #ifdef PERL_MAD
5567             I32 stuffstart;
5568 #endif
5569         case XOPERATOR:
5570             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5571                 break;
5572             PL_bufptr = s;      /* update in case we back off */
5573             if (*s == '=') {
5574                 Perl_croak(aTHX_
5575                            "Use of := for an empty attribute list is not allowed");
5576             }
5577             goto grabattrs;
5578         case XATTRBLOCK:
5579             PL_expect = XBLOCK;
5580             goto grabattrs;
5581         case XATTRTERM:
5582             PL_expect = XTERMBLOCK;
5583          grabattrs:
5584 #ifdef PERL_MAD
5585             stuffstart = s - SvPVX(PL_linestr) - 1;
5586 #endif
5587             s = PEEKSPACE(s);
5588             attrs = NULL;
5589             while (isIDFIRST_lazy_if(s,UTF)) {
5590                 I32 tmp;
5591                 SV *sv;
5592                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5593                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5594                     if (tmp < 0) tmp = -tmp;
5595                     switch (tmp) {
5596                     case KEY_or:
5597                     case KEY_and:
5598                     case KEY_for:
5599                     case KEY_foreach:
5600                     case KEY_unless:
5601                     case KEY_if:
5602                     case KEY_while:
5603                     case KEY_until:
5604                         goto got_attrs;
5605                     default:
5606                         break;
5607                     }
5608                 }
5609                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5610                 if (*d == '(') {
5611                     d = scan_str(d,TRUE,TRUE,FALSE);
5612                     if (!d) {
5613                         /* MUST advance bufptr here to avoid bogus
5614                            "at end of line" context messages from yyerror().
5615                          */
5616                         PL_bufptr = s + len;
5617                         yyerror("Unterminated attribute parameter in attribute list");
5618                         if (attrs)
5619                             op_free(attrs);
5620                         sv_free(sv);
5621                         return REPORT(0);       /* EOF indicator */
5622                     }
5623                 }
5624                 if (PL_lex_stuff) {
5625                     sv_catsv(sv, PL_lex_stuff);
5626                     attrs = op_append_elem(OP_LIST, attrs,
5627                                         newSVOP(OP_CONST, 0, sv));
5628                     SvREFCNT_dec(PL_lex_stuff);
5629                     PL_lex_stuff = NULL;
5630                 }
5631                 else {
5632                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5633                         sv_free(sv);
5634                         if (PL_in_my == KEY_our) {
5635                             deprecate(":unique");
5636                         }
5637                         else
5638                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5639                     }
5640
5641                     /* NOTE: any CV attrs applied here need to be part of
5642                        the CVf_BUILTIN_ATTRS define in cv.h! */
5643                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5644                         sv_free(sv);
5645                         CvLVALUE_on(PL_compcv);
5646                     }
5647                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5648                         sv_free(sv);
5649                         deprecate(":locked");
5650                     }
5651                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5652                         sv_free(sv);
5653                         CvMETHOD_on(PL_compcv);
5654                     }
5655                     /* After we've set the flags, it could be argued that
5656                        we don't need to do the attributes.pm-based setting
5657                        process, and shouldn't bother appending recognized
5658                        flags.  To experiment with that, uncomment the
5659                        following "else".  (Note that's already been
5660                        uncommented.  That keeps the above-applied built-in
5661                        attributes from being intercepted (and possibly
5662                        rejected) by a package's attribute routines, but is
5663                        justified by the performance win for the common case
5664                        of applying only built-in attributes.) */
5665                     else
5666                         attrs = op_append_elem(OP_LIST, attrs,
5667                                             newSVOP(OP_CONST, 0,
5668                                                     sv));
5669                 }
5670                 s = PEEKSPACE(d);
5671                 if (*s == ':' && s[1] != ':')
5672                     s = PEEKSPACE(s+1);
5673                 else if (s == d)
5674                     break;      /* require real whitespace or :'s */
5675                 /* XXX losing whitespace on sequential attributes here */
5676             }
5677             {
5678                 const char tmp
5679                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5680                 if (*s != ';' && *s != '}' && *s != tmp
5681                     && (tmp != '=' || *s != ')')) {
5682                     const char q = ((*s == '\'') ? '"' : '\'');
5683                     /* If here for an expression, and parsed no attrs, back
5684                        off. */
5685                     if (tmp == '=' && !attrs) {
5686                         s = PL_bufptr;
5687                         break;
5688                     }
5689                     /* MUST advance bufptr here to avoid bogus "at end of line"
5690                        context messages from yyerror().
5691                     */
5692                     PL_bufptr = s;
5693                     yyerror( (const char *)
5694                              (*s
5695                               ? Perl_form(aTHX_ "Invalid separator character "
5696                                           "%c%c%c in attribute list", q, *s, q)
5697                               : "Unterminated attribute list" ) );
5698                     if (attrs)
5699                         op_free(attrs);
5700                     OPERATOR(':');
5701                 }
5702             }
5703         got_attrs:
5704             if (attrs) {
5705                 start_force(PL_curforce);
5706                 NEXTVAL_NEXTTOKE.opval = attrs;
5707                 CURMAD('_', PL_nextwhite);
5708                 force_next(THING);
5709             }
5710 #ifdef PERL_MAD
5711             if (PL_madskills) {
5712                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5713                                      (s - SvPVX(PL_linestr)) - stuffstart);
5714             }
5715 #endif
5716             TOKEN(COLONATTR);
5717         }
5718         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5719             s--;
5720             TOKEN(0);
5721         }
5722         PL_lex_allbrackets--;
5723         OPERATOR(':');
5724     case '(':
5725         s++;
5726         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5727             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5728         else
5729             PL_expect = XTERM;
5730         s = SKIPSPACE1(s);
5731         PL_lex_allbrackets++;
5732         TOKEN('(');
5733     case ';':
5734         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5735             TOKEN(0);
5736         CLINE;
5737         s++;
5738         OPERATOR(';');
5739     case ')':
5740         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5741             TOKEN(0);
5742         s++;
5743         PL_lex_allbrackets--;
5744         s = SKIPSPACE1(s);
5745         if (*s == '{')
5746             PREBLOCK(')');
5747         TERM(')');
5748     case ']':
5749         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5750             TOKEN(0);
5751         s++;
5752         if (PL_lex_brackets <= 0)
5753             yyerror("Unmatched right square bracket");
5754         else
5755             --PL_lex_brackets;
5756         PL_lex_allbrackets--;
5757         if (PL_lex_state == LEX_INTERPNORMAL) {
5758             if (PL_lex_brackets == 0) {
5759                 if (*s == '-' && s[1] == '>')
5760                     PL_lex_state = LEX_INTERPENDMAYBE;
5761                 else if (*s != '[' && *s != '{')
5762                     PL_lex_state = LEX_INTERPEND;
5763             }
5764         }
5765         TERM(']');
5766     case '{':
5767         s++;
5768       leftbracket:
5769         if (PL_lex_brackets > 100) {
5770             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5771         }
5772         switch (PL_expect) {
5773         case XTERM:
5774             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5775             PL_lex_allbrackets++;
5776             OPERATOR(HASHBRACK);
5777         case XOPERATOR:
5778             while (s < PL_bufend && SPACE_OR_TAB(*s))
5779                 s++;
5780             d = s;
5781             PL_tokenbuf[0] = '\0';
5782             if (d < PL_bufend && *d == '-') {
5783                 PL_tokenbuf[0] = '-';
5784                 d++;
5785                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5786                     d++;
5787             }
5788             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5789                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5790                               FALSE, &len);
5791                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5792                     d++;
5793                 if (*d == '}') {
5794                     const char minus = (PL_tokenbuf[0] == '-');
5795                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5796                     if (minus)
5797                         force_next('-');
5798                 }
5799             }
5800             /* FALL THROUGH */
5801         case XATTRBLOCK:
5802         case XBLOCK:
5803             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5804             PL_lex_allbrackets++;
5805             PL_expect = XSTATE;
5806             break;
5807         case XATTRTERM:
5808         case XTERMBLOCK:
5809             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5810             PL_lex_allbrackets++;
5811             PL_expect = XSTATE;
5812             break;
5813         default: {
5814                 const char *t;
5815                 if (PL_oldoldbufptr == PL_last_lop)
5816                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5817                 else
5818                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5819                 PL_lex_allbrackets++;
5820                 s = SKIPSPACE1(s);
5821                 if (*s == '}') {
5822                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5823                         PL_expect = XTERM;
5824                         /* This hack is to get the ${} in the message. */
5825                         PL_bufptr = s+1;
5826                         yyerror("syntax error");
5827                         break;
5828                     }
5829                     OPERATOR(HASHBRACK);
5830                 }
5831                 /* This hack serves to disambiguate a pair of curlies
5832                  * as being a block or an anon hash.  Normally, expectation
5833                  * determines that, but in cases where we're not in a
5834                  * position to expect anything in particular (like inside
5835                  * eval"") we have to resolve the ambiguity.  This code
5836                  * covers the case where the first term in the curlies is a
5837                  * quoted string.  Most other cases need to be explicitly
5838                  * disambiguated by prepending a "+" before the opening
5839                  * curly in order to force resolution as an anon hash.
5840                  *
5841                  * XXX should probably propagate the outer expectation
5842                  * into eval"" to rely less on this hack, but that could
5843                  * potentially break current behavior of eval"".
5844                  * GSAR 97-07-21
5845                  */
5846                 t = s;
5847                 if (*s == '\'' || *s == '"' || *s == '`') {
5848                     /* common case: get past first string, handling escapes */
5849                     for (t++; t < PL_bufend && *t != *s;)
5850                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5851                             t++;
5852                     t++;
5853                 }
5854                 else if (*s == 'q') {
5855                     if (++t < PL_bufend
5856                         && (!isALNUM(*t)
5857                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5858                                 && !isALNUM(*t))))
5859                     {
5860                         /* skip q//-like construct */
5861                         const char *tmps;
5862                         char open, close, term;
5863                         I32 brackets = 1;
5864
5865                         while (t < PL_bufend && isSPACE(*t))
5866                             t++;
5867                         /* check for q => */
5868                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5869                             OPERATOR(HASHBRACK);
5870                         }
5871                         term = *t;
5872                         open = term;
5873                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5874                             term = tmps[5];
5875                         close = term;
5876                         if (open == close)
5877                             for (t++; t < PL_bufend; t++) {
5878                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5879                                     t++;
5880                                 else if (*t == open)
5881                                     break;
5882                             }
5883                         else {
5884                             for (t++; t < PL_bufend; t++) {
5885                                 if (*t == '\\' && t+1 < PL_bufend)
5886                                     t++;
5887                                 else if (*t == close && --brackets <= 0)
5888                                     break;
5889                                 else if (*t == open)
5890                                     brackets++;
5891                             }
5892                         }
5893                         t++;
5894                     }
5895                     else
5896                         /* skip plain q word */
5897                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5898                              t += UTF8SKIP(t);
5899                 }
5900                 else if (isALNUM_lazy_if(t,UTF)) {
5901                     t += UTF8SKIP(t);
5902                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5903                          t += UTF8SKIP(t);
5904                 }
5905                 while (t < PL_bufend && isSPACE(*t))
5906                     t++;
5907                 /* if comma follows first term, call it an anon hash */
5908                 /* XXX it could be a comma expression with loop modifiers */
5909                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5910                                    || (*t == '=' && t[1] == '>')))
5911                     OPERATOR(HASHBRACK);
5912                 if (PL_expect == XREF)
5913                     PL_expect = XTERM;
5914                 else {
5915                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5916                     PL_expect = XSTATE;
5917                 }
5918             }
5919             break;
5920         }
5921         pl_yylval.ival = CopLINE(PL_curcop);
5922         if (isSPACE(*s) || *s == '#')
5923             PL_copline = NOLINE;   /* invalidate current command line number */
5924         TOKEN(formbrack ? '=' : '{');
5925     case '}':
5926         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5927             TOKEN(0);
5928       rightbracket:
5929         s++;
5930         if (PL_lex_brackets <= 0)
5931             yyerror("Unmatched right curly bracket");
5932         else
5933             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5934         PL_lex_allbrackets--;
5935         if (PL_lex_state == LEX_INTERPNORMAL) {
5936             if (PL_lex_brackets == 0) {
5937                 if (PL_expect & XFAKEBRACK) {
5938                     PL_expect &= XENUMMASK;
5939                     PL_lex_state = LEX_INTERPEND;
5940                     PL_bufptr = s;
5941 #if 0
5942                     if (PL_madskills) {
5943                         if (!PL_thiswhite)
5944                             PL_thiswhite = newSVpvs("");
5945                         sv_catpvs(PL_thiswhite,"}");
5946                     }
5947 #endif
5948                     return yylex();     /* ignore fake brackets */
5949                 }
5950                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5951                  && SvEVALED(PL_lex_repl))
5952                     PL_lex_state = LEX_INTERPEND;
5953                 else if (*s == '-' && s[1] == '>')
5954                     PL_lex_state = LEX_INTERPENDMAYBE;
5955                 else if (*s != '[' && *s != '{')
5956                     PL_lex_state = LEX_INTERPEND;
5957             }
5958         }
5959         if (PL_expect & XFAKEBRACK) {
5960             PL_expect &= XENUMMASK;
5961             PL_bufptr = s;
5962             return yylex();             /* ignore fake brackets */
5963         }
5964         start_force(PL_curforce);
5965         if (PL_madskills) {
5966             curmad('X', newSVpvn(s-1,1));
5967             CURMAD('_', PL_thiswhite);
5968         }
5969         force_next(formbrack ? '.' : '}');
5970         if (formbrack) LEAVE;
5971 #ifdef PERL_MAD
5972         if (!PL_thistoken)
5973             PL_thistoken = newSVpvs("");
5974 #endif
5975         if (formbrack == 2) { /* means . where arguments were expected */
5976             start_force(PL_curforce);
5977             force_next(';');
5978             TOKEN(FORMRBRACK);
5979         }
5980         TOKEN(';');
5981     case '&':
5982         s++;
5983         if (*s++ == '&') {
5984             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5985                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5986                 s -= 2;
5987                 TOKEN(0);
5988             }
5989             AOPERATOR(ANDAND);
5990         }
5991         s--;
5992         if (PL_expect == XOPERATOR) {
5993             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5994                 && isIDFIRST_lazy_if(s,UTF))
5995             {
5996                 CopLINE_dec(PL_curcop);
5997                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5998                 CopLINE_inc(PL_curcop);
5999             }
6000             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6001                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6002                 s--;
6003                 TOKEN(0);
6004             }
6005             BAop(OP_BIT_AND);
6006         }
6007
6008         PL_tokenbuf[0] = '&';
6009         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6010                        sizeof PL_tokenbuf - 1, TRUE);
6011         if (PL_tokenbuf[1]) {
6012             PL_expect = XOPERATOR;
6013             force_ident_maybe_lex('&');
6014         }
6015         else
6016             PREREF('&');
6017         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6018         TERM('&');
6019
6020     case '|':
6021         s++;
6022         if (*s++ == '|') {
6023             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6024                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6025                 s -= 2;
6026                 TOKEN(0);
6027             }
6028             AOPERATOR(OROR);
6029         }
6030         s--;
6031         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6032                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6033             s--;
6034             TOKEN(0);
6035         }
6036         BOop(OP_BIT_OR);
6037     case '=':
6038         s++;
6039         {
6040             const char tmp = *s++;
6041             if (tmp == '=') {
6042                 if (!PL_lex_allbrackets &&
6043                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6044                     s -= 2;
6045                     TOKEN(0);
6046                 }
6047                 Eop(OP_EQ);
6048             }
6049             if (tmp == '>') {
6050                 if (!PL_lex_allbrackets &&
6051                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6052                     s -= 2;
6053                     TOKEN(0);
6054                 }
6055                 OPERATOR(',');
6056             }
6057             if (tmp == '~')
6058                 PMop(OP_MATCH);
6059             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6060                 && strchr("+-*/%.^&|<",tmp))
6061                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6062                             "Reversed %c= operator",(int)tmp);
6063             s--;
6064             if (PL_expect == XSTATE && isALPHA(tmp) &&
6065                 (s == PL_linestart+1 || s[-2] == '\n') )
6066                 {
6067                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6068                         || PL_lex_state != LEX_NORMAL) {
6069                         d = PL_bufend;
6070                         while (s < d) {
6071                             if (*s++ == '\n') {
6072                                 incline(s);
6073                                 if (strnEQ(s,"=cut",4)) {
6074                                     s = strchr(s,'\n');
6075                                     if (s)
6076                                         s++;
6077                                     else
6078                                         s = d;
6079                                     incline(s);
6080                                     goto retry;
6081                                 }
6082                             }
6083                         }
6084                         goto retry;
6085                     }
6086 #ifdef PERL_MAD
6087                     if (PL_madskills) {
6088                         if (!PL_thiswhite)
6089                             PL_thiswhite = newSVpvs("");
6090                         sv_catpvn(PL_thiswhite, PL_linestart,
6091                                   PL_bufend - PL_linestart);
6092                     }
6093 #endif
6094                     s = PL_bufend;
6095                     PL_parser->in_pod = 1;
6096                     goto retry;
6097                 }
6098         }
6099         if (PL_expect == XBLOCK) {
6100             const char *t = s;
6101 #ifdef PERL_STRICT_CR
6102             while (SPACE_OR_TAB(*t))
6103 #else
6104             while (SPACE_OR_TAB(*t) || *t == '\r')
6105 #endif
6106                 t++;
6107             if (*t == '\n' || *t == '#') {
6108                 formbrack = 1;
6109                 ENTER;
6110                 SAVEI8(PL_parser->form_lex_state);
6111                 SAVEI32(PL_lex_formbrack);
6112                 PL_parser->form_lex_state = PL_lex_state;
6113                 PL_lex_formbrack = PL_lex_brackets + 1;
6114                 goto leftbracket;
6115             }
6116         }
6117         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6118             s--;
6119             TOKEN(0);
6120         }
6121         pl_yylval.ival = 0;
6122         OPERATOR(ASSIGNOP);
6123     case '!':
6124         s++;
6125         {
6126             const char tmp = *s++;
6127             if (tmp == '=') {
6128                 /* was this !=~ where !~ was meant?
6129                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6130
6131                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6132                     const char *t = s+1;
6133
6134                     while (t < PL_bufend && isSPACE(*t))
6135                         ++t;
6136
6137                     if (*t == '/' || *t == '?' ||
6138                         ((*t == 'm' || *t == 's' || *t == 'y')
6139                          && !isALNUM(t[1])) ||
6140                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6141                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6142                                     "!=~ should be !~");
6143                 }
6144                 if (!PL_lex_allbrackets &&
6145                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6146                     s -= 2;
6147                     TOKEN(0);
6148                 }
6149                 Eop(OP_NE);
6150             }
6151             if (tmp == '~')
6152                 PMop(OP_NOT);
6153         }
6154         s--;
6155         OPERATOR('!');
6156     case '<':
6157         if (PL_expect != XOPERATOR) {
6158             if (s[1] != '<' && !strchr(s,'>'))
6159                 check_uni();
6160             if (s[1] == '<')
6161                 s = scan_heredoc(s);
6162             else
6163                 s = scan_inputsymbol(s);
6164             PL_expect = XOPERATOR;
6165             TOKEN(sublex_start());
6166         }
6167         s++;
6168         {
6169             char tmp = *s++;
6170             if (tmp == '<') {
6171                 if (*s == '=' && !PL_lex_allbrackets &&
6172                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6173                     s -= 2;
6174                     TOKEN(0);
6175                 }
6176                 SHop(OP_LEFT_SHIFT);
6177             }
6178             if (tmp == '=') {
6179                 tmp = *s++;
6180                 if (tmp == '>') {
6181                     if (!PL_lex_allbrackets &&
6182                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6183                         s -= 3;
6184                         TOKEN(0);
6185                     }
6186                     Eop(OP_NCMP);
6187                 }
6188                 s--;
6189                 if (!PL_lex_allbrackets &&
6190                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6191                     s -= 2;
6192                     TOKEN(0);
6193                 }
6194                 Rop(OP_LE);
6195             }
6196         }
6197         s--;
6198         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6199             s--;
6200             TOKEN(0);
6201         }
6202         Rop(OP_LT);
6203     case '>':
6204         s++;
6205         {
6206             const char tmp = *s++;
6207             if (tmp == '>') {
6208                 if (*s == '=' && !PL_lex_allbrackets &&
6209                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6210                     s -= 2;
6211                     TOKEN(0);
6212                 }
6213                 SHop(OP_RIGHT_SHIFT);
6214             }
6215             else if (tmp == '=') {
6216                 if (!PL_lex_allbrackets &&
6217                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6218                     s -= 2;
6219                     TOKEN(0);
6220                 }
6221                 Rop(OP_GE);
6222             }
6223         }
6224         s--;
6225         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6226             s--;
6227             TOKEN(0);
6228         }
6229         Rop(OP_GT);
6230
6231     case '$':
6232         CLINE;
6233
6234         if (PL_expect == XOPERATOR) {
6235             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6236                 return deprecate_commaless_var_list();
6237             }
6238         }
6239
6240         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6241             PL_tokenbuf[0] = '@';
6242             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6243                            sizeof PL_tokenbuf - 1, FALSE);
6244             if (PL_expect == XOPERATOR)
6245                 no_op("Array length", s);
6246             if (!PL_tokenbuf[1])
6247                 PREREF(DOLSHARP);
6248             PL_expect = XOPERATOR;
6249             force_ident_maybe_lex('#');
6250             TOKEN(DOLSHARP);
6251         }
6252
6253         PL_tokenbuf[0] = '$';
6254         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6255                        sizeof PL_tokenbuf - 1, FALSE);
6256         if (PL_expect == XOPERATOR)
6257             no_op("Scalar", s);
6258         if (!PL_tokenbuf[1]) {
6259             if (s == PL_bufend)
6260                 yyerror("Final $ should be \\$ or $name");
6261             PREREF('$');
6262         }
6263
6264         d = s;
6265         {
6266             const char tmp = *s;
6267             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6268                 s = SKIPSPACE1(s);
6269
6270             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6271                 && intuit_more(s)) {
6272                 if (*s == '[') {
6273                     PL_tokenbuf[0] = '@';
6274                     if (ckWARN(WARN_SYNTAX)) {
6275                         char *t = s+1;
6276
6277                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6278                             t++;
6279                         if (*t++ == ',') {
6280                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6281                             while (t < PL_bufend && *t != ']')
6282                                 t++;
6283                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6284                                         "Multidimensional syntax %.*s not supported",
6285                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6286                         }
6287                     }
6288                 }
6289                 else if (*s == '{') {
6290                     char *t;
6291                     PL_tokenbuf[0] = '%';
6292                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6293                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6294                         {
6295                             char tmpbuf[sizeof PL_tokenbuf];
6296                             do {
6297                                 t++;
6298                             } while (isSPACE(*t));
6299                             if (isIDFIRST_lazy_if(t,UTF)) {
6300                                 STRLEN len;
6301                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6302                                               &len);
6303                                 while (isSPACE(*t))
6304                                     t++;
6305                                 if (*t == ';'
6306                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6307                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6308                                                 "You need to quote \"%"SVf"\"",
6309                                                   SVfARG(newSVpvn_flags(tmpbuf, len,
6310                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6311                             }
6312                         }
6313                 }
6314             }
6315
6316             PL_expect = XOPERATOR;
6317             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6318                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6319                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6320                     PL_expect = XOPERATOR;
6321                 else if (strchr("$@\"'`q", *s))
6322                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6323                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6324                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6325                 else if (isIDFIRST_lazy_if(s,UTF)) {
6326                     char tmpbuf[sizeof PL_tokenbuf];
6327                     int t2;
6328                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6329                     if ((t2 = keyword(tmpbuf, len, 0))) {
6330                         /* binary operators exclude handle interpretations */
6331                         switch (t2) {
6332                         case -KEY_x:
6333                         case -KEY_eq:
6334                         case -KEY_ne:
6335                         case -KEY_gt:
6336                         case -KEY_lt:
6337                         case -KEY_ge:
6338                         case -KEY_le:
6339                         case -KEY_cmp:
6340                             break;
6341                         default:
6342                             PL_expect = XTERM;  /* e.g. print $fh length() */
6343                             break;
6344                         }
6345                     }
6346                     else {
6347                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6348                     }
6349                 }
6350                 else if (isDIGIT(*s))
6351                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6352                 else if (*s == '.' && isDIGIT(s[1]))
6353                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6354                 else if ((*s == '?' || *s == '-' || *s == '+')
6355                          && !isSPACE(s[1]) && s[1] != '=')
6356                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6357                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6358                          && s[1] != '/')
6359                     PL_expect = XTERM;          /* e.g. print $fh /.../
6360                                                    XXX except DORDOR operator
6361                                                 */
6362                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6363                          && s[2] != '=')
6364                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6365             }
6366         }
6367         force_ident_maybe_lex('$');
6368         TOKEN('$');
6369
6370     case '@':
6371         if (PL_expect == XOPERATOR)
6372             no_op("Array", s);
6373         PL_tokenbuf[0] = '@';
6374         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6375         if (!PL_tokenbuf[1]) {
6376             PREREF('@');
6377         }
6378         if (PL_lex_state == LEX_NORMAL)
6379             s = SKIPSPACE1(s);
6380         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6381             if (*s == '{')
6382                 PL_tokenbuf[0] = '%';
6383
6384             /* Warn about @ where they meant $. */
6385             if (*s == '[' || *s == '{') {
6386                 if (ckWARN(WARN_SYNTAX)) {
6387                     const char *t = s + 1;
6388                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6389                         t += UTF ? UTF8SKIP(t) : 1;
6390                     if (*t == '}' || *t == ']') {
6391                         t++;
6392                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6393        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6394                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6395                             "Scalar value %"SVf" better written as $%"SVf,
6396                             SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6397                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6398                             SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6399                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6400                     }
6401                 }
6402             }
6403         }
6404         PL_expect = XOPERATOR;
6405         force_ident_maybe_lex('@');
6406         TERM('@');
6407
6408      case '/':                  /* may be division, defined-or, or pattern */
6409         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6410             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6411                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6412                 TOKEN(0);
6413             s += 2;
6414             AOPERATOR(DORDOR);
6415         }
6416      case '?':                  /* may either be conditional or pattern */
6417         if (PL_expect == XOPERATOR) {
6418              char tmp = *s++;
6419              if(tmp == '?') {
6420                 if (!PL_lex_allbrackets &&
6421                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6422                     s--;
6423                     TOKEN(0);
6424                 }
6425                 PL_lex_allbrackets++;
6426                 OPERATOR('?');
6427              }
6428              else {
6429                  tmp = *s++;
6430                  if(tmp == '/') {
6431                      /* A // operator. */
6432                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6433                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6434                                             LEX_FAKEEOF_LOGIC)) {
6435                         s -= 2;
6436                         TOKEN(0);
6437                     }
6438                     AOPERATOR(DORDOR);
6439                  }
6440                  else {
6441                      s--;
6442                      if (*s == '=' && !PL_lex_allbrackets &&
6443                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6444                          s--;
6445                          TOKEN(0);
6446                      }
6447                      Mop(OP_DIVIDE);
6448                  }
6449              }
6450          }
6451          else {
6452              /* Disable warning on "study /blah/" */
6453              if (PL_oldoldbufptr == PL_last_uni
6454               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6455                   || memNE(PL_last_uni, "study", 5)
6456                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6457               ))
6458                  check_uni();
6459              if (*s == '?')
6460                  deprecate("?PATTERN? without explicit operator");
6461              s = scan_pat(s,OP_MATCH);
6462              TERM(sublex_start());
6463          }
6464
6465     case '.':
6466         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6467 #ifdef PERL_STRICT_CR
6468             && s[1] == '\n'
6469 #else
6470             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6471 #endif
6472             && (s == PL_linestart || s[-1] == '\n') )
6473         {
6474             PL_expect = XSTATE;
6475             formbrack = 2; /* dot seen where arguments expected */
6476             goto rightbracket;
6477         }
6478         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6479             s += 3;
6480             OPERATOR(YADAYADA);
6481         }
6482         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6483             char tmp = *s++;
6484             if (*s == tmp) {
6485                 if (!PL_lex_allbrackets &&
6486                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6487                     s--;
6488                     TOKEN(0);
6489                 }
6490                 s++;
6491                 if (*s == tmp) {
6492                     s++;
6493                     pl_yylval.ival = OPf_SPECIAL;
6494                 }
6495                 else
6496                     pl_yylval.ival = 0;
6497                 OPERATOR(DOTDOT);
6498             }
6499             if (*s == '=' && !PL_lex_allbrackets &&
6500                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6501                 s--;
6502                 TOKEN(0);
6503             }
6504             Aop(OP_CONCAT);
6505         }
6506         /* FALL THROUGH */
6507     case '0': case '1': case '2': case '3': case '4':
6508     case '5': case '6': case '7': case '8': case '9':
6509         s = scan_num(s, &pl_yylval);
6510         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6511         if (PL_expect == XOPERATOR)
6512             no_op("Number",s);
6513         TERM(THING);
6514
6515     case '\'':
6516         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6517         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6518         if (PL_expect == XOPERATOR) {
6519             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6520                 return deprecate_commaless_var_list();
6521             }
6522             else
6523                 no_op("String",s);
6524         }
6525         if (!s)
6526             missingterm(NULL);
6527         pl_yylval.ival = OP_CONST;
6528         TERM(sublex_start());
6529
6530     case '"':
6531         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6532         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6533         if (PL_expect == XOPERATOR) {
6534             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6535                 return deprecate_commaless_var_list();
6536             }
6537             else
6538                 no_op("String",s);
6539         }
6540         if (!s)
6541             missingterm(NULL);
6542         pl_yylval.ival = OP_CONST;
6543         /* FIXME. I think that this can be const if char *d is replaced by
6544            more localised variables.  */
6545         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6546             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6547                 pl_yylval.ival = OP_STRINGIFY;
6548                 break;
6549             }
6550         }
6551         TERM(sublex_start());
6552
6553     case '`':
6554         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6555         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6556         if (PL_expect == XOPERATOR)
6557             no_op("Backticks",s);
6558         if (!s)
6559             missingterm(NULL);
6560         readpipe_override();
6561         TERM(sublex_start());
6562
6563     case '\\':
6564         s++;
6565         if (PL_lex_inwhat && isDIGIT(*s))
6566             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6567                            *s, *s);
6568         if (PL_expect == XOPERATOR)
6569             no_op("Backslash",s);
6570         OPERATOR(REFGEN);
6571
6572     case 'v':
6573         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6574             char *start = s + 2;
6575             while (isDIGIT(*start) || *start == '_')
6576                 start++;
6577             if (*start == '.' && isDIGIT(start[1])) {
6578                 s = scan_num(s, &pl_yylval);
6579                 TERM(THING);
6580             }
6581             else if ((*start == ':' && start[1] == ':')
6582                   || (PL_expect == XSTATE && *start == ':'))
6583                 goto keylookup;
6584             else if (PL_expect == XSTATE) {
6585                 d = start;
6586                 while (d < PL_bufend && isSPACE(*d)) d++;
6587                 if (*d == ':') goto keylookup;
6588             }
6589             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6590             if (!isALPHA(*start) && (PL_expect == XTERM
6591                         || PL_expect == XREF || PL_expect == XSTATE
6592                         || PL_expect == XTERMORDORDOR)) {
6593                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6594                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6595                 if (!gv) {
6596                     s = scan_num(s, &pl_yylval);
6597                     TERM(THING);
6598                 }
6599             }
6600         }
6601         goto keylookup;
6602     case 'x':
6603         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6604             s++;
6605             Mop(OP_REPEAT);
6606         }
6607         goto keylookup;
6608
6609     case '_':
6610     case 'a': case 'A':
6611     case 'b': case 'B':
6612     case 'c': case 'C':
6613     case 'd': case 'D':
6614     case 'e': case 'E':
6615     case 'f': case 'F':
6616     case 'g': case 'G':
6617     case 'h': case 'H':
6618     case 'i': case 'I':
6619     case 'j': case 'J':
6620     case 'k': case 'K':
6621     case 'l': case 'L':
6622     case 'm': case 'M':
6623     case 'n': case 'N':
6624     case 'o': case 'O':
6625     case 'p': case 'P':
6626     case 'q': case 'Q':
6627     case 'r': case 'R':
6628     case 's': case 'S':
6629     case 't': case 'T':
6630     case 'u': case 'U':
6631               case 'V':
6632     case 'w': case 'W':
6633               case 'X':
6634     case 'y': case 'Y':
6635     case 'z': case 'Z':
6636
6637       keylookup: {
6638         bool anydelim;
6639         bool lex;
6640         I32 tmp;
6641         SV *sv;
6642         CV *cv;
6643         PADOFFSET off;
6644         OP *rv2cv_op;
6645
6646         lex = FALSE;
6647         orig_keyword = 0;
6648         off = 0;
6649         sv = NULL;
6650         cv = NULL;
6651         gv = NULL;
6652         gvp = NULL;
6653         rv2cv_op = NULL;
6654
6655         PL_bufptr = s;
6656         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6657
6658         /* Some keywords can be followed by any delimiter, including ':' */
6659         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6660
6661         /* x::* is just a word, unless x is "CORE" */
6662         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6663             goto just_a_word;
6664
6665         d = s;
6666         while (d < PL_bufend && isSPACE(*d))
6667                 d++;    /* no comments skipped here, or s### is misparsed */
6668
6669         /* Is this a word before a => operator? */
6670         if (*d == '=' && d[1] == '>') {
6671             CLINE;
6672             pl_yylval.opval
6673                 = (OP*)newSVOP(OP_CONST, 0,
6674                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6675             pl_yylval.opval->op_private = OPpCONST_BARE;
6676             TERM(WORD);
6677         }
6678
6679         /* Check for plugged-in keyword */
6680         {
6681             OP *o;
6682             int result;
6683             char *saved_bufptr = PL_bufptr;
6684             PL_bufptr = s;
6685             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6686             s = PL_bufptr;
6687             if (result == KEYWORD_PLUGIN_DECLINE) {
6688                 /* not a plugged-in keyword */
6689                 PL_bufptr = saved_bufptr;
6690             } else if (result == KEYWORD_PLUGIN_STMT) {
6691                 pl_yylval.opval = o;
6692                 CLINE;
6693                 PL_expect = XSTATE;
6694                 return REPORT(PLUGSTMT);
6695             } else if (result == KEYWORD_PLUGIN_EXPR) {
6696                 pl_yylval.opval = o;
6697                 CLINE;
6698                 PL_expect = XOPERATOR;
6699                 return REPORT(PLUGEXPR);
6700             } else {
6701                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6702                                         PL_tokenbuf);
6703             }
6704         }
6705
6706         /* Check for built-in keyword */
6707         tmp = keyword(PL_tokenbuf, len, 0);
6708
6709         /* Is this a label? */
6710         if (!anydelim && PL_expect == XSTATE
6711               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6712             s = d + 1;
6713             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6714                                             newSVpvn_flags(PL_tokenbuf,
6715                                                         len, UTF ? SVf_UTF8 : 0));
6716             CLINE;
6717             TOKEN(LABEL);
6718         }
6719
6720         /* Check for lexical sub */
6721         if (PL_expect != XOPERATOR) {
6722             char tmpbuf[sizeof PL_tokenbuf + 1];
6723             *tmpbuf = '&';
6724             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6725             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6726             if (off != NOT_IN_PAD) {
6727                 assert(off); /* we assume this is boolean-true below */
6728                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6729                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6730                     HEK * const stashname = HvNAME_HEK(stash);
6731                     sv = newSVhek(stashname);
6732                     sv_catpvs(sv, "::");
6733                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6734                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6735                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6736                                     SVt_PVCV);
6737                     off = 0;
6738                 }
6739                 else {
6740                     rv2cv_op = newOP(OP_PADANY, 0);
6741                     rv2cv_op->op_targ = off;
6742                     rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6743                     cv = (CV *)PAD_SV(off);
6744                 }
6745                 lex = TRUE;
6746                 goto just_a_word;
6747             }
6748             off = 0;
6749         }
6750
6751         if (tmp < 0) {                  /* second-class keyword? */
6752             GV *ogv = NULL;     /* override (winner) */
6753             GV *hgv = NULL;     /* hidden (loser) */
6754             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6755                 CV *cv;
6756                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6757                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6758                     (cv = GvCVu(gv)))
6759                 {
6760                     if (GvIMPORTED_CV(gv))
6761                         ogv = gv;
6762                     else if (! CvMETHOD(cv))
6763                         hgv = gv;
6764                 }
6765                 if (!ogv &&
6766                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6767                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6768                     (gv = *gvp) && isGV_with_GP(gv) &&
6769                     GvCVu(gv) && GvIMPORTED_CV(gv))
6770                 {
6771                     ogv = gv;
6772                 }
6773             }
6774             if (ogv) {
6775                 orig_keyword = tmp;
6776                 tmp = 0;                /* overridden by import or by GLOBAL */
6777             }
6778             else if (gv && !gvp
6779                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6780                      && GvCVu(gv))
6781             {
6782                 tmp = 0;                /* any sub overrides "weak" keyword */
6783             }
6784             else {                      /* no override */
6785                 tmp = -tmp;
6786                 if (tmp == KEY_dump) {
6787                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6788                                    "dump() better written as CORE::dump()");
6789                 }
6790                 gv = NULL;
6791                 gvp = 0;
6792                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6793                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6794                                    "Ambiguous call resolved as CORE::%s(), "
6795                                    "qualify as such or use &",
6796                                    GvENAME(hgv));
6797             }
6798         }
6799
6800       reserved_word:
6801         switch (tmp) {
6802
6803         default:                        /* not a keyword */
6804             /* Trade off - by using this evil construction we can pull the
6805                variable gv into the block labelled keylookup. If not, then
6806                we have to give it function scope so that the goto from the
6807                earlier ':' case doesn't bypass the initialisation.  */
6808             if (0) {
6809             just_a_word_zero_gv:
6810                 sv = NULL;
6811                 cv = NULL;
6812                 gv = NULL;
6813                 gvp = NULL;
6814                 rv2cv_op = NULL;
6815                 orig_keyword = 0;
6816                 lex = 0;
6817                 off = 0;
6818             }
6819           just_a_word: {
6820                 int pkgname = 0;
6821                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6822                 const char penultchar =
6823                     lastchar && PL_bufptr - 2 >= PL_linestart
6824                          ? PL_bufptr[-2]
6825                          : 0;
6826 #ifdef PERL_MAD
6827                 SV *nextPL_nextwhite = 0;
6828 #endif
6829
6830
6831                 /* Get the rest if it looks like a package qualifier */
6832
6833                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6834                     STRLEN morelen;
6835                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6836                                   TRUE, &morelen);
6837                     if (!morelen)
6838                         Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6839                                         SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6840                                             (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6841                                 *s == '\'' ? "'" : "::");
6842                     len += morelen;
6843                     pkgname = 1;
6844                 }
6845
6846                 if (PL_expect == XOPERATOR) {
6847                     if (PL_bufptr == PL_linestart) {
6848                         CopLINE_dec(PL_curcop);
6849                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6850                         CopLINE_inc(PL_curcop);
6851                     }
6852                     else
6853                         no_op("Bareword",s);
6854                 }
6855
6856                 /* Look for a subroutine with this name in current package,
6857                    unless this is a lexical sub, or name is "Foo::",
6858                    in which case Foo is a bareword
6859                    (and a package name). */
6860
6861                 if (len > 2 && !PL_madskills &&
6862                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6863                 {
6864                     if (ckWARN(WARN_BAREWORD)
6865                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6866                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6867                             "Bareword \"%"SVf"\" refers to nonexistent package",
6868                              SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6869                                         (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6870                     len -= 2;
6871                     PL_tokenbuf[len] = '\0';
6872                     gv = NULL;
6873                     gvp = 0;
6874                 }
6875                 else {
6876                     if (!lex && !gv) {
6877                         /* Mustn't actually add anything to a symbol table.
6878                            But also don't want to "initialise" any placeholder
6879                            constants that might already be there into full
6880                            blown PVGVs with attached PVCV.  */
6881                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6882                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6883                                                SVt_PVCV);
6884                     }
6885                     len = 0;
6886                 }
6887
6888                 /* if we saw a global override before, get the right name */
6889
6890                 if (!sv)
6891                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6892                     len ? len : strlen(PL_tokenbuf));
6893                 if (gvp) {
6894                     SV * const tmp_sv = sv;
6895                     sv = newSVpvs("CORE::GLOBAL::");
6896                     sv_catsv(sv, tmp_sv);
6897                     SvREFCNT_dec(tmp_sv);
6898                 }
6899
6900 #ifdef PERL_MAD
6901                 if (PL_madskills && !PL_thistoken) {
6902                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6903                     PL_thistoken = newSVpvn(start,s - start);
6904                     PL_realtokenstart = s - SvPVX(PL_linestr);
6905                 }
6906 #endif
6907
6908                 /* Presume this is going to be a bareword of some sort. */
6909                 CLINE;
6910                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6911                 pl_yylval.opval->op_private = OPpCONST_BARE;
6912
6913                 /* And if "Foo::", then that's what it certainly is. */
6914                 if (len)
6915                     goto safe_bareword;
6916
6917                 if (!off)
6918                 {
6919                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6920                     const_op->op_private = OPpCONST_BARE;
6921                     rv2cv_op = newCVREF(0, const_op);
6922                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
6923                 }
6924
6925                 /* See if it's the indirect object for a list operator. */
6926
6927                 if (PL_oldoldbufptr &&
6928                     PL_oldoldbufptr < PL_bufptr &&
6929                     (PL_oldoldbufptr == PL_last_lop
6930                      || PL_oldoldbufptr == PL_last_uni) &&
6931                     /* NO SKIPSPACE BEFORE HERE! */
6932                     (PL_expect == XREF ||
6933                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6934                 {
6935                     bool immediate_paren = *s == '(';
6936
6937                     /* (Now we can afford to cross potential line boundary.) */
6938                     s = SKIPSPACE2(s,nextPL_nextwhite);
6939 #ifdef PERL_MAD
6940                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6941 #endif
6942
6943                     /* Two barewords in a row may indicate method call. */
6944
6945                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6946                         (tmp = intuit_method(s, gv, cv))) {
6947                         op_free(rv2cv_op);
6948                         if (tmp == METHOD && !PL_lex_allbrackets &&
6949                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6950                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6951                         return REPORT(tmp);
6952                     }
6953
6954                     /* If not a declared subroutine, it's an indirect object. */
6955                     /* (But it's an indir obj regardless for sort.) */
6956                     /* Also, if "_" follows a filetest operator, it's a bareword */
6957
6958                     if (
6959                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6960                          (!cv &&
6961                         (PL_last_lop_op != OP_MAPSTART &&
6962                          PL_last_lop_op != OP_GREPSTART))))
6963                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6964                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6965                        )
6966                     {
6967                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6968                         goto bareword;
6969                     }
6970                 }
6971
6972                 PL_expect = XOPERATOR;
6973 #ifdef PERL_MAD
6974                 if (isSPACE(*s))
6975                     s = SKIPSPACE2(s,nextPL_nextwhite);
6976                 PL_nextwhite = nextPL_nextwhite;
6977 #else
6978                 s = skipspace(s);
6979 #endif
6980
6981                 /* Is this a word before a => operator? */
6982                 if (*s == '=' && s[1] == '>' && !pkgname) {
6983                     op_free(rv2cv_op);
6984                     CLINE;
6985                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6986                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6987                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6988                     TERM(WORD);
6989                 }
6990
6991                 /* If followed by a paren, it's certainly a subroutine. */
6992                 if (*s == '(') {
6993                     CLINE;
6994                     if (cv) {
6995                         d = s + 1;
6996                         while (SPACE_OR_TAB(*d))
6997                             d++;
6998                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6999                             s = d + 1;
7000                             goto its_constant;
7001                         }
7002                     }
7003 #ifdef PERL_MAD
7004                     if (PL_madskills) {
7005                         PL_nextwhite = PL_thiswhite;
7006                         PL_thiswhite = 0;
7007                     }
7008                     start_force(PL_curforce);
7009 #endif
7010                     NEXTVAL_NEXTTOKE.opval =
7011                         off ? rv2cv_op : pl_yylval.opval;
7012                     PL_expect = XOPERATOR;
7013 #ifdef PERL_MAD
7014                     if (PL_madskills) {
7015                         PL_nextwhite = nextPL_nextwhite;
7016                         curmad('X', PL_thistoken);
7017                         PL_thistoken = newSVpvs("");
7018                     }
7019 #endif
7020                     if (off)
7021                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7022                     else op_free(rv2cv_op),        force_next(WORD);
7023                     pl_yylval.ival = 0;
7024                     TOKEN('&');
7025                 }
7026
7027                 /* If followed by var or block, call it a method (unless sub) */
7028
7029                 if ((*s == '$' || *s == '{') && !cv) {
7030                     op_free(rv2cv_op);
7031                     PL_last_lop = PL_oldbufptr;
7032                     PL_last_lop_op = OP_METHOD;
7033                     if (!PL_lex_allbrackets &&
7034                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7035                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7036                     PREBLOCK(METHOD);
7037                 }
7038
7039                 /* If followed by a bareword, see if it looks like indir obj. */
7040
7041                 if (!orig_keyword
7042                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7043                         && (tmp = intuit_method(s, gv, cv))) {
7044                     op_free(rv2cv_op);
7045                     if (tmp == METHOD && !PL_lex_allbrackets &&
7046                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7047                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7048                     return REPORT(tmp);
7049                 }
7050
7051                 /* Not a method, so call it a subroutine (if defined) */
7052
7053                 if (cv) {
7054                     if (lastchar == '-' && penultchar != '-') {
7055                         const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7056                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7057                                 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7058                                 SVfARG(tmpsv), SVfARG(tmpsv));
7059                     }
7060                     /* Check for a constant sub */
7061                     if ((sv = cv_const_sv(cv))) {
7062                   its_constant:
7063                         op_free(rv2cv_op);
7064                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7065                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7066                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
7067                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
7068                         TOKEN(WORD);
7069                     }
7070
7071                     op_free(pl_yylval.opval);
7072                     pl_yylval.opval = rv2cv_op;
7073                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7074                     PL_last_lop = PL_oldbufptr;
7075                     PL_last_lop_op = OP_ENTERSUB;
7076                     /* Is there a prototype? */
7077                     if (
7078 #ifdef PERL_MAD
7079                         cv &&
7080 #endif
7081                         SvPOK(cv))
7082                     {
7083                         STRLEN protolen = CvPROTOLEN(cv);
7084                         const char *proto = CvPROTO(cv);
7085                         bool optional;
7086                         if (!protolen)
7087                             TERM(FUNC0SUB);
7088                         if ((optional = *proto == ';'))
7089                           do
7090                             proto++;
7091                           while (*proto == ';');
7092                         if (
7093                             (
7094                                 (
7095                                     *proto == '$' || *proto == '_'
7096                                  || *proto == '*' || *proto == '+'
7097                                 )
7098                              && proto[1] == '\0'
7099                             )
7100                          || (
7101                              *proto == '\\' && proto[1] && proto[2] == '\0'
7102                             )
7103                         )
7104                             UNIPROTO(UNIOPSUB,optional);
7105                         if (*proto == '\\' && proto[1] == '[') {
7106                             const char *p = proto + 2;
7107                             while(*p && *p != ']')
7108                                 ++p;
7109                             if(*p == ']' && !p[1])
7110                                 UNIPROTO(UNIOPSUB,optional);
7111                         }
7112                         if (*proto == '&' && *s == '{') {
7113                             if (PL_curstash)
7114                                 sv_setpvs(PL_subname, "__ANON__");
7115                             else
7116                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7117                             if (!PL_lex_allbrackets &&
7118                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7119                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7120                             PREBLOCK(LSTOPSUB);
7121                         }
7122                     }
7123 #ifdef PERL_MAD
7124                     {
7125                         if (PL_madskills) {
7126                             PL_nextwhite = PL_thiswhite;
7127                             PL_thiswhite = 0;
7128                         }
7129                         start_force(PL_curforce);
7130                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7131                         PL_expect = XTERM;
7132                         if (PL_madskills) {
7133                             PL_nextwhite = nextPL_nextwhite;
7134                             curmad('X', PL_thistoken);
7135                             PL_thistoken = newSVpvs("");
7136                         }
7137                         force_next(off ? PRIVATEREF : WORD);
7138                         if (!PL_lex_allbrackets &&
7139                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7140                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7141                         TOKEN(NOAMP);
7142                     }
7143                 }
7144
7145                 /* Guess harder when madskills require "best effort". */
7146                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7147                     int probable_sub = 0;
7148                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7149                         probable_sub = 1;
7150                     else if (isALPHA(*s)) {
7151                         char tmpbuf[1024];
7152                         STRLEN tmplen;
7153                         d = s;
7154                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7155                         if (!keyword(tmpbuf, tmplen, 0))
7156                             probable_sub = 1;
7157                         else {
7158                             while (d < PL_bufend && isSPACE(*d))
7159                                 d++;
7160                             if (*d == '=' && d[1] == '>')
7161                                 probable_sub = 1;
7162                         }
7163                     }
7164                     if (probable_sub) {
7165                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7166                                         SVt_PVCV);
7167                         op_free(pl_yylval.opval);
7168                         pl_yylval.opval = rv2cv_op;
7169                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7170                         PL_last_lop = PL_oldbufptr;
7171                         PL_last_lop_op = OP_ENTERSUB;
7172                         PL_nextwhite = PL_thiswhite;
7173                         PL_thiswhite = 0;
7174                         start_force(PL_curforce);
7175                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7176                         PL_expect = XTERM;
7177                         PL_nextwhite = nextPL_nextwhite;
7178                         curmad('X', PL_thistoken);
7179                         PL_thistoken = newSVpvs("");
7180                         force_next(off ? PRIVATEREF : WORD);
7181                         if (!PL_lex_allbrackets &&
7182                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7183                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7184                         TOKEN(NOAMP);
7185                     }
7186 #else
7187                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7188                     PL_expect = XTERM;
7189                     force_next(off ? PRIVATEREF : WORD);
7190                     if (!PL_lex_allbrackets &&
7191                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7192                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7193                     TOKEN(NOAMP);
7194 #endif
7195                 }
7196
7197                 /* Call it a bare word */
7198
7199                 if (PL_hints & HINT_STRICT_SUBS)
7200                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7201                 else {
7202                 bareword:
7203                     /* after "print" and similar functions (corresponding to
7204                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7205                      * a filehandle should be subject to "strict subs".
7206                      * Likewise for the optional indirect-object argument to system
7207                      * or exec, which can't be a bareword */
7208                     if ((PL_last_lop_op == OP_PRINT
7209                             || PL_last_lop_op == OP_PRTF
7210                             || PL_last_lop_op == OP_SAY
7211                             || PL_last_lop_op == OP_SYSTEM
7212                             || PL_last_lop_op == OP_EXEC)
7213                             && (PL_hints & HINT_STRICT_SUBS))
7214                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7215                     if (lastchar != '-') {
7216                         if (ckWARN(WARN_RESERVED)) {
7217                             d = PL_tokenbuf;
7218                             while (isLOWER(*d))
7219                                 d++;
7220                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7221                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7222                                        PL_tokenbuf);
7223                         }
7224                     }
7225                 }
7226                 op_free(rv2cv_op);
7227
7228             safe_bareword:
7229                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7230                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7231                                      "Operator or semicolon missing before %c%"SVf,
7232                                      lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7233                                                     strlen(PL_tokenbuf),
7234                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7235                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7236                                      "Ambiguous use of %c resolved as operator %c",
7237                                      lastchar, lastchar);
7238                 }
7239                 TOKEN(WORD);
7240             }
7241
7242         case KEY___FILE__:
7243             FUN0OP(
7244                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7245             );
7246
7247         case KEY___LINE__:
7248             FUN0OP(
7249                 (OP*)newSVOP(OP_CONST, 0,
7250                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7251             );
7252
7253         case KEY___PACKAGE__:
7254             FUN0OP(
7255                 (OP*)newSVOP(OP_CONST, 0,
7256                                         (PL_curstash
7257                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7258                                          : &PL_sv_undef))
7259             );
7260
7261         case KEY___DATA__:
7262         case KEY___END__: {
7263             GV *gv;
7264             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7265                 const char *pname = "main";
7266                 STRLEN plen = 4;
7267                 U32 putf8 = 0;
7268                 if (PL_tokenbuf[2] == 'D')
7269                 {
7270                     HV * const stash =
7271                         PL_curstash ? PL_curstash : PL_defstash;
7272                     pname = HvNAME_get(stash);
7273                     plen  = HvNAMELEN (stash);
7274                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7275                 }
7276                 gv = gv_fetchpvn_flags(
7277                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7278                         plen+6, GV_ADD|putf8, SVt_PVIO
7279                 );
7280                 GvMULTI_on(gv);
7281                 if (!GvIO(gv))
7282                     GvIOp(gv) = newIO();
7283                 IoIFP(GvIOp(gv)) = PL_rsfp;
7284 #if defined(HAS_FCNTL) && defined(F_SETFD)
7285                 {
7286                     const int fd = PerlIO_fileno(PL_rsfp);
7287                     fcntl(fd,F_SETFD,fd >= 3);
7288                 }
7289 #endif
7290                 /* Mark this internal pseudo-handle as clean */
7291                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7292                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7293                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7294                 else
7295                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7296 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7297                 /* if the script was opened in binmode, we need to revert
7298                  * it to text mode for compatibility; but only iff it has CRs
7299                  * XXX this is a questionable hack at best. */
7300                 if (PL_bufend-PL_bufptr > 2
7301                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7302                 {
7303                     Off_t loc = 0;
7304                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7305                         loc = PerlIO_tell(PL_rsfp);
7306                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7307                     }
7308 #ifdef NETWARE
7309                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7310 #else
7311                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7312 #endif  /* NETWARE */
7313                         if (loc > 0)
7314                             PerlIO_seek(PL_rsfp, loc, 0);
7315                     }
7316                 }
7317 #endif
7318 #ifdef PERLIO_LAYERS
7319                 if (!IN_BYTES) {
7320                     if (UTF)
7321                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7322                     else if (PL_encoding) {
7323                         SV *name;
7324                         dSP;
7325                         ENTER;
7326                         SAVETMPS;
7327                         PUSHMARK(sp);
7328                         EXTEND(SP, 1);
7329                         XPUSHs(PL_encoding);
7330                         PUTBACK;
7331                         call_method("name", G_SCALAR);
7332                         SPAGAIN;
7333                         name = POPs;
7334                         PUTBACK;
7335                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7336                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7337                                                       SVfARG(name)));
7338                         FREETMPS;
7339                         LEAVE;
7340                     }
7341                 }
7342 #endif
7343 #ifdef PERL_MAD
7344                 if (PL_madskills) {
7345                     if (PL_realtokenstart >= 0) {
7346                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7347                         if (!PL_endwhite)
7348                             PL_endwhite = newSVpvs("");
7349                         sv_catsv(PL_endwhite, PL_thiswhite);
7350                         PL_thiswhite = 0;
7351                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7352                         PL_realtokenstart = -1;
7353                     }
7354                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7355                            != NULL) ;
7356                 }
7357 #endif
7358                 PL_rsfp = NULL;
7359             }
7360             goto fake_eof;
7361         }
7362
7363         case KEY___SUB__:
7364             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7365
7366         case KEY_AUTOLOAD:
7367         case KEY_DESTROY:
7368         case KEY_BEGIN:
7369         case KEY_UNITCHECK:
7370         case KEY_CHECK:
7371         case KEY_INIT:
7372         case KEY_END:
7373             if (PL_expect == XSTATE) {
7374                 s = PL_bufptr;
7375                 goto really_sub;
7376             }
7377             goto just_a_word;
7378
7379         case KEY_CORE:
7380             if (*s == ':' && s[1] == ':') {
7381                 STRLEN olen = len;
7382                 d = s;
7383                 s += 2;
7384                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7385                 if ((*s == ':' && s[1] == ':')
7386                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7387                 {
7388                     s = d;
7389                     len = olen;
7390                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7391                     goto just_a_word;
7392                 }
7393                 if (!tmp)
7394                     Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7395                                     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7396                                                 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7397                 if (tmp < 0)
7398                     tmp = -tmp;
7399                 else if (tmp == KEY_require || tmp == KEY_do
7400                       || tmp == KEY_glob)
7401                     /* that's a way to remember we saw "CORE::" */
7402                     orig_keyword = tmp;
7403                 goto reserved_word;
7404             }
7405             goto just_a_word;
7406
7407         case KEY_abs:
7408             UNI(OP_ABS);
7409
7410         case KEY_alarm:
7411             UNI(OP_ALARM);
7412
7413         case KEY_accept:
7414             LOP(OP_ACCEPT,XTERM);
7415
7416         case KEY_and:
7417             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7418                 return REPORT(0);
7419             OPERATOR(ANDOP);
7420
7421         case KEY_atan2:
7422             LOP(OP_ATAN2,XTERM);
7423
7424         case KEY_bind:
7425             LOP(OP_BIND,XTERM);
7426
7427         case KEY_binmode:
7428             LOP(OP_BINMODE,XTERM);
7429
7430         case KEY_bless:
7431             LOP(OP_BLESS,XTERM);
7432
7433         case KEY_break:
7434             FUN0(OP_BREAK);
7435
7436         case KEY_chop:
7437             UNI(OP_CHOP);
7438
7439         case KEY_continue:
7440                     /* We have to disambiguate the two senses of
7441                       "continue". If the next token is a '{' then
7442                       treat it as the start of a continue block;
7443                       otherwise treat it as a control operator.
7444                      */
7445                     s = skipspace(s);
7446                     if (*s == '{')
7447             PREBLOCK(CONTINUE);
7448                     else
7449                         FUN0(OP_CONTINUE);
7450
7451         case KEY_chdir:
7452             /* may use HOME */
7453             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7454             UNI(OP_CHDIR);
7455
7456         case KEY_close:
7457             UNI(OP_CLOSE);
7458
7459         case KEY_closedir:
7460             UNI(OP_CLOSEDIR);
7461
7462         case KEY_cmp:
7463             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7464                 return REPORT(0);
7465             Eop(OP_SCMP);
7466
7467         case KEY_caller:
7468             UNI(OP_CALLER);
7469
7470         case KEY_crypt:
7471 #ifdef FCRYPT
7472             if (!PL_cryptseen) {
7473                 PL_cryptseen = TRUE;
7474                 init_des();
7475             }
7476 #endif
7477             LOP(OP_CRYPT,XTERM);
7478
7479         case KEY_chmod:
7480             LOP(OP_CHMOD,XTERM);
7481
7482         case KEY_chown:
7483             LOP(OP_CHOWN,XTERM);
7484
7485         case KEY_connect:
7486             LOP(OP_CONNECT,XTERM);
7487
7488         case KEY_chr:
7489             UNI(OP_CHR);
7490
7491         case KEY_cos:
7492             UNI(OP_COS);
7493
7494         case KEY_chroot:
7495             UNI(OP_CHROOT);
7496
7497         case KEY_default:
7498             PREBLOCK(DEFAULT);
7499
7500         case KEY_do:
7501             s = SKIPSPACE1(s);
7502             if (*s == '{')
7503                 PRETERMBLOCK(DO);
7504             if (*s != '\'') {
7505                 *PL_tokenbuf = '&';
7506                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7507                               1, &len);
7508                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7509                     d = SKIPSPACE1(d);
7510                     if (*d == '(') {
7511                         force_ident_maybe_lex('&');
7512                         s = d;
7513                     }
7514                 }
7515             }
7516             if (orig_keyword == KEY_do) {
7517                 orig_keyword = 0;
7518                 pl_yylval.ival = 1;
7519             }
7520             else
7521                 pl_yylval.ival = 0;
7522             OPERATOR(DO);
7523
7524         case KEY_die:
7525             PL_hints |= HINT_BLOCK_SCOPE;
7526             LOP(OP_DIE,XTERM);
7527
7528         case KEY_defined:
7529             UNI(OP_DEFINED);
7530
7531         case KEY_delete:
7532             UNI(OP_DELETE);
7533
7534         case KEY_dbmopen:
7535             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7536                               STR_WITH_LEN("NDBM_File::"),
7537                               STR_WITH_LEN("DB_File::"),
7538                               STR_WITH_LEN("GDBM_File::"),
7539                               STR_WITH_LEN("SDBM_File::"),
7540                               STR_WITH_LEN("ODBM_File::"),
7541                               NULL);
7542             LOP(OP_DBMOPEN,XTERM);
7543
7544         case KEY_dbmclose:
7545             UNI(OP_DBMCLOSE);
7546
7547         case KEY_dump:
7548             PL_expect = XOPERATOR;
7549             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7550             LOOPX(OP_DUMP);
7551
7552         case KEY_else:
7553             PREBLOCK(ELSE);
7554
7555         case KEY_elsif:
7556             pl_yylval.ival = CopLINE(PL_curcop);
7557             OPERATOR(ELSIF);
7558
7559         case KEY_eq:
7560             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7561                 return REPORT(0);
7562             Eop(OP_SEQ);
7563
7564         case KEY_exists:
7565             UNI(OP_EXISTS);
7566
7567         case KEY_exit:
7568             if (PL_madskills)
7569                 UNI(OP_INT);
7570             UNI(OP_EXIT);
7571
7572         case KEY_eval:
7573             s = SKIPSPACE1(s);
7574             if (*s == '{') { /* block eval */
7575                 PL_expect = XTERMBLOCK;
7576                 UNIBRACK(OP_ENTERTRY);
7577             }
7578             else { /* string eval */
7579                 PL_expect = XTERM;
7580                 UNIBRACK(OP_ENTEREVAL);
7581             }
7582
7583         case KEY_evalbytes:
7584             PL_expect = XTERM;
7585             UNIBRACK(-OP_ENTEREVAL);
7586
7587         case KEY_eof:
7588             UNI(OP_EOF);
7589
7590         case KEY_exp:
7591             UNI(OP_EXP);
7592
7593         case KEY_each:
7594             UNI(OP_EACH);
7595
7596         case KEY_exec:
7597             LOP(OP_EXEC,XREF);
7598
7599         case KEY_endhostent:
7600             FUN0(OP_EHOSTENT);
7601
7602         case KEY_endnetent:
7603             FUN0(OP_ENETENT);
7604
7605         case KEY_endservent:
7606             FUN0(OP_ESERVENT);
7607
7608         case KEY_endprotoent:
7609             FUN0(OP_EPROTOENT);
7610
7611         case KEY_endpwent:
7612             FUN0(OP_EPWENT);
7613
7614         case KEY_endgrent:
7615             FUN0(OP_EGRENT);
7616
7617         case KEY_for:
7618         case KEY_foreach:
7619             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7620                 return REPORT(0);
7621             pl_yylval.ival = CopLINE(PL_curcop);
7622             s = SKIPSPACE1(s);
7623             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7624                 char *p = s;
7625 #ifdef PERL_MAD
7626                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7627 #endif
7628
7629                 if ((PL_bufend - p) >= 3 &&
7630                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7631                     p += 2;
7632                 else if ((PL_bufend - p) >= 4 &&
7633                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7634                     p += 3;
7635                 p = PEEKSPACE(p);
7636                 if (isIDFIRST_lazy_if(p,UTF)) {
7637                     p = scan_ident(p, PL_bufend,
7638                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7639                     p = PEEKSPACE(p);
7640                 }
7641                 if (*p != '$')
7642                     Perl_croak(aTHX_ "Missing $ on loop variable");
7643 #ifdef PERL_MAD
7644                 s = SvPVX(PL_linestr) + soff;
7645 #endif
7646             }
7647             OPERATOR(FOR);
7648
7649         case KEY_formline:
7650             LOP(OP_FORMLINE,XTERM);
7651
7652         case KEY_fork:
7653             FUN0(OP_FORK);
7654
7655         case KEY_fc:
7656             UNI(OP_FC);
7657
7658         case KEY_fcntl:
7659             LOP(OP_FCNTL,XTERM);
7660
7661         case KEY_fileno:
7662             UNI(OP_FILENO);
7663
7664         case KEY_flock:
7665             LOP(OP_FLOCK,XTERM);
7666
7667         case KEY_gt:
7668             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7669                 return REPORT(0);
7670             Rop(OP_SGT);
7671
7672         case KEY_ge:
7673             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7674                 return REPORT(0);
7675             Rop(OP_SGE);
7676
7677         case KEY_grep:
7678             LOP(OP_GREPSTART, XREF);
7679
7680         case KEY_goto:
7681             PL_expect = XOPERATOR;
7682             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7683             LOOPX(OP_GOTO);
7684
7685         case KEY_gmtime:
7686             UNI(OP_GMTIME);
7687
7688         case KEY_getc:
7689             UNIDOR(OP_GETC);
7690
7691         case KEY_getppid:
7692             FUN0(OP_GETPPID);
7693
7694         case KEY_getpgrp:
7695             UNI(OP_GETPGRP);
7696
7697         case KEY_getpriority:
7698             LOP(OP_GETPRIORITY,XTERM);
7699
7700         case KEY_getprotobyname:
7701             UNI(OP_GPBYNAME);
7702
7703         case KEY_getprotobynumber:
7704             LOP(OP_GPBYNUMBER,XTERM);
7705
7706         case KEY_getprotoent:
7707             FUN0(OP_GPROTOENT);
7708
7709         case KEY_getpwent:
7710             FUN0(OP_GPWENT);
7711
7712         case KEY_getpwnam:
7713             UNI(OP_GPWNAM);
7714
7715         case KEY_getpwuid:
7716             UNI(OP_GPWUID);
7717
7718         case KEY_getpeername:
7719             UNI(OP_GETPEERNAME);
7720
7721         case KEY_gethostbyname:
7722             UNI(OP_GHBYNAME);
7723
7724         case KEY_gethostbyaddr:
7725             LOP(OP_GHBYADDR,XTERM);
7726
7727         case KEY_gethostent:
7728             FUN0(OP_GHOSTENT);
7729
7730         case KEY_getnetbyname:
7731             UNI(OP_GNBYNAME);
7732
7733         case KEY_getnetbyaddr:
7734             LOP(OP_GNBYADDR,XTERM);
7735
7736         case KEY_getnetent:
7737             FUN0(OP_GNETENT);
7738
7739         case KEY_getservbyname:
7740             LOP(OP_GSBYNAME,XTERM);
7741
7742         case KEY_getservbyport:
7743             LOP(OP_GSBYPORT,XTERM);
7744
7745         case KEY_getservent:
7746             FUN0(OP_GSERVENT);
7747
7748         case KEY_getsockname:
7749             UNI(OP_GETSOCKNAME);
7750
7751         case KEY_getsockopt:
7752             LOP(OP_GSOCKOPT,XTERM);
7753
7754         case KEY_getgrent:
7755             FUN0(OP_GGRENT);
7756
7757         case KEY_getgrnam:
7758             UNI(OP_GGRNAM);
7759
7760         case KEY_getgrgid:
7761             UNI(OP_GGRGID);
7762
7763         case KEY_getlogin:
7764             FUN0(OP_GETLOGIN);
7765
7766         case KEY_given:
7767             pl_yylval.ival = CopLINE(PL_curcop);
7768             OPERATOR(GIVEN);
7769
7770         case KEY_glob:
7771             LOP(
7772              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7773              XTERM
7774             );
7775
7776         case KEY_hex:
7777             UNI(OP_HEX);
7778
7779         case KEY_if:
7780             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7781                 return REPORT(0);
7782             pl_yylval.ival = CopLINE(PL_curcop);
7783             OPERATOR(IF);
7784
7785         case KEY_index:
7786             LOP(OP_INDEX,XTERM);
7787
7788         case KEY_int:
7789             UNI(OP_INT);
7790
7791         case KEY_ioctl:
7792             LOP(OP_IOCTL,XTERM);
7793
7794         case KEY_join:
7795             LOP(OP_JOIN,XTERM);
7796
7797         case KEY_keys:
7798             UNI(OP_KEYS);
7799
7800         case KEY_kill:
7801             LOP(OP_KILL,XTERM);
7802
7803         case KEY_last:
7804             PL_expect = XOPERATOR;
7805             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7806             LOOPX(OP_LAST);
7807
7808         case KEY_lc:
7809             UNI(OP_LC);
7810
7811         case KEY_lcfirst:
7812             UNI(OP_LCFIRST);
7813
7814         case KEY_local:
7815             pl_yylval.ival = 0;
7816             OPERATOR(LOCAL);
7817
7818         case KEY_length:
7819             UNI(OP_LENGTH);
7820
7821         case KEY_lt:
7822             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7823                 return REPORT(0);
7824             Rop(OP_SLT);
7825
7826         case KEY_le:
7827             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7828                 return REPORT(0);
7829             Rop(OP_SLE);
7830
7831         case KEY_localtime:
7832             UNI(OP_LOCALTIME);
7833
7834         case KEY_log:
7835             UNI(OP_LOG);
7836
7837         case KEY_link:
7838             LOP(OP_LINK,XTERM);
7839
7840         case KEY_listen:
7841             LOP(OP_LISTEN,XTERM);
7842
7843         case KEY_lock:
7844             UNI(OP_LOCK);
7845
7846         case KEY_lstat:
7847             UNI(OP_LSTAT);
7848
7849         case KEY_m:
7850             s = scan_pat(s,OP_MATCH);
7851             TERM(sublex_start());
7852
7853         case KEY_map:
7854             LOP(OP_MAPSTART, XREF);
7855
7856         case KEY_mkdir:
7857             LOP(OP_MKDIR,XTERM);
7858
7859         case KEY_msgctl:
7860             LOP(OP_MSGCTL,XTERM);
7861
7862         case KEY_msgget:
7863             LOP(OP_MSGGET,XTERM);
7864
7865         case KEY_msgrcv:
7866             LOP(OP_MSGRCV,XTERM);
7867
7868         case KEY_msgsnd:
7869             LOP(OP_MSGSND,XTERM);
7870
7871         case KEY_our:
7872         case KEY_my:
7873         case KEY_state:
7874             PL_in_my = (U16)tmp;
7875             s = SKIPSPACE1(s);
7876             if (isIDFIRST_lazy_if(s,UTF)) {
7877 #ifdef PERL_MAD
7878                 char* start = s;
7879 #endif
7880                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7881                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7882                 {
7883                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7884                         Perl_croak(aTHX_
7885                                   "Experimental \"%s\" subs not enabled",
7886                                    tmp == KEY_my    ? "my"    :
7887                                    tmp == KEY_state ? "state" : "our");
7888                     goto really_sub;
7889                 }
7890                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7891                 if (!PL_in_my_stash) {
7892                     char tmpbuf[1024];
7893                     PL_bufptr = s;
7894                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7895                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7896                 }
7897 #ifdef PERL_MAD
7898                 if (PL_madskills) {     /* just add type to declarator token */
7899                     sv_catsv(PL_thistoken, PL_nextwhite);
7900                     PL_nextwhite = 0;
7901                     sv_catpvn(PL_thistoken, start, s - start);
7902                 }
7903 #endif
7904             }
7905             pl_yylval.ival = 1;
7906             OPERATOR(MY);
7907
7908         case KEY_next:
7909             PL_expect = XOPERATOR;
7910             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7911             LOOPX(OP_NEXT);
7912
7913         case KEY_ne:
7914             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7915                 return REPORT(0);
7916             Eop(OP_SNE);
7917
7918         case KEY_no:
7919             s = tokenize_use(0, s);
7920             TERM(USE);
7921
7922         case KEY_not:
7923             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7924                 FUN1(OP_NOT);
7925             else {
7926                 if (!PL_lex_allbrackets &&
7927                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7928                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7929                 OPERATOR(NOTOP);
7930             }
7931
7932         case KEY_open:
7933             s = SKIPSPACE1(s);
7934             if (isIDFIRST_lazy_if(s,UTF)) {
7935                 const char *t;
7936                 for (d = s; isALNUM_lazy_if(d,UTF);) {
7937                     d += UTF ? UTF8SKIP(d) : 1;
7938                     if (UTF) {
7939                         while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7940                             d += UTF ? UTF8SKIP(d) : 1;
7941                         }
7942                     }
7943                 }
7944                 for (t=d; isSPACE(*t);)
7945                     t++;
7946                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7947                     /* [perl #16184] */
7948                     && !(t[0] == '=' && t[1] == '>')
7949                     && !(t[0] == ':' && t[1] == ':')
7950                     && !keyword(s, d-s, 0)
7951                 ) {
7952                     SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7953                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7954                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7955                            "Precedence problem: open %"SVf" should be open(%"SVf")",
7956                             SVfARG(tmpsv), SVfARG(tmpsv));
7957                 }
7958             }
7959             LOP(OP_OPEN,XTERM);
7960
7961         case KEY_or:
7962             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7963                 return REPORT(0);
7964             pl_yylval.ival = OP_OR;
7965             OPERATOR(OROP);
7966
7967         case KEY_ord:
7968             UNI(OP_ORD);
7969
7970         case KEY_oct:
7971             UNI(OP_OCT);
7972
7973         case KEY_opendir:
7974             LOP(OP_OPEN_DIR,XTERM);
7975
7976         case KEY_print:
7977             checkcomma(s,PL_tokenbuf,"filehandle");
7978             LOP(OP_PRINT,XREF);
7979
7980         case KEY_printf:
7981             checkcomma(s,PL_tokenbuf,"filehandle");
7982             LOP(OP_PRTF,XREF);
7983
7984         case KEY_prototype:
7985             UNI(OP_PROTOTYPE);
7986
7987         case KEY_push:
7988             LOP(OP_PUSH,XTERM);
7989
7990         case KEY_pop:
7991             UNIDOR(OP_POP);
7992
7993         case KEY_pos:
7994             UNIDOR(OP_POS);
7995
7996         case KEY_pack:
7997             LOP(OP_PACK,XTERM);
7998
7999         case KEY_package:
8000             s = force_word(s,WORD,FALSE,TRUE,FALSE);
8001             s = SKIPSPACE1(s);
8002             s = force_strict_version(s);
8003             PL_lex_expect = XBLOCK;
8004             OPERATOR(PACKAGE);
8005
8006         case KEY_pipe:
8007             LOP(OP_PIPE_OP,XTERM);
8008
8009         case KEY_q:
8010             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8011             if (!s)
8012                 missingterm(NULL);
8013             pl_yylval.ival = OP_CONST;
8014             TERM(sublex_start());
8015
8016         case KEY_quotemeta:
8017             UNI(OP_QUOTEMETA);
8018
8019         case KEY_qw: {
8020             OP *words = NULL;
8021             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8022             if (!s)
8023                 missingterm(NULL);
8024             PL_expect = XOPERATOR;
8025             if (SvCUR(PL_lex_stuff)) {
8026                 int warned_comma = !ckWARN(WARN_QW);
8027                 int warned_comment = warned_comma;
8028                 d = SvPV_force(PL_lex_stuff, len);
8029                 while (len) {
8030                     for (; isSPACE(*d) && len; --len, ++d)
8031                         /**/;
8032                     if (len) {
8033                         SV *sv;
8034                         const char *b = d;
8035                         if (!warned_comma || !warned_comment) {
8036                             for (; !isSPACE(*d) && len; --len, ++d) {
8037                                 if (!warned_comma && *d == ',') {
8038                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8039                                         "Possible attempt to separate words with commas");
8040                                     ++warned_comma;
8041                                 }
8042                                 else if (!warned_comment && *d == '#') {
8043                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8044                                         "Possible attempt to put comments in qw() list");
8045                                     ++warned_comment;
8046                                 }
8047                             }
8048                         }
8049                         else {
8050                             for (; !isSPACE(*d) && len; --len, ++d)
8051                                 /**/;
8052                         }
8053                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8054                         words = op_append_elem(OP_LIST, words,
8055                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8056                     }
8057                 }
8058             }
8059             if (!words)
8060                 words = newNULLLIST();
8061             if (PL_lex_stuff) {
8062                 SvREFCNT_dec(PL_lex_stuff);
8063                 PL_lex_stuff = NULL;
8064             }
8065             PL_expect = XOPERATOR;
8066             pl_yylval.opval = sawparens(words);
8067             TOKEN(QWLIST);
8068         }
8069
8070         case KEY_qq:
8071             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8072             if (!s)
8073                 missingterm(NULL);
8074             pl_yylval.ival = OP_STRINGIFY;
8075             if (SvIVX(PL_lex_stuff) == '\'')
8076                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8077             TERM(sublex_start());
8078
8079         case KEY_qr:
8080             s = scan_pat(s,OP_QR);
8081             TERM(sublex_start());
8082
8083         case KEY_qx:
8084             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8085             if (!s)
8086                 missingterm(NULL);
8087             readpipe_override();
8088             TERM(sublex_start());
8089
8090         case KEY_return:
8091             OLDLOP(OP_RETURN);
8092
8093         case KEY_require:
8094             s = SKIPSPACE1(s);
8095             PL_expect = XOPERATOR;
8096             if (isDIGIT(*s)) {
8097                 s = force_version(s, FALSE);
8098             }
8099             else if (*s != 'v' || !isDIGIT(s[1])
8100                     || (s = force_version(s, TRUE), *s == 'v'))
8101             {
8102                 *PL_tokenbuf = '\0';
8103                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8104                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8105                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8106                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8107                 else if (*s == '<')
8108                     yyerror("<> should be quotes");
8109             }
8110             if (orig_keyword == KEY_require) {
8111                 orig_keyword = 0;
8112                 pl_yylval.ival = 1;
8113             }
8114             else
8115                 pl_yylval.ival = 0;
8116             PL_expect = XTERM;
8117             PL_bufptr = s;
8118             PL_last_uni = PL_oldbufptr;
8119             PL_last_lop_op = OP_REQUIRE;
8120             s = skipspace(s);
8121             return REPORT( (int)REQUIRE );
8122
8123         case KEY_reset:
8124             UNI(OP_RESET);
8125
8126         case KEY_redo:
8127             PL_expect = XOPERATOR;
8128             s = force_word(s,WORD,TRUE,FALSE,FALSE);
8129             LOOPX(OP_REDO);
8130
8131         case KEY_rename:
8132             LOP(OP_RENAME,XTERM);
8133
8134         case KEY_rand:
8135             UNI(OP_RAND);
8136
8137         case KEY_rmdir:
8138             UNI(OP_RMDIR);
8139
8140         case KEY_rindex:
8141             LOP(OP_RINDEX,XTERM);
8142
8143         case KEY_read:
8144             LOP(OP_READ,XTERM);
8145
8146         case KEY_readdir:
8147             UNI(OP_READDIR);
8148
8149         case KEY_readline:
8150             UNIDOR(OP_READLINE);
8151
8152         case KEY_readpipe:
8153             UNIDOR(OP_BACKTICK);
8154
8155         case KEY_rewinddir:
8156             UNI(OP_REWINDDIR);
8157
8158         case KEY_recv:
8159             LOP(OP_RECV,XTERM);
8160
8161         case KEY_reverse:
8162             LOP(OP_REVERSE,XTERM);
8163
8164         case KEY_readlink:
8165             UNIDOR(OP_READLINK);
8166
8167         case KEY_ref:
8168             UNI(OP_REF);
8169
8170         case KEY_s:
8171             s = scan_subst(s);
8172             if (pl_yylval.opval)
8173                 TERM(sublex_start());
8174             else
8175                 TOKEN(1);       /* force error */
8176
8177         case KEY_say:
8178             checkcomma(s,PL_tokenbuf,"filehandle");
8179             LOP(OP_SAY,XREF);
8180
8181         case KEY_chomp:
8182             UNI(OP_CHOMP);
8183
8184         case KEY_scalar:
8185             UNI(OP_SCALAR);
8186
8187         case KEY_select:
8188             LOP(OP_SELECT,XTERM);
8189
8190         case KEY_seek:
8191             LOP(OP_SEEK,XTERM);
8192
8193         case KEY_semctl:
8194             LOP(OP_SEMCTL,XTERM);
8195
8196         case KEY_semget:
8197             LOP(OP_SEMGET,XTERM);
8198
8199         case KEY_semop:
8200             LOP(OP_SEMOP,XTERM);
8201
8202         case KEY_send:
8203             LOP(OP_SEND,XTERM);
8204
8205         case KEY_setpgrp:
8206             LOP(OP_SETPGRP,XTERM);
8207
8208         case KEY_setpriority:
8209             LOP(OP_SETPRIORITY,XTERM);
8210
8211         case KEY_sethostent:
8212             UNI(OP_SHOSTENT);
8213
8214         case KEY_setnetent:
8215             UNI(OP_SNETENT);
8216
8217         case KEY_setservent:
8218             UNI(OP_SSERVENT);
8219
8220         case KEY_setprotoent:
8221             UNI(OP_SPROTOENT);
8222
8223         case KEY_setpwent:
8224             FUN0(OP_SPWENT);
8225
8226         case KEY_setgrent:
8227             FUN0(OP_SGRENT);
8228
8229         case KEY_seekdir:
8230             LOP(OP_SEEKDIR,XTERM);
8231
8232         case KEY_setsockopt:
8233             LOP(OP_SSOCKOPT,XTERM);
8234
8235         case KEY_shift:
8236             UNIDOR(OP_SHIFT);
8237
8238         case KEY_shmctl:
8239             LOP(OP_SHMCTL,XTERM);
8240
8241         case KEY_shmget:
8242             LOP(OP_SHMGET,XTERM);
8243
8244         case KEY_shmread:
8245             LOP(OP_SHMREAD,XTERM);
8246
8247         case KEY_shmwrite:
8248             LOP(OP_SHMWRITE,XTERM);
8249
8250         case KEY_shutdown:
8251             LOP(OP_SHUTDOWN,XTERM);
8252
8253         case KEY_sin:
8254             UNI(OP_SIN);
8255
8256         case KEY_sleep:
8257             UNI(OP_SLEEP);
8258
8259         case KEY_socket:
8260             LOP(OP_SOCKET,XTERM);
8261
8262         case KEY_socketpair:
8263             LOP(OP_SOCKPAIR,XTERM);
8264
8265         case KEY_sort:
8266             checkcomma(s,PL_tokenbuf,"subroutine name");
8267             s = SKIPSPACE1(s);
8268             PL_expect = XTERM;
8269             s = force_word(s,WORD,TRUE,TRUE,FALSE);
8270             LOP(OP_SORT,XREF);
8271
8272         case KEY_split:
8273             LOP(OP_SPLIT,XTERM);
8274
8275         case KEY_sprintf:
8276             LOP(OP_SPRINTF,XTERM);
8277
8278         case KEY_splice:
8279             LOP(OP_SPLICE,XTERM);
8280
8281         case KEY_sqrt:
8282             UNI(OP_SQRT);
8283
8284         case KEY_srand:
8285             UNI(OP_SRAND);
8286
8287         case KEY_stat:
8288             UNI(OP_STAT);
8289
8290         case KEY_study:
8291             UNI(OP_STUDY);
8292
8293         case KEY_substr:
8294             LOP(OP_SUBSTR,XTERM);
8295
8296         case KEY_format:
8297         case KEY_sub:
8298           really_sub:
8299             {
8300                 char * const tmpbuf = PL_tokenbuf + 1;
8301                 SSize_t tboffset = 0;
8302                 expectation attrful;
8303                 bool have_name, have_proto;
8304                 const int key = tmp;
8305
8306 #ifdef PERL_MAD
8307                 SV *tmpwhite = 0;
8308
8309                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8310                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8311                 PL_thistoken = 0;
8312
8313                 d = s;
8314                 s = SKIPSPACE2(s,tmpwhite);
8315 #else
8316                 d = s;
8317                 s = skipspace(s);
8318 #endif
8319
8320                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8321                     (*s == ':' && s[1] == ':'))
8322                 {
8323 #ifdef PERL_MAD
8324                     SV *nametoke = NULL;
8325 #endif
8326
8327                     PL_expect = XBLOCK;
8328                     attrful = XATTRBLOCK;
8329                     /* remember buffer pos'n for later force_word */
8330                     tboffset = s - PL_oldbufptr;
8331                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8332                                   &len);
8333 #ifdef PERL_MAD
8334                     if (PL_madskills)
8335                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8336 #endif
8337                     *PL_tokenbuf = '&';
8338                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8339                      || pad_findmy_pvn(
8340                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8341                         ) != NOT_IN_PAD)
8342                         sv_setpvn(PL_subname, tmpbuf, len);
8343                     else {
8344                         sv_setsv(PL_subname,PL_curstname);
8345                         sv_catpvs(PL_subname,"::");
8346                         sv_catpvn(PL_subname,tmpbuf,len);
8347                     }
8348                     if (SvUTF8(PL_linestr))
8349                         SvUTF8_on(PL_subname);
8350                     have_name = TRUE;
8351
8352
8353 #ifdef PERL_MAD
8354                     start_force(0);
8355                     CURMAD('X', nametoke);
8356                     CURMAD('_', tmpwhite);
8357                     force_ident_maybe_lex('&');
8358
8359                     s = SKIPSPACE2(d,tmpwhite);
8360 #else
8361                     s = skipspace(d);
8362 #endif
8363                 }
8364                 else {
8365                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8366                     {
8367                         *d = '\0';
8368                         /* diag_listed_as: Missing name in "%s sub" */
8369                         Perl_croak(aTHX_
8370                                   "Missing name in \"%s\"", PL_bufptr);
8371                     }
8372                     PL_expect = XTERMBLOCK;
8373                     attrful = XATTRTERM;
8374                     sv_setpvs(PL_subname,"?");
8375                     have_name = FALSE;
8376                 }
8377
8378                 if (key == KEY_format) {
8379 #ifdef PERL_MAD
8380                     PL_thistoken = subtoken;
8381                     s = d;
8382 #else
8383                     if (have_name)
8384                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8385                                           FALSE, TRUE, TRUE);
8386 #endif
8387                     PREBLOCK(FORMAT);
8388                 }
8389
8390                 /* Look for a prototype */
8391                 if (*s == '(') {
8392                     char *p;
8393                     bool bad_proto = FALSE;
8394                     bool in_brackets = FALSE;
8395                     char greedy_proto = ' ';
8396                     bool proto_after_greedy_proto = FALSE;
8397                     bool must_be_last = FALSE;
8398                     bool underscore = FALSE;
8399                     bool seen_underscore = FALSE;
8400                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8401                     STRLEN tmplen;
8402
8403                     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8404                     if (!s)
8405                         Perl_croak(aTHX_ "Prototype not terminated");
8406                     /* strip spaces and check for bad characters */
8407                     d = SvPV(PL_lex_stuff, tmplen);
8408                     tmp = 0;
8409                     for (p = d; tmplen; tmplen--, ++p) {
8410                         if (!isSPACE(*p)) {
8411                             d[tmp++] = *p;
8412
8413                             if (warnillegalproto) {
8414                                 if (must_be_last)
8415                                     proto_after_greedy_proto = TRUE;
8416                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8417                                     bad_proto = TRUE;
8418                                 }
8419                                 else {
8420                                     if ( underscore ) {
8421                                         if ( !strchr(";@%", *p) )
8422                                             bad_proto = TRUE;
8423                                         underscore = FALSE;
8424                                     }
8425                                     if ( *p == '[' ) {
8426                                         in_brackets = TRUE;
8427                                     }
8428                                     else if ( *p == ']' ) {
8429                                         in_brackets = FALSE;
8430                                     }
8431                                     else if ( (*p == '@' || *p == '%') &&
8432                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8433                                          !in_brackets ) {
8434                                         must_be_last = TRUE;
8435                                         greedy_proto = *p;
8436                                     }
8437                                     else if ( *p == '_' ) {
8438                                         underscore = seen_underscore = TRUE;
8439                                     }
8440                                 }
8441                             }
8442                         }
8443                     }
8444                     d[tmp] = '\0';
8445                     if (proto_after_greedy_proto)
8446                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8447                                     "Prototype after '%c' for %"SVf" : %s",
8448                                     greedy_proto, SVfARG(PL_subname), d);
8449                     if (bad_proto) {
8450                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8451                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8452                                     "Illegal character %sin prototype for %"SVf" : %s",
8453                                     seen_underscore ? "after '_' " : "",
8454                                     SVfARG(PL_subname),
8455                                     SvUTF8(PL_lex_stuff)
8456                                         ? sv_uni_display(dsv,
8457                                             newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8458                                             tmp,
8459                                             UNI_DISPLAY_ISPRINT)
8460                                         : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8461                                             PERL_PV_ESCAPE_NONASCII));
8462                     }
8463                     SvCUR_set(PL_lex_stuff, tmp);
8464                     have_proto = TRUE;
8465
8466 #ifdef PERL_MAD
8467                     start_force(0);
8468                     CURMAD('q', PL_thisopen);
8469                     CURMAD('_', tmpwhite);
8470                     CURMAD('=', PL_thisstuff);
8471                     CURMAD('Q', PL_thisclose);
8472                     NEXTVAL_NEXTTOKE.opval =
8473                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8474                     PL_lex_stuff = NULL;
8475                     force_next(THING);
8476
8477                     s = SKIPSPACE2(s,tmpwhite);
8478 #else
8479                     s = skipspace(s);
8480 #endif
8481                 }
8482                 else
8483                     have_proto = FALSE;
8484
8485                 if (*s == ':' && s[1] != ':')
8486                     PL_expect = attrful;
8487                 else if (*s != '{' && key == KEY_sub) {
8488                     if (!have_name)
8489                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8490                     else if (*s != ';' && *s != '}')
8491                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8492                 }
8493
8494 #ifdef PERL_MAD
8495                 start_force(0);
8496                 if (tmpwhite) {
8497                     if (PL_madskills)
8498                         curmad('^', newSVpvs(""));
8499                     CURMAD('_', tmpwhite);
8500                 }
8501                 force_next(0);
8502
8503                 PL_thistoken = subtoken;
8504 #else
8505                 if (have_proto) {
8506                     NEXTVAL_NEXTTOKE.opval =
8507                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8508                     PL_lex_stuff = NULL;
8509                     force_next(THING);
8510                 }
8511 #endif
8512                 if (!have_name) {
8513                     if (PL_curstash)
8514                         sv_setpvs(PL_subname, "__ANON__");
8515                     else
8516                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8517                     TOKEN(ANONSUB);
8518                 }
8519 #ifndef PERL_MAD
8520                 force_ident_maybe_lex('&');
8521 #endif
8522                 TOKEN(SUB);
8523             }
8524
8525         case KEY_system:
8526             LOP(OP_SYSTEM,XREF);
8527
8528         case KEY_symlink:
8529             LOP(OP_SYMLINK,XTERM);
8530
8531         case KEY_syscall:
8532             LOP(OP_SYSCALL,XTERM);
8533
8534         case KEY_sysopen:
8535             LOP(OP_SYSOPEN,XTERM);
8536
8537         case KEY_sysseek:
8538             LOP(OP_SYSSEEK,XTERM);
8539
8540         case KEY_sysread:
8541             LOP(OP_SYSREAD,XTERM);
8542
8543         case KEY_syswrite:
8544             LOP(OP_SYSWRITE,XTERM);
8545
8546         case KEY_tr:
8547         case KEY_y:
8548             s = scan_trans(s);
8549             TERM(sublex_start());
8550
8551         case KEY_tell:
8552             UNI(OP_TELL);
8553
8554         case KEY_telldir:
8555             UNI(OP_TELLDIR);
8556
8557         case KEY_tie:
8558             LOP(OP_TIE,XTERM);
8559
8560         case KEY_tied:
8561             UNI(OP_TIED);
8562
8563         case KEY_time:
8564             FUN0(OP_TIME);
8565
8566         case KEY_times:
8567             FUN0(OP_TMS);
8568
8569         case KEY_truncate:
8570             LOP(OP_TRUNCATE,XTERM);
8571
8572         case KEY_uc:
8573             UNI(OP_UC);
8574
8575         case KEY_ucfirst:
8576             UNI(OP_UCFIRST);
8577
8578         case KEY_untie:
8579             UNI(OP_UNTIE);
8580
8581         case KEY_until:
8582             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8583                 return REPORT(0);
8584             pl_yylval.ival = CopLINE(PL_curcop);
8585             OPERATOR(UNTIL);
8586
8587         case KEY_unless:
8588             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8589                 return REPORT(0);
8590             pl_yylval.ival = CopLINE(PL_curcop);
8591             OPERATOR(UNLESS);
8592
8593         case KEY_unlink:
8594             LOP(OP_UNLINK,XTERM);
8595
8596         case KEY_undef:
8597             UNIDOR(OP_UNDEF);
8598
8599         case KEY_unpack:
8600             LOP(OP_UNPACK,XTERM);
8601
8602         case KEY_utime:
8603             LOP(OP_UTIME,XTERM);
8604
8605         case KEY_umask:
8606             UNIDOR(OP_UMASK);
8607
8608         case KEY_unshift:
8609             LOP(OP_UNSHIFT,XTERM);
8610
8611         case KEY_use:
8612             s = tokenize_use(1, s);
8613             OPERATOR(USE);
8614
8615         case KEY_values:
8616             UNI(OP_VALUES);
8617
8618         case KEY_vec:
8619             LOP(OP_VEC,XTERM);
8620
8621         case KEY_when:
8622             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8623                 return REPORT(0);
8624             pl_yylval.ival = CopLINE(PL_curcop);
8625             OPERATOR(WHEN);
8626
8627         case KEY_while:
8628             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8629                 return REPORT(0);
8630             pl_yylval.ival = CopLINE(PL_curcop);
8631             OPERATOR(WHILE);
8632
8633         case KEY_warn:
8634             PL_hints |= HINT_BLOCK_SCOPE;
8635             LOP(OP_WARN,XTERM);
8636
8637         case KEY_wait:
8638             FUN0(OP_WAIT);
8639
8640         case KEY_waitpid:
8641             LOP(OP_WAITPID,XTERM);
8642
8643         case KEY_wantarray:
8644             FUN0(OP_WANTARRAY);
8645
8646         case KEY_write:
8647 #ifdef EBCDIC
8648         {
8649             char ctl_l[2];
8650             ctl_l[0] = toCTRL('L');
8651             ctl_l[1] = '\0';
8652             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8653         }
8654 #else
8655             /* Make sure $^L is defined */
8656             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8657 #endif
8658             UNI(OP_ENTERWRITE);
8659
8660         case KEY_x:
8661             if (PL_expect == XOPERATOR) {
8662                 if (*s == '=' && !PL_lex_allbrackets &&
8663                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8664                     return REPORT(0);
8665                 Mop(OP_REPEAT);
8666             }
8667             check_uni();
8668             goto just_a_word;
8669
8670         case KEY_xor:
8671             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8672                 return REPORT(0);
8673             pl_yylval.ival = OP_XOR;
8674             OPERATOR(OROP);
8675         }
8676     }}
8677 }
8678 #ifdef __SC__
8679 #pragma segment Main
8680 #endif
8681
8682 /*
8683   S_pending_ident
8684
8685   Looks up an identifier in the pad or in a package
8686
8687   Returns:
8688     PRIVATEREF if this is a lexical name.
8689     WORD       if this belongs to a package.
8690
8691   Structure:
8692       if we're in a my declaration
8693           croak if they tried to say my($foo::bar)
8694           build the ops for a my() declaration
8695       if it's an access to a my() variable
8696           build ops for access to a my() variable
8697       if in a dq string, and they've said @foo and we can't find @foo
8698           warn
8699       build ops for a bareword
8700 */
8701
8702 static int
8703 S_pending_ident(pTHX)
8704 {
8705     dVAR;
8706     PADOFFSET tmp = 0;
8707     const char pit = (char)pl_yylval.ival;
8708     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8709     /* All routes through this function want to know if there is a colon.  */
8710     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8711
8712     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8713           "### Pending identifier '%s'\n", PL_tokenbuf); });
8714
8715     /* if we're in a my(), we can't allow dynamics here.
8716        $foo'bar has already been turned into $foo::bar, so
8717        just check for colons.
8718
8719        if it's a legal name, the OP is a PADANY.
8720     */
8721     if (PL_in_my) {
8722         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8723             if (has_colon)
8724                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8725                                   "variable %s in \"our\"",
8726                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8727             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8728         }
8729         else {
8730             if (has_colon)
8731                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8732                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8733                             UTF ? SVf_UTF8 : 0);
8734
8735             pl_yylval.opval = newOP(OP_PADANY, 0);
8736             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8737                                                         UTF ? SVf_UTF8 : 0);
8738             return PRIVATEREF;
8739         }
8740     }
8741
8742     /*
8743        build the ops for accesses to a my() variable.
8744     */
8745
8746     if (!has_colon) {
8747         if (!PL_in_my)
8748             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8749                                     UTF ? SVf_UTF8 : 0);
8750         if (tmp != NOT_IN_PAD) {
8751             /* might be an "our" variable" */
8752             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8753                 /* build ops for a bareword */
8754                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8755                 HEK * const stashname = HvNAME_HEK(stash);
8756                 SV *  const sym = newSVhek(stashname);
8757                 sv_catpvs(sym, "::");
8758                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8759                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8760                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8761                 if (pit != '&')
8762                   gv_fetchsv(sym,
8763                     (PL_in_eval
8764                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8765                         : GV_ADDMULTI
8766                     ),
8767                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8768                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8769                      : SVt_PVHV));
8770                 return WORD;
8771             }
8772
8773             pl_yylval.opval = newOP(OP_PADANY, 0);
8774             pl_yylval.opval->op_targ = tmp;
8775             return PRIVATEREF;
8776         }
8777     }
8778
8779     /*
8780        Whine if they've said @foo in a doublequoted string,
8781        and @foo isn't a variable we can find in the symbol
8782        table.
8783     */
8784     if (ckWARN(WARN_AMBIGUOUS) &&
8785         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8786         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8787                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8788         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8789                 /* DO NOT warn for @- and @+ */
8790                 && !( PL_tokenbuf[2] == '\0' &&
8791                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8792            )
8793         {
8794             /* Downgraded from fatal to warning 20000522 mjd */
8795             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8796                         "Possible unintended interpolation of %"SVf" in string",
8797                         SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8798                                         SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8799         }
8800     }
8801
8802     /* build ops for a bareword */
8803     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8804                                    newSVpvn_flags(PL_tokenbuf + 1,
8805                                                       tokenbuf_len - 1,
8806                                                       UTF ? SVf_UTF8 : 0 ));
8807     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8808     if (pit != '&')
8809         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8810                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8811                      | ( UTF ? SVf_UTF8 : 0 ),
8812                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8813                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8814                       : SVt_PVHV));
8815     return WORD;
8816 }
8817
8818 STATIC void
8819 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8820 {
8821     dVAR;
8822
8823     PERL_ARGS_ASSERT_CHECKCOMMA;
8824
8825     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8826         if (ckWARN(WARN_SYNTAX)) {
8827             int level = 1;
8828             const char *w;
8829             for (w = s+2; *w && level; w++) {
8830                 if (*w == '(')
8831                     ++level;
8832                 else if (*w == ')')
8833                     --level;
8834             }
8835             while (isSPACE(*w))
8836                 ++w;
8837             /* the list of chars below is for end of statements or
8838              * block / parens, boolean operators (&&, ||, //) and branch
8839              * constructs (or, and, if, until, unless, while, err, for).
8840              * Not a very solid hack... */
8841             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8842                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8843                             "%s (...) interpreted as function",name);
8844         }
8845     }
8846     while (s < PL_bufend && isSPACE(*s))
8847         s++;
8848     if (*s == '(')
8849         s++;
8850     while (s < PL_bufend && isSPACE(*s))
8851         s++;
8852     if (isIDFIRST_lazy_if(s,UTF)) {
8853         const char * const w = s;
8854         s += UTF ? UTF8SKIP(s) : 1;
8855         while (isALNUM_lazy_if(s,UTF))
8856             s += UTF ? UTF8SKIP(s) : 1;
8857         while (s < PL_bufend && isSPACE(*s))
8858             s++;
8859         if (*s == ',') {
8860             GV* gv;
8861             if (keyword(w, s - w, 0))
8862                 return;
8863
8864             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8865             if (gv && GvCVu(gv))
8866                 return;
8867             Perl_croak(aTHX_ "No comma allowed after %s", what);
8868         }
8869     }
8870 }
8871
8872 /* Either returns sv, or mortalizes sv and returns a new SV*.
8873    Best used as sv=new_constant(..., sv, ...).
8874    If s, pv are NULL, calls subroutine with one argument,
8875    and type is used with error messages only. */
8876
8877 STATIC SV *
8878 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8879                SV *sv, SV *pv, const char *type, STRLEN typelen)
8880 {
8881     dVAR; dSP;
8882     HV * table = GvHV(PL_hintgv);                /* ^H */
8883     SV *res;
8884     SV **cvp;
8885     SV *cv, *typesv;
8886     const char *why1 = "", *why2 = "", *why3 = "";
8887
8888     PERL_ARGS_ASSERT_NEW_CONSTANT;
8889
8890     /* charnames doesn't work well if there have been errors found */
8891     if (PL_error_count > 0 && strEQ(key,"charnames"))
8892         return &PL_sv_undef;
8893
8894     if (!table
8895         || ! (PL_hints & HINT_LOCALIZE_HH)
8896         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8897         || ! SvOK(*cvp))
8898     {
8899         SV *msg;
8900
8901         /* Here haven't found what we're looking for.  If it is charnames,
8902          * perhaps it needs to be loaded.  Try doing that before giving up */
8903         if (strEQ(key,"charnames")) {
8904             Perl_load_module(aTHX_
8905                             0,
8906                             newSVpvs("_charnames"),
8907                              /* version parameter; no need to specify it, as if
8908                               * we get too early a version, will fail anyway,
8909                               * not being able to find '_charnames' */
8910                             NULL,
8911                             newSVpvs(":full"),
8912                             newSVpvs(":short"),
8913                             NULL);
8914             SPAGAIN;
8915             table = GvHV(PL_hintgv);
8916             if (table
8917                 && (PL_hints & HINT_LOCALIZE_HH)
8918                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8919                 && SvOK(*cvp))
8920             {
8921                 goto now_ok;
8922             }
8923         }
8924         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8925             msg = Perl_newSVpvf(aTHX_
8926                             "Constant(%s) unknown", (type ? type: "undef"));
8927         }
8928         else {
8929         why1 = "$^H{";
8930         why2 = key;
8931         why3 = "} is not defined";
8932     report:
8933         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8934                             (type ? type: "undef"), why1, why2, why3);
8935         }
8936         yyerror(SvPVX_const(msg));
8937         SvREFCNT_dec(msg);
8938         return sv;
8939     }
8940 now_ok:
8941     sv_2mortal(sv);                     /* Parent created it permanently */
8942     cv = *cvp;
8943     if (!pv && s)
8944         pv = newSVpvn_flags(s, len, SVs_TEMP);
8945     if (type && pv)
8946         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8947     else
8948         typesv = &PL_sv_undef;
8949
8950     PUSHSTACKi(PERLSI_OVERLOAD);
8951     ENTER ;
8952     SAVETMPS;
8953
8954     PUSHMARK(SP) ;
8955     EXTEND(sp, 3);
8956     if (pv)
8957         PUSHs(pv);
8958     PUSHs(sv);
8959     if (pv)
8960         PUSHs(typesv);
8961     PUTBACK;
8962     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8963
8964     SPAGAIN ;
8965
8966     /* Check the eval first */
8967     if (!PL_in_eval && SvTRUE(ERRSV)) {
8968         sv_catpvs(ERRSV, "Propagated");
8969         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8970         (void)POPs;
8971         res = SvREFCNT_inc_simple(sv);
8972     }
8973     else {
8974         res = POPs;
8975         SvREFCNT_inc_simple_void(res);
8976     }
8977
8978     PUTBACK ;
8979     FREETMPS ;
8980     LEAVE ;
8981     POPSTACK;
8982
8983     if (!SvOK(res)) {
8984         why1 = "Call to &{$^H{";
8985         why2 = key;
8986         why3 = "}} did not return a defined value";
8987         sv = res;
8988         goto report;
8989     }
8990
8991     return res;
8992 }
8993
8994 /* Returns a NUL terminated string, with the length of the string written to
8995    *slp
8996    */
8997 STATIC char *
8998 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8999 {
9000     dVAR;
9001     char *d = dest;
9002     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9003
9004     PERL_ARGS_ASSERT_SCAN_WORD;
9005
9006     for (;;) {
9007         if (d >= e)
9008             Perl_croak(aTHX_ ident_too_long);
9009         if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))   /* UTF handled below */
9010             *d++ = *s++;
9011         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
9012             *d++ = ':';
9013             *d++ = ':';
9014             s++;
9015         }
9016         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
9017             *d++ = *s++;
9018             *d++ = *s++;
9019         }
9020         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9021             char *t = s + UTF8SKIP(s);
9022             size_t len;
9023             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9024                 t += UTF8SKIP(t);
9025             len = t - s;
9026             if (d + len > e)
9027                 Perl_croak(aTHX_ ident_too_long);
9028             Copy(s, d, len, char);
9029             d += len;
9030             s = t;
9031         }
9032         else {
9033             *d = '\0';
9034             *slp = d - dest;
9035             return s;
9036         }
9037     }
9038 }
9039
9040 STATIC char *
9041 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9042 {
9043     dVAR;
9044     char *bracket = NULL;
9045     char funny = *s++;
9046     char *d = dest;
9047     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9048
9049     PERL_ARGS_ASSERT_SCAN_IDENT;
9050
9051     if (isSPACE(*s))
9052         s = PEEKSPACE(s);
9053     if (isDIGIT(*s)) {
9054         while (isDIGIT(*s)) {
9055             if (d >= e)
9056                 Perl_croak(aTHX_ ident_too_long);
9057             *d++ = *s++;
9058         }
9059     }
9060     else {
9061         for (;;) {
9062             if (d >= e)
9063                 Perl_croak(aTHX_ ident_too_long);
9064             if (isALNUM(*s))    /* UTF handled below */
9065                 *d++ = *s++;
9066             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9067                 *d++ = ':';
9068                 *d++ = ':';
9069                 s++;
9070             }
9071             else if (*s == ':' && s[1] == ':') {
9072                 *d++ = *s++;
9073                 *d++ = *s++;
9074             }
9075             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9076                 char *t = s + UTF8SKIP(s);
9077                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9078                     t += UTF8SKIP(t);
9079                 if (d + (t - s) > e)
9080                     Perl_croak(aTHX_ ident_too_long);
9081                 Copy(s, d, t - s, char);
9082                 d += t - s;
9083                 s = t;
9084             }
9085             else
9086                 break;
9087         }
9088     }
9089     *d = '\0';
9090     d = dest;
9091     if (*d) {
9092         if (PL_lex_state != LEX_NORMAL)
9093             PL_lex_state = LEX_INTERPENDMAYBE;
9094         return s;
9095     }
9096     if (*s == '$' && s[1] &&
9097         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9098     {
9099         return s;
9100     }
9101     if (*s == '{') {
9102         bracket = s;
9103         s++;
9104     }
9105     if (s < send) {
9106         if (UTF) {
9107             const STRLEN skip = UTF8SKIP(s);
9108             STRLEN i;
9109             d[skip] = '\0';
9110             for ( i = 0; i < skip; i++ )
9111                 d[i] = *s++;
9112         }
9113         else {
9114             *d = *s++;
9115             d[1] = '\0';
9116         }
9117     }
9118     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9119         *d = toCTRL(*s);
9120         s++;
9121     }
9122     else if (ck_uni && !bracket)
9123         check_uni();
9124     if (bracket) {
9125         if (isSPACE(s[-1])) {
9126             while (s < send) {
9127                 const char ch = *s++;
9128                 if (!SPACE_OR_TAB(ch)) {
9129                     *d = ch;
9130                     break;
9131                 }
9132             }
9133         }
9134         if (isIDFIRST_lazy_if(d,UTF)) {
9135             d += UTF8SKIP(d);
9136             if (UTF) {
9137                 char *end = s;
9138                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9139                     end += UTF8SKIP(end);
9140                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9141                         end += UTF8SKIP(end);
9142                 }
9143                 Copy(s, d, end - s, char);
9144                 d += end - s;
9145                 s = end;
9146             }
9147             else {
9148                 while ((isALNUM(*s) || *s == ':') && d < e)
9149                     *d++ = *s++;
9150                 if (d >= e)
9151                     Perl_croak(aTHX_ ident_too_long);
9152             }
9153             *d = '\0';
9154             while (s < send && SPACE_OR_TAB(*s))
9155                 s++;
9156             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9157                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9158                     const char * const brack =
9159                         (const char *)
9160                         ((*s == '[') ? "[...]" : "{...}");
9161    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9162                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9163                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9164                         funny, dest, brack, funny, dest, brack);
9165                 }
9166                 bracket++;
9167                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9168                 PL_lex_allbrackets++;
9169                 return s;
9170             }
9171         }
9172         /* Handle extended ${^Foo} variables
9173          * 1999-02-27 mjd-perl-patch@plover.com */
9174         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9175                  && isALNUM(*s))
9176         {
9177             d++;
9178             while (isALNUM(*s) && d < e) {
9179                 *d++ = *s++;
9180             }
9181             if (d >= e)
9182                 Perl_croak(aTHX_ ident_too_long);
9183             *d = '\0';
9184         }
9185         if (*s == '}') {
9186             s++;
9187             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9188                 PL_lex_state = LEX_INTERPEND;
9189                 PL_expect = XREF;
9190             }
9191             if (PL_lex_state == LEX_NORMAL) {
9192                 if (ckWARN(WARN_AMBIGUOUS) &&
9193                     (keyword(dest, d - dest, 0)
9194                      || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9195                 {
9196                     SV *tmp = newSVpvn_flags( dest, d - dest,
9197                                             SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9198                     if (funny == '#')
9199                         funny = '@';
9200                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9201                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9202                         funny, tmp, funny, tmp);
9203                 }
9204             }
9205         }
9206         else {
9207             s = bracket;                /* let the parser handle it */
9208             *dest = '\0';
9209         }
9210     }
9211     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9212         PL_lex_state = LEX_INTERPEND;
9213     return s;
9214 }
9215
9216 static bool
9217 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9218
9219     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9220      * the parse starting at 's', based on the subset that are valid in this
9221      * context input to this routine in 'valid_flags'. Advances s.  Returns
9222      * TRUE if the input should be treated as a valid flag, so the next char
9223      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9224      * first call on the current regex.  This routine will set it to any
9225      * charset modifier found.  The caller shouldn't change it.  This way,
9226      * another charset modifier encountered in the parse can be detected as an
9227      * error, as we have decided to allow only one */
9228
9229     const char c = **s;
9230     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9231
9232     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9233         if (isALNUM_lazy_if(*s, UTF)) {
9234             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9235                        UTF ? SVf_UTF8 : 0);
9236             (*s) += charlen;
9237             /* Pretend that it worked, so will continue processing before
9238              * dieing */
9239             return TRUE;
9240         }
9241         return FALSE;
9242     }
9243
9244     switch (c) {
9245
9246         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9247         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9248         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9249         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9250         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9251         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9252         case LOCALE_PAT_MOD:
9253             if (*charset) {
9254                 goto multiple_charsets;
9255             }
9256             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9257             *charset = c;
9258             break;
9259         case UNICODE_PAT_MOD:
9260             if (*charset) {
9261                 goto multiple_charsets;
9262             }
9263             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9264             *charset = c;
9265             break;
9266         case ASCII_RESTRICT_PAT_MOD:
9267             if (! *charset) {
9268                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9269             }
9270             else {
9271
9272                 /* Error if previous modifier wasn't an 'a', but if it was, see
9273                  * if, and accept, a second occurrence (only) */
9274                 if (*charset != 'a'
9275                     || get_regex_charset(*pmfl)
9276                         != REGEX_ASCII_RESTRICTED_CHARSET)
9277                 {
9278                         goto multiple_charsets;
9279                 }
9280                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9281             }
9282             *charset = c;
9283             break;
9284         case DEPENDS_PAT_MOD:
9285             if (*charset) {
9286                 goto multiple_charsets;
9287             }
9288             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9289             *charset = c;
9290             break;
9291     }
9292
9293     (*s)++;
9294     return TRUE;
9295
9296     multiple_charsets:
9297         if (*charset != c) {
9298             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9299         }
9300         else if (c == 'a') {
9301             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9302         }
9303         else {
9304             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9305         }
9306
9307         /* Pretend that it worked, so will continue processing before dieing */
9308         (*s)++;
9309         return TRUE;
9310 }
9311
9312 STATIC char *
9313 S_scan_pat(pTHX_ char *start, I32 type)
9314 {
9315     dVAR;
9316     PMOP *pm;
9317     char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9318     const char * const valid_flags =
9319         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9320     char charset = '\0';    /* character set modifier */
9321 #ifdef PERL_MAD
9322     char *modstart;
9323 #endif
9324
9325     PERL_ARGS_ASSERT_SCAN_PAT;
9326
9327     /* this was only needed for the initial scan_str; set it to false
9328      * so that any (?{}) code blocks etc are parsed normally */
9329     PL_reg_state.re_reparsing = FALSE;
9330     if (!s) {
9331         const char * const delimiter = skipspace(start);
9332         Perl_croak(aTHX_
9333                    (const char *)
9334                    (*delimiter == '?'
9335                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9336                     : "Search pattern not terminated" ));
9337     }
9338
9339     pm = (PMOP*)newPMOP(type, 0);
9340     if (PL_multi_open == '?') {
9341         /* This is the only point in the code that sets PMf_ONCE:  */
9342         pm->op_pmflags |= PMf_ONCE;
9343
9344         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9345            allows us to restrict the list needed by reset to just the ??
9346            matches.  */
9347         assert(type != OP_TRANS);
9348         if (PL_curstash) {
9349             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9350             U32 elements;
9351             if (!mg) {
9352                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9353                                  0);
9354             }
9355             elements = mg->mg_len / sizeof(PMOP**);
9356             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9357             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9358             mg->mg_len = elements * sizeof(PMOP**);
9359             PmopSTASH_set(pm,PL_curstash);
9360         }
9361     }
9362 #ifdef PERL_MAD
9363     modstart = s;
9364 #endif
9365
9366     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9367      * anon CV. False positives like qr/[(?{]/ are harmless */
9368
9369     if (type == OP_QR) {
9370         STRLEN len;
9371         char *e, *p = SvPV(PL_lex_stuff, len);
9372         e = p + len;
9373         for (; p < e; p++) {
9374             if (p[0] == '(' && p[1] == '?'
9375                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9376             {
9377                 pm->op_pmflags |= PMf_HAS_CV;
9378                 break;
9379             }
9380         }
9381         pm->op_pmflags |= PMf_IS_QR;
9382     }
9383
9384     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9385 #ifdef PERL_MAD
9386     if (PL_madskills && modstart != s) {
9387         SV* tmptoken = newSVpvn(modstart, s - modstart);
9388         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9389     }
9390 #endif
9391     /* issue a warning if /c is specified,but /g is not */
9392     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9393     {
9394         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9395                        "Use of /c modifier is meaningless without /g" );
9396     }
9397
9398     PL_lex_op = (OP*)pm;
9399     pl_yylval.ival = OP_MATCH;
9400     return s;
9401 }
9402
9403 STATIC char *
9404 S_scan_subst(pTHX_ char *start)
9405 {
9406     dVAR;
9407     char *s;
9408     PMOP *pm;
9409     I32 first_start;
9410     I32 es = 0;
9411     char charset = '\0';    /* character set modifier */
9412 #ifdef PERL_MAD
9413     char *modstart;
9414 #endif
9415
9416     PERL_ARGS_ASSERT_SCAN_SUBST;
9417
9418     pl_yylval.ival = OP_NULL;
9419
9420     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9421
9422     if (!s)
9423         Perl_croak(aTHX_ "Substitution pattern not terminated");
9424
9425     if (s[-1] == PL_multi_open)
9426         s--;
9427 #ifdef PERL_MAD
9428     if (PL_madskills) {
9429         CURMAD('q', PL_thisopen);
9430         CURMAD('_', PL_thiswhite);
9431         CURMAD('E', PL_thisstuff);
9432         CURMAD('Q', PL_thisclose);
9433         PL_realtokenstart = s - SvPVX(PL_linestr);
9434     }
9435 #endif
9436
9437     first_start = PL_multi_start;
9438     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9439     if (!s) {
9440         if (PL_lex_stuff) {
9441             SvREFCNT_dec(PL_lex_stuff);
9442             PL_lex_stuff = NULL;
9443         }
9444         Perl_croak(aTHX_ "Substitution replacement not terminated");
9445     }
9446     PL_multi_start = first_start;       /* so whole substitution is taken together */
9447
9448     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9449
9450 #ifdef PERL_MAD
9451     if (PL_madskills) {
9452         CURMAD('z', PL_thisopen);
9453         CURMAD('R', PL_thisstuff);
9454         CURMAD('Z', PL_thisclose);
9455     }
9456     modstart = s;
9457 #endif
9458
9459     while (*s) {
9460         if (*s == EXEC_PAT_MOD) {
9461             s++;
9462             es++;
9463         }
9464         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9465         {
9466             break;
9467         }
9468     }
9469
9470 #ifdef PERL_MAD
9471     if (PL_madskills) {
9472         if (modstart != s)
9473             curmad('m', newSVpvn(modstart, s - modstart));
9474         append_madprops(PL_thismad, (OP*)pm, 0);
9475         PL_thismad = 0;
9476     }
9477 #endif
9478     if ((pm->op_pmflags & PMf_CONTINUE)) {
9479         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9480     }
9481
9482     if (es) {
9483         SV * const repl = newSVpvs("");
9484
9485         PL_multi_end = 0;
9486         pm->op_pmflags |= PMf_EVAL;
9487         while (es-- > 0) {
9488             if (es)
9489                 sv_catpvs(repl, "eval ");
9490             else
9491                 sv_catpvs(repl, "do ");
9492         }
9493         sv_catpvs(repl, "{");
9494         sv_catsv(repl, PL_sublex_info.repl);
9495         sv_catpvs(repl, "}");
9496         SvEVALED_on(repl);
9497         SvREFCNT_dec(PL_sublex_info.repl);
9498         PL_sublex_info.repl = repl;
9499     }
9500
9501     PL_lex_op = (OP*)pm;
9502     pl_yylval.ival = OP_SUBST;
9503     return s;
9504 }
9505
9506 STATIC char *
9507 S_scan_trans(pTHX_ char *start)
9508 {
9509     dVAR;
9510     char* s;
9511     OP *o;
9512     U8 squash;
9513     U8 del;
9514     U8 complement;
9515     bool nondestruct = 0;
9516 #ifdef PERL_MAD
9517     char *modstart;
9518 #endif
9519
9520     PERL_ARGS_ASSERT_SCAN_TRANS;
9521
9522     pl_yylval.ival = OP_NULL;
9523
9524     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9525     if (!s)
9526         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9527
9528     if (s[-1] == PL_multi_open)
9529         s--;
9530 #ifdef PERL_MAD
9531     if (PL_madskills) {
9532         CURMAD('q', PL_thisopen);
9533         CURMAD('_', PL_thiswhite);
9534         CURMAD('E', PL_thisstuff);
9535         CURMAD('Q', PL_thisclose);
9536         PL_realtokenstart = s - SvPVX(PL_linestr);
9537     }
9538 #endif
9539
9540     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9541     if (!s) {
9542         if (PL_lex_stuff) {
9543             SvREFCNT_dec(PL_lex_stuff);
9544             PL_lex_stuff = NULL;
9545         }
9546         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9547     }
9548     if (PL_madskills) {
9549         CURMAD('z', PL_thisopen);
9550         CURMAD('R', PL_thisstuff);
9551         CURMAD('Z', PL_thisclose);
9552     }
9553
9554     complement = del = squash = 0;
9555 #ifdef PERL_MAD
9556     modstart = s;
9557 #endif
9558     while (1) {
9559         switch (*s) {
9560         case 'c':
9561             complement = OPpTRANS_COMPLEMENT;
9562             break;
9563         case 'd':
9564             del = OPpTRANS_DELETE;
9565             break;
9566         case 's':
9567             squash = OPpTRANS_SQUASH;
9568             break;
9569         case 'r':
9570             nondestruct = 1;
9571             break;
9572         default:
9573             goto no_more;
9574         }
9575         s++;
9576     }
9577   no_more:
9578
9579     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9580     o->op_private &= ~OPpTRANS_ALL;
9581     o->op_private |= del|squash|complement|
9582       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9583       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9584
9585     PL_lex_op = o;
9586     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9587
9588 #ifdef PERL_MAD
9589     if (PL_madskills) {
9590         if (modstart != s)
9591             curmad('m', newSVpvn(modstart, s - modstart));
9592         append_madprops(PL_thismad, o, 0);
9593         PL_thismad = 0;
9594     }
9595 #endif
9596
9597     return s;
9598 }
9599
9600 /* scan_heredoc
9601    Takes a pointer to the first < in <<FOO.
9602    Returns a pointer to the byte following <<FOO.
9603
9604    This function scans a heredoc, which involves different methods
9605    depending on whether we are in a string eval, quoted construct, etc.
9606    This is because PL_linestr could containing a single line of input, or
9607    a whole string being evalled, or the contents of the current quote-
9608    like operator.
9609
9610    The two basic methods are:
9611     - Steal lines from the input stream
9612     - Scan the heredoc in PL_linestr and remove it therefrom
9613
9614    In a file scope or filtered eval, the first method is used; in a
9615    string eval, the second.
9616
9617    In a quote-like operator, we have to choose between the two,
9618    depending on where we can find a newline.  We peek into outer lex-
9619    ing scopes until we find one with a newline in it.  If we reach the
9620    outermost lexing scope and it is a file, we use the stream method.
9621    Otherwise it is treated as an eval.
9622 */
9623
9624 STATIC char *
9625 S_scan_heredoc(pTHX_ register char *s)
9626 {
9627     dVAR;
9628     I32 op_type = OP_SCALAR;
9629     I32 len;
9630     SV *tmpstr;
9631     char term;
9632     char *d;
9633     char *e;
9634     char *peek;
9635     const bool infile = PL_rsfp || PL_parser->filtered;
9636     LEXSHARED *shared = PL_parser->lex_shared;
9637 #ifdef PERL_MAD
9638     I32 stuffstart = s - SvPVX(PL_linestr);
9639     char *tstart;
9640
9641     PL_realtokenstart = -1;
9642 #endif
9643
9644     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9645
9646     s += 2;
9647     d = PL_tokenbuf + 1;
9648     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9649     *PL_tokenbuf = '\n';
9650     peek = s;
9651     while (SPACE_OR_TAB(*peek))
9652         peek++;
9653     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9654         s = peek;
9655         term = *s++;
9656         s = delimcpy(d, e, s, PL_bufend, term, &len);
9657         if (s == PL_bufend)
9658             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9659         d += len;
9660         s++;
9661     }
9662     else {
9663         if (*s == '\\')
9664             /* <<\FOO is equivalent to <<'FOO' */
9665             s++, term = '\'';
9666         else
9667             term = '"';
9668         if (!isALNUM_lazy_if(s,UTF))
9669             deprecate("bare << to mean <<\"\"");
9670         for (; isALNUM_lazy_if(s,UTF); s++) {
9671             if (d < e)
9672                 *d++ = *s;
9673         }
9674     }
9675     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9676         Perl_croak(aTHX_ "Delimiter for here document is too long");
9677     *d++ = '\n';
9678     *d = '\0';
9679     len = d - PL_tokenbuf;
9680
9681 #ifdef PERL_MAD
9682     if (PL_madskills) {
9683         tstart = PL_tokenbuf + 1;
9684         PL_thisclose = newSVpvn(tstart, len - 1);
9685         tstart = SvPVX(PL_linestr) + stuffstart;
9686         PL_thisopen = newSVpvn(tstart, s - tstart);
9687         stuffstart = s - SvPVX(PL_linestr);
9688     }
9689 #endif
9690 #ifndef PERL_STRICT_CR
9691     d = strchr(s, '\r');
9692     if (d) {
9693         char * const olds = s;
9694         s = d;
9695         while (s < PL_bufend) {
9696             if (*s == '\r') {
9697                 *d++ = '\n';
9698                 if (*++s == '\n')
9699                     s++;
9700             }
9701             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9702                 *d++ = *s++;
9703                 s++;
9704             }
9705             else
9706                 *d++ = *s++;
9707         }
9708         *d = '\0';
9709         PL_bufend = d;
9710         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9711         s = olds;
9712     }
9713 #endif
9714 #ifdef PERL_MAD
9715     if (PL_madskills) {
9716         tstart = SvPVX(PL_linestr) + stuffstart;
9717         if (PL_thisstuff)
9718             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9719         else
9720             PL_thisstuff = newSVpvn(tstart, s - tstart);
9721     }
9722
9723     stuffstart = s - SvPVX(PL_linestr);
9724 #endif
9725
9726     tmpstr = newSV_type(SVt_PVIV);
9727     SvGROW(tmpstr, 80);
9728     if (term == '\'') {
9729         op_type = OP_CONST;
9730         SvIV_set(tmpstr, -1);
9731     }
9732     else if (term == '`') {
9733         op_type = OP_BACKTICK;
9734         SvIV_set(tmpstr, '\\');
9735     }
9736
9737     PL_multi_start = CopLINE(PL_curcop) + 1;
9738     PL_multi_open = PL_multi_close = '<';
9739     /* inside a string eval or quote-like operator */
9740     if (!infile || PL_lex_inwhat) {
9741         SV *linestr;
9742         char *bufend;
9743         char * const olds = s;
9744         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9745         /* These two fields are not set until an inner lexing scope is
9746            entered.  But we need them set here. */
9747         shared->ls_bufptr  = s;
9748         shared->ls_linestr = PL_linestr;
9749         if (PL_lex_inwhat)
9750           /* Look for a newline.  If the current buffer does not have one,
9751              peek into the line buffer of the parent lexing scope, going
9752              up as many levels as necessary to find one with a newline
9753              after bufptr.
9754            */
9755           while (!(s = (char *)memchr(
9756                     (void *)shared->ls_bufptr, '\n',
9757                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9758                 ))) {
9759             shared = shared->ls_prev;
9760             /* shared is only null if we have gone beyond the outermost
9761                lexing scope.  In a file, we will have broken out of the
9762                loop in the previous iteration.  In an eval, the string buf-
9763                fer ends with "\n;", so the while condition below will have
9764                evaluated to false.  So shared can never be null. */
9765             assert(shared);
9766             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9767                most lexing scope.  In a file, shared->ls_linestr at that
9768                level is just one line, so there is no body to steal. */
9769             if (infile && !shared->ls_prev) {
9770                 s = olds;
9771                 goto streaming;
9772             }
9773           }
9774         else {  /* eval */
9775             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9776             assert(s);
9777         }
9778         linestr = shared->ls_linestr;
9779         bufend = SvEND(linestr);
9780         d = s;
9781         while (s < bufend &&
9782           (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9783             if (*s++ == '\n')
9784                 ++shared->herelines;
9785         }
9786         if (s >= bufend) {
9787             goto interminable;
9788         }
9789         sv_setpvn(tmpstr,d+1,s-d);
9790 #ifdef PERL_MAD
9791         if (PL_madskills) {
9792             if (PL_thisstuff)
9793                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9794             else
9795                 PL_thisstuff = newSVpvn(d + 1, s - d);
9796             stuffstart = s - SvPVX(PL_linestr);
9797         }
9798 #endif
9799         s += len - 1;
9800         /* the preceding stmt passes a newline */
9801         shared->herelines++;
9802
9803         /* s now points to the newline after the heredoc terminator.
9804            d points to the newline before the body of the heredoc.
9805          */
9806
9807         /* We are going to modify linestr in place here, so set
9808            aside copies of the string if necessary for re-evals or
9809            (caller $n)[6]. */
9810         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9811            check shared->re_eval_str. */
9812         if (shared->re_eval_start || shared->re_eval_str) {
9813             /* Set aside the rest of the regexp */
9814             if (!shared->re_eval_str)
9815                 shared->re_eval_str =
9816                        newSVpvn(shared->re_eval_start,
9817                                 bufend - shared->re_eval_start);
9818             shared->re_eval_start -= s-d;
9819         }
9820         if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9821          && cx->blk_eval.cur_text == linestr) {
9822             cx->blk_eval.cur_text = newSVsv(linestr);
9823             SvSCREAM_on(cx->blk_eval.cur_text);
9824         }
9825         /* Copy everything from s onwards back to d. */
9826         Move(s,d,bufend-s + 1,char);
9827         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9828         /* Setting PL_bufend only applies when we have not dug deeper
9829            into other scopes, because sublex_done sets PL_bufend to
9830            SvEND(PL_linestr). */
9831         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9832         s = olds;
9833     }
9834     else
9835     {
9836       SV *linestr_save;
9837      streaming:
9838       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9839       term = PL_tokenbuf[1];
9840       len--;
9841       linestr_save = PL_linestr; /* must restore this afterwards */
9842       d = s;                     /* and this */
9843       PL_linestr = newSVpvs("");
9844       PL_bufend = SvPVX(PL_linestr);
9845       while (1) {
9846 #ifdef PERL_MAD
9847         if (PL_madskills) {
9848             tstart = SvPVX(PL_linestr) + stuffstart;
9849             if (PL_thisstuff)
9850                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9851             else
9852                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9853         }
9854 #endif
9855         PL_bufptr = PL_bufend;
9856         CopLINE_set(PL_curcop,
9857                     PL_multi_start + shared->herelines);
9858         if (!lex_next_chunk(LEX_NO_TERM)
9859          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9860             SvREFCNT_dec(linestr_save);
9861             goto interminable;
9862         }
9863         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9864         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9865             lex_grow_linestr(SvCUR(PL_linestr) + 2);
9866             sv_catpvs(PL_linestr, "\n\0");
9867         }
9868         s = PL_bufptr;
9869 #ifdef PERL_MAD
9870         stuffstart = s - SvPVX(PL_linestr);
9871 #endif
9872         shared->herelines++;
9873         PL_last_lop = PL_last_uni = NULL;
9874 #ifndef PERL_STRICT_CR
9875         if (PL_bufend - PL_linestart >= 2) {
9876             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9877                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9878             {
9879                 PL_bufend[-2] = '\n';
9880                 PL_bufend--;
9881                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9882             }
9883             else if (PL_bufend[-1] == '\r')
9884                 PL_bufend[-1] = '\n';
9885         }
9886         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9887             PL_bufend[-1] = '\n';
9888 #endif
9889         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9890             SvREFCNT_dec(PL_linestr);
9891             PL_linestr = linestr_save;
9892             PL_linestart = SvPVX(linestr_save);
9893             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9894             s = d;
9895             break;
9896         }
9897         else {
9898             sv_catsv(tmpstr,PL_linestr);
9899         }
9900       }
9901     }
9902     PL_multi_end = CopLINE(PL_curcop);
9903     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9904         SvPV_shrink_to_cur(tmpstr);
9905     }
9906     if (!IN_BYTES) {
9907         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9908             SvUTF8_on(tmpstr);
9909         else if (PL_encoding)
9910             sv_recode_to_utf8(tmpstr, PL_encoding);
9911     }
9912     PL_lex_stuff = tmpstr;
9913     pl_yylval.ival = op_type;
9914     return s;
9915
9916   interminable:
9917     SvREFCNT_dec(tmpstr);
9918     CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9919     missingterm(PL_tokenbuf + 1);
9920 }
9921
9922 /* scan_inputsymbol
9923    takes: current position in input buffer
9924    returns: new position in input buffer
9925    side-effects: pl_yylval and lex_op are set.
9926
9927    This code handles:
9928
9929    <>           read from ARGV
9930    <FH>         read from filehandle
9931    <pkg::FH>    read from package qualified filehandle
9932    <pkg'FH>     read from package qualified filehandle
9933    <$fh>        read from filehandle in $fh
9934    <*.h>        filename glob
9935
9936 */
9937
9938 STATIC char *
9939 S_scan_inputsymbol(pTHX_ char *start)
9940 {
9941     dVAR;
9942     char *s = start;            /* current position in buffer */
9943     char *end;
9944     I32 len;
9945     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9946     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9947
9948     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9949
9950     end = strchr(s, '\n');
9951     if (!end)
9952         end = PL_bufend;
9953     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9954
9955     /* die if we didn't have space for the contents of the <>,
9956        or if it didn't end, or if we see a newline
9957     */
9958
9959     if (len >= (I32)sizeof PL_tokenbuf)
9960         Perl_croak(aTHX_ "Excessively long <> operator");
9961     if (s >= end)
9962         Perl_croak(aTHX_ "Unterminated <> operator");
9963
9964     s++;
9965
9966     /* check for <$fh>
9967        Remember, only scalar variables are interpreted as filehandles by
9968        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9969        treated as a glob() call.
9970        This code makes use of the fact that except for the $ at the front,
9971        a scalar variable and a filehandle look the same.
9972     */
9973     if (*d == '$' && d[1]) d++;
9974
9975     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9976     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9977         d += UTF ? UTF8SKIP(d) : 1;
9978
9979     /* If we've tried to read what we allow filehandles to look like, and
9980        there's still text left, then it must be a glob() and not a getline.
9981        Use scan_str to pull out the stuff between the <> and treat it
9982        as nothing more than a string.
9983     */
9984
9985     if (d - PL_tokenbuf != len) {
9986         pl_yylval.ival = OP_GLOB;
9987         s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9988         if (!s)
9989            Perl_croak(aTHX_ "Glob not terminated");
9990         return s;
9991     }
9992     else {
9993         bool readline_overriden = FALSE;
9994         GV *gv_readline;
9995         GV **gvp;
9996         /* we're in a filehandle read situation */
9997         d = PL_tokenbuf;
9998
9999         /* turn <> into <ARGV> */
10000         if (!len)
10001             Copy("ARGV",d,5,char);
10002
10003         /* Check whether readline() is overriden */
10004         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10005         if ((gv_readline
10006                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10007                 ||
10008                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10009                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10010                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10011             readline_overriden = TRUE;
10012
10013         /* if <$fh>, create the ops to turn the variable into a
10014            filehandle
10015         */
10016         if (*d == '$') {
10017             /* try to find it in the pad for this block, otherwise find
10018                add symbol table ops
10019             */
10020             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10021             if (tmp != NOT_IN_PAD) {
10022                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10023                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10024                     HEK * const stashname = HvNAME_HEK(stash);
10025                     SV * const sym = sv_2mortal(newSVhek(stashname));
10026                     sv_catpvs(sym, "::");
10027                     sv_catpv(sym, d+1);
10028                     d = SvPVX(sym);
10029                     goto intro_sym;
10030                 }
10031                 else {
10032                     OP * const o = newOP(OP_PADSV, 0);
10033                     o->op_targ = tmp;
10034                     PL_lex_op = readline_overriden
10035                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10036                                 op_append_elem(OP_LIST, o,
10037                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10038                         : (OP*)newUNOP(OP_READLINE, 0, o);
10039                 }
10040             }
10041             else {
10042                 GV *gv;
10043                 ++d;
10044 intro_sym:
10045                 gv = gv_fetchpv(d,
10046                                 (PL_in_eval
10047                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10048                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10049                                 SVt_PV);
10050                 PL_lex_op = readline_overriden
10051                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10052                             op_append_elem(OP_LIST,
10053                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10054                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10055                     : (OP*)newUNOP(OP_READLINE, 0,
10056                             newUNOP(OP_RV2SV, 0,
10057                                 newGVOP(OP_GV, 0, gv)));
10058             }
10059             if (!readline_overriden)
10060                 PL_lex_op->op_flags |= OPf_SPECIAL;
10061             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10062             pl_yylval.ival = OP_NULL;
10063         }
10064
10065         /* If it's none of the above, it must be a literal filehandle
10066            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10067         else {
10068             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10069             PL_lex_op = readline_overriden
10070                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10071                         op_append_elem(OP_LIST,
10072                             newGVOP(OP_GV, 0, gv),
10073                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10074                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10075             pl_yylval.ival = OP_NULL;
10076         }
10077     }
10078
10079     return s;
10080 }
10081
10082
10083 /* scan_str
10084    takes: start position in buffer
10085           keep_quoted preserve \ on the embedded delimiter(s)
10086           keep_delims preserve the delimiters around the string
10087           re_reparse  compiling a run-time /(?{})/:
10088                         collapse // to /,  and skip encoding src
10089    returns: position to continue reading from buffer
10090    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10091         updates the read buffer.
10092
10093    This subroutine pulls a string out of the input.  It is called for:
10094         q               single quotes           q(literal text)
10095         '               single quotes           'literal text'
10096         qq              double quotes           qq(interpolate $here please)
10097         "               double quotes           "interpolate $here please"
10098         qx              backticks               qx(/bin/ls -l)
10099         `               backticks               `/bin/ls -l`
10100         qw              quote words             @EXPORT_OK = qw( func() $spam )
10101         m//             regexp match            m/this/
10102         s///            regexp substitute       s/this/that/
10103         tr///           string transliterate    tr/this/that/
10104         y///            string transliterate    y/this/that/
10105         ($*@)           sub prototypes          sub foo ($)
10106         (stuff)         sub attr parameters     sub foo : attr(stuff)
10107         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10108
10109    In most of these cases (all but <>, patterns and transliterate)
10110    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10111    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10112    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10113    calls scan_str().
10114
10115    It skips whitespace before the string starts, and treats the first
10116    character as the delimiter.  If the delimiter is one of ([{< then
10117    the corresponding "close" character )]}> is used as the closing
10118    delimiter.  It allows quoting of delimiters, and if the string has
10119    balanced delimiters ([{<>}]) it allows nesting.
10120
10121    On success, the SV with the resulting string is put into lex_stuff or,
10122    if that is already non-NULL, into lex_repl. The second case occurs only
10123    when parsing the RHS of the special constructs s/// and tr/// (y///).
10124    For convenience, the terminating delimiter character is stuffed into
10125    SvIVX of the SV.
10126 */
10127
10128 STATIC char *
10129 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10130 {
10131     dVAR;
10132     SV *sv;                             /* scalar value: string */
10133     const char *tmps;                   /* temp string, used for delimiter matching */
10134     char *s = start;            /* current position in the buffer */
10135     char term;                  /* terminating character */
10136     char *to;                   /* current position in the sv's data */
10137     I32 brackets = 1;                   /* bracket nesting level */
10138     bool has_utf8 = FALSE;              /* is there any utf8 content? */
10139     I32 termcode;                       /* terminating char. code */
10140     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
10141     STRLEN termlen;                     /* length of terminating string */
10142     int last_off = 0;                   /* last position for nesting bracket */
10143 #ifdef PERL_MAD
10144     int stuffstart;
10145     char *tstart;
10146 #endif
10147
10148     PERL_ARGS_ASSERT_SCAN_STR;
10149
10150     /* skip space before the delimiter */
10151     if (isSPACE(*s)) {
10152         s = PEEKSPACE(s);
10153     }
10154
10155 #ifdef PERL_MAD
10156     if (PL_realtokenstart >= 0) {
10157         stuffstart = PL_realtokenstart;
10158         PL_realtokenstart = -1;
10159     }
10160     else
10161         stuffstart = start - SvPVX(PL_linestr);
10162 #endif
10163     /* mark where we are, in case we need to report errors */
10164     CLINE;
10165
10166     /* after skipping whitespace, the next character is the terminator */
10167     term = *s;
10168     if (!UTF) {
10169         termcode = termstr[0] = term;
10170         termlen = 1;
10171     }
10172     else {
10173         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10174         Copy(s, termstr, termlen, U8);
10175         if (!UTF8_IS_INVARIANT(term))
10176             has_utf8 = TRUE;
10177     }
10178
10179     /* mark where we are */
10180     PL_multi_start = CopLINE(PL_curcop);
10181     PL_multi_open = term;
10182
10183     /* find corresponding closing delimiter */
10184     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10185         termcode = termstr[0] = term = tmps[5];
10186
10187     PL_multi_close = term;
10188
10189     /* create a new SV to hold the contents.  79 is the SV's initial length.
10190        What a random number. */
10191     sv = newSV_type(SVt_PVIV);
10192     SvGROW(sv, 80);
10193     SvIV_set(sv, termcode);
10194     (void)SvPOK_only(sv);               /* validate pointer */
10195
10196     /* move past delimiter and try to read a complete string */
10197     if (keep_delims)
10198         sv_catpvn(sv, s, termlen);
10199     s += termlen;
10200 #ifdef PERL_MAD
10201     tstart = SvPVX(PL_linestr) + stuffstart;
10202     if (!PL_thisopen && !keep_delims) {
10203         PL_thisopen = newSVpvn(tstart, s - tstart);
10204         stuffstart = s - SvPVX(PL_linestr);
10205     }
10206 #endif
10207     for (;;) {
10208         if (PL_encoding && !UTF && !re_reparse) {
10209             bool cont = TRUE;
10210
10211             while (cont) {
10212                 int offset = s - SvPVX_const(PL_linestr);
10213                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10214                                            &offset, (char*)termstr, termlen);
10215                 const char * const ns = SvPVX_const(PL_linestr) + offset;
10216                 char * const svlast = SvEND(sv) - 1;
10217
10218                 for (; s < ns; s++) {
10219                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10220                         COPLINE_INC_WITH_HERELINES;
10221                 }
10222                 if (!found)
10223                     goto read_more_line;
10224                 else {
10225                     /* handle quoted delimiters */
10226                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10227                         const char *t;
10228                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10229                             t--;
10230                         if ((svlast-1 - t) % 2) {
10231                             if (!keep_quoted) {
10232                                 *(svlast-1) = term;
10233                                 *svlast = '\0';
10234                                 SvCUR_set(sv, SvCUR(sv) - 1);
10235                             }
10236                             continue;
10237                         }
10238                     }
10239                     if (PL_multi_open == PL_multi_close) {
10240                         cont = FALSE;
10241                     }
10242                     else {
10243                         const char *t;
10244                         char *w;
10245                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10246                             /* At here, all closes are "was quoted" one,
10247                                so we don't check PL_multi_close. */
10248                             if (*t == '\\') {
10249                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10250                                     t++;
10251                                 else
10252                                     *w++ = *t++;
10253                             }
10254                             else if (*t == PL_multi_open)
10255                                 brackets++;
10256
10257                             *w = *t;
10258                         }
10259                         if (w < t) {
10260                             *w++ = term;
10261                             *w = '\0';
10262                             SvCUR_set(sv, w - SvPVX_const(sv));
10263                         }
10264                         last_off = w - SvPVX(sv);
10265                         if (--brackets <= 0)
10266                             cont = FALSE;
10267                     }
10268                 }
10269             }
10270             if (!keep_delims) {
10271                 SvCUR_set(sv, SvCUR(sv) - 1);
10272                 *SvEND(sv) = '\0';
10273             }
10274             break;
10275         }
10276
10277         /* extend sv if need be */
10278         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10279         /* set 'to' to the next character in the sv's string */
10280         to = SvPVX(sv)+SvCUR(sv);
10281
10282         /* if open delimiter is the close delimiter read unbridle */
10283         if (PL_multi_open == PL_multi_close) {
10284             for (; s < PL_bufend; s++,to++) {
10285                 /* embedded newlines increment the current line number */
10286                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10287                     COPLINE_INC_WITH_HERELINES;
10288                 /* handle quoted delimiters */
10289                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10290                     if (!keep_quoted
10291                         && (s[1] == term
10292                             || (re_reparse && s[1] == '\\'))
10293                     )
10294                         s++;
10295                     /* any other quotes are simply copied straight through */
10296                     else
10297                         *to++ = *s++;
10298                 }
10299                 /* terminate when run out of buffer (the for() condition), or
10300                    have found the terminator */
10301                 else if (*s == term) {
10302                     if (termlen == 1)
10303                         break;
10304                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10305                         break;
10306                 }
10307                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10308                     has_utf8 = TRUE;
10309                 *to = *s;
10310             }
10311         }
10312
10313         /* if the terminator isn't the same as the start character (e.g.,
10314            matched brackets), we have to allow more in the quoting, and
10315            be prepared for nested brackets.
10316         */
10317         else {
10318             /* read until we run out of string, or we find the terminator */
10319             for (; s < PL_bufend; s++,to++) {
10320                 /* embedded newlines increment the line count */
10321                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10322                     COPLINE_INC_WITH_HERELINES;
10323                 /* backslashes can escape the open or closing characters */
10324                 if (*s == '\\' && s+1 < PL_bufend) {
10325                     if (!keep_quoted &&
10326                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10327                         s++;
10328                     else
10329                         *to++ = *s++;
10330                 }
10331                 /* allow nested opens and closes */
10332                 else if (*s == PL_multi_close && --brackets <= 0)
10333                     break;
10334                 else if (*s == PL_multi_open)
10335                     brackets++;
10336                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10337                     has_utf8 = TRUE;
10338                 *to = *s;
10339             }
10340         }
10341         /* terminate the copied string and update the sv's end-of-string */
10342         *to = '\0';
10343         SvCUR_set(sv, to - SvPVX_const(sv));
10344
10345         /*
10346          * this next chunk reads more into the buffer if we're not done yet
10347          */
10348
10349         if (s < PL_bufend)
10350             break;              /* handle case where we are done yet :-) */
10351
10352 #ifndef PERL_STRICT_CR
10353         if (to - SvPVX_const(sv) >= 2) {
10354             if ((to[-2] == '\r' && to[-1] == '\n') ||
10355                 (to[-2] == '\n' && to[-1] == '\r'))
10356             {
10357                 to[-2] = '\n';
10358                 to--;
10359                 SvCUR_set(sv, to - SvPVX_const(sv));
10360             }
10361             else if (to[-1] == '\r')
10362                 to[-1] = '\n';
10363         }
10364         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10365             to[-1] = '\n';
10366 #endif
10367
10368      read_more_line:
10369         /* if we're out of file, or a read fails, bail and reset the current
10370            line marker so we can report where the unterminated string began
10371         */
10372 #ifdef PERL_MAD
10373         if (PL_madskills) {
10374             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10375             if (PL_thisstuff)
10376                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10377             else
10378                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10379         }
10380 #endif
10381         COPLINE_INC_WITH_HERELINES;
10382         PL_bufptr = PL_bufend;
10383         if (!lex_next_chunk(0)) {
10384             sv_free(sv);
10385             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10386             return NULL;
10387         }
10388         s = PL_bufptr;
10389 #ifdef PERL_MAD
10390         stuffstart = 0;
10391 #endif
10392     }
10393
10394     /* at this point, we have successfully read the delimited string */
10395
10396     if (!PL_encoding || UTF || re_reparse) {
10397 #ifdef PERL_MAD
10398         if (PL_madskills) {
10399             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10400             const int len = s - tstart;
10401             if (PL_thisstuff)
10402                 sv_catpvn(PL_thisstuff, tstart, len);
10403             else
10404                 PL_thisstuff = newSVpvn(tstart, len);
10405             if (!PL_thisclose && !keep_delims)
10406                 PL_thisclose = newSVpvn(s,termlen);
10407         }
10408 #endif
10409
10410         if (keep_delims)
10411             sv_catpvn(sv, s, termlen);
10412         s += termlen;
10413     }
10414 #ifdef PERL_MAD
10415     else {
10416         if (PL_madskills) {
10417             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10418             const int len = s - tstart - termlen;
10419             if (PL_thisstuff)
10420                 sv_catpvn(PL_thisstuff, tstart, len);
10421             else
10422                 PL_thisstuff = newSVpvn(tstart, len);
10423             if (!PL_thisclose && !keep_delims)
10424                 PL_thisclose = newSVpvn(s - termlen,termlen);
10425         }
10426     }
10427 #endif
10428     if (has_utf8 || (PL_encoding && !re_reparse))
10429         SvUTF8_on(sv);
10430
10431     PL_multi_end = CopLINE(PL_curcop);
10432
10433     /* if we allocated too much space, give some back */
10434     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10435         SvLEN_set(sv, SvCUR(sv) + 1);
10436         SvPV_renew(sv, SvLEN(sv));
10437     }
10438
10439     /* decide whether this is the first or second quoted string we've read
10440        for this op
10441     */
10442
10443     if (PL_lex_stuff)
10444         PL_sublex_info.repl = sv;
10445     else
10446         PL_lex_stuff = sv;
10447     return s;
10448 }
10449
10450 /*
10451   scan_num
10452   takes: pointer to position in buffer
10453   returns: pointer to new position in buffer
10454   side-effects: builds ops for the constant in pl_yylval.op
10455
10456   Read a number in any of the formats that Perl accepts:
10457
10458   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10459   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10460   0b[01](_?[01])*
10461   0[0-7](_?[0-7])*
10462   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10463
10464   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10465   thing it reads.
10466
10467   If it reads a number without a decimal point or an exponent, it will
10468   try converting the number to an integer and see if it can do so
10469   without loss of precision.
10470 */
10471
10472 char *
10473 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10474 {
10475     dVAR;
10476     const char *s = start;      /* current position in buffer */
10477     char *d;                    /* destination in temp buffer */
10478     char *e;                    /* end of temp buffer */
10479     NV nv;                              /* number read, as a double */
10480     SV *sv = NULL;                      /* place to put the converted number */
10481     bool floatit;                       /* boolean: int or float? */
10482     const char *lastub = NULL;          /* position of last underbar */
10483     static char const number_too_long[] = "Number too long";
10484
10485     PERL_ARGS_ASSERT_SCAN_NUM;
10486
10487     /* We use the first character to decide what type of number this is */
10488
10489     switch (*s) {
10490     default:
10491         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10492
10493     /* if it starts with a 0, it could be an octal number, a decimal in
10494        0.13 disguise, or a hexadecimal number, or a binary number. */
10495     case '0':
10496         {
10497           /* variables:
10498              u          holds the "number so far"
10499              shift      the power of 2 of the base
10500                         (hex == 4, octal == 3, binary == 1)
10501              overflowed was the number more than we can hold?
10502
10503              Shift is used when we add a digit.  It also serves as an "are
10504              we in octal/hex/binary?" indicator to disallow hex characters
10505              when in octal mode.
10506            */
10507             NV n = 0.0;
10508             UV u = 0;
10509             I32 shift;
10510             bool overflowed = FALSE;
10511             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10512             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10513             static const char* const bases[5] =
10514               { "", "binary", "", "octal", "hexadecimal" };
10515             static const char* const Bases[5] =
10516               { "", "Binary", "", "Octal", "Hexadecimal" };
10517             static const char* const maxima[5] =
10518               { "",
10519                 "0b11111111111111111111111111111111",
10520                 "",
10521                 "037777777777",
10522                 "0xffffffff" };
10523             const char *base, *Base, *max;
10524
10525             /* check for hex */
10526             if (s[1] == 'x' || s[1] == 'X') {
10527                 shift = 4;
10528                 s += 2;
10529                 just_zero = FALSE;
10530             } else if (s[1] == 'b' || s[1] == 'B') {
10531                 shift = 1;
10532                 s += 2;
10533                 just_zero = FALSE;
10534             }
10535             /* check for a decimal in disguise */
10536             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10537                 goto decimal;
10538             /* so it must be octal */
10539             else {
10540                 shift = 3;
10541                 s++;
10542             }
10543
10544             if (*s == '_') {
10545                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10546                                "Misplaced _ in number");
10547                lastub = s++;
10548             }
10549
10550             base = bases[shift];
10551             Base = Bases[shift];
10552             max  = maxima[shift];
10553
10554             /* read the rest of the number */
10555             for (;;) {
10556                 /* x is used in the overflow test,
10557                    b is the digit we're adding on. */
10558                 UV x, b;
10559
10560                 switch (*s) {
10561
10562                 /* if we don't mention it, we're done */
10563                 default:
10564                     goto out;
10565
10566                 /* _ are ignored -- but warned about if consecutive */
10567                 case '_':
10568                     if (lastub && s == lastub + 1)
10569                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10570                                        "Misplaced _ in number");
10571                     lastub = s++;
10572                     break;
10573
10574                 /* 8 and 9 are not octal */
10575                 case '8': case '9':
10576                     if (shift == 3)
10577                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10578                     /* FALL THROUGH */
10579
10580                 /* octal digits */
10581                 case '2': case '3': case '4':
10582                 case '5': case '6': case '7':
10583                     if (shift == 1)
10584                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10585                     /* FALL THROUGH */
10586
10587                 case '0': case '1':
10588                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10589                     goto digit;
10590
10591                 /* hex digits */
10592                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10593                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10594                     /* make sure they said 0x */
10595                     if (shift != 4)
10596                         goto out;
10597                     b = (*s++ & 7) + 9;
10598
10599                     /* Prepare to put the digit we have onto the end
10600                        of the number so far.  We check for overflows.
10601                     */
10602
10603                   digit:
10604                     just_zero = FALSE;
10605                     if (!overflowed) {
10606                         x = u << shift; /* make room for the digit */
10607
10608                         if ((x >> shift) != u
10609                             && !(PL_hints & HINT_NEW_BINARY)) {
10610                             overflowed = TRUE;
10611                             n = (NV) u;
10612                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10613                                              "Integer overflow in %s number",
10614                                              base);
10615                         } else
10616                             u = x | b;          /* add the digit to the end */
10617                     }
10618                     if (overflowed) {
10619                         n *= nvshift[shift];
10620                         /* If an NV has not enough bits in its
10621                          * mantissa to represent an UV this summing of
10622                          * small low-order numbers is a waste of time
10623                          * (because the NV cannot preserve the
10624                          * low-order bits anyway): we could just
10625                          * remember when did we overflow and in the
10626                          * end just multiply n by the right
10627                          * amount. */
10628                         n += (NV) b;
10629                     }
10630                     break;
10631                 }
10632             }
10633
10634           /* if we get here, we had success: make a scalar value from
10635              the number.
10636           */
10637           out:
10638
10639             /* final misplaced underbar check */
10640             if (s[-1] == '_') {
10641                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10642             }
10643
10644             if (overflowed) {
10645                 if (n > 4294967295.0)
10646                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10647                                    "%s number > %s non-portable",
10648                                    Base, max);
10649                 sv = newSVnv(n);
10650             }
10651             else {
10652 #if UVSIZE > 4
10653                 if (u > 0xffffffff)
10654                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10655                                    "%s number > %s non-portable",
10656                                    Base, max);
10657 #endif
10658                 sv = newSVuv(u);
10659             }
10660             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10661                 sv = new_constant(start, s - start, "integer",
10662                                   sv, NULL, NULL, 0);
10663             else if (PL_hints & HINT_NEW_BINARY)
10664                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10665         }
10666         break;
10667
10668     /*
10669       handle decimal numbers.
10670       we're also sent here when we read a 0 as the first digit
10671     */
10672     case '1': case '2': case '3': case '4': case '5':
10673     case '6': case '7': case '8': case '9': case '.':
10674       decimal:
10675         d = PL_tokenbuf;
10676         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10677         floatit = FALSE;
10678
10679         /* read next group of digits and _ and copy into d */
10680         while (isDIGIT(*s) || *s == '_') {
10681             /* skip underscores, checking for misplaced ones
10682                if -w is on
10683             */
10684             if (*s == '_') {
10685                 if (lastub && s == lastub + 1)
10686                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10687                                    "Misplaced _ in number");
10688                 lastub = s++;
10689             }
10690             else {
10691                 /* check for end of fixed-length buffer */
10692                 if (d >= e)
10693                     Perl_croak(aTHX_ number_too_long);
10694                 /* if we're ok, copy the character */
10695                 *d++ = *s++;
10696             }
10697         }
10698
10699         /* final misplaced underbar check */
10700         if (lastub && s == lastub + 1) {
10701             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10702         }
10703
10704         /* read a decimal portion if there is one.  avoid
10705            3..5 being interpreted as the number 3. followed
10706            by .5
10707         */
10708         if (*s == '.' && s[1] != '.') {
10709             floatit = TRUE;
10710             *d++ = *s++;
10711
10712             if (*s == '_') {
10713                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10714                                "Misplaced _ in number");
10715                 lastub = s;
10716             }
10717
10718             /* copy, ignoring underbars, until we run out of digits.
10719             */
10720             for (; isDIGIT(*s) || *s == '_'; s++) {
10721                 /* fixed length buffer check */
10722                 if (d >= e)
10723                     Perl_croak(aTHX_ number_too_long);
10724                 if (*s == '_') {
10725                    if (lastub && s == lastub + 1)
10726                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10727                                       "Misplaced _ in number");
10728                    lastub = s;
10729                 }
10730                 else
10731                     *d++ = *s;
10732             }
10733             /* fractional part ending in underbar? */
10734             if (s[-1] == '_') {
10735                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10736                                "Misplaced _ in number");
10737             }
10738             if (*s == '.' && isDIGIT(s[1])) {
10739                 /* oops, it's really a v-string, but without the "v" */
10740                 s = start;
10741                 goto vstring;
10742             }
10743         }
10744
10745         /* read exponent part, if present */
10746         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10747             floatit = TRUE;
10748             s++;
10749
10750             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10751             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10752
10753             /* stray preinitial _ */
10754             if (*s == '_') {
10755                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10756                                "Misplaced _ in number");
10757                 lastub = s++;
10758             }
10759
10760             /* allow positive or negative exponent */
10761             if (*s == '+' || *s == '-')
10762                 *d++ = *s++;
10763
10764             /* stray initial _ */
10765             if (*s == '_') {
10766                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10767                                "Misplaced _ in number");
10768                 lastub = s++;
10769             }
10770
10771             /* read digits of exponent */
10772             while (isDIGIT(*s) || *s == '_') {
10773                 if (isDIGIT(*s)) {
10774                     if (d >= e)
10775                         Perl_croak(aTHX_ number_too_long);
10776                     *d++ = *s++;
10777                 }
10778                 else {
10779                    if (((lastub && s == lastub + 1) ||
10780                         (!isDIGIT(s[1]) && s[1] != '_')))
10781                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10782                                       "Misplaced _ in number");
10783                    lastub = s++;
10784                 }
10785             }
10786         }
10787
10788
10789         /*
10790            We try to do an integer conversion first if no characters
10791            indicating "float" have been found.
10792          */
10793
10794         if (!floatit) {
10795             UV uv;
10796             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10797
10798             if (flags == IS_NUMBER_IN_UV) {
10799               if (uv <= IV_MAX)
10800                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10801               else
10802                 sv = newSVuv(uv);
10803             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10804               if (uv <= (UV) IV_MIN)
10805                 sv = newSViv(-(IV)uv);
10806               else
10807                 floatit = TRUE;
10808             } else
10809               floatit = TRUE;
10810         }
10811         if (floatit) {
10812             /* terminate the string */
10813             *d = '\0';
10814             nv = Atof(PL_tokenbuf);
10815             sv = newSVnv(nv);
10816         }
10817
10818         if ( floatit
10819              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10820             const char *const key = floatit ? "float" : "integer";
10821             const STRLEN keylen = floatit ? 5 : 7;
10822             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10823                                 key, keylen, sv, NULL, NULL, 0);
10824         }
10825         break;
10826
10827     /* if it starts with a v, it could be a v-string */
10828     case 'v':
10829 vstring:
10830                 sv = newSV(5); /* preallocate storage space */
10831                 s = scan_vstring(s, PL_bufend, sv);
10832         break;
10833     }
10834
10835     /* make the op for the constant and return */
10836
10837     if (sv)
10838         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10839     else
10840         lvalp->opval = NULL;
10841
10842     return (char *)s;
10843 }
10844
10845 STATIC char *
10846 S_scan_formline(pTHX_ register char *s)
10847 {
10848     dVAR;
10849     char *eol;
10850     char *t;
10851     SV * const stuff = newSVpvs("");
10852     bool needargs = FALSE;
10853     bool eofmt = FALSE;
10854 #ifdef PERL_MAD
10855     char *tokenstart = s;
10856     SV* savewhite = NULL;
10857
10858     if (PL_madskills) {
10859         savewhite = PL_thiswhite;
10860         PL_thiswhite = 0;
10861     }
10862 #endif
10863
10864     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10865
10866     while (!needargs) {
10867         if (*s == '.') {
10868             t = s+1;
10869 #ifdef PERL_STRICT_CR
10870             while (SPACE_OR_TAB(*t))
10871                 t++;
10872 #else
10873             while (SPACE_OR_TAB(*t) || *t == '\r')
10874                 t++;
10875 #endif
10876             if (*t == '\n' || t == PL_bufend) {
10877                 eofmt = TRUE;
10878                 break;
10879             }
10880         }
10881         eol = (char *) memchr(s,'\n',PL_bufend-s);
10882         if (!eol++)
10883                 eol = PL_bufend;
10884         if (*s != '#') {
10885             for (t = s; t < eol; t++) {
10886                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10887                     needargs = FALSE;
10888                     goto enough;        /* ~~ must be first line in formline */
10889                 }
10890                 if (*t == '@' || *t == '^')
10891                     needargs = TRUE;
10892             }
10893             if (eol > s) {
10894                 sv_catpvn(stuff, s, eol-s);
10895 #ifndef PERL_STRICT_CR
10896                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10897                     char *end = SvPVX(stuff) + SvCUR(stuff);
10898                     end[-2] = '\n';
10899                     end[-1] = '\0';
10900                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10901                 }
10902 #endif
10903             }
10904             else
10905               break;
10906         }
10907         s = (char*)eol;
10908         if ((PL_rsfp || PL_parser->filtered)
10909          && PL_parser->form_lex_state == LEX_NORMAL) {
10910             bool got_some;
10911 #ifdef PERL_MAD
10912             if (PL_madskills) {
10913                 if (PL_thistoken)
10914                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10915                 else
10916                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10917             }
10918 #endif
10919             PL_bufptr = PL_bufend;
10920             COPLINE_INC_WITH_HERELINES;
10921             got_some = lex_next_chunk(0);
10922             CopLINE_dec(PL_curcop);
10923             s = PL_bufptr;
10924 #ifdef PERL_MAD
10925             tokenstart = PL_bufptr;
10926 #endif
10927             if (!got_some)
10928                 break;
10929         }
10930         incline(s);
10931     }
10932   enough:
10933     if (!SvCUR(stuff) || needargs)
10934         PL_lex_state = PL_parser->form_lex_state;
10935     if (SvCUR(stuff)) {
10936         PL_expect = XSTATE;
10937         if (needargs) {
10938             start_force(PL_curforce);
10939             NEXTVAL_NEXTTOKE.ival = 0;
10940             force_next(FORMLBRACK);
10941         }
10942         if (!IN_BYTES) {
10943             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10944                 SvUTF8_on(stuff);
10945             else if (PL_encoding)
10946                 sv_recode_to_utf8(stuff, PL_encoding);
10947         }
10948         start_force(PL_curforce);
10949         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10950         force_next(THING);
10951     }
10952     else {
10953         SvREFCNT_dec(stuff);
10954         if (eofmt)
10955             PL_lex_formbrack = 0;
10956     }
10957 #ifdef PERL_MAD
10958     if (PL_madskills) {
10959         if (PL_thistoken)
10960             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10961         else
10962             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10963         PL_thiswhite = savewhite;
10964     }
10965 #endif
10966     return s;
10967 }
10968
10969 I32
10970 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10971 {
10972     dVAR;
10973     const I32 oldsavestack_ix = PL_savestack_ix;
10974     CV* const outsidecv = PL_compcv;
10975
10976     SAVEI32(PL_subline);
10977     save_item(PL_subname);
10978     SAVESPTR(PL_compcv);
10979
10980     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10981     CvFLAGS(PL_compcv) |= flags;
10982
10983     PL_subline = CopLINE(PL_curcop);
10984     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10985     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10986     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10987     if (outsidecv && CvPADLIST(outsidecv))
10988         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10989
10990     return oldsavestack_ix;
10991 }
10992
10993 #ifdef __SC__
10994 #pragma segment Perl_yylex
10995 #endif
10996 static int
10997 S_yywarn(pTHX_ const char *const s, U32 flags)
10998 {
10999     dVAR;
11000
11001     PERL_ARGS_ASSERT_YYWARN;
11002
11003     PL_in_eval |= EVAL_WARNONLY;
11004     yyerror_pv(s, flags);
11005     PL_in_eval &= ~EVAL_WARNONLY;
11006     return 0;
11007 }
11008
11009 int
11010 Perl_yyerror(pTHX_ const char *const s)
11011 {
11012     PERL_ARGS_ASSERT_YYERROR;
11013     return yyerror_pvn(s, strlen(s), 0);
11014 }
11015
11016 int
11017 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11018 {
11019     PERL_ARGS_ASSERT_YYERROR_PV;
11020     return yyerror_pvn(s, strlen(s), flags);
11021 }
11022
11023 int
11024 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11025 {
11026     dVAR;
11027     const char *context = NULL;
11028     int contlen = -1;
11029     SV *msg;
11030     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11031     int yychar  = PL_parser->yychar;
11032     U32 is_utf8 = flags & SVf_UTF8;
11033
11034     PERL_ARGS_ASSERT_YYERROR_PVN;
11035
11036     if (!yychar || (yychar == ';' && !PL_rsfp))
11037         sv_catpvs(where_sv, "at EOF");
11038     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11039       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11040       PL_oldbufptr != PL_bufptr) {
11041         /*
11042                 Only for NetWare:
11043                 The code below is removed for NetWare because it abends/crashes on NetWare
11044                 when the script has error such as not having the closing quotes like:
11045                     if ($var eq "value)
11046                 Checking of white spaces is anyway done in NetWare code.
11047         */
11048 #ifndef NETWARE
11049         while (isSPACE(*PL_oldoldbufptr))
11050             PL_oldoldbufptr++;
11051 #endif
11052         context = PL_oldoldbufptr;
11053         contlen = PL_bufptr - PL_oldoldbufptr;
11054     }
11055     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11056       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11057         /*
11058                 Only for NetWare:
11059                 The code below is removed for NetWare because it abends/crashes on NetWare
11060                 when the script has error such as not having the closing quotes like:
11061                     if ($var eq "value)
11062                 Checking of white spaces is anyway done in NetWare code.
11063         */
11064 #ifndef NETWARE
11065         while (isSPACE(*PL_oldbufptr))
11066             PL_oldbufptr++;
11067 #endif
11068         context = PL_oldbufptr;
11069         contlen = PL_bufptr - PL_oldbufptr;
11070     }
11071     else if (yychar > 255)
11072         sv_catpvs(where_sv, "next token ???");
11073     else if (yychar == -2) { /* YYEMPTY */
11074         if (PL_lex_state == LEX_NORMAL ||
11075            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11076             sv_catpvs(where_sv, "at end of line");
11077         else if (PL_lex_inpat)
11078             sv_catpvs(where_sv, "within pattern");
11079         else
11080             sv_catpvs(where_sv, "within string");
11081     }
11082     else {
11083         sv_catpvs(where_sv, "next char ");
11084         if (yychar < 32)
11085             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11086         else if (isPRINT_LC(yychar)) {
11087             const char string = yychar;
11088             sv_catpvn(where_sv, &string, 1);
11089         }
11090         else
11091             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11092     }
11093     msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
11094     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11095         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11096     if (context)
11097         Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11098                             SVfARG(newSVpvn_flags(context, contlen,
11099                                         SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11100     else
11101         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11102     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11103         Perl_sv_catpvf(aTHX_ msg,
11104         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11105                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11106         PL_multi_end = 0;
11107     }
11108     if (PL_in_eval & EVAL_WARNONLY) {
11109         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11110     }
11111     else
11112         qerror(msg);
11113     if (PL_error_count >= 10) {
11114         if (PL_in_eval && SvCUR(ERRSV))
11115             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11116                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
11117         else
11118             Perl_croak(aTHX_ "%s has too many errors.\n",
11119             OutCopFILE(PL_curcop));
11120     }
11121     PL_in_my = 0;
11122     PL_in_my_stash = NULL;
11123     return 0;
11124 }
11125 #ifdef __SC__
11126 #pragma segment Main
11127 #endif
11128
11129 STATIC char*
11130 S_swallow_bom(pTHX_ U8 *s)
11131 {
11132     dVAR;
11133     const STRLEN slen = SvCUR(PL_linestr);
11134
11135     PERL_ARGS_ASSERT_SWALLOW_BOM;
11136
11137     switch (s[0]) {
11138     case 0xFF:
11139         if (s[1] == 0xFE) {
11140             /* UTF-16 little-endian? (or UTF-32LE?) */
11141             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11142                 /* diag_listed_as: Unsupported script encoding %s */
11143                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11144 #ifndef PERL_NO_UTF16_FILTER
11145             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11146             s += 2;
11147             if (PL_bufend > (char*)s) {
11148                 s = add_utf16_textfilter(s, TRUE);
11149             }
11150 #else
11151             /* diag_listed_as: Unsupported script encoding %s */
11152             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11153 #endif
11154         }
11155         break;
11156     case 0xFE:
11157         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11158 #ifndef PERL_NO_UTF16_FILTER
11159             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11160             s += 2;
11161             if (PL_bufend > (char *)s) {
11162                 s = add_utf16_textfilter(s, FALSE);
11163             }
11164 #else
11165             /* diag_listed_as: Unsupported script encoding %s */
11166             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11167 #endif
11168         }
11169         break;
11170     case 0xEF:
11171         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11172             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11173             s += 3;                      /* UTF-8 */
11174         }
11175         break;
11176     case 0:
11177         if (slen > 3) {
11178              if (s[1] == 0) {
11179                   if (s[2] == 0xFE && s[3] == 0xFF) {
11180                        /* UTF-32 big-endian */
11181                        /* diag_listed_as: Unsupported script encoding %s */
11182                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11183                   }
11184              }
11185              else if (s[2] == 0 && s[3] != 0) {
11186                   /* Leading bytes
11187                    * 00 xx 00 xx
11188                    * are a good indicator of UTF-16BE. */
11189 #ifndef PERL_NO_UTF16_FILTER
11190                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11191                   s = add_utf16_textfilter(s, FALSE);
11192 #else
11193                   /* diag_listed_as: Unsupported script encoding %s */
11194                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11195 #endif
11196              }
11197         }
11198 #ifdef EBCDIC
11199     case 0xDD:
11200         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11201             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11202             s += 4;                      /* UTF-8 */
11203         }
11204         break;
11205 #endif
11206
11207     default:
11208          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11209                   /* Leading bytes
11210                    * xx 00 xx 00
11211                    * are a good indicator of UTF-16LE. */
11212 #ifndef PERL_NO_UTF16_FILTER
11213               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11214               s = add_utf16_textfilter(s, TRUE);
11215 #else
11216               /* diag_listed_as: Unsupported script encoding %s */
11217               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11218 #endif
11219          }
11220     }
11221     return (char*)s;
11222 }
11223
11224
11225 #ifndef PERL_NO_UTF16_FILTER
11226 static I32
11227 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11228 {
11229     dVAR;
11230     SV *const filter = FILTER_DATA(idx);
11231     /* We re-use this each time round, throwing the contents away before we
11232        return.  */
11233     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11234     SV *const utf8_buffer = filter;
11235     IV status = IoPAGE(filter);
11236     const bool reverse = cBOOL(IoLINES(filter));
11237     I32 retval;
11238
11239     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11240
11241     /* As we're automatically added, at the lowest level, and hence only called
11242        from this file, we can be sure that we're not called in block mode. Hence
11243        don't bother writing code to deal with block mode.  */
11244     if (maxlen) {
11245         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11246     }
11247     if (status < 0) {
11248         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11249     }
11250     DEBUG_P(PerlIO_printf(Perl_debug_log,
11251                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11252                           FPTR2DPTR(void *, S_utf16_textfilter),
11253                           reverse ? 'l' : 'b', idx, maxlen, status,
11254                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11255
11256     while (1) {
11257         STRLEN chars;
11258         STRLEN have;
11259         I32 newlen;
11260         U8 *end;
11261         /* First, look in our buffer of existing UTF-8 data:  */
11262         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11263
11264         if (nl) {
11265             ++nl;
11266         } else if (status == 0) {
11267             /* EOF */
11268             IoPAGE(filter) = 0;
11269             nl = SvEND(utf8_buffer);
11270         }
11271         if (nl) {
11272             STRLEN got = nl - SvPVX(utf8_buffer);
11273             /* Did we have anything to append?  */
11274             retval = got != 0;
11275             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11276             /* Everything else in this code works just fine if SVp_POK isn't
11277                set.  This, however, needs it, and we need it to work, else
11278                we loop infinitely because the buffer is never consumed.  */
11279             sv_chop(utf8_buffer, nl);
11280             break;
11281         }
11282
11283         /* OK, not a complete line there, so need to read some more UTF-16.
11284            Read an extra octect if the buffer currently has an odd number. */
11285         while (1) {
11286             if (status <= 0)
11287                 break;
11288             if (SvCUR(utf16_buffer) >= 2) {
11289                 /* Location of the high octet of the last complete code point.
11290                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11291                    *coupled* with all the benefits of partial reads and
11292                    endianness.  */
11293                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11294                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11295
11296                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11297                     break;
11298                 }
11299
11300                 /* We have the first half of a surrogate. Read more.  */
11301                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11302             }
11303
11304             status = FILTER_READ(idx + 1, utf16_buffer,
11305                                  160 + (SvCUR(utf16_buffer) & 1));
11306             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11307             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11308             if (status < 0) {
11309                 /* Error */
11310                 IoPAGE(filter) = status;
11311                 return status;
11312             }
11313         }
11314
11315         chars = SvCUR(utf16_buffer) >> 1;
11316         have = SvCUR(utf8_buffer);
11317         SvGROW(utf8_buffer, have + chars * 3 + 1);
11318
11319         if (reverse) {
11320             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11321                                          (U8*)SvPVX_const(utf8_buffer) + have,
11322                                          chars * 2, &newlen);
11323         } else {
11324             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11325                                 (U8*)SvPVX_const(utf8_buffer) + have,
11326                                 chars * 2, &newlen);
11327         }
11328         SvCUR_set(utf8_buffer, have + newlen);
11329         *end = '\0';
11330
11331         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11332            it's private to us, and utf16_to_utf8{,reversed} take a
11333            (pointer,length) pair, rather than a NUL-terminated string.  */
11334         if(SvCUR(utf16_buffer) & 1) {
11335             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11336             SvCUR_set(utf16_buffer, 1);
11337         } else {
11338             SvCUR_set(utf16_buffer, 0);
11339         }
11340     }
11341     DEBUG_P(PerlIO_printf(Perl_debug_log,
11342                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11343                           status,
11344                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11345     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11346     return retval;
11347 }
11348
11349 static U8 *
11350 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11351 {
11352     SV *filter = filter_add(S_utf16_textfilter, NULL);
11353
11354     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11355
11356     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11357     sv_setpvs(filter, "");
11358     IoLINES(filter) = reversed;
11359     IoPAGE(filter) = 1; /* Not EOF */
11360
11361     /* Sadly, we have to return a valid pointer, come what may, so we have to
11362        ignore any error return from this.  */
11363     SvCUR_set(PL_linestr, 0);
11364     if (FILTER_READ(0, PL_linestr, 0)) {
11365         SvUTF8_on(PL_linestr);
11366     } else {
11367         SvUTF8_on(PL_linestr);
11368     }
11369     PL_bufend = SvEND(PL_linestr);
11370     return (U8*)SvPVX(PL_linestr);
11371 }
11372 #endif
11373
11374 /*
11375 Returns a pointer to the next character after the parsed
11376 vstring, as well as updating the passed in sv.
11377
11378 Function must be called like
11379
11380         sv = newSV(5);
11381         s = scan_vstring(s,e,sv);
11382
11383 where s and e are the start and end of the string.
11384 The sv should already be large enough to store the vstring
11385 passed in, for performance reasons.
11386
11387 */
11388
11389 char *
11390 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11391 {
11392     dVAR;
11393     const char *pos = s;
11394     const char *start = s;
11395
11396     PERL_ARGS_ASSERT_SCAN_VSTRING;
11397
11398     if (*pos == 'v') pos++;  /* get past 'v' */
11399     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11400         pos++;
11401     if ( *pos != '.') {
11402         /* this may not be a v-string if followed by => */
11403         const char *next = pos;
11404         while (next < e && isSPACE(*next))
11405             ++next;
11406         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11407             /* return string not v-string */
11408             sv_setpvn(sv,(char *)s,pos-s);
11409             return (char *)pos;
11410         }
11411     }
11412
11413     if (!isALPHA(*pos)) {
11414         U8 tmpbuf[UTF8_MAXBYTES+1];
11415
11416         if (*s == 'v')
11417             s++;  /* get past 'v' */
11418
11419         sv_setpvs(sv, "");
11420
11421         for (;;) {
11422             /* this is atoi() that tolerates underscores */
11423             U8 *tmpend;
11424             UV rev = 0;
11425             const char *end = pos;
11426             UV mult = 1;
11427             while (--end >= s) {
11428                 if (*end != '_') {
11429                     const UV orev = rev;
11430                     rev += (*end - '0') * mult;
11431                     mult *= 10;
11432                     if (orev > rev)
11433                         /* diag_listed_as: Integer overflow in %s number */
11434                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11435                                          "Integer overflow in decimal number");
11436                 }
11437             }
11438 #ifdef EBCDIC
11439             if (rev > 0x7FFFFFFF)
11440                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11441 #endif
11442             /* Append native character for the rev point */
11443             tmpend = uvchr_to_utf8(tmpbuf, rev);
11444             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11445             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11446                  SvUTF8_on(sv);
11447             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11448                  s = ++pos;
11449             else {
11450                  s = pos;
11451                  break;
11452             }
11453             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11454                  pos++;
11455         }
11456         SvPOK_on(sv);
11457         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11458         SvRMAGICAL_on(sv);
11459     }
11460     return (char *)s;
11461 }
11462
11463 int
11464 Perl_keyword_plugin_standard(pTHX_
11465         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11466 {
11467     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11468     PERL_UNUSED_CONTEXT;
11469     PERL_UNUSED_ARG(keyword_ptr);
11470     PERL_UNUSED_ARG(keyword_len);
11471     PERL_UNUSED_ARG(op_ptr);
11472     return KEYWORD_PLUGIN_DECLINE;
11473 }
11474
11475 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11476 static void
11477 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11478 {
11479     SAVEI32(PL_lex_brackets);
11480     if (PL_lex_brackets > 100)
11481         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11482     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11483     SAVEI32(PL_lex_allbrackets);
11484     PL_lex_allbrackets = 0;
11485     SAVEI8(PL_lex_fakeeof);
11486     PL_lex_fakeeof = (U8)fakeeof;
11487     if(yyparse(gramtype) && !PL_parser->error_count)
11488         qerror(Perl_mess(aTHX_ "Parse error"));
11489 }
11490
11491 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11492 static OP *
11493 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11494 {
11495     OP *o;
11496     ENTER;
11497     SAVEVPTR(PL_eval_root);
11498     PL_eval_root = NULL;
11499     parse_recdescent(gramtype, fakeeof);
11500     o = PL_eval_root;
11501     LEAVE;
11502     return o;
11503 }
11504
11505 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11506 static OP *
11507 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11508 {
11509     OP *exprop;
11510     if (flags & ~PARSE_OPTIONAL)
11511         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11512     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11513     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11514         if (!PL_parser->error_count)
11515             qerror(Perl_mess(aTHX_ "Parse error"));
11516         exprop = newOP(OP_NULL, 0);
11517     }
11518     return exprop;
11519 }
11520
11521 /*
11522 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11523
11524 Parse a Perl arithmetic expression.  This may contain operators of precedence
11525 down to the bit shift operators.  The expression must be followed (and thus
11526 terminated) either by a comparison or lower-precedence operator or by
11527 something that would normally terminate an expression such as semicolon.
11528 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11529 otherwise it is mandatory.  It is up to the caller to ensure that the
11530 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11531 the source of the code to be parsed and the lexical context for the
11532 expression.
11533
11534 The op tree representing the expression is returned.  If an optional
11535 expression is absent, a null pointer is returned, otherwise the pointer
11536 will be non-null.
11537
11538 If an error occurs in parsing or compilation, in most cases a valid op
11539 tree is returned anyway.  The error is reflected in the parser state,
11540 normally resulting in a single exception at the top level of parsing
11541 which covers all the compilation errors that occurred.  Some compilation
11542 errors, however, will throw an exception immediately.
11543
11544 =cut
11545 */
11546
11547 OP *
11548 Perl_parse_arithexpr(pTHX_ U32 flags)
11549 {
11550     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11551 }
11552
11553 /*
11554 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11555
11556 Parse a Perl term expression.  This may contain operators of precedence
11557 down to the assignment operators.  The expression must be followed (and thus
11558 terminated) either by a comma or lower-precedence operator or by
11559 something that would normally terminate an expression such as semicolon.
11560 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11561 otherwise it is mandatory.  It is up to the caller to ensure that the
11562 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11563 the source of the code to be parsed and the lexical context for the
11564 expression.
11565
11566 The op tree representing the expression is returned.  If an optional
11567 expression is absent, a null pointer is returned, otherwise the pointer
11568 will be non-null.
11569
11570 If an error occurs in parsing or compilation, in most cases a valid op
11571 tree is returned anyway.  The error is reflected in the parser state,
11572 normally resulting in a single exception at the top level of parsing
11573 which covers all the compilation errors that occurred.  Some compilation
11574 errors, however, will throw an exception immediately.
11575
11576 =cut
11577 */
11578
11579 OP *
11580 Perl_parse_termexpr(pTHX_ U32 flags)
11581 {
11582     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11583 }
11584
11585 /*
11586 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11587
11588 Parse a Perl list expression.  This may contain operators of precedence
11589 down to the comma operator.  The expression must be followed (and thus
11590 terminated) either by a low-precedence logic operator such as C<or> or by
11591 something that would normally terminate an expression such as semicolon.
11592 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11593 otherwise it is mandatory.  It is up to the caller to ensure that the
11594 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11595 the source of the code to be parsed and the lexical context for the
11596 expression.
11597
11598 The op tree representing the expression is returned.  If an optional
11599 expression is absent, a null pointer is returned, otherwise the pointer
11600 will be non-null.
11601
11602 If an error occurs in parsing or compilation, in most cases a valid op
11603 tree is returned anyway.  The error is reflected in the parser state,
11604 normally resulting in a single exception at the top level of parsing
11605 which covers all the compilation errors that occurred.  Some compilation
11606 errors, however, will throw an exception immediately.
11607
11608 =cut
11609 */
11610
11611 OP *
11612 Perl_parse_listexpr(pTHX_ U32 flags)
11613 {
11614     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11615 }
11616
11617 /*
11618 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11619
11620 Parse a single complete Perl expression.  This allows the full
11621 expression grammar, including the lowest-precedence operators such
11622 as C<or>.  The expression must be followed (and thus terminated) by a
11623 token that an expression would normally be terminated by: end-of-file,
11624 closing bracketing punctuation, semicolon, or one of the keywords that
11625 signals a postfix expression-statement modifier.  If I<flags> includes
11626 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11627 mandatory.  It is up to the caller to ensure that the dynamic parser
11628 state (L</PL_parser> et al) is correctly set to reflect the source of
11629 the code to be parsed and the lexical context for the expression.
11630
11631 The op tree representing the expression is returned.  If an optional
11632 expression is absent, a null pointer is returned, otherwise the pointer
11633 will be non-null.
11634
11635 If an error occurs in parsing or compilation, in most cases a valid op
11636 tree is returned anyway.  The error is reflected in the parser state,
11637 normally resulting in a single exception at the top level of parsing
11638 which covers all the compilation errors that occurred.  Some compilation
11639 errors, however, will throw an exception immediately.
11640
11641 =cut
11642 */
11643
11644 OP *
11645 Perl_parse_fullexpr(pTHX_ U32 flags)
11646 {
11647     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11648 }
11649
11650 /*
11651 =for apidoc Amx|OP *|parse_block|U32 flags
11652
11653 Parse a single complete Perl code block.  This consists of an opening
11654 brace, a sequence of statements, and a closing brace.  The block
11655 constitutes a lexical scope, so C<my> variables and various compile-time
11656 effects can be contained within it.  It is up to the caller to ensure
11657 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11658 reflect the source of the code to be parsed and the lexical context for
11659 the statement.
11660
11661 The op tree representing the code block is returned.  This is always a
11662 real op, never a null pointer.  It will normally be a C<lineseq> list,
11663 including C<nextstate> or equivalent ops.  No ops to construct any kind
11664 of runtime scope are included by virtue of it being a block.
11665
11666 If an error occurs in parsing or compilation, in most cases a valid op
11667 tree (most likely null) is returned anyway.  The error is reflected in
11668 the parser state, normally resulting in a single exception at the top
11669 level of parsing which covers all the compilation errors that occurred.
11670 Some compilation errors, however, will throw an exception immediately.
11671
11672 The I<flags> parameter is reserved for future use, and must always
11673 be zero.
11674
11675 =cut
11676 */
11677
11678 OP *
11679 Perl_parse_block(pTHX_ U32 flags)
11680 {
11681     if (flags)
11682         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11683     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11684 }
11685
11686 /*
11687 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11688
11689 Parse a single unadorned Perl statement.  This may be a normal imperative
11690 statement or a declaration that has compile-time effect.  It does not
11691 include any label or other affixture.  It is up to the caller to ensure
11692 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11693 reflect the source of the code to be parsed and the lexical context for
11694 the statement.
11695
11696 The op tree representing the statement is returned.  This may be a
11697 null pointer if the statement is null, for example if it was actually
11698 a subroutine definition (which has compile-time side effects).  If not
11699 null, it will be ops directly implementing the statement, suitable to
11700 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11701 equivalent op (except for those embedded in a scope contained entirely
11702 within the statement).
11703
11704 If an error occurs in parsing or compilation, in most cases a valid op
11705 tree (most likely null) is returned anyway.  The error is reflected in
11706 the parser state, normally resulting in a single exception at the top
11707 level of parsing which covers all the compilation errors that occurred.
11708 Some compilation errors, however, will throw an exception immediately.
11709
11710 The I<flags> parameter is reserved for future use, and must always
11711 be zero.
11712
11713 =cut
11714 */
11715
11716 OP *
11717 Perl_parse_barestmt(pTHX_ U32 flags)
11718 {
11719     if (flags)
11720         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11721     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11722 }
11723
11724 /*
11725 =for apidoc Amx|SV *|parse_label|U32 flags
11726
11727 Parse a single label, possibly optional, of the type that may prefix a
11728 Perl statement.  It is up to the caller to ensure that the dynamic parser
11729 state (L</PL_parser> et al) is correctly set to reflect the source of
11730 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11731 label is optional, otherwise it is mandatory.
11732
11733 The name of the label is returned in the form of a fresh scalar.  If an
11734 optional label is absent, a null pointer is returned.
11735
11736 If an error occurs in parsing, which can only occur if the label is
11737 mandatory, a valid label is returned anyway.  The error is reflected in
11738 the parser state, normally resulting in a single exception at the top
11739 level of parsing which covers all the compilation errors that occurred.
11740
11741 =cut
11742 */
11743
11744 SV *
11745 Perl_parse_label(pTHX_ U32 flags)
11746 {
11747     if (flags & ~PARSE_OPTIONAL)
11748         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11749     if (PL_lex_state == LEX_KNOWNEXT) {
11750         PL_parser->yychar = yylex();
11751         if (PL_parser->yychar == LABEL) {
11752             SV *lsv;
11753             PL_parser->yychar = YYEMPTY;
11754             lsv = newSV_type(SVt_PV);
11755             sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11756             return lsv;
11757         } else {
11758             yyunlex();
11759             goto no_label;
11760         }
11761     } else {
11762         char *s, *t;
11763         STRLEN wlen, bufptr_pos;
11764         lex_read_space(0);
11765         t = s = PL_bufptr;
11766         if (!isIDFIRST_lazy_if(s, UTF))
11767             goto no_label;
11768         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11769         if (word_takes_any_delimeter(s, wlen))
11770             goto no_label;
11771         bufptr_pos = s - SvPVX(PL_linestr);
11772         PL_bufptr = t;
11773         lex_read_space(LEX_KEEP_PREVIOUS);
11774         t = PL_bufptr;
11775         s = SvPVX(PL_linestr) + bufptr_pos;
11776         if (t[0] == ':' && t[1] != ':') {
11777             PL_oldoldbufptr = PL_oldbufptr;
11778             PL_oldbufptr = s;
11779             PL_bufptr = t+1;
11780             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11781         } else {
11782             PL_bufptr = s;
11783             no_label:
11784             if (flags & PARSE_OPTIONAL) {
11785                 return NULL;
11786             } else {
11787                 qerror(Perl_mess(aTHX_ "Parse error"));
11788                 return newSVpvs("x");
11789             }
11790         }
11791     }
11792 }
11793
11794 /*
11795 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11796
11797 Parse a single complete Perl statement.  This may be a normal imperative
11798 statement or a declaration that has compile-time effect, and may include
11799 optional labels.  It is up to the caller to ensure that the dynamic
11800 parser state (L</PL_parser> et al) is correctly set to reflect the source
11801 of the code to be parsed and the lexical context for the statement.
11802
11803 The op tree representing the statement is returned.  This may be a
11804 null pointer if the statement is null, for example if it was actually
11805 a subroutine definition (which has compile-time side effects).  If not
11806 null, it will be the result of a L</newSTATEOP> call, normally including
11807 a C<nextstate> or equivalent op.
11808
11809 If an error occurs in parsing or compilation, in most cases a valid op
11810 tree (most likely null) is returned anyway.  The error is reflected in
11811 the parser state, normally resulting in a single exception at the top
11812 level of parsing which covers all the compilation errors that occurred.
11813 Some compilation errors, however, will throw an exception immediately.
11814
11815 The I<flags> parameter is reserved for future use, and must always
11816 be zero.
11817
11818 =cut
11819 */
11820
11821 OP *
11822 Perl_parse_fullstmt(pTHX_ U32 flags)
11823 {
11824     if (flags)
11825         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11826     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11827 }
11828
11829 /*
11830 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11831
11832 Parse a sequence of zero or more Perl statements.  These may be normal
11833 imperative statements, including optional labels, or declarations
11834 that have compile-time effect, or any mixture thereof.  The statement
11835 sequence ends when a closing brace or end-of-file is encountered in a
11836 place where a new statement could have validly started.  It is up to
11837 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11838 is correctly set to reflect the source of the code to be parsed and the
11839 lexical context for the statements.
11840
11841 The op tree representing the statement sequence is returned.  This may
11842 be a null pointer if the statements were all null, for example if there
11843 were no statements or if there were only subroutine definitions (which
11844 have compile-time side effects).  If not null, it will be a C<lineseq>
11845 list, normally including C<nextstate> or equivalent ops.
11846
11847 If an error occurs in parsing or compilation, in most cases a valid op
11848 tree is returned anyway.  The error is reflected in the parser state,
11849 normally resulting in a single exception at the top level of parsing
11850 which covers all the compilation errors that occurred.  Some compilation
11851 errors, however, will throw an exception immediately.
11852
11853 The I<flags> parameter is reserved for future use, and must always
11854 be zero.
11855
11856 =cut
11857 */
11858
11859 OP *
11860 Perl_parse_stmtseq(pTHX_ U32 flags)
11861 {
11862     OP *stmtseqop;
11863     I32 c;
11864     if (flags)
11865         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11866     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11867     c = lex_peek_unichar(0);
11868     if (c != -1 && c != /*{*/'}')
11869         qerror(Perl_mess(aTHX_ "Parse error"));
11870     return stmtseqop;
11871 }
11872
11873 /*
11874  * Local variables:
11875  * c-indentation-style: bsd
11876  * c-basic-offset: 4
11877  * indent-tabs-mode: nil
11878  * End:
11879  *
11880  * ex: set ts=8 sts=4 sw=4 et:
11881  */