toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char ident_too_long[] = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN0OP       : zero-argument function, with its op created in this file
 225  * FUN1         : not used, except for not, which isn't a UNIOP
 226  * BOop         : bitwise or or xor
 227  * BAop         : bitwise and
 228  * SHop         : shift operator
 229  * PWop         : power operator
 230  * PMop         : pattern-matching operator
 231  * Aop          : addition-level operator
 232  * Mop          : multiplication-level operator
 233  * Eop          : equality-testing operator
 234  * Rop          : relational operator <= != gt
 235  *
 236  * Also see LOP and lop() below.
 237  */
 238
 239 #ifdef DEBUGGING /* Serve -DT. */
 240 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 241 #else
 242 #   define REPORT(retval) (retval)
 243 #endif
 244
 245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 253 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 254 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 255 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI3(f,x,have_x) { \
 273         pl_yylval.ival = f; \
 274         if (have_x) PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI3(f,XTERM,1)
 284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 285 #define UNIPROTO(f,optional) { \
 286         if (optional) PL_last_uni = PL_oldbufptr; \
 287         OPERATOR(f); \
 288         }
 289
 290 #define UNIBRACK(f) UNI3(f,0,0)
 291
 292 /* grandfather return to old style */
 293 #define OLDLOP(f) \
 294         do { \
 295             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 296                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 297             pl_yylval.ival = (f); \
 298             PL_expect = XTERM; \
 299             PL_bufptr = s; \
 300             return (int)LSTOP; \
 301         } while(0)
 302
 303 #define COPLINE_INC_WITH_HERELINES                  \
 304     STMT_START {                                     \
 305         CopLINE_inc(PL_curcop);                       \
 306         if (PL_parser->lex_shared->herelines)          \
 307             CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
 308             PL_parser->lex_shared->herelines = 0;                    \
 309     } STMT_END
 310
 311
 312 #ifdef DEBUGGING
 313
 314 /* how to interpret the pl_yylval associated with the token */
 315 enum token_type {
 316     TOKENTYPE_NONE,
 317     TOKENTYPE_IVAL,
 318     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 319     TOKENTYPE_PVAL,
 320     TOKENTYPE_OPVAL
 321 };
 322
 323 static struct debug_tokens {
 324     const int token;
 325     enum token_type type;
 326     const char *name;
 327 } const debug_tokens[] =
 328 {
 329     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 330     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 331     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 332     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 333     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 334     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 335     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 336     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 337     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 338     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 339     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 340     { DO,               TOKENTYPE_NONE,         "DO" },
 341     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 342     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 343     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 344     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 345     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 346     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 347     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 348     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 349     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 350     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 351     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 352     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 353     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 354     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 355     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 356     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 357     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 358     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 359     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 360     { IF,               TOKENTYPE_IVAL,         "IF" },
 361     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 362     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 363     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 364     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 365     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 366     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 367     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 368     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 369     { MY,               TOKENTYPE_IVAL,         "MY" },
 370     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 371     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 372     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 373     { OROR,             TOKENTYPE_NONE,         "OROR" },
 374     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 375     { PEG,              TOKENTYPE_NONE,         "PEG" },
 376     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 377     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 378     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 379     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 380     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 381     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 382     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 383     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 384     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 385     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 386     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 387     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 388     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 389     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 390     { SUB,              TOKENTYPE_NONE,         "SUB" },
 391     { THING,            TOKENTYPE_OPVAL,        "THING" },
 392     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 393     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 394     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 395     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 396     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 397     { USE,              TOKENTYPE_IVAL,         "USE" },
 398     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 399     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 400     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 401     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 402     { 0,                TOKENTYPE_NONE,         NULL }
 403 };
 404
 405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 406
 407 STATIC int
 408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 409 {
 410     dVAR;
 411
 412     PERL_ARGS_ASSERT_TOKEREPORT;
 413
 414     if (DEBUG_T_TEST) {
 415         const char *name = NULL;
 416         enum token_type type = TOKENTYPE_NONE;
 417         const struct debug_tokens *p;
 418         SV* const report = newSVpvs("<== ");
 419
 420         for (p = debug_tokens; p->token; p++) {
 421             if (p->token == (int)rv) {
 422                 name = p->name;
 423                 type = p->type;
 424                 break;
 425             }
 426         }
 427         if (name)
 428             Perl_sv_catpv(aTHX_ report, name);
 429         else if ((char)rv > ' ' && (char)rv <= '~')
 430             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 431         else if (!rv)
 432             sv_catpvs(report, "EOF");
 433         else
 434             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 435         switch (type) {
 436         case TOKENTYPE_NONE:
 437             break;
 438         case TOKENTYPE_IVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 440             break;
 441         case TOKENTYPE_OPNUM:
 442             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 443                                     PL_op_name[lvalp->ival]);
 444             break;
 445         case TOKENTYPE_PVAL:
 446             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 447             break;
 448         case TOKENTYPE_OPVAL:
 449             if (lvalp->opval) {
 450                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 451                                     PL_op_name[lvalp->opval->op_type]);
 452                 if (lvalp->opval->op_type == OP_CONST) {
 453                     Perl_sv_catpvf(aTHX_ report, " %s",
 454                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 455                 }
 456
 457             }
 458             else
 459                 sv_catpvs(report, "(opval=null)");
 460             break;
 461         }
 462         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 463     };
 464     return (int)rv;
 465 }
 466
 467
 468 /* print the buffer with suitable escapes */
 469
 470 STATIC void
 471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 472 {
 473     SV* const tmp = newSVpvs("");
 474
 475     PERL_ARGS_ASSERT_PRINTBUF;
 476
 477     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 478     SvREFCNT_dec(tmp);
 479 }
 480
 481 #endif
 482
 483 static int
 484 S_deprecate_commaless_var_list(pTHX) {
 485     PL_expect = XTERM;
 486     deprecate("comma-less variable list");
 487     return REPORT(','); /* grandfather non-comma-format format */
 488 }
 489
 490 /*
 491  * S_ao
 492  *
 493  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 494  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 495  */
 496
 497 STATIC int
 498 S_ao(pTHX_ int toketype)
 499 {
 500     dVAR;
 501     if (*PL_bufptr == '=') {
 502         PL_bufptr++;
 503         if (toketype == ANDAND)
 504             pl_yylval.ival = OP_ANDASSIGN;
 505         else if (toketype == OROR)
 506             pl_yylval.ival = OP_ORASSIGN;
 507         else if (toketype == DORDOR)
 508             pl_yylval.ival = OP_DORASSIGN;
 509         toketype = ASSIGNOP;
 510     }
 511     return toketype;
 512 }
 513
 514 /*
 515  * S_no_op
 516  * When Perl expects an operator and finds something else, no_op
 517  * prints the warning.  It always prints "<something> found where
 518  * operator expected.  It prints "Missing semicolon on previous line?"
 519  * if the surprise occurs at the start of the line.  "do you need to
 520  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 521  * where the compiler doesn't know if foo is a method call or a function.
 522  * It prints "Missing operator before end of line" if there's nothing
 523  * after the missing operator, or "... before <...>" if there is something
 524  * after the missing operator.
 525  */
 526
 527 STATIC void
 528 S_no_op(pTHX_ const char *const what, char *s)
 529 {
 530     dVAR;
 531     char * const oldbp = PL_bufptr;
 532     const bool is_first = (PL_oldbufptr == PL_linestart);
 533
 534     PERL_ARGS_ASSERT_NO_OP;
 535
 536     if (!s)
 537         s = oldbp;
 538     else
 539         PL_bufptr = s;
 540     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 541     if (ckWARN_d(WARN_SYNTAX)) {
 542         if (is_first)
 543             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                     "\t(Missing semicolon on previous line?)\n");
 545         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 546             const char *t;
 547             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
 548                                                             t += UTF ? UTF8SKIP(t) : 1)
 549                 NOOP;
 550             if (t < PL_bufptr && isSPACE(*t))
 551                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 552                         "\t(Do you need to predeclare %"SVf"?)\n",
 553                     SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
 554                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 555         }
 556         else {
 557             assert(s >= oldbp);
 558             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 559                     "\t(Missing operator before %"SVf"?)\n",
 560                     SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
 561                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 562         }
 563     }
 564     PL_bufptr = oldbp;
 565 }
 566
 567 /*
 568  * S_missingterm
 569  * Complain about missing quote/regexp/heredoc terminator.
 570  * If it's called with NULL then it cauterizes the line buffer.
 571  * If we're in a delimited string and the delimiter is a control
 572  * character, it's reformatted into a two-char sequence like ^C.
 573  * This is fatal.
 574  */
 575
 576 STATIC void
 577 S_missingterm(pTHX_ char *s)
 578 {
 579     dVAR;
 580     char tmpbuf[3];
 581     char q;
 582     if (s) {
 583         char * const nl = strrchr(s,'\n');
 584         if (nl)
 585             *nl = '\0';
 586     }
 587     else if (isCNTRL(PL_multi_close)) {
 588         *tmpbuf = '^';
 589         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 590         tmpbuf[2] = '\0';
 591         s = tmpbuf;
 592     }
 593     else {
 594         *tmpbuf = (char)PL_multi_close;
 595         tmpbuf[1] = '\0';
 596         s = tmpbuf;
 597     }
 598     q = strchr(s,'"') ? '\'' : '"';
 599     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 600 }
 601
 602 #include "feature.h"
 603
 604 /*
 605  * Check whether the named feature is enabled.
 606  */
 607 bool
 608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 609 {
 610     dVAR;
 611     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 612
 613     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 614
 615     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 616
 617     if (namelen > MAX_FEATURE_LEN)
 618         return FALSE;
 619     memcpy(&he_name[8], name, namelen);
 620
 621     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 622                                      REFCOUNTED_HE_EXISTS));
 623 }
 624
 625 /*
 626  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 627  * utf16-to-utf8-reversed.
 628  */
 629
 630 #ifdef PERL_CR_FILTER
 631 static void
 632 strip_return(SV *sv)
 633 {
 634     const char *s = SvPVX_const(sv);
 635     const char * const e = s + SvCUR(sv);
 636
 637     PERL_ARGS_ASSERT_STRIP_RETURN;
 638
 639     /* outer loop optimized to do nothing if there are no CR-LFs */
 640     while (s < e) {
 641         if (*s++ == '\r' && *s == '\n') {
 642             /* hit a CR-LF, need to copy the rest */
 643             char *d = s - 1;
 644             *d++ = *s++;
 645             while (s < e) {
 646                 if (*s == '\r' && s[1] == '\n')
 647                     s++;
 648                 *d++ = *s++;
 649             }
 650             SvCUR(sv) -= s - d;
 651             return;
 652         }
 653     }
 654 }
 655
 656 STATIC I32
 657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 658 {
 659     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 660     if (count > 0 && !maxlen)
 661         strip_return(sv);
 662     return count;
 663 }
 664 #endif
 665
 666 /*
 667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 668
 669 Creates and initialises a new lexer/parser state object, supplying
 670 a context in which to lex and parse from a new source of Perl code.
 671 A pointer to the new state object is placed in L</PL_parser>.  An entry
 672 is made on the save stack so that upon unwinding the new state object
 673 will be destroyed and the former value of L</PL_parser> will be restored.
 674 Nothing else need be done to clean up the parsing context.
 675
 676 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 677 non-null, provides a string (in SV form) containing code to be parsed.
 678 A copy of the string is made, so subsequent modification of I<line>
 679 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 680 from which code will be read to be parsed.  If both are non-null, the
 681 code in I<line> comes first and must consist of complete lines of input,
 682 and I<rsfp> supplies the remainder of the source.
 683
 684 The I<flags> parameter is reserved for future use.  Currently it is only
 685 used by perl internally, so extensions should always pass zero.
 686
 687 =cut
 688 */
 689
 690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 691    can share filters with the current parser.
 692    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 693    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 694    destruction. This is used to handle the case of defaulting to reading the
 695    script from the standard input because no filename was given on the command
 696    line (without getting confused by situation where STDIN has been closed, so
 697    the script handle is opened on fd 0)  */
 698
 699 void
 700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 701 {
 702     dVAR;
 703     const char *s = NULL;
 704     yy_parser *parser, *oparser;
 705     if (flags && flags & ~LEX_START_FLAGS)
 706         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 707
 708     /* create and initialise a parser */
 709
 710     Newxz(parser, 1, yy_parser);
 711     parser->old_parser = oparser = PL_parser;
 712     PL_parser = parser;
 713
 714     parser->stack = NULL;
 715     parser->ps = NULL;
 716     parser->stack_size = 0;
 717
 718     /* on scope exit, free this parser and restore any outer one */
 719     SAVEPARSER(parser);
 720     parser->saved_curcop = PL_curcop;
 721
 722     /* initialise lexer state */
 723
 724 #ifdef PERL_MAD
 725     parser->curforce = -1;
 726 #else
 727     parser->nexttoke = 0;
 728 #endif
 729     parser->error_count = oparser ? oparser->error_count : 0;
 730     parser->copline = NOLINE;
 731     parser->lex_state = LEX_NORMAL;
 732     parser->expect = XSTATE;
 733     parser->rsfp = rsfp;
 734     parser->rsfp_filters =
 735       !(flags & LEX_START_SAME_FILTER) || !oparser
 736         ? NULL
 737         : MUTABLE_AV(SvREFCNT_inc(
 738             oparser->rsfp_filters
 739              ? oparser->rsfp_filters
 740              : (oparser->rsfp_filters = newAV())
 741           ));
 742
 743     Newx(parser->lex_brackstack, 120, char);
 744     Newx(parser->lex_casestack, 12, char);
 745     *parser->lex_casestack = '\0';
 746     Newxz(parser->lex_shared, 1, LEXSHARED);
 747
 748     if (line) {
 749         STRLEN len;
 750         s = SvPV_const(line, len);
 751         parser->linestr = flags & LEX_START_COPIED
 752                             ? SvREFCNT_inc_simple_NN(line)
 753                             : newSVpvn_flags(s, len, SvUTF8(line));
 754         sv_catpvs(parser->linestr, "\n;");
 755     } else {
 756         parser->linestr = newSVpvs("\n;");
 757     }
 758     parser->oldoldbufptr =
 759         parser->oldbufptr =
 760         parser->bufptr =
 761         parser->linestart = SvPVX(parser->linestr);
 762     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 763     parser->last_lop = parser->last_uni = NULL;
 764     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 765                                  |LEX_DONT_CLOSE_RSFP);
 766
 767     parser->in_pod = parser->filtered = 0;
 768 }
 769
 770
 771 /* delete a parser object */
 772
 773 void
 774 Perl_parser_free(pTHX_  const yy_parser *parser)
 775 {
 776     PERL_ARGS_ASSERT_PARSER_FREE;
 777
 778     PL_curcop = parser->saved_curcop;
 779     SvREFCNT_dec(parser->linestr);
 780
 781     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 782         PerlIO_clearerr(parser->rsfp);
 783     else if (parser->rsfp && (!parser->old_parser ||
 784                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 785         PerlIO_close(parser->rsfp);
 786     SvREFCNT_dec(parser->rsfp_filters);
 787
 788     Safefree(parser->lex_brackstack);
 789     Safefree(parser->lex_casestack);
 790     Safefree(parser->lex_shared);
 791     PL_parser = parser->old_parser;
 792     Safefree(parser);
 793 }
 794
 795
 796 /*
 797 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 798
 799 Buffer scalar containing the chunk currently under consideration of the
 800 text currently being lexed.  This is always a plain string scalar (for
 801 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 802 normal scalar means; instead refer to the buffer directly by the pointer
 803 variables described below.
 804
 805 The lexer maintains various C<char*> pointers to things in the
 806 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 807 reallocated, all of these pointers must be updated.  Don't attempt to
 808 do this manually, but rather use L</lex_grow_linestr> if you need to
 809 reallocate the buffer.
 810
 811 The content of the text chunk in the buffer is commonly exactly one
 812 complete line of input, up to and including a newline terminator,
 813 but there are situations where it is otherwise.  The octets of the
 814 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 815 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 816 flag on this scalar, which may disagree with it.
 817
 818 For direct examination of the buffer, the variable
 819 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 820 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 821 of these pointers is usually preferable to examination of the scalar
 822 through normal scalar means.
 823
 824 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 825
 826 Direct pointer to the end of the chunk of text currently being lexed, the
 827 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 828 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 829 always located at the end of the buffer, and does not count as part of
 830 the buffer's contents.
 831
 832 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 833
 834 Points to the current position of lexing inside the lexer buffer.
 835 Characters around this point may be freely examined, within
 836 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 837 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 838 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 839
 840 Lexing code (whether in the Perl core or not) moves this pointer past
 841 the characters that it consumes.  It is also expected to perform some
 842 bookkeeping whenever a newline character is consumed.  This movement
 843 can be more conveniently performed by the function L</lex_read_to>,
 844 which handles newlines appropriately.
 845
 846 Interpretation of the buffer's octets can be abstracted out by
 847 using the slightly higher-level functions L</lex_peek_unichar> and
 848 L</lex_read_unichar>.
 849
 850 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 851
 852 Points to the start of the current line inside the lexer buffer.
 853 This is useful for indicating at which column an error occurred, and
 854 not much else.  This must be updated by any lexing code that consumes
 855 a newline; the function L</lex_read_to> handles this detail.
 856
 857 =cut
 858 */
 859
 860 /*
 861 =for apidoc Amx|bool|lex_bufutf8
 862
 863 Indicates whether the octets in the lexer buffer
 864 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 865 of Unicode characters.  If not, they should be interpreted as Latin-1
 866 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 867
 868 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 869 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 870 encoding.
 871
 872 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 873 is significant, but not the whole story regarding the input character
 874 encoding.  Normally, when a file is being read, the scalar contains octets
 875 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 876 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 877 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 878 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 879 is in effect.  This logic may change in the future; use this function
 880 instead of implementing the logic yourself.
 881
 882 =cut
 883 */
 884
 885 bool
 886 Perl_lex_bufutf8(pTHX)
 887 {
 888     return UTF;
 889 }
 890
 891 /*
 892 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 893
 894 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 895 at least I<len> octets (including terminating NUL).  Returns a
 896 pointer to the reallocated buffer.  This is necessary before making
 897 any direct modification of the buffer that would increase its length.
 898 L</lex_stuff_pvn> provides a more convenient way to insert text into
 899 the buffer.
 900
 901 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 902 this function updates all of the lexer's variables that point directly
 903 into the buffer.
 904
 905 =cut
 906 */
 907
 908 char *
 909 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 910 {
 911     SV *linestr;
 912     char *buf;
 913     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 914     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 915     linestr = PL_parser->linestr;
 916     buf = SvPVX(linestr);
 917     if (len <= SvLEN(linestr))
 918         return buf;
 919     bufend_pos = PL_parser->bufend - buf;
 920     bufptr_pos = PL_parser->bufptr - buf;
 921     oldbufptr_pos = PL_parser->oldbufptr - buf;
 922     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 923     linestart_pos = PL_parser->linestart - buf;
 924     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 925     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 926     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 927                             PL_parser->lex_shared->re_eval_start - buf : 0;
 928
 929     buf = sv_grow(linestr, len);
 930
 931     PL_parser->bufend = buf + bufend_pos;
 932     PL_parser->bufptr = buf + bufptr_pos;
 933     PL_parser->oldbufptr = buf + oldbufptr_pos;
 934     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 935     PL_parser->linestart = buf + linestart_pos;
 936     if (PL_parser->last_uni)
 937         PL_parser->last_uni = buf + last_uni_pos;
 938     if (PL_parser->last_lop)
 939         PL_parser->last_lop = buf + last_lop_pos;
 940     if (PL_parser->lex_shared->re_eval_start)
 941         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 942     return buf;
 943 }
 944
 945 /*
 946 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 947
 948 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 949 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 950 reallocating the buffer if necessary.  This means that lexing code that
 951 runs later will see the characters as if they had appeared in the input.
 952 It is not recommended to do this as part of normal parsing, and most
 953 uses of this facility run the risk of the inserted characters being
 954 interpreted in an unintended manner.
 955
 956 The string to be inserted is represented by I<len> octets starting
 957 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 958 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 959 The characters are recoded for the lexer buffer, according to how the
 960 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 961 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 962 function is more convenient.
 963
 964 =cut
 965 */
 966
 967 void
 968 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 969 {
 970     dVAR;
 971     char *bufptr;
 972     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 973     if (flags & ~(LEX_STUFF_UTF8))
 974         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 975     if (UTF) {
 976         if (flags & LEX_STUFF_UTF8) {
 977             goto plain_copy;
 978         } else {
 979             STRLEN highhalf = 0;
 980             const char *p, *e = pv+len;
 981             for (p = pv; p != e; p++)
 982                 highhalf += !!(((U8)*p) & 0x80);
 983             if (!highhalf)
 984                 goto plain_copy;
 985             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 986             bufptr = PL_parser->bufptr;
 987             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 988             SvCUR_set(PL_parser->linestr,
 989                 SvCUR(PL_parser->linestr) + len+highhalf);
 990             PL_parser->bufend += len+highhalf;
 991             for (p = pv; p != e; p++) {
 992                 U8 c = (U8)*p;
 993                 if (c & 0x80) {
 994                     *bufptr++ = (char)(0xc0 | (c >> 6));
 995                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 996                 } else {
 997                     *bufptr++ = (char)c;
 998                 }
 999             }
1000         }
1001     } else {
1002         if (flags & LEX_STUFF_UTF8) {
1003             STRLEN highhalf = 0;
1004             const char *p, *e = pv+len;
1005             for (p = pv; p != e; p++) {
1006                 U8 c = (U8)*p;
1007                 if (c >= 0xc4) {
1008                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1009                                 "non-Latin-1 character into Latin-1 input");
1010                 } else if (c >= 0xc2 && p+1 != e &&
1011                             (((U8)p[1]) & 0xc0) == 0x80) {
1012                     p++;
1013                     highhalf++;
1014                 } else if (c >= 0x80) {
1015                     /* malformed UTF-8 */
1016                     ENTER;
1017                     SAVESPTR(PL_warnhook);
1018                     PL_warnhook = PERL_WARNHOOK_FATAL;
1019                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1020                     LEAVE;
1021                 }
1022             }
1023             if (!highhalf)
1024                 goto plain_copy;
1025             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1026             bufptr = PL_parser->bufptr;
1027             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1028             SvCUR_set(PL_parser->linestr,
1029                 SvCUR(PL_parser->linestr) + len-highhalf);
1030             PL_parser->bufend += len-highhalf;
1031             for (p = pv; p != e; p++) {
1032                 U8 c = (U8)*p;
1033                 if (c & 0x80) {
1034                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1035                     p++;
1036                 } else {
1037                     *bufptr++ = (char)c;
1038                 }
1039             }
1040         } else {
1041             plain_copy:
1042             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043             bufptr = PL_parser->bufptr;
1044             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046             PL_parser->bufend += len;
1047             Copy(pv, bufptr, len, char);
1048         }
1049     }
1050 }
1051
1052 /*
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1054
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary.  This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1062
1063 The string to be inserted is represented by octets starting at I<pv>
1064 and continuing to the first nul.  These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in I<flags>.  The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1076 {
1077     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078     lex_stuff_pvn(pv, strlen(pv), flags);
1079 }
1080
1081 /*
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1083
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary.  This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1091
1092 The string to be inserted is the string value of I<sv>.  The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1097
1098 =cut
1099 */
1100
1101 void
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103 {
1104     char *pv;
1105     STRLEN len;
1106     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1107     if (flags)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1109     pv = SvPV(sv, len);
1110     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111 }
1112
1113 /*
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1115
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1120
1121 This is not the normal way to consume lexed text.  For that, use
1122 L</lex_read_to>.
1123
1124 =cut
1125 */
1126
1127 void
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1129 {
1130     char *buf, *bufend;
1131     STRLEN unstuff_len;
1132     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133     buf = PL_parser->bufptr;
1134     if (ptr < buf)
1135         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1136     if (ptr == buf)
1137         return;
1138     bufend = PL_parser->bufend;
1139     if (ptr > bufend)
1140         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141     unstuff_len = ptr - buf;
1142     Move(ptr, buf, bufend+1-ptr, char);
1143     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144     PL_parser->bufend = bufend - unstuff_len;
1145 }
1146
1147 /*
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1149
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1154
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_read_to(pTHX_ char *ptr)
1164 {
1165     char *s;
1166     PERL_ARGS_ASSERT_LEX_READ_TO;
1167     s = PL_parser->bufptr;
1168     if (ptr < s || ptr > PL_parser->bufend)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170     for (; s != ptr; s++)
1171         if (*s == '\n') {
1172             COPLINE_INC_WITH_HERELINES;
1173             PL_parser->linestart = s+1;
1174         }
1175     PL_parser->bufptr = ptr;
1176 }
1177
1178 /*
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1180
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to I<ptr>.  The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately.  I<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1186
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it.  However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1194
1195 =cut
1196 */
1197
1198 void
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1200 {
1201     char *buf;
1202     STRLEN discard_len;
1203     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204     buf = SvPVX(PL_parser->linestr);
1205     if (ptr < buf)
1206         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1207     if (ptr == buf)
1208         return;
1209     if (ptr > PL_parser->bufptr)
1210         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211     discard_len = ptr - buf;
1212     if (PL_parser->oldbufptr < ptr)
1213         PL_parser->oldbufptr = ptr;
1214     if (PL_parser->oldoldbufptr < ptr)
1215         PL_parser->oldoldbufptr = ptr;
1216     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217         PL_parser->last_uni = NULL;
1218     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219         PL_parser->last_lop = NULL;
1220     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222     PL_parser->bufend -= discard_len;
1223     PL_parser->bufptr -= discard_len;
1224     PL_parser->oldbufptr -= discard_len;
1225     PL_parser->oldoldbufptr -= discard_len;
1226     if (PL_parser->last_uni)
1227         PL_parser->last_uni -= discard_len;
1228     if (PL_parser->last_lop)
1229         PL_parser->last_lop -= discard_len;
1230 }
1231
1232 /*
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1234
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more.  It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1240
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1245 will not be discarded.  If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1247
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1250
1251 =cut
1252 */
1253
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM  0x40000000
1256
1257 bool
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1259 {
1260     SV *linestr;
1261     char *buf;
1262     STRLEN old_bufend_pos, new_bufend_pos;
1263     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265     bool got_some_for_debugger = 0;
1266     bool got_some;
1267     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269     linestr = PL_parser->linestr;
1270     buf = SvPVX(linestr);
1271     if (!(flags & LEX_KEEP_PREVIOUS) &&
1272             PL_parser->bufptr == PL_parser->bufend) {
1273         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1274         linestart_pos = 0;
1275         if (PL_parser->last_uni != PL_parser->bufend)
1276             PL_parser->last_uni = NULL;
1277         if (PL_parser->last_lop != PL_parser->bufend)
1278             PL_parser->last_lop = NULL;
1279         last_uni_pos = last_lop_pos = 0;
1280         *buf = 0;
1281         SvCUR(linestr) = 0;
1282     } else {
1283         old_bufend_pos = PL_parser->bufend - buf;
1284         bufptr_pos = PL_parser->bufptr - buf;
1285         oldbufptr_pos = PL_parser->oldbufptr - buf;
1286         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1287         linestart_pos = PL_parser->linestart - buf;
1288         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1289         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1290     }
1291     if (flags & LEX_FAKE_EOF) {
1292         goto eof;
1293     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1294         got_some = 0;
1295     } else if (filter_gets(linestr, old_bufend_pos)) {
1296         got_some = 1;
1297         got_some_for_debugger = 1;
1298     } else if (flags & LEX_NO_TERM) {
1299         got_some = 0;
1300     } else {
1301         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1302             sv_setpvs(linestr, "");
1303         eof:
1304         /* End of real input.  Close filehandle (unless it was STDIN),
1305          * then add implicit termination.
1306          */
1307         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1308             PerlIO_clearerr(PL_parser->rsfp);
1309         else if (PL_parser->rsfp)
1310             (void)PerlIO_close(PL_parser->rsfp);
1311         PL_parser->rsfp = NULL;
1312         PL_parser->in_pod = PL_parser->filtered = 0;
1313 #ifdef PERL_MAD
1314         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1315             PL_faketokens = 1;
1316 #endif
1317         if (!PL_in_eval && PL_minus_p) {
1318             sv_catpvs(linestr,
1319                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1320             PL_minus_n = PL_minus_p = 0;
1321         } else if (!PL_in_eval && PL_minus_n) {
1322             sv_catpvs(linestr, /*{*/";}");
1323             PL_minus_n = 0;
1324         } else
1325             sv_catpvs(linestr, ";");
1326         got_some = 1;
1327     }
1328     buf = SvPVX(linestr);
1329     new_bufend_pos = SvCUR(linestr);
1330     PL_parser->bufend = buf + new_bufend_pos;
1331     PL_parser->bufptr = buf + bufptr_pos;
1332     PL_parser->oldbufptr = buf + oldbufptr_pos;
1333     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1334     PL_parser->linestart = buf + linestart_pos;
1335     if (PL_parser->last_uni)
1336         PL_parser->last_uni = buf + last_uni_pos;
1337     if (PL_parser->last_lop)
1338         PL_parser->last_lop = buf + last_lop_pos;
1339     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1340             PL_curstash != PL_debstash) {
1341         /* debugger active and we're not compiling the debugger code,
1342          * so store the line into the debugger's array of lines
1343          */
1344         update_debugger_info(NULL, buf+old_bufend_pos,
1345             new_bufend_pos-old_bufend_pos);
1346     }
1347     return got_some;
1348 }
1349
1350 /*
1351 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1352
1353 Looks ahead one (Unicode) character in the text currently being lexed.
1354 Returns the codepoint (unsigned integer value) of the next character,
1355 or -1 if lexing has reached the end of the input text.  To consume the
1356 peeked character, use L</lex_read_unichar>.
1357
1358 If the next character is in (or extends into) the next chunk of input
1359 text, the next chunk will be read in.  Normally the current chunk will be
1360 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1361 then the current chunk will not be discarded.
1362
1363 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1364 is encountered, an exception is generated.
1365
1366 =cut
1367 */
1368
1369 I32
1370 Perl_lex_peek_unichar(pTHX_ U32 flags)
1371 {
1372     dVAR;
1373     char *s, *bufend;
1374     if (flags & ~(LEX_KEEP_PREVIOUS))
1375         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1376     s = PL_parser->bufptr;
1377     bufend = PL_parser->bufend;
1378     if (UTF) {
1379         U8 head;
1380         I32 unichar;
1381         STRLEN len, retlen;
1382         if (s == bufend) {
1383             if (!lex_next_chunk(flags))
1384                 return -1;
1385             s = PL_parser->bufptr;
1386             bufend = PL_parser->bufend;
1387         }
1388         head = (U8)*s;
1389         if (!(head & 0x80))
1390             return head;
1391         if (head & 0x40) {
1392             len = PL_utf8skip[head];
1393             while ((STRLEN)(bufend-s) < len) {
1394                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1395                     break;
1396                 s = PL_parser->bufptr;
1397                 bufend = PL_parser->bufend;
1398             }
1399         }
1400         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1401         if (retlen == (STRLEN)-1) {
1402             /* malformed UTF-8 */
1403             ENTER;
1404             SAVESPTR(PL_warnhook);
1405             PL_warnhook = PERL_WARNHOOK_FATAL;
1406             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1407             LEAVE;
1408         }
1409         return unichar;
1410     } else {
1411         if (s == bufend) {
1412             if (!lex_next_chunk(flags))
1413                 return -1;
1414             s = PL_parser->bufptr;
1415         }
1416         return (U8)*s;
1417     }
1418 }
1419
1420 /*
1421 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1422
1423 Reads the next (Unicode) character in the text currently being lexed.
1424 Returns the codepoint (unsigned integer value) of the character read,
1425 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1426 if lexing has reached the end of the input text.  To non-destructively
1427 examine the next character, use L</lex_peek_unichar> instead.
1428
1429 If the next character is in (or extends into) the next chunk of input
1430 text, the next chunk will be read in.  Normally the current chunk will be
1431 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1432 then the current chunk will not be discarded.
1433
1434 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1435 is encountered, an exception is generated.
1436
1437 =cut
1438 */
1439
1440 I32
1441 Perl_lex_read_unichar(pTHX_ U32 flags)
1442 {
1443     I32 c;
1444     if (flags & ~(LEX_KEEP_PREVIOUS))
1445         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1446     c = lex_peek_unichar(flags);
1447     if (c != -1) {
1448         if (c == '\n')
1449             COPLINE_INC_WITH_HERELINES;
1450         if (UTF)
1451             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1452         else
1453             ++(PL_parser->bufptr);
1454     }
1455     return c;
1456 }
1457
1458 /*
1459 =for apidoc Amx|void|lex_read_space|U32 flags
1460
1461 Reads optional spaces, in Perl style, in the text currently being
1462 lexed.  The spaces may include ordinary whitespace characters and
1463 Perl-style comments.  C<#line> directives are processed if encountered.
1464 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1465 at a non-space character (or the end of the input text).
1466
1467 If spaces extend into the next chunk of input text, the next chunk will
1468 be read in.  Normally the current chunk will be discarded at the same
1469 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1470 chunk will not be discarded.
1471
1472 =cut
1473 */
1474
1475 #define LEX_NO_NEXT_CHUNK 0x80000000
1476
1477 void
1478 Perl_lex_read_space(pTHX_ U32 flags)
1479 {
1480     char *s, *bufend;
1481     bool need_incline = 0;
1482     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1483         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1484 #ifdef PERL_MAD
1485     if (PL_skipwhite) {
1486         sv_free(PL_skipwhite);
1487         PL_skipwhite = NULL;
1488     }
1489     if (PL_madskills)
1490         PL_skipwhite = newSVpvs("");
1491 #endif /* PERL_MAD */
1492     s = PL_parser->bufptr;
1493     bufend = PL_parser->bufend;
1494     while (1) {
1495         char c = *s;
1496         if (c == '#') {
1497             do {
1498                 c = *++s;
1499             } while (!(c == '\n' || (c == 0 && s == bufend)));
1500         } else if (c == '\n') {
1501             s++;
1502             PL_parser->linestart = s;
1503             if (s == bufend)
1504                 need_incline = 1;
1505             else
1506                 incline(s);
1507         } else if (isSPACE(c)) {
1508             s++;
1509         } else if (c == 0 && s == bufend) {
1510             bool got_more;
1511 #ifdef PERL_MAD
1512             if (PL_madskills)
1513                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1514 #endif /* PERL_MAD */
1515             if (flags & LEX_NO_NEXT_CHUNK)
1516                 break;
1517             PL_parser->bufptr = s;
1518             COPLINE_INC_WITH_HERELINES;
1519             got_more = lex_next_chunk(flags);
1520             CopLINE_dec(PL_curcop);
1521             s = PL_parser->bufptr;
1522             bufend = PL_parser->bufend;
1523             if (!got_more)
1524                 break;
1525             if (need_incline && PL_parser->rsfp) {
1526                 incline(s);
1527                 need_incline = 0;
1528             }
1529         } else {
1530             break;
1531         }
1532     }
1533 #ifdef PERL_MAD
1534     if (PL_madskills)
1535         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1536 #endif /* PERL_MAD */
1537     PL_parser->bufptr = s;
1538 }
1539
1540 /*
1541  * S_incline
1542  * This subroutine has nothing to do with tilting, whether at windmills
1543  * or pinball tables.  Its name is short for "increment line".  It
1544  * increments the current line number in CopLINE(PL_curcop) and checks
1545  * to see whether the line starts with a comment of the form
1546  *    # line 500 "foo.pm"
1547  * If so, it sets the current line number and file to the values in the comment.
1548  */
1549
1550 STATIC void
1551 S_incline(pTHX_ const char *s)
1552 {
1553     dVAR;
1554     const char *t;
1555     const char *n;
1556     const char *e;
1557     line_t line_num;
1558
1559     PERL_ARGS_ASSERT_INCLINE;
1560
1561     COPLINE_INC_WITH_HERELINES;
1562     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1563      && s+1 == PL_bufend && *s == ';') {
1564         /* fake newline in string eval */
1565         CopLINE_dec(PL_curcop);
1566         return;
1567     }
1568     if (*s++ != '#')
1569         return;
1570     while (SPACE_OR_TAB(*s))
1571         s++;
1572     if (strnEQ(s, "line", 4))
1573         s += 4;
1574     else
1575         return;
1576     if (SPACE_OR_TAB(*s))
1577         s++;
1578     else
1579         return;
1580     while (SPACE_OR_TAB(*s))
1581         s++;
1582     if (!isDIGIT(*s))
1583         return;
1584
1585     n = s;
1586     while (isDIGIT(*s))
1587         s++;
1588     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1589         return;
1590     while (SPACE_OR_TAB(*s))
1591         s++;
1592     if (*s == '"' && (t = strchr(s+1, '"'))) {
1593         s++;
1594         e = t + 1;
1595     }
1596     else {
1597         t = s;
1598         while (!isSPACE(*t))
1599             t++;
1600         e = t;
1601     }
1602     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1603         e++;
1604     if (*e != '\n' && *e != '\0')
1605         return;         /* false alarm */
1606
1607     line_num = atoi(n)-1;
1608
1609     if (t - s > 0) {
1610         const STRLEN len = t - s;
1611         SV *const temp_sv = CopFILESV(PL_curcop);
1612         const char *cf;
1613         STRLEN tmplen;
1614
1615         if (temp_sv) {
1616             cf = SvPVX(temp_sv);
1617             tmplen = SvCUR(temp_sv);
1618         } else {
1619             cf = NULL;
1620             tmplen = 0;
1621         }
1622
1623         if (!PL_rsfp && !PL_parser->filtered) {
1624             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1625              * to *{"::_<newfilename"} */
1626             /* However, the long form of evals is only turned on by the
1627                debugger - usually they're "(eval %lu)" */
1628             char smallbuf[128];
1629             char *tmpbuf;
1630             GV **gvp;
1631             STRLEN tmplen2 = len;
1632             if (tmplen + 2 <= sizeof smallbuf)
1633                 tmpbuf = smallbuf;
1634             else
1635                 Newx(tmpbuf, tmplen + 2, char);
1636             tmpbuf[0] = '_';
1637             tmpbuf[1] = '<';
1638             memcpy(tmpbuf + 2, cf, tmplen);
1639             tmplen += 2;
1640             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1641             if (gvp) {
1642                 char *tmpbuf2;
1643                 GV *gv2;
1644
1645                 if (tmplen2 + 2 <= sizeof smallbuf)
1646                     tmpbuf2 = smallbuf;
1647                 else
1648                     Newx(tmpbuf2, tmplen2 + 2, char);
1649
1650                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1651                     /* Either they malloc'd it, or we malloc'd it,
1652                        so no prefix is present in ours.  */
1653                     tmpbuf2[0] = '_';
1654                     tmpbuf2[1] = '<';
1655                 }
1656
1657                 memcpy(tmpbuf2 + 2, s, tmplen2);
1658                 tmplen2 += 2;
1659
1660                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1661                 if (!isGV(gv2)) {
1662                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1663                     /* adjust ${"::_<newfilename"} to store the new file name */
1664                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1665                     /* The line number may differ. If that is the case,
1666                        alias the saved lines that are in the array.
1667                        Otherwise alias the whole array. */
1668                     if (CopLINE(PL_curcop) == line_num) {
1669                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1670                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1671                     }
1672                     else if (GvAV(*gvp)) {
1673                         AV * const av = GvAV(*gvp);
1674                         const I32 start = CopLINE(PL_curcop)+1;
1675                         I32 items = AvFILLp(av) - start;
1676                         if (items > 0) {
1677                             AV * const av2 = GvAVn(gv2);
1678                             SV **svp = AvARRAY(av) + start;
1679                             I32 l = (I32)line_num+1;
1680                             while (items--)
1681                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1682                         }
1683                     }
1684                 }
1685
1686                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1687             }
1688             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1689         }
1690         CopFILE_free(PL_curcop);
1691         CopFILE_setn(PL_curcop, s, len);
1692     }
1693     CopLINE_set(PL_curcop, line_num);
1694 }
1695
1696 #ifdef PERL_MAD
1697 /* skip space before PL_thistoken */
1698
1699 STATIC char *
1700 S_skipspace0(pTHX_ register char *s)
1701 {
1702     PERL_ARGS_ASSERT_SKIPSPACE0;
1703
1704     s = skipspace(s);
1705     if (!PL_madskills)
1706         return s;
1707     if (PL_skipwhite) {
1708         if (!PL_thiswhite)
1709             PL_thiswhite = newSVpvs("");
1710         sv_catsv(PL_thiswhite, PL_skipwhite);
1711         sv_free(PL_skipwhite);
1712         PL_skipwhite = 0;
1713     }
1714     PL_realtokenstart = s - SvPVX(PL_linestr);
1715     return s;
1716 }
1717
1718 /* skip space after PL_thistoken */
1719
1720 STATIC char *
1721 S_skipspace1(pTHX_ register char *s)
1722 {
1723     const char *start = s;
1724     I32 startoff = start - SvPVX(PL_linestr);
1725
1726     PERL_ARGS_ASSERT_SKIPSPACE1;
1727
1728     s = skipspace(s);
1729     if (!PL_madskills)
1730         return s;
1731     start = SvPVX(PL_linestr) + startoff;
1732     if (!PL_thistoken && PL_realtokenstart >= 0) {
1733         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1734         PL_thistoken = newSVpvn(tstart, start - tstart);
1735     }
1736     PL_realtokenstart = -1;
1737     if (PL_skipwhite) {
1738         if (!PL_nextwhite)
1739             PL_nextwhite = newSVpvs("");
1740         sv_catsv(PL_nextwhite, PL_skipwhite);
1741         sv_free(PL_skipwhite);
1742         PL_skipwhite = 0;
1743     }
1744     return s;
1745 }
1746
1747 STATIC char *
1748 S_skipspace2(pTHX_ register char *s, SV **svp)
1749 {
1750     char *start;
1751     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1752     const I32 startoff = s - SvPVX(PL_linestr);
1753
1754     PERL_ARGS_ASSERT_SKIPSPACE2;
1755
1756     s = skipspace(s);
1757     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1758     if (!PL_madskills || !svp)
1759         return s;
1760     start = SvPVX(PL_linestr) + startoff;
1761     if (!PL_thistoken && PL_realtokenstart >= 0) {
1762         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1763         PL_thistoken = newSVpvn(tstart, start - tstart);
1764         PL_realtokenstart = -1;
1765     }
1766     if (PL_skipwhite) {
1767         if (!*svp)
1768             *svp = newSVpvs("");
1769         sv_setsv(*svp, PL_skipwhite);
1770         sv_free(PL_skipwhite);
1771         PL_skipwhite = 0;
1772     }
1773
1774     return s;
1775 }
1776 #endif
1777
1778 STATIC void
1779 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1780 {
1781     AV *av = CopFILEAVx(PL_curcop);
1782     if (av) {
1783         SV * const sv = newSV_type(SVt_PVMG);
1784         if (orig_sv)
1785             sv_setsv(sv, orig_sv);
1786         else
1787             sv_setpvn(sv, buf, len);
1788         (void)SvIOK_on(sv);
1789         SvIV_set(sv, 0);
1790         av_store(av, (I32)CopLINE(PL_curcop), sv);
1791     }
1792 }
1793
1794 /*
1795  * S_skipspace
1796  * Called to gobble the appropriate amount and type of whitespace.
1797  * Skips comments as well.
1798  */
1799
1800 STATIC char *
1801 S_skipspace(pTHX_ register char *s)
1802 {
1803 #ifdef PERL_MAD
1804     char *start = s;
1805 #endif /* PERL_MAD */
1806     PERL_ARGS_ASSERT_SKIPSPACE;
1807 #ifdef PERL_MAD
1808     if (PL_skipwhite) {
1809         sv_free(PL_skipwhite);
1810         PL_skipwhite = NULL;
1811     }
1812 #endif /* PERL_MAD */
1813     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1814         while (s < PL_bufend && SPACE_OR_TAB(*s))
1815             s++;
1816     } else {
1817         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1818         PL_bufptr = s;
1819         lex_read_space(LEX_KEEP_PREVIOUS |
1820                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1821                     LEX_NO_NEXT_CHUNK : 0));
1822         s = PL_bufptr;
1823         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1824         if (PL_linestart > PL_bufptr)
1825             PL_bufptr = PL_linestart;
1826         return s;
1827     }
1828 #ifdef PERL_MAD
1829     if (PL_madskills)
1830         PL_skipwhite = newSVpvn(start, s-start);
1831 #endif /* PERL_MAD */
1832     return s;
1833 }
1834
1835 /*
1836  * S_check_uni
1837  * Check the unary operators to ensure there's no ambiguity in how they're
1838  * used.  An ambiguous piece of code would be:
1839  *     rand + 5
1840  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1841  * the +5 is its argument.
1842  */
1843
1844 STATIC void
1845 S_check_uni(pTHX)
1846 {
1847     dVAR;
1848     const char *s;
1849     const char *t;
1850
1851     if (PL_oldoldbufptr != PL_last_uni)
1852         return;
1853     while (isSPACE(*PL_last_uni))
1854         PL_last_uni++;
1855     s = PL_last_uni;
1856     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1857         s++;
1858     if ((t = strchr(s, '(')) && t < PL_bufptr)
1859         return;
1860
1861     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1862                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1863                      (int)(s - PL_last_uni), PL_last_uni);
1864 }
1865
1866 /*
1867  * LOP : macro to build a list operator.  Its behaviour has been replaced
1868  * with a subroutine, S_lop() for which LOP is just another name.
1869  */
1870
1871 #define LOP(f,x) return lop(f,x,s)
1872
1873 /*
1874  * S_lop
1875  * Build a list operator (or something that might be one).  The rules:
1876  *  - if we have a next token, then it's a list operator [why?]
1877  *  - if the next thing is an opening paren, then it's a function
1878  *  - else it's a list operator
1879  */
1880
1881 STATIC I32
1882 S_lop(pTHX_ I32 f, int x, char *s)
1883 {
1884     dVAR;
1885
1886     PERL_ARGS_ASSERT_LOP;
1887
1888     pl_yylval.ival = f;
1889     CLINE;
1890     PL_expect = x;
1891     PL_bufptr = s;
1892     PL_last_lop = PL_oldbufptr;
1893     PL_last_lop_op = (OPCODE)f;
1894 #ifdef PERL_MAD
1895     if (PL_lasttoke)
1896         goto lstop;
1897 #else
1898     if (PL_nexttoke)
1899         goto lstop;
1900 #endif
1901     if (*s == '(')
1902         return REPORT(FUNC);
1903     s = PEEKSPACE(s);
1904     if (*s == '(')
1905         return REPORT(FUNC);
1906     else {
1907         lstop:
1908         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1909             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1910         return REPORT(LSTOP);
1911     }
1912 }
1913
1914 #ifdef PERL_MAD
1915  /*
1916  * S_start_force
1917  * Sets up for an eventual force_next().  start_force(0) basically does
1918  * an unshift, while start_force(-1) does a push.  yylex removes items
1919  * on the "pop" end.
1920  */
1921
1922 STATIC void
1923 S_start_force(pTHX_ int where)
1924 {
1925     int i;
1926
1927     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1928         where = PL_lasttoke;
1929     assert(PL_curforce < 0 || PL_curforce == where);
1930     if (PL_curforce != where) {
1931         for (i = PL_lasttoke; i > where; --i) {
1932             PL_nexttoke[i] = PL_nexttoke[i-1];
1933         }
1934         PL_lasttoke++;
1935     }
1936     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1937         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1938     PL_curforce = where;
1939     if (PL_nextwhite) {
1940         if (PL_madskills)
1941             curmad('^', newSVpvs(""));
1942         CURMAD('_', PL_nextwhite);
1943     }
1944 }
1945
1946 STATIC void
1947 S_curmad(pTHX_ char slot, SV *sv)
1948 {
1949     MADPROP **where;
1950
1951     if (!sv)
1952         return;
1953     if (PL_curforce < 0)
1954         where = &PL_thismad;
1955     else
1956         where = &PL_nexttoke[PL_curforce].next_mad;
1957
1958     if (PL_faketokens)
1959         sv_setpvs(sv, "");
1960     else {
1961         if (!IN_BYTES) {
1962             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1963                 SvUTF8_on(sv);
1964             else if (PL_encoding) {
1965                 sv_recode_to_utf8(sv, PL_encoding);
1966             }
1967         }
1968     }
1969
1970     /* keep a slot open for the head of the list? */
1971     if (slot != '_' && *where && (*where)->mad_key == '^') {
1972         (*where)->mad_key = slot;
1973         sv_free(MUTABLE_SV(((*where)->mad_val)));
1974         (*where)->mad_val = (void*)sv;
1975     }
1976     else
1977         addmad(newMADsv(slot, sv), where, 0);
1978 }
1979 #else
1980 #  define start_force(where)    NOOP
1981 #  define curmad(slot, sv)      NOOP
1982 #endif
1983
1984 /*
1985  * S_force_next
1986  * When the lexer realizes it knows the next token (for instance,
1987  * it is reordering tokens for the parser) then it can call S_force_next
1988  * to know what token to return the next time the lexer is called.  Caller
1989  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1990  * and possibly PL_expect to ensure the lexer handles the token correctly.
1991  */
1992
1993 STATIC void
1994 S_force_next(pTHX_ I32 type)
1995 {
1996     dVAR;
1997 #ifdef DEBUGGING
1998     if (DEBUG_T_TEST) {
1999         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2000         tokereport(type, &NEXTVAL_NEXTTOKE);
2001     }
2002 #endif
2003     /* Don’t let opslab_force_free snatch it */
2004     if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2005         assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2006         NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2007     }
2008 #ifdef PERL_MAD
2009     if (PL_curforce < 0)
2010         start_force(PL_lasttoke);
2011     PL_nexttoke[PL_curforce].next_type = type;
2012     if (PL_lex_state != LEX_KNOWNEXT)
2013         PL_lex_defer = PL_lex_state;
2014     PL_lex_state = LEX_KNOWNEXT;
2015     PL_lex_expect = PL_expect;
2016     PL_curforce = -1;
2017 #else
2018     PL_nexttype[PL_nexttoke] = type;
2019     PL_nexttoke++;
2020     if (PL_lex_state != LEX_KNOWNEXT) {
2021         PL_lex_defer = PL_lex_state;
2022         PL_lex_expect = PL_expect;
2023         PL_lex_state = LEX_KNOWNEXT;
2024     }
2025 #endif
2026 }
2027
2028 void
2029 Perl_yyunlex(pTHX)
2030 {
2031     int yyc = PL_parser->yychar;
2032     if (yyc != YYEMPTY) {
2033         if (yyc) {
2034             start_force(-1);
2035             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2036             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2037                 PL_lex_allbrackets--;
2038                 PL_lex_brackets--;
2039                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2040             } else if (yyc == '('/*)*/) {
2041                 PL_lex_allbrackets--;
2042                 yyc |= (2<<24);
2043             }
2044             force_next(yyc);
2045         }
2046         PL_parser->yychar = YYEMPTY;
2047     }
2048 }
2049
2050 STATIC SV *
2051 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2052 {
2053     dVAR;
2054     SV * const sv = newSVpvn_utf8(start, len,
2055                                   !IN_BYTES
2056                                   && UTF
2057                                   && !is_ascii_string((const U8*)start, len)
2058                                   && is_utf8_string((const U8*)start, len));
2059     return sv;
2060 }
2061
2062 /*
2063  * S_force_word
2064  * When the lexer knows the next thing is a word (for instance, it has
2065  * just seen -> and it knows that the next char is a word char, then
2066  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2067  * lookahead.
2068  *
2069  * Arguments:
2070  *   char *start : buffer position (must be within PL_linestr)
2071  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2072  *   int check_keyword : if true, Perl checks to make sure the word isn't
2073  *       a keyword (do this if the word is a label, e.g. goto FOO)
2074  *   int allow_pack : if true, : characters will also be allowed (require,
2075  *       use, etc. do this)
2076  *   int allow_initial_tick : used by the "sub" lexer only.
2077  */
2078
2079 STATIC char *
2080 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2081 {
2082     dVAR;
2083     char *s;
2084     STRLEN len;
2085
2086     PERL_ARGS_ASSERT_FORCE_WORD;
2087
2088     start = SKIPSPACE1(start);
2089     s = start;
2090     if (isIDFIRST_lazy_if(s,UTF) ||
2091         (allow_pack && *s == ':') ||
2092         (allow_initial_tick && *s == '\'') )
2093     {
2094         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2095         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2096             return start;
2097         start_force(PL_curforce);
2098         if (PL_madskills)
2099             curmad('X', newSVpvn(start,s-start));
2100         if (token == METHOD) {
2101             s = SKIPSPACE1(s);
2102             if (*s == '(')
2103                 PL_expect = XTERM;
2104             else {
2105                 PL_expect = XOPERATOR;
2106             }
2107         }
2108         if (PL_madskills)
2109             curmad('g', newSVpvs( "forced" ));
2110         NEXTVAL_NEXTTOKE.opval
2111             = (OP*)newSVOP(OP_CONST,0,
2112                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2113         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2114         force_next(token);
2115     }
2116     return s;
2117 }
2118
2119 /*
2120  * S_force_ident
2121  * Called when the lexer wants $foo *foo &foo etc, but the program
2122  * text only contains the "foo" portion.  The first argument is a pointer
2123  * to the "foo", and the second argument is the type symbol to prefix.
2124  * Forces the next token to be a "WORD".
2125  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2126  */
2127
2128 STATIC void
2129 S_force_ident(pTHX_ register const char *s, int kind)
2130 {
2131     dVAR;
2132
2133     PERL_ARGS_ASSERT_FORCE_IDENT;
2134
2135     if (*s) {
2136         const STRLEN len = strlen(s);
2137         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2138                                                                 UTF ? SVf_UTF8 : 0));
2139         start_force(PL_curforce);
2140         NEXTVAL_NEXTTOKE.opval = o;
2141         force_next(WORD);
2142         if (kind) {
2143             o->op_private = OPpCONST_ENTERED;
2144             /* XXX see note in pp_entereval() for why we forgo typo
2145                warnings if the symbol must be introduced in an eval.
2146                GSAR 96-10-12 */
2147             gv_fetchpvn_flags(s, len,
2148                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2149                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2150                               kind == '$' ? SVt_PV :
2151                               kind == '@' ? SVt_PVAV :
2152                               kind == '%' ? SVt_PVHV :
2153                               SVt_PVGV
2154                               );
2155         }
2156     }
2157 }
2158
2159 static void
2160 S_force_ident_maybe_lex(pTHX_ char pit)
2161 {
2162     start_force(PL_curforce);
2163     NEXTVAL_NEXTTOKE.ival = pit;
2164     force_next('p');
2165 }
2166
2167 NV
2168 Perl_str_to_version(pTHX_ SV *sv)
2169 {
2170     NV retval = 0.0;
2171     NV nshift = 1.0;
2172     STRLEN len;
2173     const char *start = SvPV_const(sv,len);
2174     const char * const end = start + len;
2175     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2176
2177     PERL_ARGS_ASSERT_STR_TO_VERSION;
2178
2179     while (start < end) {
2180         STRLEN skip;
2181         UV n;
2182         if (utf)
2183             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2184         else {
2185             n = *(U8*)start;
2186             skip = 1;
2187         }
2188         retval += ((NV)n)/nshift;
2189         start += skip;
2190         nshift *= 1000;
2191     }
2192     return retval;
2193 }
2194
2195 /*
2196  * S_force_version
2197  * Forces the next token to be a version number.
2198  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2199  * and if "guessing" is TRUE, then no new token is created (and the caller
2200  * must use an alternative parsing method).
2201  */
2202
2203 STATIC char *
2204 S_force_version(pTHX_ char *s, int guessing)
2205 {
2206     dVAR;
2207     OP *version = NULL;
2208     char *d;
2209 #ifdef PERL_MAD
2210     I32 startoff = s - SvPVX(PL_linestr);
2211 #endif
2212
2213     PERL_ARGS_ASSERT_FORCE_VERSION;
2214
2215     s = SKIPSPACE1(s);
2216
2217     d = s;
2218     if (*d == 'v')
2219         d++;
2220     if (isDIGIT(*d)) {
2221         while (isDIGIT(*d) || *d == '_' || *d == '.')
2222             d++;
2223 #ifdef PERL_MAD
2224         if (PL_madskills) {
2225             start_force(PL_curforce);
2226             curmad('X', newSVpvn(s,d-s));
2227         }
2228 #endif
2229         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2230             SV *ver;
2231 #ifdef USE_LOCALE_NUMERIC
2232             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2233             setlocale(LC_NUMERIC, "C");
2234 #endif
2235             s = scan_num(s, &pl_yylval);
2236 #ifdef USE_LOCALE_NUMERIC
2237             setlocale(LC_NUMERIC, loc);
2238             Safefree(loc);
2239 #endif
2240             version = pl_yylval.opval;
2241             ver = cSVOPx(version)->op_sv;
2242             if (SvPOK(ver) && !SvNIOK(ver)) {
2243                 SvUPGRADE(ver, SVt_PVNV);
2244                 SvNV_set(ver, str_to_version(ver));
2245                 SvNOK_on(ver);          /* hint that it is a version */
2246             }
2247         }
2248         else if (guessing) {
2249 #ifdef PERL_MAD
2250             if (PL_madskills) {
2251                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2252                 PL_nextwhite = 0;
2253                 s = SvPVX(PL_linestr) + startoff;
2254             }
2255 #endif
2256             return s;
2257         }
2258     }
2259
2260 #ifdef PERL_MAD
2261     if (PL_madskills && !version) {
2262         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2263         PL_nextwhite = 0;
2264         s = SvPVX(PL_linestr) + startoff;
2265     }
2266 #endif
2267     /* NOTE: The parser sees the package name and the VERSION swapped */
2268     start_force(PL_curforce);
2269     NEXTVAL_NEXTTOKE.opval = version;
2270     force_next(WORD);
2271
2272     return s;
2273 }
2274
2275 /*
2276  * S_force_strict_version
2277  * Forces the next token to be a version number using strict syntax rules.
2278  */
2279
2280 STATIC char *
2281 S_force_strict_version(pTHX_ char *s)
2282 {
2283     dVAR;
2284     OP *version = NULL;
2285 #ifdef PERL_MAD
2286     I32 startoff = s - SvPVX(PL_linestr);
2287 #endif
2288     const char *errstr = NULL;
2289
2290     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2291
2292     while (isSPACE(*s)) /* leading whitespace */
2293         s++;
2294
2295     if (is_STRICT_VERSION(s,&errstr)) {
2296         SV *ver = newSV(0);
2297         s = (char *)scan_version(s, ver, 0);
2298         version = newSVOP(OP_CONST, 0, ver);
2299     }
2300     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2301             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2302     {
2303         PL_bufptr = s;
2304         if (errstr)
2305             yyerror(errstr); /* version required */
2306         return s;
2307     }
2308
2309 #ifdef PERL_MAD
2310     if (PL_madskills && !version) {
2311         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2312         PL_nextwhite = 0;
2313         s = SvPVX(PL_linestr) + startoff;
2314     }
2315 #endif
2316     /* NOTE: The parser sees the package name and the VERSION swapped */
2317     start_force(PL_curforce);
2318     NEXTVAL_NEXTTOKE.opval = version;
2319     force_next(WORD);
2320
2321     return s;
2322 }
2323
2324 /*
2325  * S_tokeq
2326  * Tokenize a quoted string passed in as an SV.  It finds the next
2327  * chunk, up to end of string or a backslash.  It may make a new
2328  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2329  * turns \\ into \.
2330  */
2331
2332 STATIC SV *
2333 S_tokeq(pTHX_ SV *sv)
2334 {
2335     dVAR;
2336     char *s;
2337     char *send;
2338     char *d;
2339     STRLEN len = 0;
2340     SV *pv = sv;
2341
2342     PERL_ARGS_ASSERT_TOKEQ;
2343
2344     if (!SvLEN(sv))
2345         goto finish;
2346
2347     s = SvPV_force(sv, len);
2348     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2349         goto finish;
2350     send = s + len;
2351     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2352     while (s < send && !(*s == '\\' && s[1] == '\\'))
2353         s++;
2354     if (s == send)
2355         goto finish;
2356     d = s;
2357     if ( PL_hints & HINT_NEW_STRING ) {
2358         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2359     }
2360     while (s < send) {
2361         if (*s == '\\') {
2362             if (s + 1 < send && (s[1] == '\\'))
2363                 s++;            /* all that, just for this */
2364         }
2365         *d++ = *s++;
2366     }
2367     *d = '\0';
2368     SvCUR_set(sv, d - SvPVX_const(sv));
2369   finish:
2370     if ( PL_hints & HINT_NEW_STRING )
2371        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2372     return sv;
2373 }
2374
2375 /*
2376  * Now come three functions related to double-quote context,
2377  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2378  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2379  * interact with PL_lex_state, and create fake ( ... ) argument lists
2380  * to handle functions and concatenation.
2381  * For example,
2382  *   "foo\lbar"
2383  * is tokenised as
2384  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2385  */
2386
2387 /*
2388  * S_sublex_start
2389  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2390  *
2391  * Pattern matching will set PL_lex_op to the pattern-matching op to
2392  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2393  *
2394  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2395  *
2396  * Everything else becomes a FUNC.
2397  *
2398  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2399  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2400  * call to S_sublex_push().
2401  */
2402
2403 STATIC I32
2404 S_sublex_start(pTHX)
2405 {
2406     dVAR;
2407     const I32 op_type = pl_yylval.ival;
2408
2409     if (op_type == OP_NULL) {
2410         pl_yylval.opval = PL_lex_op;
2411         PL_lex_op = NULL;
2412         return THING;
2413     }
2414     if (op_type == OP_CONST || op_type == OP_READLINE) {
2415         SV *sv = tokeq(PL_lex_stuff);
2416
2417         if (SvTYPE(sv) == SVt_PVIV) {
2418             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2419             STRLEN len;
2420             const char * const p = SvPV_const(sv, len);
2421             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2422             SvREFCNT_dec(sv);
2423             sv = nsv;
2424         }
2425         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2426         PL_lex_stuff = NULL;
2427         /* Allow <FH> // "foo" */
2428         if (op_type == OP_READLINE)
2429             PL_expect = XTERMORDORDOR;
2430         return THING;
2431     }
2432     else if (op_type == OP_BACKTICK && PL_lex_op) {
2433         /* readpipe() vas overriden */
2434         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2435         pl_yylval.opval = PL_lex_op;
2436         PL_lex_op = NULL;
2437         PL_lex_stuff = NULL;
2438         return THING;
2439     }
2440
2441     PL_sublex_info.super_state = PL_lex_state;
2442     PL_sublex_info.sub_inwhat = (U16)op_type;
2443     PL_sublex_info.sub_op = PL_lex_op;
2444     PL_lex_state = LEX_INTERPPUSH;
2445
2446     PL_expect = XTERM;
2447     if (PL_lex_op) {
2448         pl_yylval.opval = PL_lex_op;
2449         PL_lex_op = NULL;
2450         return PMFUNC;
2451     }
2452     else
2453         return FUNC;
2454 }
2455
2456 /*
2457  * S_sublex_push
2458  * Create a new scope to save the lexing state.  The scope will be
2459  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2460  * to the uc, lc, etc. found before.
2461  * Sets PL_lex_state to LEX_INTERPCONCAT.
2462  */
2463
2464 STATIC I32
2465 S_sublex_push(pTHX)
2466 {
2467     dVAR;
2468     LEXSHARED *shared;
2469     ENTER;
2470
2471     PL_lex_state = PL_sublex_info.super_state;
2472     SAVEBOOL(PL_lex_dojoin);
2473     SAVEI32(PL_lex_brackets);
2474     SAVEI32(PL_lex_allbrackets);
2475     SAVEI32(PL_lex_formbrack);
2476     SAVEI8(PL_lex_fakeeof);
2477     SAVEI32(PL_lex_casemods);
2478     SAVEI32(PL_lex_starts);
2479     SAVEI8(PL_lex_state);
2480     SAVESPTR(PL_lex_repl);
2481     SAVEVPTR(PL_lex_inpat);
2482     SAVEI16(PL_lex_inwhat);
2483     SAVECOPLINE(PL_curcop);
2484     SAVEPPTR(PL_bufptr);
2485     SAVEPPTR(PL_bufend);
2486     SAVEPPTR(PL_oldbufptr);
2487     SAVEPPTR(PL_oldoldbufptr);
2488     SAVEPPTR(PL_last_lop);
2489     SAVEPPTR(PL_last_uni);
2490     SAVEPPTR(PL_linestart);
2491     SAVESPTR(PL_linestr);
2492     SAVEGENERICPV(PL_lex_brackstack);
2493     SAVEGENERICPV(PL_lex_casestack);
2494     SAVEGENERICPV(PL_parser->lex_shared);
2495
2496     /* The here-doc parser needs to be able to peek into outer lexing
2497        scopes to find the body of the here-doc.  So we put PL_linestr and
2498        PL_bufptr into lex_shared, to ‘share’ those values.
2499      */
2500     PL_parser->lex_shared->ls_linestr = PL_linestr;
2501     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2502
2503     PL_linestr = PL_lex_stuff;
2504     PL_lex_repl = PL_sublex_info.repl;
2505     PL_lex_stuff = NULL;
2506     PL_sublex_info.repl = NULL;
2507
2508     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2509         = SvPVX(PL_linestr);
2510     PL_bufend += SvCUR(PL_linestr);
2511     PL_last_lop = PL_last_uni = NULL;
2512     SAVEFREESV(PL_linestr);
2513     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2514
2515     PL_lex_dojoin = FALSE;
2516     PL_lex_brackets = PL_lex_formbrack = 0;
2517     PL_lex_allbrackets = 0;
2518     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2519     Newx(PL_lex_brackstack, 120, char);
2520     Newx(PL_lex_casestack, 12, char);
2521     PL_lex_casemods = 0;
2522     *PL_lex_casestack = '\0';
2523     PL_lex_starts = 0;
2524     PL_lex_state = LEX_INTERPCONCAT;
2525     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2526
2527     Newxz(shared, 1, LEXSHARED);
2528     shared->ls_prev = PL_parser->lex_shared;
2529     PL_parser->lex_shared = shared;
2530
2531     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2532     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2533     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2534         PL_lex_inpat = PL_sublex_info.sub_op;
2535     else
2536         PL_lex_inpat = NULL;
2537
2538     return '(';
2539 }
2540
2541 /*
2542  * S_sublex_done
2543  * Restores lexer state after a S_sublex_push.
2544  */
2545
2546 STATIC I32
2547 S_sublex_done(pTHX)
2548 {
2549     dVAR;
2550     if (!PL_lex_starts++) {
2551         SV * const sv = newSVpvs("");
2552         if (SvUTF8(PL_linestr))
2553             SvUTF8_on(sv);
2554         PL_expect = XOPERATOR;
2555         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2556         return THING;
2557     }
2558
2559     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2560         PL_lex_state = LEX_INTERPCASEMOD;
2561         return yylex();
2562     }
2563
2564     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2565     assert(PL_lex_inwhat != OP_TRANSR);
2566     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2567         PL_linestr = PL_lex_repl;
2568         PL_lex_inpat = 0;
2569         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2570         PL_bufend += SvCUR(PL_linestr);
2571         PL_last_lop = PL_last_uni = NULL;
2572         PL_lex_dojoin = FALSE;
2573         PL_lex_brackets = 0;
2574         PL_lex_allbrackets = 0;
2575         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2576         PL_lex_casemods = 0;
2577         *PL_lex_casestack = '\0';
2578         PL_lex_starts = 0;
2579         if (SvEVALED(PL_lex_repl)) {
2580             PL_lex_state = LEX_INTERPNORMAL;
2581             PL_lex_starts++;
2582             /*  we don't clear PL_lex_repl here, so that we can check later
2583                 whether this is an evalled subst; that means we rely on the
2584                 logic to ensure sublex_done() is called again only via the
2585                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2586         }
2587         else {
2588             PL_lex_state = LEX_INTERPCONCAT;
2589             PL_lex_repl = NULL;
2590         }
2591         return ',';
2592     }
2593     else {
2594 #ifdef PERL_MAD
2595         if (PL_madskills) {
2596             if (PL_thiswhite) {
2597                 if (!PL_endwhite)
2598                     PL_endwhite = newSVpvs("");
2599                 sv_catsv(PL_endwhite, PL_thiswhite);
2600                 PL_thiswhite = 0;
2601             }
2602             if (PL_thistoken)
2603                 sv_setpvs(PL_thistoken,"");
2604             else
2605                 PL_realtokenstart = -1;
2606         }
2607 #endif
2608         LEAVE;
2609         PL_bufend = SvPVX(PL_linestr);
2610         PL_bufend += SvCUR(PL_linestr);
2611         PL_expect = XOPERATOR;
2612         PL_sublex_info.sub_inwhat = 0;
2613         return ')';
2614     }
2615 }
2616
2617 /*
2618   scan_const
2619
2620   Extracts the next constant part of a pattern, double-quoted string,
2621   or transliteration.  This is terrifying code.
2622
2623   For example, in parsing the double-quoted string "ab\x63$d", it would
2624   stop at the '$' and return an OP_CONST containing 'abc'.
2625
2626   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2627   processing a pattern (PL_lex_inpat is true), a transliteration
2628   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2629
2630   Returns a pointer to the character scanned up to. If this is
2631   advanced from the start pointer supplied (i.e. if anything was
2632   successfully parsed), will leave an OP_CONST for the substring scanned
2633   in pl_yylval. Caller must intuit reason for not parsing further
2634   by looking at the next characters herself.
2635
2636   In patterns:
2637     expand:
2638       \N{ABC}  => \N{U+41.42.43}
2639
2640     pass through:
2641         all other \-char, including \N and \N{ apart from \N{ABC}
2642
2643     stops on:
2644         @ and $ where it appears to be a var, but not for $ as tail anchor
2645         \l \L \u \U \Q \E
2646         (?{  or  (??{
2647
2648
2649   In transliterations:
2650     characters are VERY literal, except for - not at the start or end
2651     of the string, which indicates a range. If the range is in bytes,
2652     scan_const expands the range to the full set of intermediate
2653     characters. If the range is in utf8, the hyphen is replaced with
2654     a certain range mark which will be handled by pmtrans() in op.c.
2655
2656   In double-quoted strings:
2657     backslashes:
2658       double-quoted style: \r and \n
2659       constants: \x31, etc.
2660       deprecated backrefs: \1 (in substitution replacements)
2661       case and quoting: \U \Q \E
2662     stops on @ and $
2663
2664   scan_const does *not* construct ops to handle interpolated strings.
2665   It stops processing as soon as it finds an embedded $ or @ variable
2666   and leaves it to the caller to work out what's going on.
2667
2668   embedded arrays (whether in pattern or not) could be:
2669       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2670
2671   $ in double-quoted strings must be the symbol of an embedded scalar.
2672
2673   $ in pattern could be $foo or could be tail anchor.  Assumption:
2674   it's a tail anchor if $ is the last thing in the string, or if it's
2675   followed by one of "()| \r\n\t"
2676
2677   \1 (backreferences) are turned into $1 in substitutions
2678
2679   The structure of the code is
2680       while (there's a character to process) {
2681           handle transliteration ranges
2682           skip regexp comments /(?#comment)/ and codes /(?{code})/
2683           skip #-initiated comments in //x patterns
2684           check for embedded arrays
2685           check for embedded scalars
2686           if (backslash) {
2687               deprecate \1 in substitution replacements
2688               handle string-changing backslashes \l \U \Q \E, etc.
2689               switch (what was escaped) {
2690                   handle \- in a transliteration (becomes a literal -)
2691                   if a pattern and not \N{, go treat as regular character
2692                   handle \132 (octal characters)
2693                   handle \x15 and \x{1234} (hex characters)
2694                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2695                   handle \cV (control characters)
2696                   handle printf-style backslashes (\f, \r, \n, etc)
2697               } (end switch)
2698               continue
2699           } (end if backslash)
2700           handle regular character
2701     } (end while character to read)
2702
2703 */
2704
2705 STATIC char *
2706 S_scan_const(pTHX_ char *start)
2707 {
2708     dVAR;
2709     char *send = PL_bufend;             /* end of the constant */
2710     SV *sv = newSV(send - start);               /* sv for the constant.  See
2711                                                    note below on sizing. */
2712     char *s = start;                    /* start of the constant */
2713     char *d = SvPVX(sv);                /* destination for copies */
2714     bool dorange = FALSE;                       /* are we in a translit range? */
2715     bool didrange = FALSE;                      /* did we just finish a range? */
2716     bool in_charclass = FALSE;                  /* within /[...]/ */
2717     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2718     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2719                                                    to be UTF8?  But, this can
2720                                                    show as true when the source
2721                                                    isn't utf8, as for example
2722                                                    when it is entirely composed
2723                                                    of hex constants */
2724
2725     /* Note on sizing:  The scanned constant is placed into sv, which is
2726      * initialized by newSV() assuming one byte of output for every byte of
2727      * input.  This routine expects newSV() to allocate an extra byte for a
2728      * trailing NUL, which this routine will append if it gets to the end of
2729      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2730      * CAPITAL LETTER A}), or more output than input if the constant ends up
2731      * recoded to utf8, but each time a construct is found that might increase
2732      * the needed size, SvGROW() is called.  Its size parameter each time is
2733      * based on the best guess estimate at the time, namely the length used so
2734      * far, plus the length the current construct will occupy, plus room for
2735      * the trailing NUL, plus one byte for every input byte still unscanned */
2736
2737     UV uv;
2738 #ifdef EBCDIC
2739     UV literal_endpoint = 0;
2740     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2741 #endif
2742
2743     PERL_ARGS_ASSERT_SCAN_CONST;
2744
2745     assert(PL_lex_inwhat != OP_TRANSR);
2746     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2747         /* If we are doing a trans and we know we want UTF8 set expectation */
2748         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2749         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2750     }
2751
2752
2753     while (s < send || dorange) {
2754
2755         /* get transliterations out of the way (they're most literal) */
2756         if (PL_lex_inwhat == OP_TRANS) {
2757             /* expand a range A-Z to the full set of characters.  AIE! */
2758             if (dorange) {
2759                 I32 i;                          /* current expanded character */
2760                 I32 min;                        /* first character in range */
2761                 I32 max;                        /* last character in range */
2762
2763 #ifdef EBCDIC
2764                 UV uvmax = 0;
2765 #endif
2766
2767                 if (has_utf8
2768 #ifdef EBCDIC
2769                     && !native_range
2770 #endif
2771                     ) {
2772                     char * const c = (char*)utf8_hop((U8*)d, -1);
2773                     char *e = d++;
2774                     while (e-- > c)
2775                         *(e + 1) = *e;
2776                     *c = (char)UTF_TO_NATIVE(0xff);
2777                     /* mark the range as done, and continue */
2778                     dorange = FALSE;
2779                     didrange = TRUE;
2780                     continue;
2781                 }
2782
2783                 i = d - SvPVX_const(sv);                /* remember current offset */
2784 #ifdef EBCDIC
2785                 SvGROW(sv,
2786                        SvLEN(sv) + (has_utf8 ?
2787                                     (512 - UTF_CONTINUATION_MARK +
2788                                      UNISKIP(0x100))
2789                                     : 256));
2790                 /* How many two-byte within 0..255: 128 in UTF-8,
2791                  * 96 in UTF-8-mod. */
2792 #else
2793                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2794 #endif
2795                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2796 #ifdef EBCDIC
2797                 if (has_utf8) {
2798                     int j;
2799                     for (j = 0; j <= 1; j++) {
2800                         char * const c = (char*)utf8_hop((U8*)d, -1);
2801                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2802                         if (j)
2803                             min = (U8)uv;
2804                         else if (uv < 256)
2805                             max = (U8)uv;
2806                         else {
2807                             max = (U8)0xff; /* only to \xff */
2808                             uvmax = uv; /* \x{100} to uvmax */
2809                         }
2810                         d = c; /* eat endpoint chars */
2811                      }
2812                 }
2813                else {
2814 #endif
2815                    d -= 2;              /* eat the first char and the - */
2816                    min = (U8)*d;        /* first char in range */
2817                    max = (U8)d[1];      /* last char in range  */
2818 #ifdef EBCDIC
2819                }
2820 #endif
2821
2822                 if (min > max) {
2823                     SvREFCNT_dec(sv);
2824                     Perl_croak(aTHX_
2825                                "Invalid range \"%c-%c\" in transliteration operator",
2826                                (char)min, (char)max);
2827                 }
2828
2829 #ifdef EBCDIC
2830                 if (literal_endpoint == 2 &&
2831                     ((isLOWER(min) && isLOWER(max)) ||
2832                      (isUPPER(min) && isUPPER(max)))) {
2833                     if (isLOWER(min)) {
2834                         for (i = min; i <= max; i++)
2835                             if (isLOWER(i))
2836                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2837                     } else {
2838                         for (i = min; i <= max; i++)
2839                             if (isUPPER(i))
2840                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2841                     }
2842                 }
2843                 else
2844 #endif
2845                     for (i = min; i <= max; i++)
2846 #ifdef EBCDIC
2847                         if (has_utf8) {
2848                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2849                             if (UNI_IS_INVARIANT(ch))
2850                                 *d++ = (U8)i;
2851                             else {
2852                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2853                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2854                             }
2855                         }
2856                         else
2857 #endif
2858                             *d++ = (char)i;
2859
2860 #ifdef EBCDIC
2861                 if (uvmax) {
2862                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2863                     if (uvmax > 0x101)
2864                         *d++ = (char)UTF_TO_NATIVE(0xff);
2865                     if (uvmax > 0x100)
2866                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2867                 }
2868 #endif
2869
2870                 /* mark the range as done, and continue */
2871                 dorange = FALSE;
2872                 didrange = TRUE;
2873 #ifdef EBCDIC
2874                 literal_endpoint = 0;
2875 #endif
2876                 continue;
2877             }
2878
2879             /* range begins (ignore - as first or last char) */
2880             else if (*s == '-' && s+1 < send  && s != start) {
2881                 if (didrange) {
2882                     SvREFCNT_dec(sv);
2883                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2884                 }
2885                 if (has_utf8
2886 #ifdef EBCDIC
2887                     && !native_range
2888 #endif
2889                     ) {
2890                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2891                     s++;
2892                     continue;
2893                 }
2894                 dorange = TRUE;
2895                 s++;
2896             }
2897             else {
2898                 didrange = FALSE;
2899 #ifdef EBCDIC
2900                 literal_endpoint = 0;
2901                 native_range = TRUE;
2902 #endif
2903             }
2904         }
2905
2906         /* if we get here, we're not doing a transliteration */
2907
2908         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2909             char *s1 = s-1;
2910             int esc = 0;
2911             while (s1 >= start && *s1-- == '\\')
2912                 esc = !esc;
2913             if (!esc)
2914                 in_charclass = TRUE;
2915         }
2916
2917         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2918             char *s1 = s-1;
2919             int esc = 0;
2920             while (s1 >= start && *s1-- == '\\')
2921                 esc = !esc;
2922             if (!esc)
2923                 in_charclass = FALSE;
2924         }
2925
2926         /* skip for regexp comments /(?#comment)/, except for the last
2927          * char, which will be done separately.
2928          * Stop on (?{..}) and friends */
2929
2930         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2931             if (s[2] == '#') {
2932                 while (s+1 < send && *s != ')')
2933                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2934             }
2935             else if (!PL_lex_casemods && !in_charclass &&
2936                      (    s[2] == '{' /* This should match regcomp.c */
2937                       || (s[2] == '?' && s[3] == '{')))
2938             {
2939                 break;
2940             }
2941         }
2942
2943         /* likewise skip #-initiated comments in //x patterns */
2944         else if (*s == '#' && PL_lex_inpat &&
2945           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2946             while (s+1 < send && *s != '\n')
2947                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2948         }
2949
2950         /* no further processing of single-quoted regex */
2951         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2952             goto default_action;
2953
2954         /* check for embedded arrays
2955            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2956            */
2957         else if (*s == '@' && s[1]) {
2958             if (isALNUM_lazy_if(s+1,UTF))
2959                 break;
2960             if (strchr(":'{$", s[1]))
2961                 break;
2962             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2963                 break; /* in regexp, neither @+ nor @- are interpolated */
2964         }
2965
2966         /* check for embedded scalars.  only stop if we're sure it's a
2967            variable.
2968         */
2969         else if (*s == '$') {
2970             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2971                 break;
2972             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2973                 if (s[1] == '\\') {
2974                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2975                                    "Possible unintended interpolation of $\\ in regex");
2976                 }
2977                 break;          /* in regexp, $ might be tail anchor */
2978             }
2979         }
2980
2981         /* End of else if chain - OP_TRANS rejoin rest */
2982
2983         /* backslashes */
2984         if (*s == '\\' && s+1 < send) {
2985             char* e;    /* Can be used for ending '}', etc. */
2986
2987             s++;
2988
2989             /* warn on \1 - \9 in substitution replacements, but note that \11
2990              * is an octal; and \19 is \1 followed by '9' */
2991             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2992                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2993             {
2994                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2995                 *--s = '$';
2996                 break;
2997             }
2998
2999             /* string-change backslash escapes */
3000             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3001                 --s;
3002                 break;
3003             }
3004             /* In a pattern, process \N, but skip any other backslash escapes.
3005              * This is because we don't want to translate an escape sequence
3006              * into a meta symbol and have the regex compiler use the meta
3007              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3008              * in spite of this, we do have to process \N here while the proper
3009              * charnames handler is in scope.  See bugs #56444 and #62056.
3010              * There is a complication because \N in a pattern may also stand
3011              * for 'match a non-nl', and not mean a charname, in which case its
3012              * processing should be deferred to the regex compiler.  To be a
3013              * charname it must be followed immediately by a '{', and not look
3014              * like \N followed by a curly quantifier, i.e., not something like
3015              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3016              * quantifier */
3017             else if (PL_lex_inpat
3018                     && (*s != 'N'
3019                         || s[1] != '{'
3020                         || regcurly(s + 1)))
3021             {
3022                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3023                 goto default_action;
3024             }
3025
3026             switch (*s) {
3027
3028             /* quoted - in transliterations */
3029             case '-':
3030                 if (PL_lex_inwhat == OP_TRANS) {
3031                     *d++ = *s++;
3032                     continue;
3033                 }
3034                 /* FALL THROUGH */
3035             default:
3036                 {
3037                     if ((isALNUMC(*s)))
3038                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3039                                        "Unrecognized escape \\%c passed through",
3040                                        *s);
3041                     /* default action is to copy the quoted character */
3042                     goto default_action;
3043                 }
3044
3045             /* eg. \132 indicates the octal constant 0132 */
3046             case '0': case '1': case '2': case '3':
3047             case '4': case '5': case '6': case '7':
3048                 {
3049                     I32 flags = 0;
3050                     STRLEN len = 3;
3051                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3052                     s += len;
3053                 }
3054                 goto NUM_ESCAPE_INSERT;
3055
3056             /* eg. \o{24} indicates the octal constant \024 */
3057             case 'o':
3058                 {
3059                     STRLEN len;
3060                     const char* error;
3061
3062                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3063                     s += len;
3064                     if (! valid) {
3065                         yyerror(error);
3066                         continue;
3067                     }
3068                     goto NUM_ESCAPE_INSERT;
3069                 }
3070
3071             /* eg. \x24 indicates the hex constant 0x24 */
3072             case 'x':
3073                 {
3074                     STRLEN len;
3075                     const char* error;
3076
3077                     bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3078                     s += len;
3079                     if (! valid) {
3080                         yyerror(error);
3081                         continue;
3082                     }
3083                 }
3084
3085               NUM_ESCAPE_INSERT:
3086                 /* Insert oct or hex escaped character.  There will always be
3087                  * enough room in sv since such escapes will be longer than any
3088                  * UTF-8 sequence they can end up as, except if they force us
3089                  * to recode the rest of the string into utf8 */
3090
3091                 /* Here uv is the ordinal of the next character being added in
3092                  * unicode (converted from native). */
3093                 if (!UNI_IS_INVARIANT(uv)) {
3094                     if (!has_utf8 && uv > 255) {
3095                         /* Might need to recode whatever we have accumulated so
3096                          * far if it contains any chars variant in utf8 or
3097                          * utf-ebcdic. */
3098
3099                         SvCUR_set(sv, d - SvPVX_const(sv));
3100                         SvPOK_on(sv);
3101                         *d = '\0';
3102                         /* See Note on sizing above.  */
3103                         sv_utf8_upgrade_flags_grow(sv,
3104                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3105                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3106                         d = SvPVX(sv) + SvCUR(sv);
3107                         has_utf8 = TRUE;
3108                     }
3109
3110                     if (has_utf8) {
3111                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3112                         if (PL_lex_inwhat == OP_TRANS &&
3113                             PL_sublex_info.sub_op) {
3114                             PL_sublex_info.sub_op->op_private |=
3115                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3116                                              : OPpTRANS_TO_UTF);
3117                         }
3118 #ifdef EBCDIC
3119                         if (uv > 255 && !dorange)
3120                             native_range = FALSE;
3121 #endif
3122                     }
3123                     else {
3124                         *d++ = (char)uv;
3125                     }
3126                 }
3127                 else {
3128                     *d++ = (char) uv;
3129                 }
3130                 continue;
3131
3132             case 'N':
3133                 /* In a non-pattern \N must be a named character, like \N{LATIN
3134                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3135                  * mean to match a non-newline.  For non-patterns, named
3136                  * characters are converted to their string equivalents. In
3137                  * patterns, named characters are not converted to their
3138                  * ultimate forms for the same reasons that other escapes
3139                  * aren't.  Instead, they are converted to the \N{U+...} form
3140                  * to get the value from the charnames that is in effect right
3141                  * now, while preserving the fact that it was a named character
3142                  * so that the regex compiler knows this */
3143
3144                 /* This section of code doesn't generally use the
3145                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3146                  * a close examination of this macro and determined it is a
3147                  * no-op except on utfebcdic variant characters.  Every
3148                  * character generated by this that would normally need to be
3149                  * enclosed by this macro is invariant, so the macro is not
3150                  * needed, and would complicate use of copy().  XXX There are
3151                  * other parts of this file where the macro is used
3152                  * inconsistently, but are saved by it being a no-op */
3153
3154                 /* The structure of this section of code (besides checking for
3155                  * errors and upgrading to utf8) is:
3156                  *  Further disambiguate between the two meanings of \N, and if
3157                  *      not a charname, go process it elsewhere
3158                  *  If of form \N{U+...}, pass it through if a pattern;
3159                  *      otherwise convert to utf8
3160                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3161                  *  pattern; otherwise convert to utf8 */
3162
3163                 /* Here, s points to the 'N'; the test below is guaranteed to
3164                  * succeed if we are being called on a pattern as we already
3165                  * know from a test above that the next character is a '{'.
3166                  * On a non-pattern \N must mean 'named sequence, which
3167                  * requires braces */
3168                 s++;
3169                 if (*s != '{') {
3170                     yyerror("Missing braces on \\N{}");
3171                     continue;
3172                 }
3173                 s++;
3174
3175                 /* If there is no matching '}', it is an error. */
3176                 if (! (e = strchr(s, '}'))) {
3177                     if (! PL_lex_inpat) {
3178                         yyerror("Missing right brace on \\N{}");
3179                     } else {
3180                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3181                     }
3182                     continue;
3183                 }
3184
3185                 /* Here it looks like a named character */
3186
3187                 if (PL_lex_inpat) {
3188
3189                     /* XXX This block is temporary code.  \N{} implies that the
3190                      * pattern is to have Unicode semantics, and therefore
3191                      * currently has to be encoded in utf8.  By putting it in
3192                      * utf8 now, we save a whole pass in the regular expression
3193                      * compiler.  Once that code is changed so Unicode
3194                      * semantics doesn't necessarily have to be in utf8, this
3195                      * block should be removed.  However, the code that parses
3196                      * the output of this would have to be changed to not
3197                      * necessarily expect utf8 */
3198                     if (!has_utf8) {
3199                         SvCUR_set(sv, d - SvPVX_const(sv));
3200                         SvPOK_on(sv);
3201                         *d = '\0';
3202                         /* See Note on sizing above.  */
3203                         sv_utf8_upgrade_flags_grow(sv,
3204                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3205                                         /* 5 = '\N{' + cur char + NUL */
3206                                         (STRLEN)(send - s) + 5);
3207                         d = SvPVX(sv) + SvCUR(sv);
3208                         has_utf8 = TRUE;
3209                     }
3210                 }
3211
3212                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3213                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3214                                 | PERL_SCAN_DISALLOW_PREFIX;
3215                     STRLEN len;
3216
3217                     /* For \N{U+...}, the '...' is a unicode value even on
3218                      * EBCDIC machines */
3219                     s += 2;         /* Skip to next char after the 'U+' */
3220                     len = e - s;
3221                     uv = grok_hex(s, &len, &flags, NULL);
3222                     if (len == 0 || len != (STRLEN)(e - s)) {
3223                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3224                         s = e + 1;
3225                         continue;
3226                     }
3227
3228                     if (PL_lex_inpat) {
3229
3230                         /* On non-EBCDIC platforms, pass through to the regex
3231                          * compiler unchanged.  The reason we evaluated the
3232                          * number above is to make sure there wasn't a syntax
3233                          * error.  But on EBCDIC we convert to native so
3234                          * downstream code can continue to assume it's native
3235                          */
3236                         s -= 5;     /* Include the '\N{U+' */
3237 #ifdef EBCDIC
3238                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3239                                                                and the \0 */
3240                                     "\\N{U+%X}",
3241                                     (unsigned int) UNI_TO_NATIVE(uv));
3242 #else
3243                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3244                         d += e - s + 1;
3245 #endif
3246                     }
3247                     else {  /* Not a pattern: convert the hex to string */
3248
3249                          /* If destination is not in utf8, unconditionally
3250                           * recode it to be so.  This is because \N{} implies
3251                           * Unicode semantics, and scalars have to be in utf8
3252                           * to guarantee those semantics */
3253                         if (! has_utf8) {
3254                             SvCUR_set(sv, d - SvPVX_const(sv));
3255                             SvPOK_on(sv);
3256                             *d = '\0';
3257                             /* See Note on sizing above.  */
3258                             sv_utf8_upgrade_flags_grow(
3259                                         sv,
3260                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3261                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3262                             d = SvPVX(sv) + SvCUR(sv);
3263                             has_utf8 = TRUE;
3264                         }
3265
3266                         /* Add the string to the output */
3267                         if (UNI_IS_INVARIANT(uv)) {
3268                             *d++ = (char) uv;
3269                         }
3270                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3271                     }
3272                 }
3273                 else { /* Here is \N{NAME} but not \N{U+...}. */
3274
3275                     SV *res;            /* result from charnames */
3276                     const char *str;    /* the string in 'res' */
3277                     STRLEN len;         /* its length */
3278
3279                     /* Get the value for NAME */
3280                     res = newSVpvn(s, e - s);
3281                     res = new_constant( NULL, 0, "charnames",
3282                                         /* includes all of: \N{...} */
3283                                         res, NULL, s - 3, e - s + 4 );
3284
3285                     /* Most likely res will be in utf8 already since the
3286                      * standard charnames uses pack U, but a custom translator
3287                      * can leave it otherwise, so make sure.  XXX This can be
3288                      * revisited to not have charnames use utf8 for characters
3289                      * that don't need it when regexes don't have to be in utf8
3290                      * for Unicode semantics.  If doing so, remember EBCDIC */
3291                     if (SvPOK(res)) {
3292                     sv_utf8_upgrade(res);
3293                     str = SvPV_const(res, len);
3294
3295                     /* Don't accept malformed input */
3296                     if (! is_utf8_string((U8 *) str, len)) {
3297                         yyerror("Malformed UTF-8 returned by \\N");
3298                     }
3299                     else if (PL_lex_inpat) {
3300
3301                         if (! len) { /* The name resolved to an empty string */
3302                             Copy("\\N{}", d, 4, char);
3303                             d += 4;
3304                         }
3305                         else {
3306                             /* In order to not lose information for the regex
3307                             * compiler, pass the result in the specially made
3308                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3309                             * the code points in hex of each character
3310                             * returned by charnames */
3311
3312                             const char *str_end = str + len;
3313                             STRLEN char_length;     /* cur char's byte length */
3314                             STRLEN output_length;   /* and the number of bytes
3315                                                        after this is translated
3316                                                        into hex digits */
3317                             const STRLEN off = d - SvPVX_const(sv);
3318
3319                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3320                              * max('U+', '.'); and 1 for NUL */
3321                             char hex_string[2 * UTF8_MAXBYTES + 5];
3322
3323                             /* Get the first character of the result. */
3324                             U32 uv = utf8n_to_uvuni((U8 *) str,
3325                                                     len,
3326                                                     &char_length,
3327                                                     UTF8_ALLOW_ANYUV);
3328
3329                             /* The call to is_utf8_string() above hopefully
3330                              * guarantees that there won't be an error.  But
3331                              * it's easy here to make sure.  The function just
3332                              * above warns and returns 0 if invalid utf8, but
3333                              * it can also return 0 if the input is validly a
3334                              * NUL. Disambiguate */
3335                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3336                                 uv = UNICODE_REPLACEMENT;
3337                             }
3338
3339                             /* Convert first code point to hex, including the
3340                              * boiler plate before it.  For all these, we
3341                              * convert to native format so that downstream code
3342                              * can continue to assume the input is native */
3343                             output_length =
3344                                 my_snprintf(hex_string, sizeof(hex_string),
3345                                             "\\N{U+%X",
3346                                             (unsigned int) UNI_TO_NATIVE(uv));
3347
3348                             /* Make sure there is enough space to hold it */
3349                             d = off + SvGROW(sv, off
3350                                                  + output_length
3351                                                  + (STRLEN)(send - e)
3352                                                  + 2);  /* '}' + NUL */
3353                             /* And output it */
3354                             Copy(hex_string, d, output_length, char);
3355                             d += output_length;
3356
3357                             /* For each subsequent character, append dot and
3358                              * its ordinal in hex */
3359                             while ((str += char_length) < str_end) {
3360                                 const STRLEN off = d - SvPVX_const(sv);
3361                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3362                                                         str_end - str,
3363                                                         &char_length,
3364                                                         UTF8_ALLOW_ANYUV);
3365                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3366                                     uv = UNICODE_REPLACEMENT;
3367                                 }
3368
3369                                 output_length =
3370                                     my_snprintf(hex_string, sizeof(hex_string),
3371                                             ".%X",
3372                                             (unsigned int) UNI_TO_NATIVE(uv));
3373
3374                                 d = off + SvGROW(sv, off
3375                                                      + output_length
3376                                                      + (STRLEN)(send - e)
3377                                                      + 2);      /* '}' +  NUL */
3378                                 Copy(hex_string, d, output_length, char);
3379                                 d += output_length;
3380                             }
3381
3382                             *d++ = '}'; /* Done.  Add the trailing brace */
3383                         }
3384                     }
3385                     else { /* Here, not in a pattern.  Convert the name to a
3386                             * string. */
3387
3388                          /* If destination is not in utf8, unconditionally
3389                           * recode it to be so.  This is because \N{} implies
3390                           * Unicode semantics, and scalars have to be in utf8
3391                           * to guarantee those semantics */
3392                         if (! has_utf8) {
3393                             SvCUR_set(sv, d - SvPVX_const(sv));
3394                             SvPOK_on(sv);
3395                             *d = '\0';
3396                             /* See Note on sizing above.  */
3397                             sv_utf8_upgrade_flags_grow(sv,
3398                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3399                                                 len + (STRLEN)(send - s) + 1);
3400                             d = SvPVX(sv) + SvCUR(sv);
3401                             has_utf8 = TRUE;
3402                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3403
3404                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3405                              * set correctly here). */
3406                             const STRLEN off = d - SvPVX_const(sv);
3407                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3408                         }
3409                         Copy(str, d, len, char);
3410                         d += len;
3411                     }
3412                     SvREFCNT_dec(res);
3413
3414                     /* Deprecate non-approved name syntax */
3415                     if (ckWARN_d(WARN_DEPRECATED)) {
3416                         bool problematic = FALSE;
3417                         char* i = s;
3418
3419                         /* For non-ut8 input, look to see that the first
3420                          * character is an alpha, then loop through the rest
3421                          * checking that each is a continuation */
3422                         if (! this_utf8) {
3423                             if (! isALPHAU(*i)) problematic = TRUE;
3424                             else for (i = s + 1; i < e; i++) {
3425                                 if (isCHARNAME_CONT(*i)) continue;
3426                                 problematic = TRUE;
3427                                 break;
3428                             }
3429                         }
3430                         else {
3431                             /* Similarly for utf8.  For invariants can check
3432                              * directly.  We accept anything above the latin1
3433                              * range because it is immaterial to Perl if it is
3434                              * correct or not, and is expensive to check.  But
3435                              * it is fairly easy in the latin1 range to convert
3436                              * the variants into a single character and check
3437                              * those */
3438                             if (UTF8_IS_INVARIANT(*i)) {
3439                                 if (! isALPHAU(*i)) problematic = TRUE;
3440                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3441                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3442                                                                             *(i+1)))))
3443                                 {
3444                                     problematic = TRUE;
3445                                 }
3446                             }
3447                             if (! problematic) for (i = s + UTF8SKIP(s);
3448                                                     i < e;
3449                                                     i+= UTF8SKIP(i))
3450                             {
3451                                 if (UTF8_IS_INVARIANT(*i)) {
3452                                     if (isCHARNAME_CONT(*i)) continue;
3453                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3454                                     continue;
3455                                 } else if (isCHARNAME_CONT(
3456                                             UNI_TO_NATIVE(
3457                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3458                                 {
3459                                     continue;
3460                                 }
3461                                 problematic = TRUE;
3462                                 break;
3463                             }
3464                         }
3465                         if (problematic) {
3466                             /* The e-i passed to the final %.*s makes sure that
3467                              * should the trailing NUL be missing that this
3468                              * print won't run off the end of the string */
3469                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3470                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3471                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3472                         }
3473                     }
3474                 }
3475                 } /* End \N{NAME} */
3476 #ifdef EBCDIC
3477                 if (!dorange)
3478                     native_range = FALSE; /* \N{} is defined to be Unicode */
3479 #endif
3480                 s = e + 1;  /* Point to just after the '}' */
3481                 continue;
3482
3483             /* \c is a control character */
3484             case 'c':
3485                 s++;
3486                 if (s < send) {
3487                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3488                 }
3489                 else {
3490                     yyerror("Missing control char name in \\c");
3491                 }
3492                 continue;
3493
3494             /* printf-style backslashes, formfeeds, newlines, etc */
3495             case 'b':
3496                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3497                 break;
3498             case 'n':
3499                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3500                 break;
3501             case 'r':
3502                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3503                 break;
3504             case 'f':
3505                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3506                 break;
3507             case 't':
3508                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3509                 break;
3510             case 'e':
3511                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3512                 break;
3513             case 'a':
3514                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3515                 break;
3516             } /* end switch */
3517
3518             s++;
3519             continue;
3520         } /* end if (backslash) */
3521 #ifdef EBCDIC
3522         else
3523             literal_endpoint++;
3524 #endif
3525
3526     default_action:
3527         /* If we started with encoded form, or already know we want it,
3528            then encode the next character */
3529         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3530             STRLEN len  = 1;
3531
3532
3533             /* One might think that it is wasted effort in the case of the
3534              * source being utf8 (this_utf8 == TRUE) to take the next character
3535              * in the source, convert it to an unsigned value, and then convert
3536              * it back again.  But the source has not been validated here.  The
3537              * routine that does the conversion checks for errors like
3538              * malformed utf8 */
3539
3540             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3541             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3542             if (!has_utf8) {
3543                 SvCUR_set(sv, d - SvPVX_const(sv));
3544                 SvPOK_on(sv);
3545                 *d = '\0';
3546                 /* See Note on sizing above.  */
3547                 sv_utf8_upgrade_flags_grow(sv,
3548                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3549                                         need + (STRLEN)(send - s) + 1);
3550                 d = SvPVX(sv) + SvCUR(sv);
3551                 has_utf8 = TRUE;
3552             } else if (need > len) {
3553                 /* encoded value larger than old, may need extra space (NOTE:
3554                  * SvCUR() is not set correctly here).   See Note on sizing
3555                  * above.  */
3556                 const STRLEN off = d - SvPVX_const(sv);
3557                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3558             }
3559             s += len;
3560
3561             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3562 #ifdef EBCDIC
3563             if (uv > 255 && !dorange)
3564                 native_range = FALSE;
3565 #endif
3566         }
3567         else {
3568             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3569         }
3570     } /* while loop to process each character */
3571
3572     /* terminate the string and set up the sv */
3573     *d = '\0';
3574     SvCUR_set(sv, d - SvPVX_const(sv));
3575     if (SvCUR(sv) >= SvLEN(sv))
3576         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3577                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3578
3579     SvPOK_on(sv);
3580     if (PL_encoding && !has_utf8) {
3581         sv_recode_to_utf8(sv, PL_encoding);
3582         if (SvUTF8(sv))
3583             has_utf8 = TRUE;
3584     }
3585     if (has_utf8) {
3586         SvUTF8_on(sv);
3587         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3588             PL_sublex_info.sub_op->op_private |=
3589                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3590         }
3591     }
3592
3593     /* shrink the sv if we allocated more than we used */
3594     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3595         SvPV_shrink_to_cur(sv);
3596     }
3597
3598     /* return the substring (via pl_yylval) only if we parsed anything */
3599     if (s > PL_bufptr) {
3600         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3601             const char *const key = PL_lex_inpat ? "qr" : "q";
3602             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3603             const char *type;
3604             STRLEN typelen;
3605
3606             if (PL_lex_inwhat == OP_TRANS) {
3607                 type = "tr";
3608                 typelen = 2;
3609             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3610                 type = "s";
3611                 typelen = 1;
3612             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3613                 type = "q";
3614                 typelen = 1;
3615             } else  {
3616                 type = "qq";
3617                 typelen = 2;
3618             }
3619
3620             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3621                                 type, typelen);
3622         }
3623         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3624     } else
3625         SvREFCNT_dec(sv);
3626     return s;
3627 }
3628
3629 /* S_intuit_more
3630  * Returns TRUE if there's more to the expression (e.g., a subscript),
3631  * FALSE otherwise.
3632  *
3633  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3634  *
3635  * ->[ and ->{ return TRUE
3636  * { and [ outside a pattern are always subscripts, so return TRUE
3637  * if we're outside a pattern and it's not { or [, then return FALSE
3638  * if we're in a pattern and the first char is a {
3639  *   {4,5} (any digits around the comma) returns FALSE
3640  * if we're in a pattern and the first char is a [
3641  *   [] returns FALSE
3642  *   [SOMETHING] has a funky algorithm to decide whether it's a
3643  *      character class or not.  It has to deal with things like
3644  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3645  * anything else returns TRUE
3646  */
3647
3648 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3649
3650 STATIC int
3651 S_intuit_more(pTHX_ register char *s)
3652 {
3653     dVAR;
3654
3655     PERL_ARGS_ASSERT_INTUIT_MORE;
3656
3657     if (PL_lex_brackets)
3658         return TRUE;
3659     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3660         return TRUE;
3661     if (*s != '{' && *s != '[')
3662         return FALSE;
3663     if (!PL_lex_inpat)
3664         return TRUE;
3665
3666     /* In a pattern, so maybe we have {n,m}. */
3667     if (*s == '{') {
3668         if (regcurly(s)) {
3669             return FALSE;
3670         }
3671         return TRUE;
3672     }
3673
3674     /* On the other hand, maybe we have a character class */
3675
3676     s++;
3677     if (*s == ']' || *s == '^')
3678         return FALSE;
3679     else {
3680         /* this is terrifying, and it works */
3681         int weight = 2;         /* let's weigh the evidence */
3682         char seen[256];
3683         unsigned char un_char = 255, last_un_char;
3684         const char * const send = strchr(s,']');
3685         char tmpbuf[sizeof PL_tokenbuf * 4];
3686
3687         if (!send)              /* has to be an expression */
3688             return TRUE;
3689
3690         Zero(seen,256,char);
3691         if (*s == '$')
3692             weight -= 3;
3693         else if (isDIGIT(*s)) {
3694             if (s[1] != ']') {
3695                 if (isDIGIT(s[1]) && s[2] == ']')
3696                     weight -= 10;
3697             }
3698             else
3699                 weight -= 100;
3700         }
3701         for (; s < send; s++) {
3702             last_un_char = un_char;
3703             un_char = (unsigned char)*s;
3704             switch (*s) {
3705             case '@':
3706             case '&':
3707             case '$':
3708                 weight -= seen[un_char] * 10;
3709                 if (isALNUM_lazy_if(s+1,UTF)) {
3710                     int len;
3711                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3712                     len = (int)strlen(tmpbuf);
3713                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3714                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3715                         weight -= 100;
3716                     else
3717                         weight -= 10;
3718                 }
3719                 else if (*s == '$' && s[1] &&
3720                   strchr("[#!%*<>()-=",s[1])) {
3721                     if (/*{*/ strchr("])} =",s[2]))
3722                         weight -= 10;
3723                     else
3724                         weight -= 1;
3725                 }
3726                 break;
3727             case '\\':
3728                 un_char = 254;
3729                 if (s[1]) {
3730                     if (strchr("wds]",s[1]))
3731                         weight += 100;
3732                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3733                         weight += 1;
3734                     else if (strchr("rnftbxcav",s[1]))
3735                         weight += 40;
3736                     else if (isDIGIT(s[1])) {
3737                         weight += 40;
3738                         while (s[1] && isDIGIT(s[1]))
3739                             s++;
3740                     }
3741                 }
3742                 else
3743                     weight += 100;
3744                 break;
3745             case '-':
3746                 if (s[1] == '\\')
3747                     weight += 50;
3748                 if (strchr("aA01! ",last_un_char))
3749                     weight += 30;
3750                 if (strchr("zZ79~",s[1]))
3751                     weight += 30;
3752                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3753                     weight -= 5;        /* cope with negative subscript */
3754                 break;
3755             default:
3756                 if (!isALNUM(last_un_char)
3757                     && !(last_un_char == '$' || last_un_char == '@'
3758                          || last_un_char == '&')
3759                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3760                     char *d = tmpbuf;
3761                     while (isALPHA(*s))
3762                         *d++ = *s++;
3763                     *d = '\0';
3764                     if (keyword(tmpbuf, d - tmpbuf, 0))
3765                         weight -= 150;
3766                 }
3767                 if (un_char == last_un_char + 1)
3768                     weight += 5;
3769                 weight -= seen[un_char];
3770                 break;
3771             }
3772             seen[un_char]++;
3773         }
3774         if (weight >= 0)        /* probably a character class */
3775             return FALSE;
3776     }
3777
3778     return TRUE;
3779 }
3780
3781 /*
3782  * S_intuit_method
3783  *
3784  * Does all the checking to disambiguate
3785  *   foo bar
3786  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3787  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3788  *
3789  * First argument is the stuff after the first token, e.g. "bar".
3790  *
3791  * Not a method if foo is a filehandle.
3792  * Not a method if foo is a subroutine prototyped to take a filehandle.
3793  * Not a method if it's really "Foo $bar"
3794  * Method if it's "foo $bar"
3795  * Not a method if it's really "print foo $bar"
3796  * Method if it's really "foo package::" (interpreted as package->foo)
3797  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3798  * Not a method if bar is a filehandle or package, but is quoted with
3799  *   =>
3800  */
3801
3802 STATIC int
3803 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3804 {
3805     dVAR;
3806     char *s = start + (*start == '$');
3807     char tmpbuf[sizeof PL_tokenbuf];
3808     STRLEN len;
3809     GV* indirgv;
3810 #ifdef PERL_MAD
3811     int soff;
3812 #endif
3813
3814     PERL_ARGS_ASSERT_INTUIT_METHOD;
3815
3816     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3817             return 0;
3818     if (cv && SvPOK(cv)) {
3819                 const char *proto = CvPROTO(cv);
3820                 if (proto) {
3821                     if (*proto == ';')
3822                         proto++;
3823                     if (*proto == '*')
3824                         return 0;
3825                 }
3826     }
3827     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3828     /* start is the beginning of the possible filehandle/object,
3829      * and s is the end of it
3830      * tmpbuf is a copy of it
3831      */
3832
3833     if (*start == '$') {
3834         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3835                 isUPPER(*PL_tokenbuf))
3836             return 0;
3837 #ifdef PERL_MAD
3838         len = start - SvPVX(PL_linestr);
3839 #endif
3840         s = PEEKSPACE(s);
3841 #ifdef PERL_MAD
3842         start = SvPVX(PL_linestr) + len;
3843 #endif
3844         PL_bufptr = start;
3845         PL_expect = XREF;
3846         return *s == '(' ? FUNCMETH : METHOD;
3847     }
3848     if (!keyword(tmpbuf, len, 0)) {
3849         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3850             len -= 2;
3851             tmpbuf[len] = '\0';
3852 #ifdef PERL_MAD
3853             soff = s - SvPVX(PL_linestr);
3854 #endif
3855             goto bare_package;
3856         }
3857         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3858         if (indirgv && GvCVu(indirgv))
3859             return 0;
3860         /* filehandle or package name makes it a method */
3861         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3862 #ifdef PERL_MAD
3863             soff = s - SvPVX(PL_linestr);
3864 #endif
3865             s = PEEKSPACE(s);
3866             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3867                 return 0;       /* no assumptions -- "=>" quotes bareword */
3868       bare_package:
3869             start_force(PL_curforce);
3870             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3871                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3872             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3873             if (PL_madskills)
3874                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3875                                                             ( UTF ? SVf_UTF8 : 0 )));
3876             PL_expect = XTERM;
3877             force_next(WORD);
3878             PL_bufptr = s;
3879 #ifdef PERL_MAD
3880             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3881 #endif
3882             return *s == '(' ? FUNCMETH : METHOD;
3883         }
3884     }
3885     return 0;
3886 }
3887
3888 /* Encoded script support. filter_add() effectively inserts a
3889  * 'pre-processing' function into the current source input stream.
3890  * Note that the filter function only applies to the current source file
3891  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3892  *
3893  * The datasv parameter (which may be NULL) can be used to pass
3894  * private data to this instance of the filter. The filter function
3895  * can recover the SV using the FILTER_DATA macro and use it to
3896  * store private buffers and state information.
3897  *
3898  * The supplied datasv parameter is upgraded to a PVIO type
3899  * and the IoDIRP/IoANY field is used to store the function pointer,
3900  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3901  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3902  * private use must be set using malloc'd pointers.
3903  */
3904
3905 SV *
3906 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3907 {
3908     dVAR;
3909     if (!funcp)
3910         return NULL;
3911
3912     if (!PL_parser)
3913         return NULL;
3914
3915     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3916         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3917
3918     if (!PL_rsfp_filters)
3919         PL_rsfp_filters = newAV();
3920     if (!datasv)
3921         datasv = newSV(0);
3922     SvUPGRADE(datasv, SVt_PVIO);
3923     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3924     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3925     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3926                           FPTR2DPTR(void *, IoANY(datasv)),
3927                           SvPV_nolen(datasv)));
3928     av_unshift(PL_rsfp_filters, 1);
3929     av_store(PL_rsfp_filters, 0, datasv) ;
3930     if (
3931         !PL_parser->filtered
3932      && PL_parser->lex_flags & LEX_EVALBYTES
3933      && PL_bufptr < PL_bufend
3934     ) {
3935         const char *s = PL_bufptr;
3936         while (s < PL_bufend) {
3937             if (*s == '\n') {
3938                 SV *linestr = PL_parser->linestr;
3939                 char *buf = SvPVX(linestr);
3940                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3941                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3942                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3943                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3944                 STRLEN const last_uni_pos =
3945                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3946                 STRLEN const last_lop_pos =
3947                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3948                 av_push(PL_rsfp_filters, linestr);
3949                 PL_parser->linestr =
3950                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3951                 buf = SvPVX(PL_parser->linestr);
3952                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3953                 PL_parser->bufptr = buf + bufptr_pos;
3954                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3955                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3956                 PL_parser->linestart = buf + linestart_pos;
3957                 if (PL_parser->last_uni)
3958                     PL_parser->last_uni = buf + last_uni_pos;
3959                 if (PL_parser->last_lop)
3960                     PL_parser->last_lop = buf + last_lop_pos;
3961                 SvLEN(linestr) = SvCUR(linestr);
3962                 SvCUR(linestr) = s-SvPVX(linestr);
3963                 PL_parser->filtered = 1;
3964                 break;
3965             }
3966             s++;
3967         }
3968     }
3969     return(datasv);
3970 }
3971
3972
3973 /* Delete most recently added instance of this filter function. */
3974 void
3975 Perl_filter_del(pTHX_ filter_t funcp)
3976 {
3977     dVAR;
3978     SV *datasv;
3979
3980     PERL_ARGS_ASSERT_FILTER_DEL;
3981
3982 #ifdef DEBUGGING
3983     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3984                           FPTR2DPTR(void*, funcp)));
3985 #endif
3986     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3987         return;
3988     /* if filter is on top of stack (usual case) just pop it off */
3989     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3990     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3991         sv_free(av_pop(PL_rsfp_filters));
3992
3993         return;
3994     }
3995     /* we need to search for the correct entry and clear it     */
3996     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3997 }
3998
3999
4000 /* Invoke the idxth filter function for the current rsfp.        */
4001 /* maxlen 0 = read one text line */
4002 I32
4003 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4004 {
4005     dVAR;
4006     filter_t funcp;
4007     SV *datasv = NULL;
4008     /* This API is bad. It should have been using unsigned int for maxlen.
4009        Not sure if we want to change the API, but if not we should sanity
4010        check the value here.  */
4011     unsigned int correct_length
4012         = maxlen < 0 ?
4013 #ifdef PERL_MICRO
4014         0x7FFFFFFF
4015 #else
4016         INT_MAX
4017 #endif
4018         : maxlen;
4019
4020     PERL_ARGS_ASSERT_FILTER_READ;
4021
4022     if (!PL_parser || !PL_rsfp_filters)
4023         return -1;
4024     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4025         /* Provide a default input filter to make life easy.    */
4026         /* Note that we append to the line. This is handy.      */
4027         DEBUG_P(PerlIO_printf(Perl_debug_log,
4028                               "filter_read %d: from rsfp\n", idx));
4029         if (correct_length) {
4030             /* Want a block */
4031             int len ;
4032             const int old_len = SvCUR(buf_sv);
4033
4034             /* ensure buf_sv is large enough */
4035             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4036             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4037                                    correct_length)) <= 0) {
4038                 if (PerlIO_error(PL_rsfp))
4039                     return -1;          /* error */
4040                 else
4041                     return 0 ;          /* end of file */
4042             }
4043             SvCUR_set(buf_sv, old_len + len) ;
4044             SvPVX(buf_sv)[old_len + len] = '\0';
4045         } else {
4046             /* Want a line */
4047             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4048                 if (PerlIO_error(PL_rsfp))
4049                     return -1;          /* error */
4050                 else
4051                     return 0 ;          /* end of file */
4052             }
4053         }
4054         return SvCUR(buf_sv);
4055     }
4056     /* Skip this filter slot if filter has been deleted */
4057     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4058         DEBUG_P(PerlIO_printf(Perl_debug_log,
4059                               "filter_read %d: skipped (filter deleted)\n",
4060                               idx));
4061         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4062     }
4063     if (SvTYPE(datasv) != SVt_PVIO) {
4064         if (correct_length) {
4065             /* Want a block */
4066             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4067             if (!remainder) return 0; /* eof */
4068             if (correct_length > remainder) correct_length = remainder;
4069             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4070             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4071         } else {
4072             /* Want a line */
4073             const char *s = SvEND(datasv);
4074             const char *send = SvPVX(datasv) + SvLEN(datasv);
4075             while (s < send) {
4076                 if (*s == '\n') {
4077                     s++;
4078                     break;
4079                 }
4080                 s++;
4081             }
4082             if (s == send) return 0; /* eof */
4083             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4084             SvCUR_set(datasv, s-SvPVX(datasv));
4085         }
4086         return SvCUR(buf_sv);
4087     }
4088     /* Get function pointer hidden within datasv        */
4089     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4090     DEBUG_P(PerlIO_printf(Perl_debug_log,
4091                           "filter_read %d: via function %p (%s)\n",
4092                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4093     /* Call function. The function is expected to       */
4094     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4095     /* Return: <0:error, =0:eof, >0:not eof             */
4096     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4097 }
4098
4099 STATIC char *
4100 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4101 {
4102     dVAR;
4103
4104     PERL_ARGS_ASSERT_FILTER_GETS;
4105
4106 #ifdef PERL_CR_FILTER
4107     if (!PL_rsfp_filters) {
4108         filter_add(S_cr_textfilter,NULL);
4109     }
4110 #endif
4111     if (PL_rsfp_filters) {
4112         if (!append)
4113             SvCUR_set(sv, 0);   /* start with empty line        */
4114         if (FILTER_READ(0, sv, 0) > 0)
4115             return ( SvPVX(sv) ) ;
4116         else
4117             return NULL ;
4118     }
4119     else
4120         return (sv_gets(sv, PL_rsfp, append));
4121 }
4122
4123 STATIC HV *
4124 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4125 {
4126     dVAR;
4127     GV *gv;
4128
4129     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4130
4131     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4132         return PL_curstash;
4133
4134     if (len > 2 &&
4135         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4136         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4137     {
4138         return GvHV(gv);                        /* Foo:: */
4139     }
4140
4141     /* use constant CLASS => 'MyClass' */
4142     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4143     if (gv && GvCV(gv)) {
4144         SV * const sv = cv_const_sv(GvCV(gv));
4145         if (sv)
4146             pkgname = SvPV_const(sv, len);
4147     }
4148
4149     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4150 }
4151
4152 /*
4153  * S_readpipe_override
4154  * Check whether readpipe() is overridden, and generates the appropriate
4155  * optree, provided sublex_start() is called afterwards.
4156  */
4157 STATIC void
4158 S_readpipe_override(pTHX)
4159 {
4160     GV **gvp;
4161     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4162     pl_yylval.ival = OP_BACKTICK;
4163     if ((gv_readpipe
4164                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4165             ||
4166             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4167              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4168              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4169     {
4170         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4171             op_append_elem(OP_LIST,
4172                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4173                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4174     }
4175 }
4176
4177 #ifdef PERL_MAD
4178  /*
4179  * Perl_madlex
4180  * The intent of this yylex wrapper is to minimize the changes to the
4181  * tokener when we aren't interested in collecting madprops.  It remains
4182  * to be seen how successful this strategy will be...
4183  */
4184
4185 int
4186 Perl_madlex(pTHX)
4187 {
4188     int optype;
4189     char *s = PL_bufptr;
4190
4191     /* make sure PL_thiswhite is initialized */
4192     PL_thiswhite = 0;
4193     PL_thismad = 0;
4194
4195     /* previous token ate up our whitespace? */
4196     if (!PL_lasttoke && PL_nextwhite) {
4197         PL_thiswhite = PL_nextwhite;
4198         PL_nextwhite = 0;
4199     }
4200
4201     /* isolate the token, and figure out where it is without whitespace */
4202     PL_realtokenstart = -1;
4203     PL_thistoken = 0;
4204     optype = yylex();
4205     s = PL_bufptr;
4206     assert(PL_curforce < 0);
4207
4208     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4209         if (!PL_thistoken) {
4210             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4211                 PL_thistoken = newSVpvs("");
4212             else {
4213                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4214                 PL_thistoken = newSVpvn(tstart, s - tstart);
4215             }
4216         }
4217         if (PL_thismad) /* install head */
4218             CURMAD('X', PL_thistoken);
4219     }
4220
4221     /* last whitespace of a sublex? */
4222     if (optype == ')' && PL_endwhite) {
4223         CURMAD('X', PL_endwhite);
4224     }
4225
4226     if (!PL_thismad) {
4227
4228         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4229         if (!PL_thiswhite && !PL_endwhite && !optype) {
4230             sv_free(PL_thistoken);
4231             PL_thistoken = 0;
4232             return 0;
4233         }
4234
4235         /* put off final whitespace till peg */
4236         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4237             PL_nextwhite = PL_thiswhite;
4238             PL_thiswhite = 0;
4239         }
4240         else if (PL_thisopen) {
4241             CURMAD('q', PL_thisopen);
4242             if (PL_thistoken)
4243                 sv_free(PL_thistoken);
4244             PL_thistoken = 0;
4245         }
4246         else {
4247             /* Store actual token text as madprop X */
4248             CURMAD('X', PL_thistoken);
4249         }
4250
4251         if (PL_thiswhite) {
4252             /* add preceding whitespace as madprop _ */
4253             CURMAD('_', PL_thiswhite);
4254         }
4255
4256         if (PL_thisstuff) {
4257             /* add quoted material as madprop = */
4258             CURMAD('=', PL_thisstuff);
4259         }
4260
4261         if (PL_thisclose) {
4262             /* add terminating quote as madprop Q */
4263             CURMAD('Q', PL_thisclose);
4264         }
4265     }
4266
4267     /* special processing based on optype */
4268
4269     switch (optype) {
4270
4271     /* opval doesn't need a TOKEN since it can already store mp */
4272     case WORD:
4273     case METHOD:
4274     case FUNCMETH:
4275     case THING:
4276     case PMFUNC:
4277     case PRIVATEREF:
4278     case FUNC0SUB:
4279     case UNIOPSUB:
4280     case LSTOPSUB:
4281     case LABEL:
4282         if (pl_yylval.opval)
4283             append_madprops(PL_thismad, pl_yylval.opval, 0);
4284         PL_thismad = 0;
4285         return optype;
4286
4287     /* fake EOF */
4288     case 0:
4289         optype = PEG;
4290         if (PL_endwhite) {
4291             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4292             PL_endwhite = 0;
4293         }
4294         break;
4295
4296     case ']':
4297     case '}':
4298         if (PL_faketokens)
4299             break;
4300         /* remember any fake bracket that lexer is about to discard */
4301         if (PL_lex_brackets == 1 &&
4302             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4303         {
4304             s = PL_bufptr;
4305             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4306                 s++;
4307             if (*s == '}') {
4308                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4309                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4310                 PL_thiswhite = 0;
4311                 PL_bufptr = s - 1;
4312                 break;  /* don't bother looking for trailing comment */
4313             }
4314             else
4315                 s = PL_bufptr;
4316         }
4317         if (optype == ']')
4318             break;
4319         /* FALLTHROUGH */
4320
4321     /* attach a trailing comment to its statement instead of next token */
4322     case ';':
4323         if (PL_faketokens)
4324             break;
4325         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4326             s = PL_bufptr;
4327             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4328                 s++;
4329             if (*s == '\n' || *s == '#') {
4330                 while (s < PL_bufend && *s != '\n')
4331                     s++;
4332                 if (s < PL_bufend)
4333                     s++;
4334                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4335                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4336                 PL_thiswhite = 0;
4337                 PL_bufptr = s;
4338             }
4339         }
4340         break;
4341
4342     /* ival */
4343     default:
4344         break;
4345
4346     }
4347
4348     /* Create new token struct.  Note: opvals return early above. */
4349     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4350     PL_thismad = 0;
4351     return optype;
4352 }
4353 #endif
4354
4355 STATIC char *
4356 S_tokenize_use(pTHX_ int is_use, char *s) {
4357     dVAR;
4358
4359     PERL_ARGS_ASSERT_TOKENIZE_USE;
4360
4361     if (PL_expect != XSTATE)
4362         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4363                     is_use ? "use" : "no"));
4364     PL_expect = XTERM;
4365     s = SKIPSPACE1(s);
4366     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4367         s = force_version(s, TRUE);
4368         if (*s == ';' || *s == '}'
4369                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4370             start_force(PL_curforce);
4371             NEXTVAL_NEXTTOKE.opval = NULL;
4372             force_next(WORD);
4373         }
4374         else if (*s == 'v') {
4375             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4376             s = force_version(s, FALSE);
4377         }
4378     }
4379     else {
4380         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4381         s = force_version(s, FALSE);
4382     }
4383     pl_yylval.ival = is_use;
4384     return s;
4385 }
4386 #ifdef DEBUGGING
4387     static const char* const exp_name[] =
4388         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4389           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4390         };
4391 #endif
4392
4393 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4394 STATIC bool
4395 S_word_takes_any_delimeter(char *p, STRLEN len)
4396 {
4397     return (len == 1 && strchr("msyq", p[0])) ||
4398            (len == 2 && (
4399             (p[0] == 't' && p[1] == 'r') ||
4400             (p[0] == 'q' && strchr("qwxr", p[1]))));
4401 }
4402
4403 /*
4404   yylex
4405
4406   Works out what to call the token just pulled out of the input
4407   stream.  The yacc parser takes care of taking the ops we return and
4408   stitching them into a tree.
4409
4410   Returns:
4411     The type of the next token
4412
4413   Structure:
4414       Switch based on the current state:
4415           - if we already built the token before, use it
4416           - if we have a case modifier in a string, deal with that
4417           - handle other cases of interpolation inside a string
4418           - scan the next line if we are inside a format
4419       In the normal state switch on the next character:
4420           - default:
4421             if alphabetic, go to key lookup
4422             unrecoginized character - croak
4423           - 0/4/26: handle end-of-line or EOF
4424           - cases for whitespace
4425           - \n and #: handle comments and line numbers
4426           - various operators, brackets and sigils
4427           - numbers
4428           - quotes
4429           - 'v': vstrings (or go to key lookup)
4430           - 'x' repetition operator (or go to key lookup)
4431           - other ASCII alphanumerics (key lookup begins here):
4432               word before => ?
4433               keyword plugin
4434               scan built-in keyword (but do nothing with it yet)
4435               check for statement label
4436               check for lexical subs
4437                   goto just_a_word if there is one
4438               see whether built-in keyword is overridden
4439               switch on keyword number:
4440                   - default: just_a_word:
4441                       not a built-in keyword; handle bareword lookup
4442                       disambiguate between method and sub call
4443                       fall back to bareword
4444                   - cases for built-in keywords
4445 */
4446
4447
4448 #ifdef __SC__
4449 #pragma segment Perl_yylex
4450 #endif
4451 int
4452 Perl_yylex(pTHX)
4453 {
4454     dVAR;
4455     char *s = PL_bufptr;
4456     char *d;
4457     STRLEN len;
4458     bool bof = FALSE;
4459     U8 formbrack = 0;
4460     U32 fake_eof = 0;
4461
4462     /* orig_keyword, gvp, and gv are initialized here because
4463      * jump to the label just_a_word_zero can bypass their
4464      * initialization later. */
4465     I32 orig_keyword = 0;
4466     GV *gv = NULL;
4467     GV **gvp = NULL;
4468
4469     DEBUG_T( {
4470         SV* tmp = newSVpvs("");
4471         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4472             (IV)CopLINE(PL_curcop),
4473             lex_state_names[PL_lex_state],
4474             exp_name[PL_expect],
4475             pv_display(tmp, s, strlen(s), 0, 60));
4476         SvREFCNT_dec(tmp);
4477     } );
4478
4479     switch (PL_lex_state) {
4480 #ifdef COMMENTARY
4481     case LEX_NORMAL:            /* Some compilers will produce faster */
4482     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4483         break;
4484 #endif
4485
4486     /* when we've already built the next token, just pull it out of the queue */
4487     case LEX_KNOWNEXT:
4488 #ifdef PERL_MAD
4489         PL_lasttoke--;
4490         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4491         if (PL_madskills) {
4492             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4493             PL_nexttoke[PL_lasttoke].next_mad = 0;
4494             if (PL_thismad && PL_thismad->mad_key == '_') {
4495                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4496                 PL_thismad->mad_val = 0;
4497                 mad_free(PL_thismad);
4498                 PL_thismad = 0;
4499             }
4500         }
4501         if (!PL_lasttoke) {
4502             PL_lex_state = PL_lex_defer;
4503             PL_expect = PL_lex_expect;
4504             PL_lex_defer = LEX_NORMAL;
4505             if (!PL_nexttoke[PL_lasttoke].next_type)
4506                 return yylex();
4507         }
4508 #else
4509         PL_nexttoke--;
4510         pl_yylval = PL_nextval[PL_nexttoke];
4511         if (!PL_nexttoke) {
4512             PL_lex_state = PL_lex_defer;
4513             PL_expect = PL_lex_expect;
4514             PL_lex_defer = LEX_NORMAL;
4515         }
4516 #endif
4517         {
4518             I32 next_type;
4519 #ifdef PERL_MAD
4520             next_type = PL_nexttoke[PL_lasttoke].next_type;
4521 #else
4522             next_type = PL_nexttype[PL_nexttoke];
4523 #endif
4524             if (next_type & (7<<24)) {
4525                 if (next_type & (1<<24)) {
4526                     if (PL_lex_brackets > 100)
4527                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4528                     PL_lex_brackstack[PL_lex_brackets++] =
4529                         (char) ((next_type >> 16) & 0xff);
4530                 }
4531                 if (next_type & (2<<24))
4532                     PL_lex_allbrackets++;
4533                 if (next_type & (4<<24))
4534                     PL_lex_allbrackets--;
4535                 next_type &= 0xffff;
4536             }
4537             if (S_is_opval_token(next_type) && pl_yylval.opval)
4538                 pl_yylval.opval->op_savefree = 0; /* release */
4539             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4540         }
4541
4542     /* interpolated case modifiers like \L \U, including \Q and \E.
4543        when we get here, PL_bufptr is at the \
4544     */
4545     case LEX_INTERPCASEMOD:
4546 #ifdef DEBUGGING
4547         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4548             Perl_croak(aTHX_
4549                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4550                        PL_bufptr, PL_bufend, *PL_bufptr);
4551 #endif
4552         /* handle \E or end of string */
4553         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4554             /* if at a \E */
4555             if (PL_lex_casemods) {
4556                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4557                 PL_lex_casestack[PL_lex_casemods] = '\0';
4558
4559                 if (PL_bufptr != PL_bufend
4560                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4561                         || oldmod == 'F')) {
4562                     PL_bufptr += 2;
4563                     PL_lex_state = LEX_INTERPCONCAT;
4564 #ifdef PERL_MAD
4565                     if (PL_madskills)
4566                         PL_thistoken = newSVpvs("\\E");
4567 #endif
4568                 }
4569                 PL_lex_allbrackets--;
4570                 return REPORT(')');
4571             }
4572             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4573                /* Got an unpaired \E */
4574                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4575                         "Useless use of \\E");
4576             }
4577 #ifdef PERL_MAD
4578             while (PL_bufptr != PL_bufend &&
4579               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4580                 if (!PL_thiswhite)
4581                     PL_thiswhite = newSVpvs("");
4582                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4583                 PL_bufptr += 2;
4584             }
4585 #else
4586             if (PL_bufptr != PL_bufend)
4587                 PL_bufptr += 2;
4588 #endif
4589             PL_lex_state = LEX_INTERPCONCAT;
4590             return yylex();
4591         }
4592         else {
4593             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4594               "### Saw case modifier\n"); });
4595             s = PL_bufptr + 1;
4596             if (s[1] == '\\' && s[2] == 'E') {
4597 #ifdef PERL_MAD
4598                 if (!PL_thiswhite)
4599                     PL_thiswhite = newSVpvs("");
4600                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4601 #endif
4602                 PL_bufptr = s + 3;
4603                 PL_lex_state = LEX_INTERPCONCAT;
4604                 return yylex();
4605             }
4606             else {
4607                 I32 tmp;
4608                 if (!PL_madskills) /* when just compiling don't need correct */
4609                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4610                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4611                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4612                     (strchr(PL_lex_casestack, 'L')
4613                         || strchr(PL_lex_casestack, 'U')
4614                         || strchr(PL_lex_casestack, 'F'))) {
4615                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4616                     PL_lex_allbrackets--;
4617                     return REPORT(')');
4618                 }
4619                 if (PL_lex_casemods > 10)
4620                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4621                 PL_lex_casestack[PL_lex_casemods++] = *s;
4622                 PL_lex_casestack[PL_lex_casemods] = '\0';
4623                 PL_lex_state = LEX_INTERPCONCAT;
4624                 start_force(PL_curforce);
4625                 NEXTVAL_NEXTTOKE.ival = 0;
4626                 force_next((2<<24)|'(');
4627                 start_force(PL_curforce);
4628                 if (*s == 'l')
4629                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4630                 else if (*s == 'u')
4631                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4632                 else if (*s == 'L')
4633                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4634                 else if (*s == 'U')
4635                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4636                 else if (*s == 'Q')
4637                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4638                 else if (*s == 'F')
4639                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4640                 else
4641                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4642                 if (PL_madskills) {
4643                     SV* const tmpsv = newSVpvs("\\ ");
4644                     /* replace the space with the character we want to escape
4645                      */
4646                     SvPVX(tmpsv)[1] = *s;
4647                     curmad('_', tmpsv);
4648                 }
4649                 PL_bufptr = s + 1;
4650             }
4651             force_next(FUNC);
4652             if (PL_lex_starts) {
4653                 s = PL_bufptr;
4654                 PL_lex_starts = 0;
4655 #ifdef PERL_MAD
4656                 if (PL_madskills) {
4657                     if (PL_thistoken)
4658                         sv_free(PL_thistoken);
4659                     PL_thistoken = newSVpvs("");
4660                 }
4661 #endif
4662                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4663                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4664                     OPERATOR(',');
4665                 else
4666                     Aop(OP_CONCAT);
4667             }
4668             else
4669                 return yylex();
4670         }
4671
4672     case LEX_INTERPPUSH:
4673         return REPORT(sublex_push());
4674
4675     case LEX_INTERPSTART:
4676         if (PL_bufptr == PL_bufend)
4677             return REPORT(sublex_done());
4678         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4679               "### Interpolated variable\n"); });
4680         PL_expect = XTERM;
4681         PL_lex_dojoin = (*PL_bufptr == '@');
4682         PL_lex_state = LEX_INTERPNORMAL;
4683         if (PL_lex_dojoin) {
4684             start_force(PL_curforce);
4685             NEXTVAL_NEXTTOKE.ival = 0;
4686             force_next(',');
4687             start_force(PL_curforce);
4688             force_ident("\"", '$');
4689             start_force(PL_curforce);
4690             NEXTVAL_NEXTTOKE.ival = 0;
4691             force_next('$');
4692             start_force(PL_curforce);
4693             NEXTVAL_NEXTTOKE.ival = 0;
4694             force_next((2<<24)|'(');
4695             start_force(PL_curforce);
4696             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4697             force_next(FUNC);
4698         }
4699         /* Convert (?{...}) and friends to 'do {...}' */
4700         if (PL_lex_inpat && *PL_bufptr == '(') {
4701             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4702             PL_bufptr += 2;
4703             if (*PL_bufptr != '{')
4704                 PL_bufptr++;
4705             start_force(PL_curforce);
4706             /* XXX probably need a CURMAD(something) here */
4707             PL_expect = XTERMBLOCK;
4708             force_next(DO);
4709         }
4710
4711         if (PL_lex_starts++) {
4712             s = PL_bufptr;
4713 #ifdef PERL_MAD
4714             if (PL_madskills) {
4715                 if (PL_thistoken)
4716                     sv_free(PL_thistoken);
4717                 PL_thistoken = newSVpvs("");
4718             }
4719 #endif
4720             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4721             if (!PL_lex_casemods && PL_lex_inpat)
4722                 OPERATOR(',');
4723             else
4724                 Aop(OP_CONCAT);
4725         }
4726         return yylex();
4727
4728     case LEX_INTERPENDMAYBE:
4729         if (intuit_more(PL_bufptr)) {
4730             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4731             break;
4732         }
4733         /* FALL THROUGH */
4734
4735     case LEX_INTERPEND:
4736         if (PL_lex_dojoin) {
4737             PL_lex_dojoin = FALSE;
4738             PL_lex_state = LEX_INTERPCONCAT;
4739 #ifdef PERL_MAD
4740             if (PL_madskills) {
4741                 if (PL_thistoken)
4742                     sv_free(PL_thistoken);
4743                 PL_thistoken = newSVpvs("");
4744             }
4745 #endif
4746             PL_lex_allbrackets--;
4747             return REPORT(')');
4748         }
4749         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4750             && SvEVALED(PL_lex_repl))
4751         {
4752             if (PL_bufptr != PL_bufend)
4753                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4754             PL_lex_repl = NULL;
4755         }
4756         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4757            re_eval_str.  If the here-doc body’s length equals the previous
4758            value of re_eval_start, re_eval_start will now be null.  So
4759            check re_eval_str as well. */
4760         if (PL_parser->lex_shared->re_eval_start
4761          || PL_parser->lex_shared->re_eval_str) {
4762             SV *sv;
4763             if (*PL_bufptr != ')')
4764                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4765             PL_bufptr++;
4766             /* having compiled a (?{..}) expression, return the original
4767              * text too, as a const */
4768             if (PL_parser->lex_shared->re_eval_str) {
4769                 sv = PL_parser->lex_shared->re_eval_str;
4770                 PL_parser->lex_shared->re_eval_str = NULL;
4771                 SvCUR_set(sv,
4772                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4773                 SvPV_shrink_to_cur(sv);
4774             }
4775             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4776                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4777             start_force(PL_curforce);
4778             /* XXX probably need a CURMAD(something) here */
4779             NEXTVAL_NEXTTOKE.opval =
4780                     (OP*)newSVOP(OP_CONST, 0,
4781                                  sv);
4782             force_next(THING);
4783             PL_parser->lex_shared->re_eval_start = NULL;
4784             PL_expect = XTERM;
4785             return REPORT(',');
4786         }
4787
4788         /* FALLTHROUGH */
4789     case LEX_INTERPCONCAT:
4790 #ifdef DEBUGGING
4791         if (PL_lex_brackets)
4792             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4793                        (long) PL_lex_brackets);
4794 #endif
4795         if (PL_bufptr == PL_bufend)
4796             return REPORT(sublex_done());
4797
4798         /* m'foo' still needs to be parsed for possible (?{...}) */
4799         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4800             SV *sv = newSVsv(PL_linestr);
4801             sv = tokeq(sv);
4802             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4803             s = PL_bufend;
4804         }
4805         else {
4806             s = scan_const(PL_bufptr);
4807             if (*s == '\\')
4808                 PL_lex_state = LEX_INTERPCASEMOD;
4809             else
4810                 PL_lex_state = LEX_INTERPSTART;
4811         }
4812
4813         if (s != PL_bufptr) {
4814             start_force(PL_curforce);
4815             if (PL_madskills) {
4816                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4817             }
4818             NEXTVAL_NEXTTOKE = pl_yylval;
4819             PL_expect = XTERM;
4820             force_next(THING);
4821             if (PL_lex_starts++) {
4822 #ifdef PERL_MAD
4823                 if (PL_madskills) {
4824                     if (PL_thistoken)
4825                         sv_free(PL_thistoken);
4826                     PL_thistoken = newSVpvs("");
4827                 }
4828 #endif
4829                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4830                 if (!PL_lex_casemods && PL_lex_inpat)
4831                     OPERATOR(',');
4832                 else
4833                     Aop(OP_CONCAT);
4834             }
4835             else {
4836                 PL_bufptr = s;
4837                 return yylex();
4838             }
4839         }
4840
4841         return yylex();
4842     case LEX_FORMLINE:
4843         s = scan_formline(PL_bufptr);
4844         if (!PL_lex_formbrack)
4845         {
4846             formbrack = 1;
4847             goto rightbracket;
4848         }
4849         PL_bufptr = s;
4850         return yylex();
4851     }
4852
4853     s = PL_bufptr;
4854     PL_oldoldbufptr = PL_oldbufptr;
4855     PL_oldbufptr = s;
4856
4857   retry:
4858 #ifdef PERL_MAD
4859     if (PL_thistoken) {
4860         sv_free(PL_thistoken);
4861         PL_thistoken = 0;
4862     }
4863     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4864 #endif
4865     switch (*s) {
4866     default:
4867         if (isIDFIRST_lazy_if(s,UTF))
4868             goto keylookup;
4869         {
4870         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4871         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4872                                                     UTF8SKIP(s),
4873                                                     SVs_TEMP | SVf_UTF8),
4874                                             10, UNI_DISPLAY_ISPRINT))
4875                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4876         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4877         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4878             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4879         } else {
4880             d = PL_linestart;
4881         }
4882         *s = '\0';
4883         sv_setpv(dsv, d);
4884         if (UTF)
4885             SvUTF8_on(dsv);
4886         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4887     }
4888     case 4:
4889     case 26:
4890         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4891     case 0:
4892 #ifdef PERL_MAD
4893         if (PL_madskills)
4894             PL_faketokens = 0;
4895 #endif
4896         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4897             PL_last_uni = 0;
4898             PL_last_lop = 0;
4899             if (PL_lex_brackets &&
4900                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4901                 yyerror((const char *)
4902                         (PL_lex_formbrack
4903                          ? "Format not terminated"
4904                          : "Missing right curly or square bracket"));
4905             }
4906             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4907                         "### Tokener got EOF\n");
4908             } );
4909             TOKEN(0);
4910         }
4911         if (s++ < PL_bufend)
4912             goto retry;                 /* ignore stray nulls */
4913         PL_last_uni = 0;
4914         PL_last_lop = 0;
4915         if (!PL_in_eval && !PL_preambled) {
4916             PL_preambled = TRUE;
4917 #ifdef PERL_MAD
4918             if (PL_madskills)
4919                 PL_faketokens = 1;
4920 #endif
4921             if (PL_perldb) {
4922                 /* Generate a string of Perl code to load the debugger.
4923                  * If PERL5DB is set, it will return the contents of that,
4924                  * otherwise a compile-time require of perl5db.pl.  */
4925
4926                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4927
4928                 if (pdb) {
4929                     sv_setpv(PL_linestr, pdb);
4930                     sv_catpvs(PL_linestr,";");
4931                 } else {
4932                     SETERRNO(0,SS_NORMAL);
4933                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4934                 }
4935             } else
4936                 sv_setpvs(PL_linestr,"");
4937             if (PL_preambleav) {
4938                 SV **svp = AvARRAY(PL_preambleav);
4939                 SV **const end = svp + AvFILLp(PL_preambleav);
4940                 while(svp <= end) {
4941                     sv_catsv(PL_linestr, *svp);
4942                     ++svp;
4943                     sv_catpvs(PL_linestr, ";");
4944                 }
4945                 sv_free(MUTABLE_SV(PL_preambleav));
4946                 PL_preambleav = NULL;
4947             }
4948             if (PL_minus_E)
4949                 sv_catpvs(PL_linestr,
4950                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4951             if (PL_minus_n || PL_minus_p) {
4952                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4953                 if (PL_minus_l)
4954                     sv_catpvs(PL_linestr,"chomp;");
4955                 if (PL_minus_a) {
4956                     if (PL_minus_F) {
4957                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4958                              || *PL_splitstr == '"')
4959                               && strchr(PL_splitstr + 1, *PL_splitstr))
4960                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4961                         else {
4962                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4963                                bytes can be used as quoting characters.  :-) */
4964                             const char *splits = PL_splitstr;
4965                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4966                             do {
4967                                 /* Need to \ \s  */
4968                                 if (*splits == '\\')
4969                                     sv_catpvn(PL_linestr, splits, 1);
4970                                 sv_catpvn(PL_linestr, splits, 1);
4971                             } while (*splits++);
4972                             /* This loop will embed the trailing NUL of
4973                                PL_linestr as the last thing it does before
4974                                terminating.  */
4975                             sv_catpvs(PL_linestr, ");");
4976                         }
4977                     }
4978                     else
4979                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4980                 }
4981             }
4982             sv_catpvs(PL_linestr, "\n");
4983             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4984             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4985             PL_last_lop = PL_last_uni = NULL;
4986             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4987                 update_debugger_info(PL_linestr, NULL, 0);
4988             goto retry;
4989         }
4990         do {
4991             fake_eof = 0;
4992             bof = PL_rsfp ? TRUE : FALSE;
4993             if (0) {
4994               fake_eof:
4995                 fake_eof = LEX_FAKE_EOF;
4996             }
4997             PL_bufptr = PL_bufend;
4998             COPLINE_INC_WITH_HERELINES;
4999             if (!lex_next_chunk(fake_eof)) {
5000                 CopLINE_dec(PL_curcop);
5001                 s = PL_bufptr;
5002                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5003             }
5004             CopLINE_dec(PL_curcop);
5005 #ifdef PERL_MAD
5006             if (!PL_rsfp)
5007                 PL_realtokenstart = -1;
5008 #endif
5009             s = PL_bufptr;
5010             /* If it looks like the start of a BOM or raw UTF-16,
5011              * check if it in fact is. */
5012             if (bof && PL_rsfp &&
5013                      (*s == 0 ||
5014                       *(U8*)s == 0xEF ||
5015                       *(U8*)s >= 0xFE ||
5016                       s[1] == 0)) {
5017                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5018                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5019 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5020                 /* offset may include swallowed CR */
5021                 if (!bof)
5022                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5023 #endif
5024                 if (bof) {
5025                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5026                     s = swallow_bom((U8*)s);
5027                 }
5028             }
5029             if (PL_parser->in_pod) {
5030                 /* Incest with pod. */
5031 #ifdef PERL_MAD
5032                 if (PL_madskills)
5033                     sv_catsv(PL_thiswhite, PL_linestr);
5034 #endif
5035                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5036                     sv_setpvs(PL_linestr, "");
5037                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5038                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5039                     PL_last_lop = PL_last_uni = NULL;
5040                     PL_parser->in_pod = 0;
5041                 }
5042             }
5043             if (PL_rsfp || PL_parser->filtered)
5044                 incline(s);
5045         } while (PL_parser->in_pod);
5046         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5047         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5048         PL_last_lop = PL_last_uni = NULL;
5049         if (CopLINE(PL_curcop) == 1) {
5050             while (s < PL_bufend && isSPACE(*s))
5051                 s++;
5052             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5053                 s++;
5054 #ifdef PERL_MAD
5055             if (PL_madskills)
5056                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5057 #endif
5058             d = NULL;
5059             if (!PL_in_eval) {
5060                 if (*s == '#' && *(s+1) == '!')
5061                     d = s + 2;
5062 #ifdef ALTERNATE_SHEBANG
5063                 else {
5064                     static char const as[] = ALTERNATE_SHEBANG;
5065                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5066                         d = s + (sizeof(as) - 1);
5067                 }
5068 #endif /* ALTERNATE_SHEBANG */
5069             }
5070             if (d) {
5071                 char *ipath;
5072                 char *ipathend;
5073
5074                 while (isSPACE(*d))
5075                     d++;
5076                 ipath = d;
5077                 while (*d && !isSPACE(*d))
5078                     d++;
5079                 ipathend = d;
5080
5081 #ifdef ARG_ZERO_IS_SCRIPT
5082                 if (ipathend > ipath) {
5083                     /*
5084                      * HP-UX (at least) sets argv[0] to the script name,
5085                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5086                      * at least, set argv[0] to the basename of the Perl
5087                      * interpreter. So, having found "#!", we'll set it right.
5088                      */
5089                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5090                                                     SVt_PV)); /* $^X */
5091                     assert(SvPOK(x) || SvGMAGICAL(x));
5092                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5093                         sv_setpvn(x, ipath, ipathend - ipath);
5094                         SvSETMAGIC(x);
5095                     }
5096                     else {
5097                         STRLEN blen;
5098                         STRLEN llen;
5099                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5100                         const char * const lstart = SvPV_const(x,llen);
5101                         if (llen < blen) {
5102                             bstart += blen - llen;
5103                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5104                                 sv_setpvn(x, ipath, ipathend - ipath);
5105                                 SvSETMAGIC(x);
5106                             }
5107                         }
5108                     }
5109                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5110                 }
5111 #endif /* ARG_ZERO_IS_SCRIPT */
5112
5113                 /*
5114                  * Look for options.
5115                  */
5116                 d = instr(s,"perl -");
5117                 if (!d) {
5118                     d = instr(s,"perl");
5119 #if defined(DOSISH)
5120                     /* avoid getting into infinite loops when shebang
5121                      * line contains "Perl" rather than "perl" */
5122                     if (!d) {
5123                         for (d = ipathend-4; d >= ipath; --d) {
5124                             if ((*d == 'p' || *d == 'P')
5125                                 && !ibcmp(d, "perl", 4))
5126                             {
5127                                 break;
5128                             }
5129                         }
5130                         if (d < ipath)
5131                             d = NULL;
5132                     }
5133 #endif
5134                 }
5135 #ifdef ALTERNATE_SHEBANG
5136                 /*
5137                  * If the ALTERNATE_SHEBANG on this system starts with a
5138                  * character that can be part of a Perl expression, then if
5139                  * we see it but not "perl", we're probably looking at the
5140                  * start of Perl code, not a request to hand off to some
5141                  * other interpreter.  Similarly, if "perl" is there, but
5142                  * not in the first 'word' of the line, we assume the line
5143                  * contains the start of the Perl program.
5144                  */
5145                 if (d && *s != '#') {
5146                     const char *c = ipath;
5147                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5148                         c++;
5149                     if (c < d)
5150                         d = NULL;       /* "perl" not in first word; ignore */
5151                     else
5152                         *s = '#';       /* Don't try to parse shebang line */
5153                 }
5154 #endif /* ALTERNATE_SHEBANG */
5155                 if (!d &&
5156                     *s == '#' &&
5157                     ipathend > ipath &&
5158                     !PL_minus_c &&
5159                     !instr(s,"indir") &&
5160                     instr(PL_origargv[0],"perl"))
5161                 {
5162                     dVAR;
5163                     char **newargv;
5164
5165                     *ipathend = '\0';
5166                     s = ipathend + 1;
5167                     while (s < PL_bufend && isSPACE(*s))
5168                         s++;
5169                     if (s < PL_bufend) {
5170                         Newx(newargv,PL_origargc+3,char*);
5171                         newargv[1] = s;
5172                         while (s < PL_bufend && !isSPACE(*s))
5173                             s++;
5174                         *s = '\0';
5175                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5176                     }
5177                     else
5178                         newargv = PL_origargv;
5179                     newargv[0] = ipath;
5180                     PERL_FPU_PRE_EXEC
5181                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5182                     PERL_FPU_POST_EXEC
5183                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5184                 }
5185                 if (d) {
5186                     while (*d && !isSPACE(*d))
5187                         d++;
5188                     while (SPACE_OR_TAB(*d))
5189                         d++;
5190
5191                     if (*d++ == '-') {
5192                         const bool switches_done = PL_doswitches;
5193                         const U32 oldpdb = PL_perldb;
5194                         const bool oldn = PL_minus_n;
5195                         const bool oldp = PL_minus_p;
5196                         const char *d1 = d;
5197
5198                         do {
5199                             bool baduni = FALSE;
5200                             if (*d1 == 'C') {
5201                                 const char *d2 = d1 + 1;
5202                                 if (parse_unicode_opts((const char **)&d2)
5203                                     != PL_unicode)
5204                                     baduni = TRUE;
5205                             }
5206                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5207                                 const char * const m = d1;
5208                                 while (*d1 && !isSPACE(*d1))
5209                                     d1++;
5210                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5211                                       (int)(d1 - m), m);
5212                             }
5213                             d1 = moreswitches(d1);
5214                         } while (d1);
5215                         if (PL_doswitches && !switches_done) {
5216                             int argc = PL_origargc;
5217                             char **argv = PL_origargv;
5218                             do {
5219                                 argc--,argv++;
5220                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5221                             init_argv_symbols(argc,argv);
5222                         }
5223                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5224                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5225                               /* if we have already added "LINE: while (<>) {",
5226                                  we must not do it again */
5227                         {
5228                             sv_setpvs(PL_linestr, "");
5229                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5230                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5231                             PL_last_lop = PL_last_uni = NULL;
5232                             PL_preambled = FALSE;
5233                             if (PERLDB_LINE || PERLDB_SAVESRC)
5234                                 (void)gv_fetchfile(PL_origfilename);
5235                             goto retry;
5236                         }
5237                     }
5238                 }
5239             }
5240         }
5241         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5242             PL_lex_state = LEX_FORMLINE;
5243             start_force(PL_curforce);
5244             NEXTVAL_NEXTTOKE.ival = 0;
5245             force_next(FORMRBRACK);
5246             TOKEN(';');
5247         }
5248         goto retry;
5249     case '\r':
5250 #ifdef PERL_STRICT_CR
5251         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5252         Perl_croak(aTHX_
5253       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5254 #endif
5255     case ' ': case '\t': case '\f': case 013:
5256 #ifdef PERL_MAD
5257         PL_realtokenstart = -1;
5258         if (!PL_thiswhite)
5259             PL_thiswhite = newSVpvs("");
5260         sv_catpvn(PL_thiswhite, s, 1);
5261 #endif
5262         s++;
5263         goto retry;
5264     case '#':
5265     case '\n':
5266 #ifdef PERL_MAD
5267         PL_realtokenstart = -1;
5268         if (PL_madskills)
5269             PL_faketokens = 0;
5270 #endif
5271         if (PL_lex_state != LEX_NORMAL ||
5272              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5273             if (*s == '#' && s == PL_linestart && PL_in_eval
5274              && !PL_rsfp && !PL_parser->filtered) {
5275                 /* handle eval qq[#line 1 "foo"\n ...] */
5276                 CopLINE_dec(PL_curcop);
5277                 incline(s);
5278             }
5279             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5280                 s = SKIPSPACE0(s);
5281                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5282                     incline(s);
5283             }
5284             else {
5285                 const bool in_comment = *s == '#';
5286                 d = s;
5287                 while (d < PL_bufend && *d != '\n')
5288                     d++;
5289                 if (d < PL_bufend)
5290                     d++;
5291                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5292                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5293                                d, PL_bufend);
5294 #ifdef PERL_MAD
5295                 if (PL_madskills)
5296                     PL_thiswhite = newSVpvn(s, d - s);
5297 #endif
5298                 s = d;
5299                 if (in_comment && d == PL_bufend
5300                  && PL_lex_state == LEX_INTERPNORMAL
5301                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5302                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5303                 else incline(s);
5304             }
5305             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5306                 PL_lex_state = LEX_FORMLINE;
5307                 start_force(PL_curforce);
5308                 NEXTVAL_NEXTTOKE.ival = 0;
5309                 force_next(FORMRBRACK);
5310                 TOKEN(';');
5311             }
5312         }
5313         else {
5314 #ifdef PERL_MAD
5315             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5316                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5317                     PL_faketokens = 0;
5318                     s = SKIPSPACE0(s);
5319                     TOKEN(PEG); /* make sure any #! line is accessible */
5320                 }
5321                 s = SKIPSPACE0(s);
5322             }
5323             else {
5324 /*              if (PL_madskills && PL_lex_formbrack) { */
5325                     d = s;
5326                     while (d < PL_bufend && *d != '\n')
5327                         d++;
5328                     if (d < PL_bufend)
5329                         d++;
5330                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5331                       Perl_croak(aTHX_ "panic: input overflow");
5332                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5333                         if (!PL_thiswhite)
5334                             PL_thiswhite = newSVpvs("");
5335                         if (CopLINE(PL_curcop) == 1) {
5336                             sv_setpvs(PL_thiswhite, "");
5337                             PL_faketokens = 0;
5338                         }
5339                         sv_catpvn(PL_thiswhite, s, d - s);
5340                     }
5341                     s = d;
5342 /*              }
5343                 *s = '\0';
5344                 PL_bufend = s; */
5345             }
5346 #else
5347             *s = '\0';
5348             PL_bufend = s;
5349 #endif
5350         }
5351         goto retry;
5352     case '-':
5353         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5354             I32 ftst = 0;
5355             char tmp;
5356
5357             s++;
5358             PL_bufptr = s;
5359             tmp = *s++;
5360
5361             while (s < PL_bufend && SPACE_OR_TAB(*s))
5362                 s++;
5363
5364             if (strnEQ(s,"=>",2)) {
5365                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5366                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5367                 OPERATOR('-');          /* unary minus */
5368             }
5369             PL_last_uni = PL_oldbufptr;
5370             switch (tmp) {
5371             case 'r': ftst = OP_FTEREAD;        break;
5372             case 'w': ftst = OP_FTEWRITE;       break;
5373             case 'x': ftst = OP_FTEEXEC;        break;
5374             case 'o': ftst = OP_FTEOWNED;       break;
5375             case 'R': ftst = OP_FTRREAD;        break;
5376             case 'W': ftst = OP_FTRWRITE;       break;
5377             case 'X': ftst = OP_FTREXEC;        break;
5378             case 'O': ftst = OP_FTROWNED;       break;
5379             case 'e': ftst = OP_FTIS;           break;
5380             case 'z': ftst = OP_FTZERO;         break;
5381             case 's': ftst = OP_FTSIZE;         break;
5382             case 'f': ftst = OP_FTFILE;         break;
5383             case 'd': ftst = OP_FTDIR;          break;
5384             case 'l': ftst = OP_FTLINK;         break;
5385             case 'p': ftst = OP_FTPIPE;         break;
5386             case 'S': ftst = OP_FTSOCK;         break;
5387             case 'u': ftst = OP_FTSUID;         break;
5388             case 'g': ftst = OP_FTSGID;         break;
5389             case 'k': ftst = OP_FTSVTX;         break;
5390             case 'b': ftst = OP_FTBLK;          break;
5391             case 'c': ftst = OP_FTCHR;          break;
5392             case 't': ftst = OP_FTTTY;          break;
5393             case 'T': ftst = OP_FTTEXT;         break;
5394             case 'B': ftst = OP_FTBINARY;       break;
5395             case 'M': case 'A': case 'C':
5396                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5397                 switch (tmp) {
5398                 case 'M': ftst = OP_FTMTIME;    break;
5399                 case 'A': ftst = OP_FTATIME;    break;
5400                 case 'C': ftst = OP_FTCTIME;    break;
5401                 default:                        break;
5402                 }
5403                 break;
5404             default:
5405                 break;
5406             }
5407             if (ftst) {
5408                 PL_last_lop_op = (OPCODE)ftst;
5409                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5410                         "### Saw file test %c\n", (int)tmp);
5411                 } );
5412                 FTST(ftst);
5413             }
5414             else {
5415                 /* Assume it was a minus followed by a one-letter named
5416                  * subroutine call (or a -bareword), then. */
5417                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5418                         "### '-%c' looked like a file test but was not\n",
5419                         (int) tmp);
5420                 } );
5421                 s = --PL_bufptr;
5422             }
5423         }
5424         {
5425             const char tmp = *s++;
5426             if (*s == tmp) {
5427                 s++;
5428                 if (PL_expect == XOPERATOR)
5429                     TERM(POSTDEC);
5430                 else
5431                     OPERATOR(PREDEC);
5432             }
5433             else if (*s == '>') {
5434                 s++;
5435                 s = SKIPSPACE1(s);
5436                 if (isIDFIRST_lazy_if(s,UTF)) {
5437                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5438                     TOKEN(ARROW);
5439                 }
5440                 else if (*s == '$')
5441                     OPERATOR(ARROW);
5442                 else
5443                     TERM(ARROW);
5444             }
5445             if (PL_expect == XOPERATOR) {
5446                 if (*s == '=' && !PL_lex_allbrackets &&
5447                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5448                     s--;
5449                     TOKEN(0);
5450                 }
5451                 Aop(OP_SUBTRACT);
5452             }
5453             else {
5454                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5455                     check_uni();
5456                 OPERATOR('-');          /* unary minus */
5457             }
5458         }
5459
5460     case '+':
5461         {
5462             const char tmp = *s++;
5463             if (*s == tmp) {
5464                 s++;
5465                 if (PL_expect == XOPERATOR)
5466                     TERM(POSTINC);
5467                 else
5468                     OPERATOR(PREINC);
5469             }
5470             if (PL_expect == XOPERATOR) {
5471                 if (*s == '=' && !PL_lex_allbrackets &&
5472                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5473                     s--;
5474                     TOKEN(0);
5475                 }
5476                 Aop(OP_ADD);
5477             }
5478             else {
5479                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5480                     check_uni();
5481                 OPERATOR('+');
5482             }
5483         }
5484
5485     case '*':
5486         if (PL_expect != XOPERATOR) {
5487             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5488             PL_expect = XOPERATOR;
5489             force_ident(PL_tokenbuf, '*');
5490             if (!*PL_tokenbuf)
5491                 PREREF('*');
5492             TERM('*');
5493         }
5494         s++;
5495         if (*s == '*') {
5496             s++;
5497             if (*s == '=' && !PL_lex_allbrackets &&
5498                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5499                 s -= 2;
5500                 TOKEN(0);
5501             }
5502             PWop(OP_POW);
5503         }
5504         if (*s == '=' && !PL_lex_allbrackets &&
5505                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5506             s--;
5507             TOKEN(0);
5508         }
5509         Mop(OP_MULTIPLY);
5510
5511     case '%':
5512         if (PL_expect == XOPERATOR) {
5513             if (s[1] == '=' && !PL_lex_allbrackets &&
5514                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5515                 TOKEN(0);
5516             ++s;
5517             Mop(OP_MODULO);
5518         }
5519         PL_tokenbuf[0] = '%';
5520         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5521                 sizeof PL_tokenbuf - 1, FALSE);
5522         if (!PL_tokenbuf[1]) {
5523             PREREF('%');
5524         }
5525         PL_expect = XOPERATOR;
5526         force_ident_maybe_lex('%');
5527         TERM('%');
5528
5529     case '^':
5530         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5531                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5532             TOKEN(0);
5533         s++;
5534         BOop(OP_BIT_XOR);
5535     case '[':
5536         if (PL_lex_brackets > 100)
5537             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5538         PL_lex_brackstack[PL_lex_brackets++] = 0;
5539         PL_lex_allbrackets++;
5540         {
5541             const char tmp = *s++;
5542             OPERATOR(tmp);
5543         }
5544     case '~':
5545         if (s[1] == '~'
5546             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5547         {
5548             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5549                 TOKEN(0);
5550             s += 2;
5551             Eop(OP_SMARTMATCH);
5552         }
5553         s++;
5554         OPERATOR('~');
5555     case ',':
5556         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5557             TOKEN(0);
5558         s++;
5559         OPERATOR(',');
5560     case ':':
5561         if (s[1] == ':') {
5562             len = 0;
5563             goto just_a_word_zero_gv;
5564         }
5565         s++;
5566         switch (PL_expect) {
5567             OP *attrs;
5568 #ifdef PERL_MAD
5569             I32 stuffstart;
5570 #endif
5571         case XOPERATOR:
5572             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5573                 break;
5574             PL_bufptr = s;      /* update in case we back off */
5575             if (*s == '=') {
5576                 Perl_croak(aTHX_
5577                            "Use of := for an empty attribute list is not allowed");
5578             }
5579             goto grabattrs;
5580         case XATTRBLOCK:
5581             PL_expect = XBLOCK;
5582             goto grabattrs;
5583         case XATTRTERM:
5584             PL_expect = XTERMBLOCK;
5585          grabattrs:
5586 #ifdef PERL_MAD
5587             stuffstart = s - SvPVX(PL_linestr) - 1;
5588 #endif
5589             s = PEEKSPACE(s);
5590             attrs = NULL;
5591             while (isIDFIRST_lazy_if(s,UTF)) {
5592                 I32 tmp;
5593                 SV *sv;
5594                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5595                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5596                     if (tmp < 0) tmp = -tmp;
5597                     switch (tmp) {
5598                     case KEY_or:
5599                     case KEY_and:
5600                     case KEY_for:
5601                     case KEY_foreach:
5602                     case KEY_unless:
5603                     case KEY_if:
5604                     case KEY_while:
5605                     case KEY_until:
5606                         goto got_attrs;
5607                     default:
5608                         break;
5609                     }
5610                 }
5611                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5612                 if (*d == '(') {
5613                     d = scan_str(d,TRUE,TRUE,FALSE);
5614                     if (!d) {
5615                         /* MUST advance bufptr here to avoid bogus
5616                            "at end of line" context messages from yyerror().
5617                          */
5618                         PL_bufptr = s + len;
5619                         yyerror("Unterminated attribute parameter in attribute list");
5620                         if (attrs)
5621                             op_free(attrs);
5622                         sv_free(sv);
5623                         return REPORT(0);       /* EOF indicator */
5624                     }
5625                 }
5626                 if (PL_lex_stuff) {
5627                     sv_catsv(sv, PL_lex_stuff);
5628                     attrs = op_append_elem(OP_LIST, attrs,
5629                                         newSVOP(OP_CONST, 0, sv));
5630                     SvREFCNT_dec(PL_lex_stuff);
5631                     PL_lex_stuff = NULL;
5632                 }
5633                 else {
5634                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5635                         sv_free(sv);
5636                         if (PL_in_my == KEY_our) {
5637                             deprecate(":unique");
5638                         }
5639                         else
5640                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5641                     }
5642
5643                     /* NOTE: any CV attrs applied here need to be part of
5644                        the CVf_BUILTIN_ATTRS define in cv.h! */
5645                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5646                         sv_free(sv);
5647                         CvLVALUE_on(PL_compcv);
5648                     }
5649                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5650                         sv_free(sv);
5651                         deprecate(":locked");
5652                     }
5653                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5654                         sv_free(sv);
5655                         CvMETHOD_on(PL_compcv);
5656                     }
5657                     /* After we've set the flags, it could be argued that
5658                        we don't need to do the attributes.pm-based setting
5659                        process, and shouldn't bother appending recognized
5660                        flags.  To experiment with that, uncomment the
5661                        following "else".  (Note that's already been
5662                        uncommented.  That keeps the above-applied built-in
5663                        attributes from being intercepted (and possibly
5664                        rejected) by a package's attribute routines, but is
5665                        justified by the performance win for the common case
5666                        of applying only built-in attributes.) */
5667                     else
5668                         attrs = op_append_elem(OP_LIST, attrs,
5669                                             newSVOP(OP_CONST, 0,
5670                                                     sv));
5671                 }
5672                 s = PEEKSPACE(d);
5673                 if (*s == ':' && s[1] != ':')
5674                     s = PEEKSPACE(s+1);
5675                 else if (s == d)
5676                     break;      /* require real whitespace or :'s */
5677                 /* XXX losing whitespace on sequential attributes here */
5678             }
5679             {
5680                 const char tmp
5681                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5682                 if (*s != ';' && *s != '}' && *s != tmp
5683                     && (tmp != '=' || *s != ')')) {
5684                     const char q = ((*s == '\'') ? '"' : '\'');
5685                     /* If here for an expression, and parsed no attrs, back
5686                        off. */
5687                     if (tmp == '=' && !attrs) {
5688                         s = PL_bufptr;
5689                         break;
5690                     }
5691                     /* MUST advance bufptr here to avoid bogus "at end of line"
5692                        context messages from yyerror().
5693                     */
5694                     PL_bufptr = s;
5695                     yyerror( (const char *)
5696                              (*s
5697                               ? Perl_form(aTHX_ "Invalid separator character "
5698                                           "%c%c%c in attribute list", q, *s, q)
5699                               : "Unterminated attribute list" ) );
5700                     if (attrs)
5701                         op_free(attrs);
5702                     OPERATOR(':');
5703                 }
5704             }
5705         got_attrs:
5706             if (attrs) {
5707                 start_force(PL_curforce);
5708                 NEXTVAL_NEXTTOKE.opval = attrs;
5709                 CURMAD('_', PL_nextwhite);
5710                 force_next(THING);
5711             }
5712 #ifdef PERL_MAD
5713             if (PL_madskills) {
5714                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5715                                      (s - SvPVX(PL_linestr)) - stuffstart);
5716             }
5717 #endif
5718             TOKEN(COLONATTR);
5719         }
5720         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5721             s--;
5722             TOKEN(0);
5723         }
5724         PL_lex_allbrackets--;
5725         OPERATOR(':');
5726     case '(':
5727         s++;
5728         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5729             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5730         else
5731             PL_expect = XTERM;
5732         s = SKIPSPACE1(s);
5733         PL_lex_allbrackets++;
5734         TOKEN('(');
5735     case ';':
5736         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5737             TOKEN(0);
5738         CLINE;
5739         s++;
5740         OPERATOR(';');
5741     case ')':
5742         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5743             TOKEN(0);
5744         s++;
5745         PL_lex_allbrackets--;
5746         s = SKIPSPACE1(s);
5747         if (*s == '{')
5748             PREBLOCK(')');
5749         TERM(')');
5750     case ']':
5751         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5752             TOKEN(0);
5753         s++;
5754         if (PL_lex_brackets <= 0)
5755             yyerror("Unmatched right square bracket");
5756         else
5757             --PL_lex_brackets;
5758         PL_lex_allbrackets--;
5759         if (PL_lex_state == LEX_INTERPNORMAL) {
5760             if (PL_lex_brackets == 0) {
5761                 if (*s == '-' && s[1] == '>')
5762                     PL_lex_state = LEX_INTERPENDMAYBE;
5763                 else if (*s != '[' && *s != '{')
5764                     PL_lex_state = LEX_INTERPEND;
5765             }
5766         }
5767         TERM(']');
5768     case '{':
5769         s++;
5770       leftbracket:
5771         if (PL_lex_brackets > 100) {
5772             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5773         }
5774         switch (PL_expect) {
5775         case XTERM:
5776             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5777             PL_lex_allbrackets++;
5778             OPERATOR(HASHBRACK);
5779         case XOPERATOR:
5780             while (s < PL_bufend && SPACE_OR_TAB(*s))
5781                 s++;
5782             d = s;
5783             PL_tokenbuf[0] = '\0';
5784             if (d < PL_bufend && *d == '-') {
5785                 PL_tokenbuf[0] = '-';
5786                 d++;
5787                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5788                     d++;
5789             }
5790             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5791                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5792                               FALSE, &len);
5793                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5794                     d++;
5795                 if (*d == '}') {
5796                     const char minus = (PL_tokenbuf[0] == '-');
5797                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5798                     if (minus)
5799                         force_next('-');
5800                 }
5801             }
5802             /* FALL THROUGH */
5803         case XATTRBLOCK:
5804         case XBLOCK:
5805             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5806             PL_lex_allbrackets++;
5807             PL_expect = XSTATE;
5808             break;
5809         case XATTRTERM:
5810         case XTERMBLOCK:
5811             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5812             PL_lex_allbrackets++;
5813             PL_expect = XSTATE;
5814             break;
5815         default: {
5816                 const char *t;
5817                 if (PL_oldoldbufptr == PL_last_lop)
5818                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5819                 else
5820                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5821                 PL_lex_allbrackets++;
5822                 s = SKIPSPACE1(s);
5823                 if (*s == '}') {
5824                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5825                         PL_expect = XTERM;
5826                         /* This hack is to get the ${} in the message. */
5827                         PL_bufptr = s+1;
5828                         yyerror("syntax error");
5829                         break;
5830                     }
5831                     OPERATOR(HASHBRACK);
5832                 }
5833                 /* This hack serves to disambiguate a pair of curlies
5834                  * as being a block or an anon hash.  Normally, expectation
5835                  * determines that, but in cases where we're not in a
5836                  * position to expect anything in particular (like inside
5837                  * eval"") we have to resolve the ambiguity.  This code
5838                  * covers the case where the first term in the curlies is a
5839                  * quoted string.  Most other cases need to be explicitly
5840                  * disambiguated by prepending a "+" before the opening
5841                  * curly in order to force resolution as an anon hash.
5842                  *
5843                  * XXX should probably propagate the outer expectation
5844                  * into eval"" to rely less on this hack, but that could
5845                  * potentially break current behavior of eval"".
5846                  * GSAR 97-07-21
5847                  */
5848                 t = s;
5849                 if (*s == '\'' || *s == '"' || *s == '`') {
5850                     /* common case: get past first string, handling escapes */
5851                     for (t++; t < PL_bufend && *t != *s;)
5852                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5853                             t++;
5854                     t++;
5855                 }
5856                 else if (*s == 'q') {
5857                     if (++t < PL_bufend
5858                         && (!isALNUM(*t)
5859                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5860                                 && !isALNUM(*t))))
5861                     {
5862                         /* skip q//-like construct */
5863                         const char *tmps;
5864                         char open, close, term;
5865                         I32 brackets = 1;
5866
5867                         while (t < PL_bufend && isSPACE(*t))
5868                             t++;
5869                         /* check for q => */
5870                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5871                             OPERATOR(HASHBRACK);
5872                         }
5873                         term = *t;
5874                         open = term;
5875                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5876                             term = tmps[5];
5877                         close = term;
5878                         if (open == close)
5879                             for (t++; t < PL_bufend; t++) {
5880                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5881                                     t++;
5882                                 else if (*t == open)
5883                                     break;
5884                             }
5885                         else {
5886                             for (t++; t < PL_bufend; t++) {
5887                                 if (*t == '\\' && t+1 < PL_bufend)
5888                                     t++;
5889                                 else if (*t == close && --brackets <= 0)
5890                                     break;
5891                                 else if (*t == open)
5892                                     brackets++;
5893                             }
5894                         }
5895                         t++;
5896                     }
5897                     else
5898                         /* skip plain q word */
5899                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5900                              t += UTF8SKIP(t);
5901                 }
5902                 else if (isALNUM_lazy_if(t,UTF)) {
5903                     t += UTF8SKIP(t);
5904                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5905                          t += UTF8SKIP(t);
5906                 }
5907                 while (t < PL_bufend && isSPACE(*t))
5908                     t++;
5909                 /* if comma follows first term, call it an anon hash */
5910                 /* XXX it could be a comma expression with loop modifiers */
5911                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5912                                    || (*t == '=' && t[1] == '>')))
5913                     OPERATOR(HASHBRACK);
5914                 if (PL_expect == XREF)
5915                     PL_expect = XTERM;
5916                 else {
5917                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5918                     PL_expect = XSTATE;
5919                 }
5920             }
5921             break;
5922         }
5923         pl_yylval.ival = CopLINE(PL_curcop);
5924         if (isSPACE(*s) || *s == '#')
5925             PL_copline = NOLINE;   /* invalidate current command line number */
5926         TOKEN(formbrack ? '=' : '{');
5927     case '}':
5928         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5929             TOKEN(0);
5930       rightbracket:
5931         s++;
5932         if (PL_lex_brackets <= 0)
5933             yyerror("Unmatched right curly bracket");
5934         else
5935             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5936         PL_lex_allbrackets--;
5937         if (PL_lex_state == LEX_INTERPNORMAL) {
5938             if (PL_lex_brackets == 0) {
5939                 if (PL_expect & XFAKEBRACK) {
5940                     PL_expect &= XENUMMASK;
5941                     PL_lex_state = LEX_INTERPEND;
5942                     PL_bufptr = s;
5943 #if 0
5944                     if (PL_madskills) {
5945                         if (!PL_thiswhite)
5946                             PL_thiswhite = newSVpvs("");
5947                         sv_catpvs(PL_thiswhite,"}");
5948                     }
5949 #endif
5950                     return yylex();     /* ignore fake brackets */
5951                 }
5952                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5953                  && SvEVALED(PL_lex_repl))
5954                     PL_lex_state = LEX_INTERPEND;
5955                 else if (*s == '-' && s[1] == '>')
5956                     PL_lex_state = LEX_INTERPENDMAYBE;
5957                 else if (*s != '[' && *s != '{')
5958                     PL_lex_state = LEX_INTERPEND;
5959             }
5960         }
5961         if (PL_expect & XFAKEBRACK) {
5962             PL_expect &= XENUMMASK;
5963             PL_bufptr = s;
5964             return yylex();             /* ignore fake brackets */
5965         }
5966         start_force(PL_curforce);
5967         if (PL_madskills) {
5968             curmad('X', newSVpvn(s-1,1));
5969             CURMAD('_', PL_thiswhite);
5970         }
5971         force_next(formbrack ? '.' : '}');
5972         if (formbrack) LEAVE;
5973 #ifdef PERL_MAD
5974         if (!PL_thistoken)
5975             PL_thistoken = newSVpvs("");
5976 #endif
5977         if (formbrack == 2) { /* means . where arguments were expected */
5978             start_force(PL_curforce);
5979             force_next(';');
5980             TOKEN(FORMRBRACK);
5981         }
5982         TOKEN(';');
5983     case '&':
5984         s++;
5985         if (*s++ == '&') {
5986             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5987                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5988                 s -= 2;
5989                 TOKEN(0);
5990             }
5991             AOPERATOR(ANDAND);
5992         }
5993         s--;
5994         if (PL_expect == XOPERATOR) {
5995             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5996                 && isIDFIRST_lazy_if(s,UTF))
5997             {
5998                 CopLINE_dec(PL_curcop);
5999                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6000                 CopLINE_inc(PL_curcop);
6001             }
6002             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6003                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6004                 s--;
6005                 TOKEN(0);
6006             }
6007             BAop(OP_BIT_AND);
6008         }
6009
6010         PL_tokenbuf[0] = '&';
6011         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6012                        sizeof PL_tokenbuf - 1, TRUE);
6013         if (PL_tokenbuf[1]) {
6014             PL_expect = XOPERATOR;
6015             force_ident_maybe_lex('&');
6016         }
6017         else
6018             PREREF('&');
6019         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6020         TERM('&');
6021
6022     case '|':
6023         s++;
6024         if (*s++ == '|') {
6025             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6026                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6027                 s -= 2;
6028                 TOKEN(0);
6029             }
6030             AOPERATOR(OROR);
6031         }
6032         s--;
6033         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6034                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6035             s--;
6036             TOKEN(0);
6037         }
6038         BOop(OP_BIT_OR);
6039     case '=':
6040         s++;
6041         {
6042             const char tmp = *s++;
6043             if (tmp == '=') {
6044                 if (!PL_lex_allbrackets &&
6045                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6046                     s -= 2;
6047                     TOKEN(0);
6048                 }
6049                 Eop(OP_EQ);
6050             }
6051             if (tmp == '>') {
6052                 if (!PL_lex_allbrackets &&
6053                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6054                     s -= 2;
6055                     TOKEN(0);
6056                 }
6057                 OPERATOR(',');
6058             }
6059             if (tmp == '~')
6060                 PMop(OP_MATCH);
6061             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6062                 && strchr("+-*/%.^&|<",tmp))
6063                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6064                             "Reversed %c= operator",(int)tmp);
6065             s--;
6066             if (PL_expect == XSTATE && isALPHA(tmp) &&
6067                 (s == PL_linestart+1 || s[-2] == '\n') )
6068                 {
6069                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6070                         || PL_lex_state != LEX_NORMAL) {
6071                         d = PL_bufend;
6072                         while (s < d) {
6073                             if (*s++ == '\n') {
6074                                 incline(s);
6075                                 if (strnEQ(s,"=cut",4)) {
6076                                     s = strchr(s,'\n');
6077                                     if (s)
6078                                         s++;
6079                                     else
6080                                         s = d;
6081                                     incline(s);
6082                                     goto retry;
6083                                 }
6084                             }
6085                         }
6086                         goto retry;
6087                     }
6088 #ifdef PERL_MAD
6089                     if (PL_madskills) {
6090                         if (!PL_thiswhite)
6091                             PL_thiswhite = newSVpvs("");
6092                         sv_catpvn(PL_thiswhite, PL_linestart,
6093                                   PL_bufend - PL_linestart);
6094                     }
6095 #endif
6096                     s = PL_bufend;
6097                     PL_parser->in_pod = 1;
6098                     goto retry;
6099                 }
6100         }
6101         if (PL_expect == XBLOCK) {
6102             const char *t = s;
6103 #ifdef PERL_STRICT_CR
6104             while (SPACE_OR_TAB(*t))
6105 #else
6106             while (SPACE_OR_TAB(*t) || *t == '\r')
6107 #endif
6108                 t++;
6109             if (*t == '\n' || *t == '#') {
6110                 formbrack = 1;
6111                 ENTER;
6112                 SAVEI8(PL_parser->form_lex_state);
6113                 SAVEI32(PL_lex_formbrack);
6114                 PL_parser->form_lex_state = PL_lex_state;
6115                 PL_lex_formbrack = PL_lex_brackets + 1;
6116                 goto leftbracket;
6117             }
6118         }
6119         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6120             s--;
6121             TOKEN(0);
6122         }
6123         pl_yylval.ival = 0;
6124         OPERATOR(ASSIGNOP);
6125     case '!':
6126         s++;
6127         {
6128             const char tmp = *s++;
6129             if (tmp == '=') {
6130                 /* was this !=~ where !~ was meant?
6131                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6132
6133                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6134                     const char *t = s+1;
6135
6136                     while (t < PL_bufend && isSPACE(*t))
6137                         ++t;
6138
6139                     if (*t == '/' || *t == '?' ||
6140                         ((*t == 'm' || *t == 's' || *t == 'y')
6141                          && !isALNUM(t[1])) ||
6142                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6143                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6144                                     "!=~ should be !~");
6145                 }
6146                 if (!PL_lex_allbrackets &&
6147                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6148                     s -= 2;
6149                     TOKEN(0);
6150                 }
6151                 Eop(OP_NE);
6152             }
6153             if (tmp == '~')
6154                 PMop(OP_NOT);
6155         }
6156         s--;
6157         OPERATOR('!');
6158     case '<':
6159         if (PL_expect != XOPERATOR) {
6160             if (s[1] != '<' && !strchr(s,'>'))
6161                 check_uni();
6162             if (s[1] == '<')
6163                 s = scan_heredoc(s);
6164             else
6165                 s = scan_inputsymbol(s);
6166             PL_expect = XOPERATOR;
6167             TOKEN(sublex_start());
6168         }
6169         s++;
6170         {
6171             char tmp = *s++;
6172             if (tmp == '<') {
6173                 if (*s == '=' && !PL_lex_allbrackets &&
6174                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6175                     s -= 2;
6176                     TOKEN(0);
6177                 }
6178                 SHop(OP_LEFT_SHIFT);
6179             }
6180             if (tmp == '=') {
6181                 tmp = *s++;
6182                 if (tmp == '>') {
6183                     if (!PL_lex_allbrackets &&
6184                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6185                         s -= 3;
6186                         TOKEN(0);
6187                     }
6188                     Eop(OP_NCMP);
6189                 }
6190                 s--;
6191                 if (!PL_lex_allbrackets &&
6192                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6193                     s -= 2;
6194                     TOKEN(0);
6195                 }
6196                 Rop(OP_LE);
6197             }
6198         }
6199         s--;
6200         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6201             s--;
6202             TOKEN(0);
6203         }
6204         Rop(OP_LT);
6205     case '>':
6206         s++;
6207         {
6208             const char tmp = *s++;
6209             if (tmp == '>') {
6210                 if (*s == '=' && !PL_lex_allbrackets &&
6211                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6212                     s -= 2;
6213                     TOKEN(0);
6214                 }
6215                 SHop(OP_RIGHT_SHIFT);
6216             }
6217             else if (tmp == '=') {
6218                 if (!PL_lex_allbrackets &&
6219                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6220                     s -= 2;
6221                     TOKEN(0);
6222                 }
6223                 Rop(OP_GE);
6224             }
6225         }
6226         s--;
6227         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6228             s--;
6229             TOKEN(0);
6230         }
6231         Rop(OP_GT);
6232
6233     case '$':
6234         CLINE;
6235
6236         if (PL_expect == XOPERATOR) {
6237             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6238                 return deprecate_commaless_var_list();
6239             }
6240         }
6241
6242         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6243             PL_tokenbuf[0] = '@';
6244             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6245                            sizeof PL_tokenbuf - 1, FALSE);
6246             if (PL_expect == XOPERATOR)
6247                 no_op("Array length", s);
6248             if (!PL_tokenbuf[1])
6249                 PREREF(DOLSHARP);
6250             PL_expect = XOPERATOR;
6251             force_ident_maybe_lex('#');
6252             TOKEN(DOLSHARP);
6253         }
6254
6255         PL_tokenbuf[0] = '$';
6256         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6257                        sizeof PL_tokenbuf - 1, FALSE);
6258         if (PL_expect == XOPERATOR)
6259             no_op("Scalar", s);
6260         if (!PL_tokenbuf[1]) {
6261             if (s == PL_bufend)
6262                 yyerror("Final $ should be \\$ or $name");
6263             PREREF('$');
6264         }
6265
6266         d = s;
6267         {
6268             const char tmp = *s;
6269             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6270                 s = SKIPSPACE1(s);
6271
6272             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6273                 && intuit_more(s)) {
6274                 if (*s == '[') {
6275                     PL_tokenbuf[0] = '@';
6276                     if (ckWARN(WARN_SYNTAX)) {
6277                         char *t = s+1;
6278
6279                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6280                             t++;
6281                         if (*t++ == ',') {
6282                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6283                             while (t < PL_bufend && *t != ']')
6284                                 t++;
6285                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6286                                         "Multidimensional syntax %.*s not supported",
6287                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6288                         }
6289                     }
6290                 }
6291                 else if (*s == '{') {
6292                     char *t;
6293                     PL_tokenbuf[0] = '%';
6294                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6295                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6296                         {
6297                             char tmpbuf[sizeof PL_tokenbuf];
6298                             do {
6299                                 t++;
6300                             } while (isSPACE(*t));
6301                             if (isIDFIRST_lazy_if(t,UTF)) {
6302                                 STRLEN len;
6303                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6304                                               &len);
6305                                 while (isSPACE(*t))
6306                                     t++;
6307                                 if (*t == ';'
6308                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6309                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6310                                                 "You need to quote \"%"SVf"\"",
6311                                                   SVfARG(newSVpvn_flags(tmpbuf, len,
6312                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6313                             }
6314                         }
6315                 }
6316             }
6317
6318             PL_expect = XOPERATOR;
6319             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6320                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6321                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6322                     PL_expect = XOPERATOR;
6323                 else if (strchr("$@\"'`q", *s))
6324                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6325                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6326                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6327                 else if (isIDFIRST_lazy_if(s,UTF)) {
6328                     char tmpbuf[sizeof PL_tokenbuf];
6329                     int t2;
6330                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6331                     if ((t2 = keyword(tmpbuf, len, 0))) {
6332                         /* binary operators exclude handle interpretations */
6333                         switch (t2) {
6334                         case -KEY_x:
6335                         case -KEY_eq:
6336                         case -KEY_ne:
6337                         case -KEY_gt:
6338                         case -KEY_lt:
6339                         case -KEY_ge:
6340                         case -KEY_le:
6341                         case -KEY_cmp:
6342                             break;
6343                         default:
6344                             PL_expect = XTERM;  /* e.g. print $fh length() */
6345                             break;
6346                         }
6347                     }
6348                     else {
6349                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6350                     }
6351                 }
6352                 else if (isDIGIT(*s))
6353                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6354                 else if (*s == '.' && isDIGIT(s[1]))
6355                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6356                 else if ((*s == '?' || *s == '-' || *s == '+')
6357                          && !isSPACE(s[1]) && s[1] != '=')
6358                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6359                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6360                          && s[1] != '/')
6361                     PL_expect = XTERM;          /* e.g. print $fh /.../
6362                                                    XXX except DORDOR operator
6363                                                 */
6364                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6365                          && s[2] != '=')
6366                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6367             }
6368         }
6369         force_ident_maybe_lex('$');
6370         TOKEN('$');
6371
6372     case '@':
6373         if (PL_expect == XOPERATOR)
6374             no_op("Array", s);
6375         PL_tokenbuf[0] = '@';
6376         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6377         if (!PL_tokenbuf[1]) {
6378             PREREF('@');
6379         }
6380         if (PL_lex_state == LEX_NORMAL)
6381             s = SKIPSPACE1(s);
6382         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6383             if (*s == '{')
6384                 PL_tokenbuf[0] = '%';
6385
6386             /* Warn about @ where they meant $. */
6387             if (*s == '[' || *s == '{') {
6388                 if (ckWARN(WARN_SYNTAX)) {
6389                     const char *t = s + 1;
6390                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6391                         t += UTF ? UTF8SKIP(t) : 1;
6392                     if (*t == '}' || *t == ']') {
6393                         t++;
6394                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6395        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6396                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6397                             "Scalar value %"SVf" better written as $%"SVf,
6398                             SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6399                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6400                             SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6401                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6402                     }
6403                 }
6404             }
6405         }
6406         PL_expect = XOPERATOR;
6407         force_ident_maybe_lex('@');
6408         TERM('@');
6409
6410      case '/':                  /* may be division, defined-or, or pattern */
6411         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6412             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6413                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6414                 TOKEN(0);
6415             s += 2;
6416             AOPERATOR(DORDOR);
6417         }
6418      case '?':                  /* may either be conditional or pattern */
6419         if (PL_expect == XOPERATOR) {
6420              char tmp = *s++;
6421              if(tmp == '?') {
6422                 if (!PL_lex_allbrackets &&
6423                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6424                     s--;
6425                     TOKEN(0);
6426                 }
6427                 PL_lex_allbrackets++;
6428                 OPERATOR('?');
6429              }
6430              else {
6431                  tmp = *s++;
6432                  if(tmp == '/') {
6433                      /* A // operator. */
6434                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6435                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6436                                             LEX_FAKEEOF_LOGIC)) {
6437                         s -= 2;
6438                         TOKEN(0);
6439                     }
6440                     AOPERATOR(DORDOR);
6441                  }
6442                  else {
6443                      s--;
6444                      if (*s == '=' && !PL_lex_allbrackets &&
6445                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6446                          s--;
6447                          TOKEN(0);
6448                      }
6449                      Mop(OP_DIVIDE);
6450                  }
6451              }
6452          }
6453          else {
6454              /* Disable warning on "study /blah/" */
6455              if (PL_oldoldbufptr == PL_last_uni
6456               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6457                   || memNE(PL_last_uni, "study", 5)
6458                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6459               ))
6460                  check_uni();
6461              if (*s == '?')
6462                  deprecate("?PATTERN? without explicit operator");
6463              s = scan_pat(s,OP_MATCH);
6464              TERM(sublex_start());
6465          }
6466
6467     case '.':
6468         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6469 #ifdef PERL_STRICT_CR
6470             && s[1] == '\n'
6471 #else
6472             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6473 #endif
6474             && (s == PL_linestart || s[-1] == '\n') )
6475         {
6476             PL_expect = XSTATE;
6477             formbrack = 2; /* dot seen where arguments expected */
6478             goto rightbracket;
6479         }
6480         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6481             s += 3;
6482             OPERATOR(YADAYADA);
6483         }
6484         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6485             char tmp = *s++;
6486             if (*s == tmp) {
6487                 if (!PL_lex_allbrackets &&
6488                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6489                     s--;
6490                     TOKEN(0);
6491                 }
6492                 s++;
6493                 if (*s == tmp) {
6494                     s++;
6495                     pl_yylval.ival = OPf_SPECIAL;
6496                 }
6497                 else
6498                     pl_yylval.ival = 0;
6499                 OPERATOR(DOTDOT);
6500             }
6501             if (*s == '=' && !PL_lex_allbrackets &&
6502                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6503                 s--;
6504                 TOKEN(0);
6505             }
6506             Aop(OP_CONCAT);
6507         }
6508         /* FALL THROUGH */
6509     case '0': case '1': case '2': case '3': case '4':
6510     case '5': case '6': case '7': case '8': case '9':
6511         s = scan_num(s, &pl_yylval);
6512         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6513         if (PL_expect == XOPERATOR)
6514             no_op("Number",s);
6515         TERM(THING);
6516
6517     case '\'':
6518         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6519         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6520         if (PL_expect == XOPERATOR) {
6521             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6522                 return deprecate_commaless_var_list();
6523             }
6524             else
6525                 no_op("String",s);
6526         }
6527         if (!s)
6528             missingterm(NULL);
6529         pl_yylval.ival = OP_CONST;
6530         TERM(sublex_start());
6531
6532     case '"':
6533         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6534         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6535         if (PL_expect == XOPERATOR) {
6536             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6537                 return deprecate_commaless_var_list();
6538             }
6539             else
6540                 no_op("String",s);
6541         }
6542         if (!s)
6543             missingterm(NULL);
6544         pl_yylval.ival = OP_CONST;
6545         /* FIXME. I think that this can be const if char *d is replaced by
6546            more localised variables.  */
6547         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6548             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6549                 pl_yylval.ival = OP_STRINGIFY;
6550                 break;
6551             }
6552         }
6553         TERM(sublex_start());
6554
6555     case '`':
6556         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6557         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6558         if (PL_expect == XOPERATOR)
6559             no_op("Backticks",s);
6560         if (!s)
6561             missingterm(NULL);
6562         readpipe_override();
6563         TERM(sublex_start());
6564
6565     case '\\':
6566         s++;
6567         if (PL_lex_inwhat && isDIGIT(*s))
6568             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6569                            *s, *s);
6570         if (PL_expect == XOPERATOR)
6571             no_op("Backslash",s);
6572         OPERATOR(REFGEN);
6573
6574     case 'v':
6575         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6576             char *start = s + 2;
6577             while (isDIGIT(*start) || *start == '_')
6578                 start++;
6579             if (*start == '.' && isDIGIT(start[1])) {
6580                 s = scan_num(s, &pl_yylval);
6581                 TERM(THING);
6582             }
6583             else if ((*start == ':' && start[1] == ':')
6584                   || (PL_expect == XSTATE && *start == ':'))
6585                 goto keylookup;
6586             else if (PL_expect == XSTATE) {
6587                 d = start;
6588                 while (d < PL_bufend && isSPACE(*d)) d++;
6589                 if (*d == ':') goto keylookup;
6590             }
6591             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6592             if (!isALPHA(*start) && (PL_expect == XTERM
6593                         || PL_expect == XREF || PL_expect == XSTATE
6594                         || PL_expect == XTERMORDORDOR)) {
6595                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6596                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6597                 if (!gv) {
6598                     s = scan_num(s, &pl_yylval);
6599                     TERM(THING);
6600                 }
6601             }
6602         }
6603         goto keylookup;
6604     case 'x':
6605         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6606             s++;
6607             Mop(OP_REPEAT);
6608         }
6609         goto keylookup;
6610
6611     case '_':
6612     case 'a': case 'A':
6613     case 'b': case 'B':
6614     case 'c': case 'C':
6615     case 'd': case 'D':
6616     case 'e': case 'E':
6617     case 'f': case 'F':
6618     case 'g': case 'G':
6619     case 'h': case 'H':
6620     case 'i': case 'I':
6621     case 'j': case 'J':
6622     case 'k': case 'K':
6623     case 'l': case 'L':
6624     case 'm': case 'M':
6625     case 'n': case 'N':
6626     case 'o': case 'O':
6627     case 'p': case 'P':
6628     case 'q': case 'Q':
6629     case 'r': case 'R':
6630     case 's': case 'S':
6631     case 't': case 'T':
6632     case 'u': case 'U':
6633               case 'V':
6634     case 'w': case 'W':
6635               case 'X':
6636     case 'y': case 'Y':
6637     case 'z': case 'Z':
6638
6639       keylookup: {
6640         bool anydelim;
6641         bool lex;
6642         I32 tmp;
6643         SV *sv;
6644         CV *cv;
6645         PADOFFSET off;
6646         OP *rv2cv_op;
6647
6648         lex = FALSE;
6649         orig_keyword = 0;
6650         off = 0;
6651         sv = NULL;
6652         cv = NULL;
6653         gv = NULL;
6654         gvp = NULL;
6655         rv2cv_op = NULL;
6656
6657         PL_bufptr = s;
6658         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6659
6660         /* Some keywords can be followed by any delimiter, including ':' */
6661         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6662
6663         /* x::* is just a word, unless x is "CORE" */
6664         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6665             goto just_a_word;
6666
6667         d = s;
6668         while (d < PL_bufend && isSPACE(*d))
6669                 d++;    /* no comments skipped here, or s### is misparsed */
6670
6671         /* Is this a word before a => operator? */
6672         if (*d == '=' && d[1] == '>') {
6673             CLINE;
6674             pl_yylval.opval
6675                 = (OP*)newSVOP(OP_CONST, 0,
6676                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6677             pl_yylval.opval->op_private = OPpCONST_BARE;
6678             TERM(WORD);
6679         }
6680
6681         /* Check for plugged-in keyword */
6682         {
6683             OP *o;
6684             int result;
6685             char *saved_bufptr = PL_bufptr;
6686             PL_bufptr = s;
6687             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6688             s = PL_bufptr;
6689             if (result == KEYWORD_PLUGIN_DECLINE) {
6690                 /* not a plugged-in keyword */
6691                 PL_bufptr = saved_bufptr;
6692             } else if (result == KEYWORD_PLUGIN_STMT) {
6693                 pl_yylval.opval = o;
6694                 CLINE;
6695                 PL_expect = XSTATE;
6696                 return REPORT(PLUGSTMT);
6697             } else if (result == KEYWORD_PLUGIN_EXPR) {
6698                 pl_yylval.opval = o;
6699                 CLINE;
6700                 PL_expect = XOPERATOR;
6701                 return REPORT(PLUGEXPR);
6702             } else {
6703                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6704                                         PL_tokenbuf);
6705             }
6706         }
6707
6708         /* Check for built-in keyword */
6709         tmp = keyword(PL_tokenbuf, len, 0);
6710
6711         /* Is this a label? */
6712         if (!anydelim && PL_expect == XSTATE
6713               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6714             s = d + 1;
6715             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6716                                             newSVpvn_flags(PL_tokenbuf,
6717                                                         len, UTF ? SVf_UTF8 : 0));
6718             CLINE;
6719             TOKEN(LABEL);
6720         }
6721
6722         /* Check for lexical sub */
6723         if (PL_expect != XOPERATOR) {
6724             char tmpbuf[sizeof PL_tokenbuf + 1];
6725             *tmpbuf = '&';
6726             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6727             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6728             if (off != NOT_IN_PAD) {
6729                 assert(off); /* we assume this is boolean-true below */
6730                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6731                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6732                     HEK * const stashname = HvNAME_HEK(stash);
6733                     sv = newSVhek(stashname);
6734                     sv_catpvs(sv, "::");
6735                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6736                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6737                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6738                                     SVt_PVCV);
6739                     off = 0;
6740                 }
6741                 else {
6742                     rv2cv_op = newOP(OP_PADANY, 0);
6743                     rv2cv_op->op_targ = off;
6744                     rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6745                     cv = (CV *)PAD_SV(off);
6746                 }
6747                 lex = TRUE;
6748                 goto just_a_word;
6749             }
6750             off = 0;
6751         }
6752
6753         if (tmp < 0) {                  /* second-class keyword? */
6754             GV *ogv = NULL;     /* override (winner) */
6755             GV *hgv = NULL;     /* hidden (loser) */
6756             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6757                 CV *cv;
6758                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6759                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6760                     (cv = GvCVu(gv)))
6761                 {
6762                     if (GvIMPORTED_CV(gv))
6763                         ogv = gv;
6764                     else if (! CvMETHOD(cv))
6765                         hgv = gv;
6766                 }
6767                 if (!ogv &&
6768                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6769                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6770                     (gv = *gvp) && isGV_with_GP(gv) &&
6771                     GvCVu(gv) && GvIMPORTED_CV(gv))
6772                 {
6773                     ogv = gv;
6774                 }
6775             }
6776             if (ogv) {
6777                 orig_keyword = tmp;
6778                 tmp = 0;                /* overridden by import or by GLOBAL */
6779             }
6780             else if (gv && !gvp
6781                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6782                      && GvCVu(gv))
6783             {
6784                 tmp = 0;                /* any sub overrides "weak" keyword */
6785             }
6786             else {                      /* no override */
6787                 tmp = -tmp;
6788                 if (tmp == KEY_dump) {
6789                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6790                                    "dump() better written as CORE::dump()");
6791                 }
6792                 gv = NULL;
6793                 gvp = 0;
6794                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6795                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6796                                    "Ambiguous call resolved as CORE::%s(), "
6797                                    "qualify as such or use &",
6798                                    GvENAME(hgv));
6799             }
6800         }
6801
6802       reserved_word:
6803         switch (tmp) {
6804
6805         default:                        /* not a keyword */
6806             /* Trade off - by using this evil construction we can pull the
6807                variable gv into the block labelled keylookup. If not, then
6808                we have to give it function scope so that the goto from the
6809                earlier ':' case doesn't bypass the initialisation.  */
6810             if (0) {
6811             just_a_word_zero_gv:
6812                 sv = NULL;
6813                 cv = NULL;
6814                 gv = NULL;
6815                 gvp = NULL;
6816                 rv2cv_op = NULL;
6817                 orig_keyword = 0;
6818                 lex = 0;
6819                 off = 0;
6820             }
6821           just_a_word: {
6822                 int pkgname = 0;
6823                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6824                 const char penultchar =
6825                     lastchar && PL_bufptr - 2 >= PL_linestart
6826                          ? PL_bufptr[-2]
6827                          : 0;
6828 #ifdef PERL_MAD
6829                 SV *nextPL_nextwhite = 0;
6830 #endif
6831
6832
6833                 /* Get the rest if it looks like a package qualifier */
6834
6835                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6836                     STRLEN morelen;
6837                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6838                                   TRUE, &morelen);
6839                     if (!morelen)
6840                         Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6841                                         SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6842                                             (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6843                                 *s == '\'' ? "'" : "::");
6844                     len += morelen;
6845                     pkgname = 1;
6846                 }
6847
6848                 if (PL_expect == XOPERATOR) {
6849                     if (PL_bufptr == PL_linestart) {
6850                         CopLINE_dec(PL_curcop);
6851                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6852                         CopLINE_inc(PL_curcop);
6853                     }
6854                     else
6855                         no_op("Bareword",s);
6856                 }
6857
6858                 /* Look for a subroutine with this name in current package,
6859                    unless this is a lexical sub, or name is "Foo::",
6860                    in which case Foo is a bareword
6861                    (and a package name). */
6862
6863                 if (len > 2 && !PL_madskills &&
6864                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6865                 {
6866                     if (ckWARN(WARN_BAREWORD)
6867                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6868                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6869                             "Bareword \"%"SVf"\" refers to nonexistent package",
6870                              SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6871                                         (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6872                     len -= 2;
6873                     PL_tokenbuf[len] = '\0';
6874                     gv = NULL;
6875                     gvp = 0;
6876                 }
6877                 else {
6878                     if (!lex && !gv) {
6879                         /* Mustn't actually add anything to a symbol table.
6880                            But also don't want to "initialise" any placeholder
6881                            constants that might already be there into full
6882                            blown PVGVs with attached PVCV.  */
6883                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6884                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6885                                                SVt_PVCV);
6886                     }
6887                     len = 0;
6888                 }
6889
6890                 /* if we saw a global override before, get the right name */
6891
6892                 if (!sv)
6893                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6894                     len ? len : strlen(PL_tokenbuf));
6895                 if (gvp) {
6896                     SV * const tmp_sv = sv;
6897                     sv = newSVpvs("CORE::GLOBAL::");
6898                     sv_catsv(sv, tmp_sv);
6899                     SvREFCNT_dec(tmp_sv);
6900                 }
6901
6902 #ifdef PERL_MAD
6903                 if (PL_madskills && !PL_thistoken) {
6904                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6905                     PL_thistoken = newSVpvn(start,s - start);
6906                     PL_realtokenstart = s - SvPVX(PL_linestr);
6907                 }
6908 #endif
6909
6910                 /* Presume this is going to be a bareword of some sort. */
6911                 CLINE;
6912                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6913                 pl_yylval.opval->op_private = OPpCONST_BARE;
6914
6915                 /* And if "Foo::", then that's what it certainly is. */
6916                 if (len)
6917                     goto safe_bareword;
6918
6919                 if (!off)
6920                 {
6921                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6922                     const_op->op_private = OPpCONST_BARE;
6923                     rv2cv_op = newCVREF(0, const_op);
6924                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
6925                 }
6926
6927                 /* See if it's the indirect object for a list operator. */
6928
6929                 if (PL_oldoldbufptr &&
6930                     PL_oldoldbufptr < PL_bufptr &&
6931                     (PL_oldoldbufptr == PL_last_lop
6932                      || PL_oldoldbufptr == PL_last_uni) &&
6933                     /* NO SKIPSPACE BEFORE HERE! */
6934                     (PL_expect == XREF ||
6935                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6936                 {
6937                     bool immediate_paren = *s == '(';
6938
6939                     /* (Now we can afford to cross potential line boundary.) */
6940                     s = SKIPSPACE2(s,nextPL_nextwhite);
6941 #ifdef PERL_MAD
6942                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6943 #endif
6944
6945                     /* Two barewords in a row may indicate method call. */
6946
6947                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6948                         (tmp = intuit_method(s, gv, cv))) {
6949                         op_free(rv2cv_op);
6950                         if (tmp == METHOD && !PL_lex_allbrackets &&
6951                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6952                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6953                         return REPORT(tmp);
6954                     }
6955
6956                     /* If not a declared subroutine, it's an indirect object. */
6957                     /* (But it's an indir obj regardless for sort.) */
6958                     /* Also, if "_" follows a filetest operator, it's a bareword */
6959
6960                     if (
6961                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6962                          (!cv &&
6963                         (PL_last_lop_op != OP_MAPSTART &&
6964                          PL_last_lop_op != OP_GREPSTART))))
6965                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6966                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6967                        )
6968                     {
6969                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6970                         goto bareword;
6971                     }
6972                 }
6973
6974                 PL_expect = XOPERATOR;
6975 #ifdef PERL_MAD
6976                 if (isSPACE(*s))
6977                     s = SKIPSPACE2(s,nextPL_nextwhite);
6978                 PL_nextwhite = nextPL_nextwhite;
6979 #else
6980                 s = skipspace(s);
6981 #endif
6982
6983                 /* Is this a word before a => operator? */
6984                 if (*s == '=' && s[1] == '>' && !pkgname) {
6985                     op_free(rv2cv_op);
6986                     CLINE;
6987                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6988                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6989                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6990                     TERM(WORD);
6991                 }
6992
6993                 /* If followed by a paren, it's certainly a subroutine. */
6994                 if (*s == '(') {
6995                     CLINE;
6996                     if (cv) {
6997                         d = s + 1;
6998                         while (SPACE_OR_TAB(*d))
6999                             d++;
7000                         if (*d == ')' && (sv = cv_const_sv(cv))) {
7001                             s = d + 1;
7002                             goto its_constant;
7003                         }
7004                     }
7005 #ifdef PERL_MAD
7006                     if (PL_madskills) {
7007                         PL_nextwhite = PL_thiswhite;
7008                         PL_thiswhite = 0;
7009                     }
7010                     start_force(PL_curforce);
7011 #endif
7012                     NEXTVAL_NEXTTOKE.opval =
7013                         off ? rv2cv_op : pl_yylval.opval;
7014                     PL_expect = XOPERATOR;
7015 #ifdef PERL_MAD
7016                     if (PL_madskills) {
7017                         PL_nextwhite = nextPL_nextwhite;
7018                         curmad('X', PL_thistoken);
7019                         PL_thistoken = newSVpvs("");
7020                     }
7021 #endif
7022                     if (off)
7023                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7024                     else op_free(rv2cv_op),        force_next(WORD);
7025                     pl_yylval.ival = 0;
7026                     TOKEN('&');
7027                 }
7028
7029                 /* If followed by var or block, call it a method (unless sub) */
7030
7031                 if ((*s == '$' || *s == '{') && !cv) {
7032                     op_free(rv2cv_op);
7033                     PL_last_lop = PL_oldbufptr;
7034                     PL_last_lop_op = OP_METHOD;
7035                     if (!PL_lex_allbrackets &&
7036                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7037                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7038                     PREBLOCK(METHOD);
7039                 }
7040
7041                 /* If followed by a bareword, see if it looks like indir obj. */
7042
7043                 if (!orig_keyword
7044                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7045                         && (tmp = intuit_method(s, gv, cv))) {
7046                     op_free(rv2cv_op);
7047                     if (tmp == METHOD && !PL_lex_allbrackets &&
7048                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7049                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7050                     return REPORT(tmp);
7051                 }
7052
7053                 /* Not a method, so call it a subroutine (if defined) */
7054
7055                 if (cv) {
7056                     if (lastchar == '-' && penultchar != '-') {
7057                         const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7058                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7059                                 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7060                                 SVfARG(tmpsv), SVfARG(tmpsv));
7061                     }
7062                     /* Check for a constant sub */
7063                     if ((sv = cv_const_sv(cv))) {
7064                   its_constant:
7065                         op_free(rv2cv_op);
7066                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7067                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7068                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
7069                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
7070                         TOKEN(WORD);
7071                     }
7072
7073                     op_free(pl_yylval.opval);
7074                     pl_yylval.opval = rv2cv_op;
7075                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7076                     PL_last_lop = PL_oldbufptr;
7077                     PL_last_lop_op = OP_ENTERSUB;
7078                     /* Is there a prototype? */
7079                     if (
7080 #ifdef PERL_MAD
7081                         cv &&
7082 #endif
7083                         SvPOK(cv))
7084                     {
7085                         STRLEN protolen = CvPROTOLEN(cv);
7086                         const char *proto = CvPROTO(cv);
7087                         bool optional;
7088                         if (!protolen)
7089                             TERM(FUNC0SUB);
7090                         if ((optional = *proto == ';'))
7091                           do
7092                             proto++;
7093                           while (*proto == ';');
7094                         if (
7095                             (
7096                                 (
7097                                     *proto == '$' || *proto == '_'
7098                                  || *proto == '*' || *proto == '+'
7099                                 )
7100                              && proto[1] == '\0'
7101                             )
7102                          || (
7103                              *proto == '\\' && proto[1] && proto[2] == '\0'
7104                             )
7105                         )
7106                             UNIPROTO(UNIOPSUB,optional);
7107                         if (*proto == '\\' && proto[1] == '[') {
7108                             const char *p = proto + 2;
7109                             while(*p && *p != ']')
7110                                 ++p;
7111                             if(*p == ']' && !p[1])
7112                                 UNIPROTO(UNIOPSUB,optional);
7113                         }
7114                         if (*proto == '&' && *s == '{') {
7115                             if (PL_curstash)
7116                                 sv_setpvs(PL_subname, "__ANON__");
7117                             else
7118                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7119                             if (!PL_lex_allbrackets &&
7120                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7121                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7122                             PREBLOCK(LSTOPSUB);
7123                         }
7124                     }
7125 #ifdef PERL_MAD
7126                     {
7127                         if (PL_madskills) {
7128                             PL_nextwhite = PL_thiswhite;
7129                             PL_thiswhite = 0;
7130                         }
7131                         start_force(PL_curforce);
7132                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7133                         PL_expect = XTERM;
7134                         if (PL_madskills) {
7135                             PL_nextwhite = nextPL_nextwhite;
7136                             curmad('X', PL_thistoken);
7137                             PL_thistoken = newSVpvs("");
7138                         }
7139                         force_next(off ? PRIVATEREF : WORD);
7140                         if (!PL_lex_allbrackets &&
7141                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7142                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7143                         TOKEN(NOAMP);
7144                     }
7145                 }
7146
7147                 /* Guess harder when madskills require "best effort". */
7148                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7149                     int probable_sub = 0;
7150                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7151                         probable_sub = 1;
7152                     else if (isALPHA(*s)) {
7153                         char tmpbuf[1024];
7154                         STRLEN tmplen;
7155                         d = s;
7156                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7157                         if (!keyword(tmpbuf, tmplen, 0))
7158                             probable_sub = 1;
7159                         else {
7160                             while (d < PL_bufend && isSPACE(*d))
7161                                 d++;
7162                             if (*d == '=' && d[1] == '>')
7163                                 probable_sub = 1;
7164                         }
7165                     }
7166                     if (probable_sub) {
7167                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7168                                         SVt_PVCV);
7169                         op_free(pl_yylval.opval);
7170                         pl_yylval.opval = rv2cv_op;
7171                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7172                         PL_last_lop = PL_oldbufptr;
7173                         PL_last_lop_op = OP_ENTERSUB;
7174                         PL_nextwhite = PL_thiswhite;
7175                         PL_thiswhite = 0;
7176                         start_force(PL_curforce);
7177                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7178                         PL_expect = XTERM;
7179                         PL_nextwhite = nextPL_nextwhite;
7180                         curmad('X', PL_thistoken);
7181                         PL_thistoken = newSVpvs("");
7182                         force_next(off ? PRIVATEREF : WORD);
7183                         if (!PL_lex_allbrackets &&
7184                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7185                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7186                         TOKEN(NOAMP);
7187                     }
7188 #else
7189                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7190                     PL_expect = XTERM;
7191                     force_next(off ? PRIVATEREF : WORD);
7192                     if (!PL_lex_allbrackets &&
7193                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7194                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7195                     TOKEN(NOAMP);
7196 #endif
7197                 }
7198
7199                 /* Call it a bare word */
7200
7201                 if (PL_hints & HINT_STRICT_SUBS)
7202                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7203                 else {
7204                 bareword:
7205                     /* after "print" and similar functions (corresponding to
7206                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7207                      * a filehandle should be subject to "strict subs".
7208                      * Likewise for the optional indirect-object argument to system
7209                      * or exec, which can't be a bareword */
7210                     if ((PL_last_lop_op == OP_PRINT
7211                             || PL_last_lop_op == OP_PRTF
7212                             || PL_last_lop_op == OP_SAY
7213                             || PL_last_lop_op == OP_SYSTEM
7214                             || PL_last_lop_op == OP_EXEC)
7215                             && (PL_hints & HINT_STRICT_SUBS))
7216                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7217                     if (lastchar != '-') {
7218                         if (ckWARN(WARN_RESERVED)) {
7219                             d = PL_tokenbuf;
7220                             while (isLOWER(*d))
7221                                 d++;
7222                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7223                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7224                                        PL_tokenbuf);
7225                         }
7226                     }
7227                 }
7228                 op_free(rv2cv_op);
7229
7230             safe_bareword:
7231                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7232                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7233                                      "Operator or semicolon missing before %c%"SVf,
7234                                      lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7235                                                     strlen(PL_tokenbuf),
7236                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7237                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7238                                      "Ambiguous use of %c resolved as operator %c",
7239                                      lastchar, lastchar);
7240                 }
7241                 TOKEN(WORD);
7242             }
7243
7244         case KEY___FILE__:
7245             FUN0OP(
7246                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7247             );
7248
7249         case KEY___LINE__:
7250             FUN0OP(
7251                 (OP*)newSVOP(OP_CONST, 0,
7252                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7253             );
7254
7255         case KEY___PACKAGE__:
7256             FUN0OP(
7257                 (OP*)newSVOP(OP_CONST, 0,
7258                                         (PL_curstash
7259                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7260                                          : &PL_sv_undef))
7261             );
7262
7263         case KEY___DATA__:
7264         case KEY___END__: {
7265             GV *gv;
7266             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7267                 const char *pname = "main";
7268                 STRLEN plen = 4;
7269                 U32 putf8 = 0;
7270                 if (PL_tokenbuf[2] == 'D')
7271                 {
7272                     HV * const stash =
7273                         PL_curstash ? PL_curstash : PL_defstash;
7274                     pname = HvNAME_get(stash);
7275                     plen  = HvNAMELEN (stash);
7276                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7277                 }
7278                 gv = gv_fetchpvn_flags(
7279                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7280                         plen+6, GV_ADD|putf8, SVt_PVIO
7281                 );
7282                 GvMULTI_on(gv);
7283                 if (!GvIO(gv))
7284                     GvIOp(gv) = newIO();
7285                 IoIFP(GvIOp(gv)) = PL_rsfp;
7286 #if defined(HAS_FCNTL) && defined(F_SETFD)
7287                 {
7288                     const int fd = PerlIO_fileno(PL_rsfp);
7289                     fcntl(fd,F_SETFD,fd >= 3);
7290                 }
7291 #endif
7292                 /* Mark this internal pseudo-handle as clean */
7293                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7294                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7295                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7296                 else
7297                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7298 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7299                 /* if the script was opened in binmode, we need to revert
7300                  * it to text mode for compatibility; but only iff it has CRs
7301                  * XXX this is a questionable hack at best. */
7302                 if (PL_bufend-PL_bufptr > 2
7303                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7304                 {
7305                     Off_t loc = 0;
7306                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7307                         loc = PerlIO_tell(PL_rsfp);
7308                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7309                     }
7310 #ifdef NETWARE
7311                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7312 #else
7313                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7314 #endif  /* NETWARE */
7315                         if (loc > 0)
7316                             PerlIO_seek(PL_rsfp, loc, 0);
7317                     }
7318                 }
7319 #endif
7320 #ifdef PERLIO_LAYERS
7321                 if (!IN_BYTES) {
7322                     if (UTF)
7323                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7324                     else if (PL_encoding) {
7325                         SV *name;
7326                         dSP;
7327                         ENTER;
7328                         SAVETMPS;
7329                         PUSHMARK(sp);
7330                         EXTEND(SP, 1);
7331                         XPUSHs(PL_encoding);
7332                         PUTBACK;
7333                         call_method("name", G_SCALAR);
7334                         SPAGAIN;
7335                         name = POPs;
7336                         PUTBACK;
7337                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7338                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7339                                                       SVfARG(name)));
7340                         FREETMPS;
7341                         LEAVE;
7342                     }
7343                 }
7344 #endif
7345 #ifdef PERL_MAD
7346                 if (PL_madskills) {
7347                     if (PL_realtokenstart >= 0) {
7348                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7349                         if (!PL_endwhite)
7350                             PL_endwhite = newSVpvs("");
7351                         sv_catsv(PL_endwhite, PL_thiswhite);
7352                         PL_thiswhite = 0;
7353                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7354                         PL_realtokenstart = -1;
7355                     }
7356                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7357                            != NULL) ;
7358                 }
7359 #endif
7360                 PL_rsfp = NULL;
7361             }
7362             goto fake_eof;
7363         }
7364
7365         case KEY___SUB__:
7366             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7367
7368         case KEY_AUTOLOAD:
7369         case KEY_DESTROY:
7370         case KEY_BEGIN:
7371         case KEY_UNITCHECK:
7372         case KEY_CHECK:
7373         case KEY_INIT:
7374         case KEY_END:
7375             if (PL_expect == XSTATE) {
7376                 s = PL_bufptr;
7377                 goto really_sub;
7378             }
7379             goto just_a_word;
7380
7381         case KEY_CORE:
7382             if (*s == ':' && s[1] == ':') {
7383                 STRLEN olen = len;
7384                 d = s;
7385                 s += 2;
7386                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7387                 if ((*s == ':' && s[1] == ':')
7388                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7389                 {
7390                     s = d;
7391                     len = olen;
7392                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7393                     goto just_a_word;
7394                 }
7395                 if (!tmp)
7396                     Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7397                                     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7398                                                 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7399                 if (tmp < 0)
7400                     tmp = -tmp;
7401                 else if (tmp == KEY_require || tmp == KEY_do
7402                       || tmp == KEY_glob)
7403                     /* that's a way to remember we saw "CORE::" */
7404                     orig_keyword = tmp;
7405                 goto reserved_word;
7406             }
7407             goto just_a_word;
7408
7409         case KEY_abs:
7410             UNI(OP_ABS);
7411
7412         case KEY_alarm:
7413             UNI(OP_ALARM);
7414
7415         case KEY_accept:
7416             LOP(OP_ACCEPT,XTERM);
7417
7418         case KEY_and:
7419             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7420                 return REPORT(0);
7421             OPERATOR(ANDOP);
7422
7423         case KEY_atan2:
7424             LOP(OP_ATAN2,XTERM);
7425
7426         case KEY_bind:
7427             LOP(OP_BIND,XTERM);
7428
7429         case KEY_binmode:
7430             LOP(OP_BINMODE,XTERM);
7431
7432         case KEY_bless:
7433             LOP(OP_BLESS,XTERM);
7434
7435         case KEY_break:
7436             FUN0(OP_BREAK);
7437
7438         case KEY_chop:
7439             UNI(OP_CHOP);
7440
7441         case KEY_continue:
7442                     /* We have to disambiguate the two senses of
7443                       "continue". If the next token is a '{' then
7444                       treat it as the start of a continue block;
7445                       otherwise treat it as a control operator.
7446                      */
7447                     s = skipspace(s);
7448                     if (*s == '{')
7449             PREBLOCK(CONTINUE);
7450                     else
7451                         FUN0(OP_CONTINUE);
7452
7453         case KEY_chdir:
7454             /* may use HOME */
7455             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7456             UNI(OP_CHDIR);
7457
7458         case KEY_close:
7459             UNI(OP_CLOSE);
7460
7461         case KEY_closedir:
7462             UNI(OP_CLOSEDIR);
7463
7464         case KEY_cmp:
7465             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7466                 return REPORT(0);
7467             Eop(OP_SCMP);
7468
7469         case KEY_caller:
7470             UNI(OP_CALLER);
7471
7472         case KEY_crypt:
7473 #ifdef FCRYPT
7474             if (!PL_cryptseen) {
7475                 PL_cryptseen = TRUE;
7476                 init_des();
7477             }
7478 #endif
7479             LOP(OP_CRYPT,XTERM);
7480
7481         case KEY_chmod:
7482             LOP(OP_CHMOD,XTERM);
7483
7484         case KEY_chown:
7485             LOP(OP_CHOWN,XTERM);
7486
7487         case KEY_connect:
7488             LOP(OP_CONNECT,XTERM);
7489
7490         case KEY_chr:
7491             UNI(OP_CHR);
7492
7493         case KEY_cos:
7494             UNI(OP_COS);
7495
7496         case KEY_chroot:
7497             UNI(OP_CHROOT);
7498
7499         case KEY_default:
7500             PREBLOCK(DEFAULT);
7501
7502         case KEY_do:
7503             s = SKIPSPACE1(s);
7504             if (*s == '{')
7505                 PRETERMBLOCK(DO);
7506             if (*s != '\'') {
7507                 *PL_tokenbuf = '&';
7508                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7509                               1, &len);
7510                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7511                     d = SKIPSPACE1(d);
7512                     if (*d == '(') {
7513                         force_ident_maybe_lex('&');
7514                         s = d;
7515                     }
7516                 }
7517             }
7518             if (orig_keyword == KEY_do) {
7519                 orig_keyword = 0;
7520                 pl_yylval.ival = 1;
7521             }
7522             else
7523                 pl_yylval.ival = 0;
7524             OPERATOR(DO);
7525
7526         case KEY_die:
7527             PL_hints |= HINT_BLOCK_SCOPE;
7528             LOP(OP_DIE,XTERM);
7529
7530         case KEY_defined:
7531             UNI(OP_DEFINED);
7532
7533         case KEY_delete:
7534             UNI(OP_DELETE);
7535
7536         case KEY_dbmopen:
7537             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7538                               STR_WITH_LEN("NDBM_File::"),
7539                               STR_WITH_LEN("DB_File::"),
7540                               STR_WITH_LEN("GDBM_File::"),
7541                               STR_WITH_LEN("SDBM_File::"),
7542                               STR_WITH_LEN("ODBM_File::"),
7543                               NULL);
7544             LOP(OP_DBMOPEN,XTERM);
7545
7546         case KEY_dbmclose:
7547             UNI(OP_DBMCLOSE);
7548
7549         case KEY_dump:
7550             PL_expect = XOPERATOR;
7551             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7552             LOOPX(OP_DUMP);
7553
7554         case KEY_else:
7555             PREBLOCK(ELSE);
7556
7557         case KEY_elsif:
7558             pl_yylval.ival = CopLINE(PL_curcop);
7559             OPERATOR(ELSIF);
7560
7561         case KEY_eq:
7562             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7563                 return REPORT(0);
7564             Eop(OP_SEQ);
7565
7566         case KEY_exists:
7567             UNI(OP_EXISTS);
7568
7569         case KEY_exit:
7570             if (PL_madskills)
7571                 UNI(OP_INT);
7572             UNI(OP_EXIT);
7573
7574         case KEY_eval:
7575             s = SKIPSPACE1(s);
7576             if (*s == '{') { /* block eval */
7577                 PL_expect = XTERMBLOCK;
7578                 UNIBRACK(OP_ENTERTRY);
7579             }
7580             else { /* string eval */
7581                 PL_expect = XTERM;
7582                 UNIBRACK(OP_ENTEREVAL);
7583             }
7584
7585         case KEY_evalbytes:
7586             PL_expect = XTERM;
7587             UNIBRACK(-OP_ENTEREVAL);
7588
7589         case KEY_eof:
7590             UNI(OP_EOF);
7591
7592         case KEY_exp:
7593             UNI(OP_EXP);
7594
7595         case KEY_each:
7596             UNI(OP_EACH);
7597
7598         case KEY_exec:
7599             LOP(OP_EXEC,XREF);
7600
7601         case KEY_endhostent:
7602             FUN0(OP_EHOSTENT);
7603
7604         case KEY_endnetent:
7605             FUN0(OP_ENETENT);
7606
7607         case KEY_endservent:
7608             FUN0(OP_ESERVENT);
7609
7610         case KEY_endprotoent:
7611             FUN0(OP_EPROTOENT);
7612
7613         case KEY_endpwent:
7614             FUN0(OP_EPWENT);
7615
7616         case KEY_endgrent:
7617             FUN0(OP_EGRENT);
7618
7619         case KEY_for:
7620         case KEY_foreach:
7621             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7622                 return REPORT(0);
7623             pl_yylval.ival = CopLINE(PL_curcop);
7624             s = SKIPSPACE1(s);
7625             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7626                 char *p = s;
7627 #ifdef PERL_MAD
7628                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7629 #endif
7630
7631                 if ((PL_bufend - p) >= 3 &&
7632                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7633                     p += 2;
7634                 else if ((PL_bufend - p) >= 4 &&
7635                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7636                     p += 3;
7637                 p = PEEKSPACE(p);
7638                 if (isIDFIRST_lazy_if(p,UTF)) {
7639                     p = scan_ident(p, PL_bufend,
7640                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7641                     p = PEEKSPACE(p);
7642                 }
7643                 if (*p != '$')
7644                     Perl_croak(aTHX_ "Missing $ on loop variable");
7645 #ifdef PERL_MAD
7646                 s = SvPVX(PL_linestr) + soff;
7647 #endif
7648             }
7649             OPERATOR(FOR);
7650
7651         case KEY_formline:
7652             LOP(OP_FORMLINE,XTERM);
7653
7654         case KEY_fork:
7655             FUN0(OP_FORK);
7656
7657         case KEY_fc:
7658             UNI(OP_FC);
7659
7660         case KEY_fcntl:
7661             LOP(OP_FCNTL,XTERM);
7662
7663         case KEY_fileno:
7664             UNI(OP_FILENO);
7665
7666         case KEY_flock:
7667             LOP(OP_FLOCK,XTERM);
7668
7669         case KEY_gt:
7670             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7671                 return REPORT(0);
7672             Rop(OP_SGT);
7673
7674         case KEY_ge:
7675             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7676                 return REPORT(0);
7677             Rop(OP_SGE);
7678
7679         case KEY_grep:
7680             LOP(OP_GREPSTART, XREF);
7681
7682         case KEY_goto:
7683             PL_expect = XOPERATOR;
7684             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7685             LOOPX(OP_GOTO);
7686
7687         case KEY_gmtime:
7688             UNI(OP_GMTIME);
7689
7690         case KEY_getc:
7691             UNIDOR(OP_GETC);
7692
7693         case KEY_getppid:
7694             FUN0(OP_GETPPID);
7695
7696         case KEY_getpgrp:
7697             UNI(OP_GETPGRP);
7698
7699         case KEY_getpriority:
7700             LOP(OP_GETPRIORITY,XTERM);
7701
7702         case KEY_getprotobyname:
7703             UNI(OP_GPBYNAME);
7704
7705         case KEY_getprotobynumber:
7706             LOP(OP_GPBYNUMBER,XTERM);
7707
7708         case KEY_getprotoent:
7709             FUN0(OP_GPROTOENT);
7710
7711         case KEY_getpwent:
7712             FUN0(OP_GPWENT);
7713
7714         case KEY_getpwnam:
7715             UNI(OP_GPWNAM);
7716
7717         case KEY_getpwuid:
7718             UNI(OP_GPWUID);
7719
7720         case KEY_getpeername:
7721             UNI(OP_GETPEERNAME);
7722
7723         case KEY_gethostbyname:
7724             UNI(OP_GHBYNAME);
7725
7726         case KEY_gethostbyaddr:
7727             LOP(OP_GHBYADDR,XTERM);
7728
7729         case KEY_gethostent:
7730             FUN0(OP_GHOSTENT);
7731
7732         case KEY_getnetbyname:
7733             UNI(OP_GNBYNAME);
7734
7735         case KEY_getnetbyaddr:
7736             LOP(OP_GNBYADDR,XTERM);
7737
7738         case KEY_getnetent:
7739             FUN0(OP_GNETENT);
7740
7741         case KEY_getservbyname:
7742             LOP(OP_GSBYNAME,XTERM);
7743
7744         case KEY_getservbyport:
7745             LOP(OP_GSBYPORT,XTERM);
7746
7747         case KEY_getservent:
7748             FUN0(OP_GSERVENT);
7749
7750         case KEY_getsockname:
7751             UNI(OP_GETSOCKNAME);
7752
7753         case KEY_getsockopt:
7754             LOP(OP_GSOCKOPT,XTERM);
7755
7756         case KEY_getgrent:
7757             FUN0(OP_GGRENT);
7758
7759         case KEY_getgrnam:
7760             UNI(OP_GGRNAM);
7761
7762         case KEY_getgrgid:
7763             UNI(OP_GGRGID);
7764
7765         case KEY_getlogin:
7766             FUN0(OP_GETLOGIN);
7767
7768         case KEY_given:
7769             pl_yylval.ival = CopLINE(PL_curcop);
7770             OPERATOR(GIVEN);
7771
7772         case KEY_glob:
7773             LOP(
7774              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7775              XTERM
7776             );
7777
7778         case KEY_hex:
7779             UNI(OP_HEX);
7780
7781         case KEY_if:
7782             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7783                 return REPORT(0);
7784             pl_yylval.ival = CopLINE(PL_curcop);
7785             OPERATOR(IF);
7786
7787         case KEY_index:
7788             LOP(OP_INDEX,XTERM);
7789
7790         case KEY_int:
7791             UNI(OP_INT);
7792
7793         case KEY_ioctl:
7794             LOP(OP_IOCTL,XTERM);
7795
7796         case KEY_join:
7797             LOP(OP_JOIN,XTERM);
7798
7799         case KEY_keys:
7800             UNI(OP_KEYS);
7801
7802         case KEY_kill:
7803             LOP(OP_KILL,XTERM);
7804
7805         case KEY_last:
7806             PL_expect = XOPERATOR;
7807             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7808             LOOPX(OP_LAST);
7809
7810         case KEY_lc:
7811             UNI(OP_LC);
7812
7813         case KEY_lcfirst:
7814             UNI(OP_LCFIRST);
7815
7816         case KEY_local:
7817             pl_yylval.ival = 0;
7818             OPERATOR(LOCAL);
7819
7820         case KEY_length:
7821             UNI(OP_LENGTH);
7822
7823         case KEY_lt:
7824             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7825                 return REPORT(0);
7826             Rop(OP_SLT);
7827
7828         case KEY_le:
7829             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7830                 return REPORT(0);
7831             Rop(OP_SLE);
7832
7833         case KEY_localtime:
7834             UNI(OP_LOCALTIME);
7835
7836         case KEY_log:
7837             UNI(OP_LOG);
7838
7839         case KEY_link:
7840             LOP(OP_LINK,XTERM);
7841
7842         case KEY_listen:
7843             LOP(OP_LISTEN,XTERM);
7844
7845         case KEY_lock:
7846             UNI(OP_LOCK);
7847
7848         case KEY_lstat:
7849             UNI(OP_LSTAT);
7850
7851         case KEY_m:
7852             s = scan_pat(s,OP_MATCH);
7853             TERM(sublex_start());
7854
7855         case KEY_map:
7856             LOP(OP_MAPSTART, XREF);
7857
7858         case KEY_mkdir:
7859             LOP(OP_MKDIR,XTERM);
7860
7861         case KEY_msgctl:
7862             LOP(OP_MSGCTL,XTERM);
7863
7864         case KEY_msgget:
7865             LOP(OP_MSGGET,XTERM);
7866
7867         case KEY_msgrcv:
7868             LOP(OP_MSGRCV,XTERM);
7869
7870         case KEY_msgsnd:
7871             LOP(OP_MSGSND,XTERM);
7872
7873         case KEY_our:
7874         case KEY_my:
7875         case KEY_state:
7876             PL_in_my = (U16)tmp;
7877             s = SKIPSPACE1(s);
7878             if (isIDFIRST_lazy_if(s,UTF)) {
7879 #ifdef PERL_MAD
7880                 char* start = s;
7881 #endif
7882                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7883                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7884                 {
7885                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7886                         Perl_croak(aTHX_
7887                                   "Experimental \"%s\" subs not enabled",
7888                                    tmp == KEY_my    ? "my"    :
7889                                    tmp == KEY_state ? "state" : "our");
7890                     goto really_sub;
7891                 }
7892                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7893                 if (!PL_in_my_stash) {
7894                     char tmpbuf[1024];
7895                     PL_bufptr = s;
7896                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7897                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7898                 }
7899 #ifdef PERL_MAD
7900                 if (PL_madskills) {     /* just add type to declarator token */
7901                     sv_catsv(PL_thistoken, PL_nextwhite);
7902                     PL_nextwhite = 0;
7903                     sv_catpvn(PL_thistoken, start, s - start);
7904                 }
7905 #endif
7906             }
7907             pl_yylval.ival = 1;
7908             OPERATOR(MY);
7909
7910         case KEY_next:
7911             PL_expect = XOPERATOR;
7912             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7913             LOOPX(OP_NEXT);
7914
7915         case KEY_ne:
7916             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7917                 return REPORT(0);
7918             Eop(OP_SNE);
7919
7920         case KEY_no:
7921             s = tokenize_use(0, s);
7922             TERM(USE);
7923
7924         case KEY_not:
7925             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7926                 FUN1(OP_NOT);
7927             else {
7928                 if (!PL_lex_allbrackets &&
7929                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7930                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7931                 OPERATOR(NOTOP);
7932             }
7933
7934         case KEY_open:
7935             s = SKIPSPACE1(s);
7936             if (isIDFIRST_lazy_if(s,UTF)) {
7937                 const char *t;
7938                 for (d = s; isALNUM_lazy_if(d,UTF);) {
7939                     d += UTF ? UTF8SKIP(d) : 1;
7940                     if (UTF) {
7941                         while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7942                             d += UTF ? UTF8SKIP(d) : 1;
7943                         }
7944                     }
7945                 }
7946                 for (t=d; isSPACE(*t);)
7947                     t++;
7948                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7949                     /* [perl #16184] */
7950                     && !(t[0] == '=' && t[1] == '>')
7951                     && !(t[0] == ':' && t[1] == ':')
7952                     && !keyword(s, d-s, 0)
7953                 ) {
7954                     SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7955                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7956                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7957                            "Precedence problem: open %"SVf" should be open(%"SVf")",
7958                             SVfARG(tmpsv), SVfARG(tmpsv));
7959                 }
7960             }
7961             LOP(OP_OPEN,XTERM);
7962
7963         case KEY_or:
7964             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7965                 return REPORT(0);
7966             pl_yylval.ival = OP_OR;
7967             OPERATOR(OROP);
7968
7969         case KEY_ord:
7970             UNI(OP_ORD);
7971
7972         case KEY_oct:
7973             UNI(OP_OCT);
7974
7975         case KEY_opendir:
7976             LOP(OP_OPEN_DIR,XTERM);
7977
7978         case KEY_print:
7979             checkcomma(s,PL_tokenbuf,"filehandle");
7980             LOP(OP_PRINT,XREF);
7981
7982         case KEY_printf:
7983             checkcomma(s,PL_tokenbuf,"filehandle");
7984             LOP(OP_PRTF,XREF);
7985
7986         case KEY_prototype:
7987             UNI(OP_PROTOTYPE);
7988
7989         case KEY_push:
7990             LOP(OP_PUSH,XTERM);
7991
7992         case KEY_pop:
7993             UNIDOR(OP_POP);
7994
7995         case KEY_pos:
7996             UNIDOR(OP_POS);
7997
7998         case KEY_pack:
7999             LOP(OP_PACK,XTERM);
8000
8001         case KEY_package:
8002             s = force_word(s,WORD,FALSE,TRUE,FALSE);
8003             s = SKIPSPACE1(s);
8004             s = force_strict_version(s);
8005             PL_lex_expect = XBLOCK;
8006             OPERATOR(PACKAGE);
8007
8008         case KEY_pipe:
8009             LOP(OP_PIPE_OP,XTERM);
8010
8011         case KEY_q:
8012             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8013             if (!s)
8014                 missingterm(NULL);
8015             pl_yylval.ival = OP_CONST;
8016             TERM(sublex_start());
8017
8018         case KEY_quotemeta:
8019             UNI(OP_QUOTEMETA);
8020
8021         case KEY_qw: {
8022             OP *words = NULL;
8023             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8024             if (!s)
8025                 missingterm(NULL);
8026             PL_expect = XOPERATOR;
8027             if (SvCUR(PL_lex_stuff)) {
8028                 int warned_comma = !ckWARN(WARN_QW);
8029                 int warned_comment = warned_comma;
8030                 d = SvPV_force(PL_lex_stuff, len);
8031                 while (len) {
8032                     for (; isSPACE(*d) && len; --len, ++d)
8033                         /**/;
8034                     if (len) {
8035                         SV *sv;
8036                         const char *b = d;
8037                         if (!warned_comma || !warned_comment) {
8038                             for (; !isSPACE(*d) && len; --len, ++d) {
8039                                 if (!warned_comma && *d == ',') {
8040                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8041                                         "Possible attempt to separate words with commas");
8042                                     ++warned_comma;
8043                                 }
8044                                 else if (!warned_comment && *d == '#') {
8045                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8046                                         "Possible attempt to put comments in qw() list");
8047                                     ++warned_comment;
8048                                 }
8049                             }
8050                         }
8051                         else {
8052                             for (; !isSPACE(*d) && len; --len, ++d)
8053                                 /**/;
8054                         }
8055                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8056                         words = op_append_elem(OP_LIST, words,
8057                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8058                     }
8059                 }
8060             }
8061             if (!words)
8062                 words = newNULLLIST();
8063             if (PL_lex_stuff) {
8064                 SvREFCNT_dec(PL_lex_stuff);
8065                 PL_lex_stuff = NULL;
8066             }
8067             PL_expect = XOPERATOR;
8068             pl_yylval.opval = sawparens(words);
8069             TOKEN(QWLIST);
8070         }
8071
8072         case KEY_qq:
8073             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8074             if (!s)
8075                 missingterm(NULL);
8076             pl_yylval.ival = OP_STRINGIFY;
8077             if (SvIVX(PL_lex_stuff) == '\'')
8078                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8079             TERM(sublex_start());
8080
8081         case KEY_qr:
8082             s = scan_pat(s,OP_QR);
8083             TERM(sublex_start());
8084
8085         case KEY_qx:
8086             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8087             if (!s)
8088                 missingterm(NULL);
8089             readpipe_override();
8090             TERM(sublex_start());
8091
8092         case KEY_return:
8093             OLDLOP(OP_RETURN);
8094
8095         case KEY_require:
8096             s = SKIPSPACE1(s);
8097             PL_expect = XOPERATOR;
8098             if (isDIGIT(*s)) {
8099                 s = force_version(s, FALSE);
8100             }
8101             else if (*s != 'v' || !isDIGIT(s[1])
8102                     || (s = force_version(s, TRUE), *s == 'v'))
8103             {
8104                 *PL_tokenbuf = '\0';
8105                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8106                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8107                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8108                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8109                 else if (*s == '<')
8110                     yyerror("<> should be quotes");
8111             }
8112             if (orig_keyword == KEY_require) {
8113                 orig_keyword = 0;
8114                 pl_yylval.ival = 1;
8115             }
8116             else
8117                 pl_yylval.ival = 0;
8118             PL_expect = XTERM;
8119             PL_bufptr = s;
8120             PL_last_uni = PL_oldbufptr;
8121             PL_last_lop_op = OP_REQUIRE;
8122             s = skipspace(s);
8123             return REPORT( (int)REQUIRE );
8124
8125         case KEY_reset:
8126             UNI(OP_RESET);
8127
8128         case KEY_redo:
8129             PL_expect = XOPERATOR;
8130             s = force_word(s,WORD,TRUE,FALSE,FALSE);
8131             LOOPX(OP_REDO);
8132
8133         case KEY_rename:
8134             LOP(OP_RENAME,XTERM);
8135
8136         case KEY_rand:
8137             UNI(OP_RAND);
8138
8139         case KEY_rmdir:
8140             UNI(OP_RMDIR);
8141
8142         case KEY_rindex:
8143             LOP(OP_RINDEX,XTERM);
8144
8145         case KEY_read:
8146             LOP(OP_READ,XTERM);
8147
8148         case KEY_readdir:
8149             UNI(OP_READDIR);
8150
8151         case KEY_readline:
8152             UNIDOR(OP_READLINE);
8153
8154         case KEY_readpipe:
8155             UNIDOR(OP_BACKTICK);
8156
8157         case KEY_rewinddir:
8158             UNI(OP_REWINDDIR);
8159
8160         case KEY_recv:
8161             LOP(OP_RECV,XTERM);
8162
8163         case KEY_reverse:
8164             LOP(OP_REVERSE,XTERM);
8165
8166         case KEY_readlink:
8167             UNIDOR(OP_READLINK);
8168
8169         case KEY_ref:
8170             UNI(OP_REF);
8171
8172         case KEY_s:
8173             s = scan_subst(s);
8174             if (pl_yylval.opval)
8175                 TERM(sublex_start());
8176             else
8177                 TOKEN(1);       /* force error */
8178
8179         case KEY_say:
8180             checkcomma(s,PL_tokenbuf,"filehandle");
8181             LOP(OP_SAY,XREF);
8182
8183         case KEY_chomp:
8184             UNI(OP_CHOMP);
8185
8186         case KEY_scalar:
8187             UNI(OP_SCALAR);
8188
8189         case KEY_select:
8190             LOP(OP_SELECT,XTERM);
8191
8192         case KEY_seek:
8193             LOP(OP_SEEK,XTERM);
8194
8195         case KEY_semctl:
8196             LOP(OP_SEMCTL,XTERM);
8197
8198         case KEY_semget:
8199             LOP(OP_SEMGET,XTERM);
8200
8201         case KEY_semop:
8202             LOP(OP_SEMOP,XTERM);
8203
8204         case KEY_send:
8205             LOP(OP_SEND,XTERM);
8206
8207         case KEY_setpgrp:
8208             LOP(OP_SETPGRP,XTERM);
8209
8210         case KEY_setpriority:
8211             LOP(OP_SETPRIORITY,XTERM);
8212
8213         case KEY_sethostent:
8214             UNI(OP_SHOSTENT);
8215
8216         case KEY_setnetent:
8217             UNI(OP_SNETENT);
8218
8219         case KEY_setservent:
8220             UNI(OP_SSERVENT);
8221
8222         case KEY_setprotoent:
8223             UNI(OP_SPROTOENT);
8224
8225         case KEY_setpwent:
8226             FUN0(OP_SPWENT);
8227
8228         case KEY_setgrent:
8229             FUN0(OP_SGRENT);
8230
8231         case KEY_seekdir:
8232             LOP(OP_SEEKDIR,XTERM);
8233
8234         case KEY_setsockopt:
8235             LOP(OP_SSOCKOPT,XTERM);
8236
8237         case KEY_shift:
8238             UNIDOR(OP_SHIFT);
8239
8240         case KEY_shmctl:
8241             LOP(OP_SHMCTL,XTERM);
8242
8243         case KEY_shmget:
8244             LOP(OP_SHMGET,XTERM);
8245
8246         case KEY_shmread:
8247             LOP(OP_SHMREAD,XTERM);
8248
8249         case KEY_shmwrite:
8250             LOP(OP_SHMWRITE,XTERM);
8251
8252         case KEY_shutdown:
8253             LOP(OP_SHUTDOWN,XTERM);
8254
8255         case KEY_sin:
8256             UNI(OP_SIN);
8257
8258         case KEY_sleep:
8259             UNI(OP_SLEEP);
8260
8261         case KEY_socket:
8262             LOP(OP_SOCKET,XTERM);
8263
8264         case KEY_socketpair:
8265             LOP(OP_SOCKPAIR,XTERM);
8266
8267         case KEY_sort:
8268             checkcomma(s,PL_tokenbuf,"subroutine name");
8269             s = SKIPSPACE1(s);
8270             PL_expect = XTERM;
8271             s = force_word(s,WORD,TRUE,TRUE,FALSE);
8272             LOP(OP_SORT,XREF);
8273
8274         case KEY_split:
8275             LOP(OP_SPLIT,XTERM);
8276
8277         case KEY_sprintf:
8278             LOP(OP_SPRINTF,XTERM);
8279
8280         case KEY_splice:
8281             LOP(OP_SPLICE,XTERM);
8282
8283         case KEY_sqrt:
8284             UNI(OP_SQRT);
8285
8286         case KEY_srand:
8287             UNI(OP_SRAND);
8288
8289         case KEY_stat:
8290             UNI(OP_STAT);
8291
8292         case KEY_study:
8293             UNI(OP_STUDY);
8294
8295         case KEY_substr:
8296             LOP(OP_SUBSTR,XTERM);
8297
8298         case KEY_format:
8299         case KEY_sub:
8300           really_sub:
8301             {
8302                 char * const tmpbuf = PL_tokenbuf + 1;
8303                 SSize_t tboffset = 0;
8304                 expectation attrful;
8305                 bool have_name, have_proto;
8306                 const int key = tmp;
8307
8308 #ifdef PERL_MAD
8309                 SV *tmpwhite = 0;
8310
8311                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8312                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8313                 PL_thistoken = 0;
8314
8315                 d = s;
8316                 s = SKIPSPACE2(s,tmpwhite);
8317 #else
8318                 d = s;
8319                 s = skipspace(s);
8320 #endif
8321
8322                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8323                     (*s == ':' && s[1] == ':'))
8324                 {
8325 #ifdef PERL_MAD
8326                     SV *nametoke = NULL;
8327 #endif
8328
8329                     PL_expect = XBLOCK;
8330                     attrful = XATTRBLOCK;
8331                     /* remember buffer pos'n for later force_word */
8332                     tboffset = s - PL_oldbufptr;
8333                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8334                                   &len);
8335 #ifdef PERL_MAD
8336                     if (PL_madskills)
8337                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8338 #endif
8339                     *PL_tokenbuf = '&';
8340                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8341                      || pad_findmy_pvn(
8342                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8343                         ) != NOT_IN_PAD)
8344                         sv_setpvn(PL_subname, tmpbuf, len);
8345                     else {
8346                         sv_setsv(PL_subname,PL_curstname);
8347                         sv_catpvs(PL_subname,"::");
8348                         sv_catpvn(PL_subname,tmpbuf,len);
8349                     }
8350                     if (SvUTF8(PL_linestr))
8351                         SvUTF8_on(PL_subname);
8352                     have_name = TRUE;
8353
8354
8355 #ifdef PERL_MAD
8356                     start_force(0);
8357                     CURMAD('X', nametoke);
8358                     CURMAD('_', tmpwhite);
8359                     force_ident_maybe_lex('&');
8360
8361                     s = SKIPSPACE2(d,tmpwhite);
8362 #else
8363                     s = skipspace(d);
8364 #endif
8365                 }
8366                 else {
8367                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8368                     {
8369                         *d = '\0';
8370                         /* diag_listed_as: Missing name in "%s sub" */
8371                         Perl_croak(aTHX_
8372                                   "Missing name in \"%s\"", PL_bufptr);
8373                     }
8374                     PL_expect = XTERMBLOCK;
8375                     attrful = XATTRTERM;
8376                     sv_setpvs(PL_subname,"?");
8377                     have_name = FALSE;
8378                 }
8379
8380                 if (key == KEY_format) {
8381 #ifdef PERL_MAD
8382                     PL_thistoken = subtoken;
8383                     s = d;
8384 #else
8385                     if (have_name)
8386                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8387                                           FALSE, TRUE, TRUE);
8388 #endif
8389                     PREBLOCK(FORMAT);
8390                 }
8391
8392                 /* Look for a prototype */
8393                 if (*s == '(') {
8394                     char *p;
8395                     bool bad_proto = FALSE;
8396                     bool in_brackets = FALSE;
8397                     char greedy_proto = ' ';
8398                     bool proto_after_greedy_proto = FALSE;
8399                     bool must_be_last = FALSE;
8400                     bool underscore = FALSE;
8401                     bool seen_underscore = FALSE;
8402                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8403                     STRLEN tmplen;
8404
8405                     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8406                     if (!s)
8407                         Perl_croak(aTHX_ "Prototype not terminated");
8408                     /* strip spaces and check for bad characters */
8409                     d = SvPV(PL_lex_stuff, tmplen);
8410                     tmp = 0;
8411                     for (p = d; tmplen; tmplen--, ++p) {
8412                         if (!isSPACE(*p)) {
8413                             d[tmp++] = *p;
8414
8415                             if (warnillegalproto) {
8416                                 if (must_be_last)
8417                                     proto_after_greedy_proto = TRUE;
8418                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8419                                     bad_proto = TRUE;
8420                                 }
8421                                 else {
8422                                     if ( underscore ) {
8423                                         if ( !strchr(";@%", *p) )
8424                                             bad_proto = TRUE;
8425                                         underscore = FALSE;
8426                                     }
8427                                     if ( *p == '[' ) {
8428                                         in_brackets = TRUE;
8429                                     }
8430                                     else if ( *p == ']' ) {
8431                                         in_brackets = FALSE;
8432                                     }
8433                                     else if ( (*p == '@' || *p == '%') &&
8434                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8435                                          !in_brackets ) {
8436                                         must_be_last = TRUE;
8437                                         greedy_proto = *p;
8438                                     }
8439                                     else if ( *p == '_' ) {
8440                                         underscore = seen_underscore = TRUE;
8441                                     }
8442                                 }
8443                             }
8444                         }
8445                     }
8446                     d[tmp] = '\0';
8447                     if (proto_after_greedy_proto)
8448                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8449                                     "Prototype after '%c' for %"SVf" : %s",
8450                                     greedy_proto, SVfARG(PL_subname), d);
8451                     if (bad_proto) {
8452                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8453                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8454                                     "Illegal character %sin prototype for %"SVf" : %s",
8455                                     seen_underscore ? "after '_' " : "",
8456                                     SVfARG(PL_subname),
8457                                     SvUTF8(PL_lex_stuff)
8458                                         ? sv_uni_display(dsv,
8459                                             newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8460                                             tmp,
8461                                             UNI_DISPLAY_ISPRINT)
8462                                         : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8463                                             PERL_PV_ESCAPE_NONASCII));
8464                     }
8465                     SvCUR_set(PL_lex_stuff, tmp);
8466                     have_proto = TRUE;
8467
8468 #ifdef PERL_MAD
8469                     start_force(0);
8470                     CURMAD('q', PL_thisopen);
8471                     CURMAD('_', tmpwhite);
8472                     CURMAD('=', PL_thisstuff);
8473                     CURMAD('Q', PL_thisclose);
8474                     NEXTVAL_NEXTTOKE.opval =
8475                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8476                     PL_lex_stuff = NULL;
8477                     force_next(THING);
8478
8479                     s = SKIPSPACE2(s,tmpwhite);
8480 #else
8481                     s = skipspace(s);
8482 #endif
8483                 }
8484                 else
8485                     have_proto = FALSE;
8486
8487                 if (*s == ':' && s[1] != ':')
8488                     PL_expect = attrful;
8489                 else if (*s != '{' && key == KEY_sub) {
8490                     if (!have_name)
8491                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8492                     else if (*s != ';' && *s != '}')
8493                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8494                 }
8495
8496 #ifdef PERL_MAD
8497                 start_force(0);
8498                 if (tmpwhite) {
8499                     if (PL_madskills)
8500                         curmad('^', newSVpvs(""));
8501                     CURMAD('_', tmpwhite);
8502                 }
8503                 force_next(0);
8504
8505                 PL_thistoken = subtoken;
8506 #else
8507                 if (have_proto) {
8508                     NEXTVAL_NEXTTOKE.opval =
8509                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8510                     PL_lex_stuff = NULL;
8511                     force_next(THING);
8512                 }
8513 #endif
8514                 if (!have_name) {
8515                     if (PL_curstash)
8516                         sv_setpvs(PL_subname, "__ANON__");
8517                     else
8518                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8519                     TOKEN(ANONSUB);
8520                 }
8521 #ifndef PERL_MAD
8522                 force_ident_maybe_lex('&');
8523 #endif
8524                 TOKEN(SUB);
8525             }
8526
8527         case KEY_system:
8528             LOP(OP_SYSTEM,XREF);
8529
8530         case KEY_symlink:
8531             LOP(OP_SYMLINK,XTERM);
8532
8533         case KEY_syscall:
8534             LOP(OP_SYSCALL,XTERM);
8535
8536         case KEY_sysopen:
8537             LOP(OP_SYSOPEN,XTERM);
8538
8539         case KEY_sysseek:
8540             LOP(OP_SYSSEEK,XTERM);
8541
8542         case KEY_sysread:
8543             LOP(OP_SYSREAD,XTERM);
8544
8545         case KEY_syswrite:
8546             LOP(OP_SYSWRITE,XTERM);
8547
8548         case KEY_tr:
8549         case KEY_y:
8550             s = scan_trans(s);
8551             TERM(sublex_start());
8552
8553         case KEY_tell:
8554             UNI(OP_TELL);
8555
8556         case KEY_telldir:
8557             UNI(OP_TELLDIR);
8558
8559         case KEY_tie:
8560             LOP(OP_TIE,XTERM);
8561
8562         case KEY_tied:
8563             UNI(OP_TIED);
8564
8565         case KEY_time:
8566             FUN0(OP_TIME);
8567
8568         case KEY_times:
8569             FUN0(OP_TMS);
8570
8571         case KEY_truncate:
8572             LOP(OP_TRUNCATE,XTERM);
8573
8574         case KEY_uc:
8575             UNI(OP_UC);
8576
8577         case KEY_ucfirst:
8578             UNI(OP_UCFIRST);
8579
8580         case KEY_untie:
8581             UNI(OP_UNTIE);
8582
8583         case KEY_until:
8584             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8585                 return REPORT(0);
8586             pl_yylval.ival = CopLINE(PL_curcop);
8587             OPERATOR(UNTIL);
8588
8589         case KEY_unless:
8590             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8591                 return REPORT(0);
8592             pl_yylval.ival = CopLINE(PL_curcop);
8593             OPERATOR(UNLESS);
8594
8595         case KEY_unlink:
8596             LOP(OP_UNLINK,XTERM);
8597
8598         case KEY_undef:
8599             UNIDOR(OP_UNDEF);
8600
8601         case KEY_unpack:
8602             LOP(OP_UNPACK,XTERM);
8603
8604         case KEY_utime:
8605             LOP(OP_UTIME,XTERM);
8606
8607         case KEY_umask:
8608             UNIDOR(OP_UMASK);
8609
8610         case KEY_unshift:
8611             LOP(OP_UNSHIFT,XTERM);
8612
8613         case KEY_use:
8614             s = tokenize_use(1, s);
8615             OPERATOR(USE);
8616
8617         case KEY_values:
8618             UNI(OP_VALUES);
8619
8620         case KEY_vec:
8621             LOP(OP_VEC,XTERM);
8622
8623         case KEY_when:
8624             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8625                 return REPORT(0);
8626             pl_yylval.ival = CopLINE(PL_curcop);
8627             OPERATOR(WHEN);
8628
8629         case KEY_while:
8630             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8631                 return REPORT(0);
8632             pl_yylval.ival = CopLINE(PL_curcop);
8633             OPERATOR(WHILE);
8634
8635         case KEY_warn:
8636             PL_hints |= HINT_BLOCK_SCOPE;
8637             LOP(OP_WARN,XTERM);
8638
8639         case KEY_wait:
8640             FUN0(OP_WAIT);
8641
8642         case KEY_waitpid:
8643             LOP(OP_WAITPID,XTERM);
8644
8645         case KEY_wantarray:
8646             FUN0(OP_WANTARRAY);
8647
8648         case KEY_write:
8649 #ifdef EBCDIC
8650         {
8651             char ctl_l[2];
8652             ctl_l[0] = toCTRL('L');
8653             ctl_l[1] = '\0';
8654             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8655         }
8656 #else
8657             /* Make sure $^L is defined */
8658             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8659 #endif
8660             UNI(OP_ENTERWRITE);
8661
8662         case KEY_x:
8663             if (PL_expect == XOPERATOR) {
8664                 if (*s == '=' && !PL_lex_allbrackets &&
8665                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8666                     return REPORT(0);
8667                 Mop(OP_REPEAT);
8668             }
8669             check_uni();
8670             goto just_a_word;
8671
8672         case KEY_xor:
8673             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8674                 return REPORT(0);
8675             pl_yylval.ival = OP_XOR;
8676             OPERATOR(OROP);
8677         }
8678     }}
8679 }
8680 #ifdef __SC__
8681 #pragma segment Main
8682 #endif
8683
8684 /*
8685   S_pending_ident
8686
8687   Looks up an identifier in the pad or in a package
8688
8689   Returns:
8690     PRIVATEREF if this is a lexical name.
8691     WORD       if this belongs to a package.
8692
8693   Structure:
8694       if we're in a my declaration
8695           croak if they tried to say my($foo::bar)
8696           build the ops for a my() declaration
8697       if it's an access to a my() variable
8698           build ops for access to a my() variable
8699       if in a dq string, and they've said @foo and we can't find @foo
8700           warn
8701       build ops for a bareword
8702 */
8703
8704 static int
8705 S_pending_ident(pTHX)
8706 {
8707     dVAR;
8708     PADOFFSET tmp = 0;
8709     const char pit = (char)pl_yylval.ival;
8710     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8711     /* All routes through this function want to know if there is a colon.  */
8712     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8713
8714     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8715           "### Pending identifier '%s'\n", PL_tokenbuf); });
8716
8717     /* if we're in a my(), we can't allow dynamics here.
8718        $foo'bar has already been turned into $foo::bar, so
8719        just check for colons.
8720
8721        if it's a legal name, the OP is a PADANY.
8722     */
8723     if (PL_in_my) {
8724         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8725             if (has_colon)
8726                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8727                                   "variable %s in \"our\"",
8728                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8729             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8730         }
8731         else {
8732             if (has_colon)
8733                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8734                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8735                             UTF ? SVf_UTF8 : 0);
8736
8737             pl_yylval.opval = newOP(OP_PADANY, 0);
8738             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8739                                                         UTF ? SVf_UTF8 : 0);
8740             return PRIVATEREF;
8741         }
8742     }
8743
8744     /*
8745        build the ops for accesses to a my() variable.
8746     */
8747
8748     if (!has_colon) {
8749         if (!PL_in_my)
8750             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8751                                     UTF ? SVf_UTF8 : 0);
8752         if (tmp != NOT_IN_PAD) {
8753             /* might be an "our" variable" */
8754             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8755                 /* build ops for a bareword */
8756                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8757                 HEK * const stashname = HvNAME_HEK(stash);
8758                 SV *  const sym = newSVhek(stashname);
8759                 sv_catpvs(sym, "::");
8760                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8761                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8762                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8763                 if (pit != '&')
8764                   gv_fetchsv(sym,
8765                     (PL_in_eval
8766                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8767                         : GV_ADDMULTI
8768                     ),
8769                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8770                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8771                      : SVt_PVHV));
8772                 return WORD;
8773             }
8774
8775             pl_yylval.opval = newOP(OP_PADANY, 0);
8776             pl_yylval.opval->op_targ = tmp;
8777             return PRIVATEREF;
8778         }
8779     }
8780
8781     /*
8782        Whine if they've said @foo in a doublequoted string,
8783        and @foo isn't a variable we can find in the symbol
8784        table.
8785     */
8786     if (ckWARN(WARN_AMBIGUOUS) &&
8787         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8788         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8789                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8790         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8791                 /* DO NOT warn for @- and @+ */
8792                 && !( PL_tokenbuf[2] == '\0' &&
8793                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8794            )
8795         {
8796             /* Downgraded from fatal to warning 20000522 mjd */
8797             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8798                         "Possible unintended interpolation of %"SVf" in string",
8799                         SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8800                                         SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8801         }
8802     }
8803
8804     /* build ops for a bareword */
8805     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8806                                    newSVpvn_flags(PL_tokenbuf + 1,
8807                                                       tokenbuf_len - 1,
8808                                                       UTF ? SVf_UTF8 : 0 ));
8809     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8810     if (pit != '&')
8811         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8812                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8813                      | ( UTF ? SVf_UTF8 : 0 ),
8814                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8815                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8816                       : SVt_PVHV));
8817     return WORD;
8818 }
8819
8820 STATIC void
8821 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8822 {
8823     dVAR;
8824
8825     PERL_ARGS_ASSERT_CHECKCOMMA;
8826
8827     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8828         if (ckWARN(WARN_SYNTAX)) {
8829             int level = 1;
8830             const char *w;
8831             for (w = s+2; *w && level; w++) {
8832                 if (*w == '(')
8833                     ++level;
8834                 else if (*w == ')')
8835                     --level;
8836             }
8837             while (isSPACE(*w))
8838                 ++w;
8839             /* the list of chars below is for end of statements or
8840              * block / parens, boolean operators (&&, ||, //) and branch
8841              * constructs (or, and, if, until, unless, while, err, for).
8842              * Not a very solid hack... */
8843             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8844                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8845                             "%s (...) interpreted as function",name);
8846         }
8847     }
8848     while (s < PL_bufend && isSPACE(*s))
8849         s++;
8850     if (*s == '(')
8851         s++;
8852     while (s < PL_bufend && isSPACE(*s))
8853         s++;
8854     if (isIDFIRST_lazy_if(s,UTF)) {
8855         const char * const w = s;
8856         s += UTF ? UTF8SKIP(s) : 1;
8857         while (isALNUM_lazy_if(s,UTF))
8858             s += UTF ? UTF8SKIP(s) : 1;
8859         while (s < PL_bufend && isSPACE(*s))
8860             s++;
8861         if (*s == ',') {
8862             GV* gv;
8863             if (keyword(w, s - w, 0))
8864                 return;
8865
8866             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8867             if (gv && GvCVu(gv))
8868                 return;
8869             Perl_croak(aTHX_ "No comma allowed after %s", what);
8870         }
8871     }
8872 }
8873
8874 /* Either returns sv, or mortalizes sv and returns a new SV*.
8875    Best used as sv=new_constant(..., sv, ...).
8876    If s, pv are NULL, calls subroutine with one argument,
8877    and type is used with error messages only. */
8878
8879 STATIC SV *
8880 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8881                SV *sv, SV *pv, const char *type, STRLEN typelen)
8882 {
8883     dVAR; dSP;
8884     HV * table = GvHV(PL_hintgv);                /* ^H */
8885     SV *res;
8886     SV **cvp;
8887     SV *cv, *typesv;
8888     const char *why1 = "", *why2 = "", *why3 = "";
8889
8890     PERL_ARGS_ASSERT_NEW_CONSTANT;
8891
8892     /* charnames doesn't work well if there have been errors found */
8893     if (PL_error_count > 0 && strEQ(key,"charnames"))
8894         return &PL_sv_undef;
8895
8896     if (!table
8897         || ! (PL_hints & HINT_LOCALIZE_HH)
8898         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8899         || ! SvOK(*cvp))
8900     {
8901         SV *msg;
8902
8903         /* Here haven't found what we're looking for.  If it is charnames,
8904          * perhaps it needs to be loaded.  Try doing that before giving up */
8905         if (strEQ(key,"charnames")) {
8906             Perl_load_module(aTHX_
8907                             0,
8908                             newSVpvs("_charnames"),
8909                              /* version parameter; no need to specify it, as if
8910                               * we get too early a version, will fail anyway,
8911                               * not being able to find '_charnames' */
8912                             NULL,
8913                             newSVpvs(":full"),
8914                             newSVpvs(":short"),
8915                             NULL);
8916             SPAGAIN;
8917             table = GvHV(PL_hintgv);
8918             if (table
8919                 && (PL_hints & HINT_LOCALIZE_HH)
8920                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8921                 && SvOK(*cvp))
8922             {
8923                 goto now_ok;
8924             }
8925         }
8926         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8927             msg = Perl_newSVpvf(aTHX_
8928                             "Constant(%s) unknown", (type ? type: "undef"));
8929         }
8930         else {
8931             why1 = "$^H{";
8932             why2 = key;
8933             why3 = "} is not defined";
8934         report:
8935             if (strEQ(key,"charnames")) {
8936                 msg = Perl_newSVpvf(aTHX_
8937                         /* The +3 is for '\N{'; -4 for that, plus '}' */
8938                         "Unknown charname '%.*s'", (int)typelen - 4, type + 3);
8939             }
8940             else {
8941                 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8942                                 (type ? type: "undef"), why1, why2, why3);
8943             }
8944         }
8945         yyerror(SvPVX_const(msg));
8946         SvREFCNT_dec(msg);
8947         return sv;
8948     }
8949 now_ok:
8950     sv_2mortal(sv);                     /* Parent created it permanently */
8951     cv = *cvp;
8952     if (!pv && s)
8953         pv = newSVpvn_flags(s, len, SVs_TEMP);
8954     if (type && pv)
8955         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8956     else
8957         typesv = &PL_sv_undef;
8958
8959     PUSHSTACKi(PERLSI_OVERLOAD);
8960     ENTER ;
8961     SAVETMPS;
8962
8963     PUSHMARK(SP) ;
8964     EXTEND(sp, 3);
8965     if (pv)
8966         PUSHs(pv);
8967     PUSHs(sv);
8968     if (pv)
8969         PUSHs(typesv);
8970     PUTBACK;
8971     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8972
8973     SPAGAIN ;
8974
8975     /* Check the eval first */
8976     if (!PL_in_eval && SvTRUE(ERRSV)) {
8977         sv_catpvs(ERRSV, "Propagated");
8978         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8979         (void)POPs;
8980         res = SvREFCNT_inc_simple(sv);
8981     }
8982     else {
8983         res = POPs;
8984         SvREFCNT_inc_simple_void(res);
8985     }
8986
8987     PUTBACK ;
8988     FREETMPS ;
8989     LEAVE ;
8990     POPSTACK;
8991
8992     if (!SvOK(res)) {
8993         why1 = "Call to &{$^H{";
8994         why2 = key;
8995         why3 = "}} did not return a defined value";
8996         sv = res;
8997         goto report;
8998     }
8999
9000     return res;
9001 }
9002
9003 /* Returns a NUL terminated string, with the length of the string written to
9004    *slp
9005    */
9006 STATIC char *
9007 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9008 {
9009     dVAR;
9010     char *d = dest;
9011     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9012
9013     PERL_ARGS_ASSERT_SCAN_WORD;
9014
9015     for (;;) {
9016         if (d >= e)
9017             Perl_croak(aTHX_ ident_too_long);
9018         if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))   /* UTF handled below */
9019             *d++ = *s++;
9020         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
9021             *d++ = ':';
9022             *d++ = ':';
9023             s++;
9024         }
9025         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
9026             *d++ = *s++;
9027             *d++ = *s++;
9028         }
9029         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9030             char *t = s + UTF8SKIP(s);
9031             size_t len;
9032             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9033                 t += UTF8SKIP(t);
9034             len = t - s;
9035             if (d + len > e)
9036                 Perl_croak(aTHX_ ident_too_long);
9037             Copy(s, d, len, char);
9038             d += len;
9039             s = t;
9040         }
9041         else {
9042             *d = '\0';
9043             *slp = d - dest;
9044             return s;
9045         }
9046     }
9047 }
9048
9049 STATIC char *
9050 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9051 {
9052     dVAR;
9053     char *bracket = NULL;
9054     char funny = *s++;
9055     char *d = dest;
9056     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9057
9058     PERL_ARGS_ASSERT_SCAN_IDENT;
9059
9060     if (isSPACE(*s))
9061         s = PEEKSPACE(s);
9062     if (isDIGIT(*s)) {
9063         while (isDIGIT(*s)) {
9064             if (d >= e)
9065                 Perl_croak(aTHX_ ident_too_long);
9066             *d++ = *s++;
9067         }
9068     }
9069     else {
9070         for (;;) {
9071             if (d >= e)
9072                 Perl_croak(aTHX_ ident_too_long);
9073             if (isALNUM(*s))    /* UTF handled below */
9074                 *d++ = *s++;
9075             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9076                 *d++ = ':';
9077                 *d++ = ':';
9078                 s++;
9079             }
9080             else if (*s == ':' && s[1] == ':') {
9081                 *d++ = *s++;
9082                 *d++ = *s++;
9083             }
9084             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9085                 char *t = s + UTF8SKIP(s);
9086                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9087                     t += UTF8SKIP(t);
9088                 if (d + (t - s) > e)
9089                     Perl_croak(aTHX_ ident_too_long);
9090                 Copy(s, d, t - s, char);
9091                 d += t - s;
9092                 s = t;
9093             }
9094             else
9095                 break;
9096         }
9097     }
9098     *d = '\0';
9099     d = dest;
9100     if (*d) {
9101         if (PL_lex_state != LEX_NORMAL)
9102             PL_lex_state = LEX_INTERPENDMAYBE;
9103         return s;
9104     }
9105     if (*s == '$' && s[1] &&
9106         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9107     {
9108         return s;
9109     }
9110     if (*s == '{') {
9111         bracket = s;
9112         s++;
9113     }
9114     if (s < send) {
9115         if (UTF) {
9116             const STRLEN skip = UTF8SKIP(s);
9117             STRLEN i;
9118             d[skip] = '\0';
9119             for ( i = 0; i < skip; i++ )
9120                 d[i] = *s++;
9121         }
9122         else {
9123             *d = *s++;
9124             d[1] = '\0';
9125         }
9126     }
9127     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9128         *d = toCTRL(*s);
9129         s++;
9130     }
9131     else if (ck_uni && !bracket)
9132         check_uni();
9133     if (bracket) {
9134         if (isSPACE(s[-1])) {
9135             while (s < send) {
9136                 const char ch = *s++;
9137                 if (!SPACE_OR_TAB(ch)) {
9138                     *d = ch;
9139                     break;
9140                 }
9141             }
9142         }
9143         if (isIDFIRST_lazy_if(d,UTF)) {
9144             d += UTF8SKIP(d);
9145             if (UTF) {
9146                 char *end = s;
9147                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9148                     end += UTF8SKIP(end);
9149                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9150                         end += UTF8SKIP(end);
9151                 }
9152                 Copy(s, d, end - s, char);
9153                 d += end - s;
9154                 s = end;
9155             }
9156             else {
9157                 while ((isALNUM(*s) || *s == ':') && d < e)
9158                     *d++ = *s++;
9159                 if (d >= e)
9160                     Perl_croak(aTHX_ ident_too_long);
9161             }
9162             *d = '\0';
9163             while (s < send && SPACE_OR_TAB(*s))
9164                 s++;
9165             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9166                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9167                     const char * const brack =
9168                         (const char *)
9169                         ((*s == '[') ? "[...]" : "{...}");
9170    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9171                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9172                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9173                         funny, dest, brack, funny, dest, brack);
9174                 }
9175                 bracket++;
9176                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9177                 PL_lex_allbrackets++;
9178                 return s;
9179             }
9180         }
9181         /* Handle extended ${^Foo} variables
9182          * 1999-02-27 mjd-perl-patch@plover.com */
9183         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9184                  && isALNUM(*s))
9185         {
9186             d++;
9187             while (isALNUM(*s) && d < e) {
9188                 *d++ = *s++;
9189             }
9190             if (d >= e)
9191                 Perl_croak(aTHX_ ident_too_long);
9192             *d = '\0';
9193         }
9194         if (*s == '}') {
9195             s++;
9196             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9197                 PL_lex_state = LEX_INTERPEND;
9198                 PL_expect = XREF;
9199             }
9200             if (PL_lex_state == LEX_NORMAL) {
9201                 if (ckWARN(WARN_AMBIGUOUS) &&
9202                     (keyword(dest, d - dest, 0)
9203                      || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9204                 {
9205                     SV *tmp = newSVpvn_flags( dest, d - dest,
9206                                             SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9207                     if (funny == '#')
9208                         funny = '@';
9209                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9210                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9211                         funny, tmp, funny, tmp);
9212                 }
9213             }
9214         }
9215         else {
9216             s = bracket;                /* let the parser handle it */
9217             *dest = '\0';
9218         }
9219     }
9220     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9221         PL_lex_state = LEX_INTERPEND;
9222     return s;
9223 }
9224
9225 static bool
9226 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9227
9228     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9229      * the parse starting at 's', based on the subset that are valid in this
9230      * context input to this routine in 'valid_flags'. Advances s.  Returns
9231      * TRUE if the input should be treated as a valid flag, so the next char
9232      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9233      * first call on the current regex.  This routine will set it to any
9234      * charset modifier found.  The caller shouldn't change it.  This way,
9235      * another charset modifier encountered in the parse can be detected as an
9236      * error, as we have decided to allow only one */
9237
9238     const char c = **s;
9239     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9240
9241     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9242         if (isALNUM_lazy_if(*s, UTF)) {
9243             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9244                        UTF ? SVf_UTF8 : 0);
9245             (*s) += charlen;
9246             /* Pretend that it worked, so will continue processing before
9247              * dieing */
9248             return TRUE;
9249         }
9250         return FALSE;
9251     }
9252
9253     switch (c) {
9254
9255         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9256         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9257         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9258         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9259         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9260         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9261         case LOCALE_PAT_MOD:
9262             if (*charset) {
9263                 goto multiple_charsets;
9264             }
9265             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9266             *charset = c;
9267             break;
9268         case UNICODE_PAT_MOD:
9269             if (*charset) {
9270                 goto multiple_charsets;
9271             }
9272             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9273             *charset = c;
9274             break;
9275         case ASCII_RESTRICT_PAT_MOD:
9276             if (! *charset) {
9277                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9278             }
9279             else {
9280
9281                 /* Error if previous modifier wasn't an 'a', but if it was, see
9282                  * if, and accept, a second occurrence (only) */
9283                 if (*charset != 'a'
9284                     || get_regex_charset(*pmfl)
9285                         != REGEX_ASCII_RESTRICTED_CHARSET)
9286                 {
9287                         goto multiple_charsets;
9288                 }
9289                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9290             }
9291             *charset = c;
9292             break;
9293         case DEPENDS_PAT_MOD:
9294             if (*charset) {
9295                 goto multiple_charsets;
9296             }
9297             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9298             *charset = c;
9299             break;
9300     }
9301
9302     (*s)++;
9303     return TRUE;
9304
9305     multiple_charsets:
9306         if (*charset != c) {
9307             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9308         }
9309         else if (c == 'a') {
9310             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9311         }
9312         else {
9313             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9314         }
9315
9316         /* Pretend that it worked, so will continue processing before dieing */
9317         (*s)++;
9318         return TRUE;
9319 }
9320
9321 STATIC char *
9322 S_scan_pat(pTHX_ char *start, I32 type)
9323 {
9324     dVAR;
9325     PMOP *pm;
9326     char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9327     const char * const valid_flags =
9328         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9329     char charset = '\0';    /* character set modifier */
9330 #ifdef PERL_MAD
9331     char *modstart;
9332 #endif
9333
9334     PERL_ARGS_ASSERT_SCAN_PAT;
9335
9336     /* this was only needed for the initial scan_str; set it to false
9337      * so that any (?{}) code blocks etc are parsed normally */
9338     PL_reg_state.re_reparsing = FALSE;
9339     if (!s) {
9340         const char * const delimiter = skipspace(start);
9341         Perl_croak(aTHX_
9342                    (const char *)
9343                    (*delimiter == '?'
9344                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9345                     : "Search pattern not terminated" ));
9346     }
9347
9348     pm = (PMOP*)newPMOP(type, 0);
9349     if (PL_multi_open == '?') {
9350         /* This is the only point in the code that sets PMf_ONCE:  */
9351         pm->op_pmflags |= PMf_ONCE;
9352
9353         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9354            allows us to restrict the list needed by reset to just the ??
9355            matches.  */
9356         assert(type != OP_TRANS);
9357         if (PL_curstash) {
9358             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9359             U32 elements;
9360             if (!mg) {
9361                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9362                                  0);
9363             }
9364             elements = mg->mg_len / sizeof(PMOP**);
9365             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9366             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9367             mg->mg_len = elements * sizeof(PMOP**);
9368             PmopSTASH_set(pm,PL_curstash);
9369         }
9370     }
9371 #ifdef PERL_MAD
9372     modstart = s;
9373 #endif
9374
9375     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9376      * anon CV. False positives like qr/[(?{]/ are harmless */
9377
9378     if (type == OP_QR) {
9379         STRLEN len;
9380         char *e, *p = SvPV(PL_lex_stuff, len);
9381         e = p + len;
9382         for (; p < e; p++) {
9383             if (p[0] == '(' && p[1] == '?'
9384                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9385             {
9386                 pm->op_pmflags |= PMf_HAS_CV;
9387                 break;
9388             }
9389         }
9390         pm->op_pmflags |= PMf_IS_QR;
9391     }
9392
9393     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9394 #ifdef PERL_MAD
9395     if (PL_madskills && modstart != s) {
9396         SV* tmptoken = newSVpvn(modstart, s - modstart);
9397         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9398     }
9399 #endif
9400     /* issue a warning if /c is specified,but /g is not */
9401     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9402     {
9403         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9404                        "Use of /c modifier is meaningless without /g" );
9405     }
9406
9407     PL_lex_op = (OP*)pm;
9408     pl_yylval.ival = OP_MATCH;
9409     return s;
9410 }
9411
9412 STATIC char *
9413 S_scan_subst(pTHX_ char *start)
9414 {
9415     dVAR;
9416     char *s;
9417     PMOP *pm;
9418     I32 first_start;
9419     I32 es = 0;
9420     char charset = '\0';    /* character set modifier */
9421 #ifdef PERL_MAD
9422     char *modstart;
9423 #endif
9424
9425     PERL_ARGS_ASSERT_SCAN_SUBST;
9426
9427     pl_yylval.ival = OP_NULL;
9428
9429     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9430
9431     if (!s)
9432         Perl_croak(aTHX_ "Substitution pattern not terminated");
9433
9434     if (s[-1] == PL_multi_open)
9435         s--;
9436 #ifdef PERL_MAD
9437     if (PL_madskills) {
9438         CURMAD('q', PL_thisopen);
9439         CURMAD('_', PL_thiswhite);
9440         CURMAD('E', PL_thisstuff);
9441         CURMAD('Q', PL_thisclose);
9442         PL_realtokenstart = s - SvPVX(PL_linestr);
9443     }
9444 #endif
9445
9446     first_start = PL_multi_start;
9447     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9448     if (!s) {
9449         if (PL_lex_stuff) {
9450             SvREFCNT_dec(PL_lex_stuff);
9451             PL_lex_stuff = NULL;
9452         }
9453         Perl_croak(aTHX_ "Substitution replacement not terminated");
9454     }
9455     PL_multi_start = first_start;       /* so whole substitution is taken together */
9456
9457     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9458
9459 #ifdef PERL_MAD
9460     if (PL_madskills) {
9461         CURMAD('z', PL_thisopen);
9462         CURMAD('R', PL_thisstuff);
9463         CURMAD('Z', PL_thisclose);
9464     }
9465     modstart = s;
9466 #endif
9467
9468     while (*s) {
9469         if (*s == EXEC_PAT_MOD) {
9470             s++;
9471             es++;
9472         }
9473         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9474         {
9475             break;
9476         }
9477     }
9478
9479 #ifdef PERL_MAD
9480     if (PL_madskills) {
9481         if (modstart != s)
9482             curmad('m', newSVpvn(modstart, s - modstart));
9483         append_madprops(PL_thismad, (OP*)pm, 0);
9484         PL_thismad = 0;
9485     }
9486 #endif
9487     if ((pm->op_pmflags & PMf_CONTINUE)) {
9488         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9489     }
9490
9491     if (es) {
9492         SV * const repl = newSVpvs("");
9493
9494         PL_multi_end = 0;
9495         pm->op_pmflags |= PMf_EVAL;
9496         while (es-- > 0) {
9497             if (es)
9498                 sv_catpvs(repl, "eval ");
9499             else
9500                 sv_catpvs(repl, "do ");
9501         }
9502         sv_catpvs(repl, "{");
9503         sv_catsv(repl, PL_sublex_info.repl);
9504         sv_catpvs(repl, "}");
9505         SvEVALED_on(repl);
9506         SvREFCNT_dec(PL_sublex_info.repl);
9507         PL_sublex_info.repl = repl;
9508     }
9509
9510     PL_lex_op = (OP*)pm;
9511     pl_yylval.ival = OP_SUBST;
9512     return s;
9513 }
9514
9515 STATIC char *
9516 S_scan_trans(pTHX_ char *start)
9517 {
9518     dVAR;
9519     char* s;
9520     OP *o;
9521     U8 squash;
9522     U8 del;
9523     U8 complement;
9524     bool nondestruct = 0;
9525 #ifdef PERL_MAD
9526     char *modstart;
9527 #endif
9528
9529     PERL_ARGS_ASSERT_SCAN_TRANS;
9530
9531     pl_yylval.ival = OP_NULL;
9532
9533     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9534     if (!s)
9535         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9536
9537     if (s[-1] == PL_multi_open)
9538         s--;
9539 #ifdef PERL_MAD
9540     if (PL_madskills) {
9541         CURMAD('q', PL_thisopen);
9542         CURMAD('_', PL_thiswhite);
9543         CURMAD('E', PL_thisstuff);
9544         CURMAD('Q', PL_thisclose);
9545         PL_realtokenstart = s - SvPVX(PL_linestr);
9546     }
9547 #endif
9548
9549     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9550     if (!s) {
9551         if (PL_lex_stuff) {
9552             SvREFCNT_dec(PL_lex_stuff);
9553             PL_lex_stuff = NULL;
9554         }
9555         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9556     }
9557     if (PL_madskills) {
9558         CURMAD('z', PL_thisopen);
9559         CURMAD('R', PL_thisstuff);
9560         CURMAD('Z', PL_thisclose);
9561     }
9562
9563     complement = del = squash = 0;
9564 #ifdef PERL_MAD
9565     modstart = s;
9566 #endif
9567     while (1) {
9568         switch (*s) {
9569         case 'c':
9570             complement = OPpTRANS_COMPLEMENT;
9571             break;
9572         case 'd':
9573             del = OPpTRANS_DELETE;
9574             break;
9575         case 's':
9576             squash = OPpTRANS_SQUASH;
9577             break;
9578         case 'r':
9579             nondestruct = 1;
9580             break;
9581         default:
9582             goto no_more;
9583         }
9584         s++;
9585     }
9586   no_more:
9587
9588     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9589     o->op_private &= ~OPpTRANS_ALL;
9590     o->op_private |= del|squash|complement|
9591       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9592       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9593
9594     PL_lex_op = o;
9595     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9596
9597 #ifdef PERL_MAD
9598     if (PL_madskills) {
9599         if (modstart != s)
9600             curmad('m', newSVpvn(modstart, s - modstart));
9601         append_madprops(PL_thismad, o, 0);
9602         PL_thismad = 0;
9603     }
9604 #endif
9605
9606     return s;
9607 }
9608
9609 /* scan_heredoc
9610    Takes a pointer to the first < in <<FOO.
9611    Returns a pointer to the byte following <<FOO.
9612
9613    This function scans a heredoc, which involves different methods
9614    depending on whether we are in a string eval, quoted construct, etc.
9615    This is because PL_linestr could containing a single line of input, or
9616    a whole string being evalled, or the contents of the current quote-
9617    like operator.
9618
9619    The two basic methods are:
9620     - Steal lines from the input stream
9621     - Scan the heredoc in PL_linestr and remove it therefrom
9622
9623    In a file scope or filtered eval, the first method is used; in a
9624    string eval, the second.
9625
9626    In a quote-like operator, we have to choose between the two,
9627    depending on where we can find a newline.  We peek into outer lex-
9628    ing scopes until we find one with a newline in it.  If we reach the
9629    outermost lexing scope and it is a file, we use the stream method.
9630    Otherwise it is treated as an eval.
9631 */
9632
9633 STATIC char *
9634 S_scan_heredoc(pTHX_ register char *s)
9635 {
9636     dVAR;
9637     I32 op_type = OP_SCALAR;
9638     I32 len;
9639     SV *tmpstr;
9640     char term;
9641     char *d;
9642     char *e;
9643     char *peek;
9644     const bool infile = PL_rsfp || PL_parser->filtered;
9645     LEXSHARED *shared = PL_parser->lex_shared;
9646 #ifdef PERL_MAD
9647     I32 stuffstart = s - SvPVX(PL_linestr);
9648     char *tstart;
9649
9650     PL_realtokenstart = -1;
9651 #endif
9652
9653     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9654
9655     s += 2;
9656     d = PL_tokenbuf + 1;
9657     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9658     *PL_tokenbuf = '\n';
9659     peek = s;
9660     while (SPACE_OR_TAB(*peek))
9661         peek++;
9662     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9663         s = peek;
9664         term = *s++;
9665         s = delimcpy(d, e, s, PL_bufend, term, &len);
9666         if (s == PL_bufend)
9667             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9668         d += len;
9669         s++;
9670     }
9671     else {
9672         if (*s == '\\')
9673             /* <<\FOO is equivalent to <<'FOO' */
9674             s++, term = '\'';
9675         else
9676             term = '"';
9677         if (!isALNUM_lazy_if(s,UTF))
9678             deprecate("bare << to mean <<\"\"");
9679         for (; isALNUM_lazy_if(s,UTF); s++) {
9680             if (d < e)
9681                 *d++ = *s;
9682         }
9683     }
9684     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9685         Perl_croak(aTHX_ "Delimiter for here document is too long");
9686     *d++ = '\n';
9687     *d = '\0';
9688     len = d - PL_tokenbuf;
9689
9690 #ifdef PERL_MAD
9691     if (PL_madskills) {
9692         tstart = PL_tokenbuf + 1;
9693         PL_thisclose = newSVpvn(tstart, len - 1);
9694         tstart = SvPVX(PL_linestr) + stuffstart;
9695         PL_thisopen = newSVpvn(tstart, s - tstart);
9696         stuffstart = s - SvPVX(PL_linestr);
9697     }
9698 #endif
9699 #ifndef PERL_STRICT_CR
9700     d = strchr(s, '\r');
9701     if (d) {
9702         char * const olds = s;
9703         s = d;
9704         while (s < PL_bufend) {
9705             if (*s == '\r') {
9706                 *d++ = '\n';
9707                 if (*++s == '\n')
9708                     s++;
9709             }
9710             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9711                 *d++ = *s++;
9712                 s++;
9713             }
9714             else
9715                 *d++ = *s++;
9716         }
9717         *d = '\0';
9718         PL_bufend = d;
9719         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9720         s = olds;
9721     }
9722 #endif
9723 #ifdef PERL_MAD
9724     if (PL_madskills) {
9725         tstart = SvPVX(PL_linestr) + stuffstart;
9726         if (PL_thisstuff)
9727             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9728         else
9729             PL_thisstuff = newSVpvn(tstart, s - tstart);
9730     }
9731
9732     stuffstart = s - SvPVX(PL_linestr);
9733 #endif
9734
9735     tmpstr = newSV_type(SVt_PVIV);
9736     SvGROW(tmpstr, 80);
9737     if (term == '\'') {
9738         op_type = OP_CONST;
9739         SvIV_set(tmpstr, -1);
9740     }
9741     else if (term == '`') {
9742         op_type = OP_BACKTICK;
9743         SvIV_set(tmpstr, '\\');
9744     }
9745
9746     PL_multi_start = CopLINE(PL_curcop) + 1;
9747     PL_multi_open = PL_multi_close = '<';
9748     /* inside a string eval or quote-like operator */
9749     if (!infile || PL_lex_inwhat) {
9750         SV *linestr;
9751         char *bufend;
9752         char * const olds = s;
9753         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9754         /* These two fields are not set until an inner lexing scope is
9755            entered.  But we need them set here. */
9756         shared->ls_bufptr  = s;
9757         shared->ls_linestr = PL_linestr;
9758         if (PL_lex_inwhat)
9759           /* Look for a newline.  If the current buffer does not have one,
9760              peek into the line buffer of the parent lexing scope, going
9761              up as many levels as necessary to find one with a newline
9762              after bufptr.
9763            */
9764           while (!(s = (char *)memchr(
9765                     (void *)shared->ls_bufptr, '\n',
9766                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9767                 ))) {
9768             shared = shared->ls_prev;
9769             /* shared is only null if we have gone beyond the outermost
9770                lexing scope.  In a file, we will have broken out of the
9771                loop in the previous iteration.  In an eval, the string buf-
9772                fer ends with "\n;", so the while condition below will have
9773                evaluated to false.  So shared can never be null. */
9774             assert(shared);
9775             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9776                most lexing scope.  In a file, shared->ls_linestr at that
9777                level is just one line, so there is no body to steal. */
9778             if (infile && !shared->ls_prev) {
9779                 s = olds;
9780                 goto streaming;
9781             }
9782           }
9783         else {  /* eval */
9784             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9785             assert(s);
9786         }
9787         linestr = shared->ls_linestr;
9788         bufend = SvEND(linestr);
9789         d = s;
9790         while (s < bufend &&
9791           (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9792             if (*s++ == '\n')
9793                 ++shared->herelines;
9794         }
9795         if (s >= bufend) {
9796             goto interminable;
9797         }
9798         sv_setpvn(tmpstr,d+1,s-d);
9799 #ifdef PERL_MAD
9800         if (PL_madskills) {
9801             if (PL_thisstuff)
9802                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9803             else
9804                 PL_thisstuff = newSVpvn(d + 1, s - d);
9805             stuffstart = s - SvPVX(PL_linestr);
9806         }
9807 #endif
9808         s += len - 1;
9809         /* the preceding stmt passes a newline */
9810         shared->herelines++;
9811
9812         /* s now points to the newline after the heredoc terminator.
9813            d points to the newline before the body of the heredoc.
9814          */
9815
9816         /* We are going to modify linestr in place here, so set
9817            aside copies of the string if necessary for re-evals or
9818            (caller $n)[6]. */
9819         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9820            check shared->re_eval_str. */
9821         if (shared->re_eval_start || shared->re_eval_str) {
9822             /* Set aside the rest of the regexp */
9823             if (!shared->re_eval_str)
9824                 shared->re_eval_str =
9825                        newSVpvn(shared->re_eval_start,
9826                                 bufend - shared->re_eval_start);
9827             shared->re_eval_start -= s-d;
9828         }
9829         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9830             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9831             cx->blk_eval.cur_text == linestr)
9832         {
9833             cx->blk_eval.cur_text = newSVsv(linestr);
9834             SvSCREAM_on(cx->blk_eval.cur_text);
9835         }
9836         /* Copy everything from s onwards back to d. */
9837         Move(s,d,bufend-s + 1,char);
9838         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9839         /* Setting PL_bufend only applies when we have not dug deeper
9840            into other scopes, because sublex_done sets PL_bufend to
9841            SvEND(PL_linestr). */
9842         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9843         s = olds;
9844     }
9845     else
9846     {
9847       SV *linestr_save;
9848      streaming:
9849       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9850       term = PL_tokenbuf[1];
9851       len--;
9852       linestr_save = PL_linestr; /* must restore this afterwards */
9853       d = s;                     /* and this */
9854       PL_linestr = newSVpvs("");
9855       PL_bufend = SvPVX(PL_linestr);
9856       while (1) {
9857 #ifdef PERL_MAD
9858         if (PL_madskills) {
9859             tstart = SvPVX(PL_linestr) + stuffstart;
9860             if (PL_thisstuff)
9861                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9862             else
9863                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9864         }
9865 #endif
9866         PL_bufptr = PL_bufend;
9867         CopLINE_set(PL_curcop,
9868                     PL_multi_start + shared->herelines);
9869         if (!lex_next_chunk(LEX_NO_TERM)
9870          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9871             SvREFCNT_dec(linestr_save);
9872             goto interminable;
9873         }
9874         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9875         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9876             lex_grow_linestr(SvCUR(PL_linestr) + 2);
9877             sv_catpvs(PL_linestr, "\n\0");
9878         }
9879         s = PL_bufptr;
9880 #ifdef PERL_MAD
9881         stuffstart = s - SvPVX(PL_linestr);
9882 #endif
9883         shared->herelines++;
9884         PL_last_lop = PL_last_uni = NULL;
9885 #ifndef PERL_STRICT_CR
9886         if (PL_bufend - PL_linestart >= 2) {
9887             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9888                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9889             {
9890                 PL_bufend[-2] = '\n';
9891                 PL_bufend--;
9892                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9893             }
9894             else if (PL_bufend[-1] == '\r')
9895                 PL_bufend[-1] = '\n';
9896         }
9897         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9898             PL_bufend[-1] = '\n';
9899 #endif
9900         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9901             SvREFCNT_dec(PL_linestr);
9902             PL_linestr = linestr_save;
9903             PL_linestart = SvPVX(linestr_save);
9904             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9905             s = d;
9906             break;
9907         }
9908         else {
9909             sv_catsv(tmpstr,PL_linestr);
9910         }
9911       }
9912     }
9913     PL_multi_end = CopLINE(PL_curcop);
9914     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9915         SvPV_shrink_to_cur(tmpstr);
9916     }
9917     if (!IN_BYTES) {
9918         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9919             SvUTF8_on(tmpstr);
9920         else if (PL_encoding)
9921             sv_recode_to_utf8(tmpstr, PL_encoding);
9922     }
9923     PL_lex_stuff = tmpstr;
9924     pl_yylval.ival = op_type;
9925     return s;
9926
9927   interminable:
9928     SvREFCNT_dec(tmpstr);
9929     CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9930     missingterm(PL_tokenbuf + 1);
9931 }
9932
9933 /* scan_inputsymbol
9934    takes: current position in input buffer
9935    returns: new position in input buffer
9936    side-effects: pl_yylval and lex_op are set.
9937
9938    This code handles:
9939
9940    <>           read from ARGV
9941    <FH>         read from filehandle
9942    <pkg::FH>    read from package qualified filehandle
9943    <pkg'FH>     read from package qualified filehandle
9944    <$fh>        read from filehandle in $fh
9945    <*.h>        filename glob
9946
9947 */
9948
9949 STATIC char *
9950 S_scan_inputsymbol(pTHX_ char *start)
9951 {
9952     dVAR;
9953     char *s = start;            /* current position in buffer */
9954     char *end;
9955     I32 len;
9956     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9957     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9958
9959     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9960
9961     end = strchr(s, '\n');
9962     if (!end)
9963         end = PL_bufend;
9964     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9965
9966     /* die if we didn't have space for the contents of the <>,
9967        or if it didn't end, or if we see a newline
9968     */
9969
9970     if (len >= (I32)sizeof PL_tokenbuf)
9971         Perl_croak(aTHX_ "Excessively long <> operator");
9972     if (s >= end)
9973         Perl_croak(aTHX_ "Unterminated <> operator");
9974
9975     s++;
9976
9977     /* check for <$fh>
9978        Remember, only scalar variables are interpreted as filehandles by
9979        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9980        treated as a glob() call.
9981        This code makes use of the fact that except for the $ at the front,
9982        a scalar variable and a filehandle look the same.
9983     */
9984     if (*d == '$' && d[1]) d++;
9985
9986     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9987     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9988         d += UTF ? UTF8SKIP(d) : 1;
9989
9990     /* If we've tried to read what we allow filehandles to look like, and
9991        there's still text left, then it must be a glob() and not a getline.
9992        Use scan_str to pull out the stuff between the <> and treat it
9993        as nothing more than a string.
9994     */
9995
9996     if (d - PL_tokenbuf != len) {
9997         pl_yylval.ival = OP_GLOB;
9998         s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9999         if (!s)
10000            Perl_croak(aTHX_ "Glob not terminated");
10001         return s;
10002     }
10003     else {
10004         bool readline_overriden = FALSE;
10005         GV *gv_readline;
10006         GV **gvp;
10007         /* we're in a filehandle read situation */
10008         d = PL_tokenbuf;
10009
10010         /* turn <> into <ARGV> */
10011         if (!len)
10012             Copy("ARGV",d,5,char);
10013
10014         /* Check whether readline() is overriden */
10015         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10016         if ((gv_readline
10017                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10018                 ||
10019                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10020                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10021                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10022             readline_overriden = TRUE;
10023
10024         /* if <$fh>, create the ops to turn the variable into a
10025            filehandle
10026         */
10027         if (*d == '$') {
10028             /* try to find it in the pad for this block, otherwise find
10029                add symbol table ops
10030             */
10031             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10032             if (tmp != NOT_IN_PAD) {
10033                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10034                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10035                     HEK * const stashname = HvNAME_HEK(stash);
10036                     SV * const sym = sv_2mortal(newSVhek(stashname));
10037                     sv_catpvs(sym, "::");
10038                     sv_catpv(sym, d+1);
10039                     d = SvPVX(sym);
10040                     goto intro_sym;
10041                 }
10042                 else {
10043                     OP * const o = newOP(OP_PADSV, 0);
10044                     o->op_targ = tmp;
10045                     PL_lex_op = readline_overriden
10046                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10047                                 op_append_elem(OP_LIST, o,
10048                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10049                         : (OP*)newUNOP(OP_READLINE, 0, o);
10050                 }
10051             }
10052             else {
10053                 GV *gv;
10054                 ++d;
10055 intro_sym:
10056                 gv = gv_fetchpv(d,
10057                                 (PL_in_eval
10058                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10059                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10060                                 SVt_PV);
10061                 PL_lex_op = readline_overriden
10062                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10063                             op_append_elem(OP_LIST,
10064                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10065                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10066                     : (OP*)newUNOP(OP_READLINE, 0,
10067                             newUNOP(OP_RV2SV, 0,
10068                                 newGVOP(OP_GV, 0, gv)));
10069             }
10070             if (!readline_overriden)
10071                 PL_lex_op->op_flags |= OPf_SPECIAL;
10072             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10073             pl_yylval.ival = OP_NULL;
10074         }
10075
10076         /* If it's none of the above, it must be a literal filehandle
10077            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10078         else {
10079             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10080             PL_lex_op = readline_overriden
10081                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10082                         op_append_elem(OP_LIST,
10083                             newGVOP(OP_GV, 0, gv),
10084                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10085                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10086             pl_yylval.ival = OP_NULL;
10087         }
10088     }
10089
10090     return s;
10091 }
10092
10093
10094 /* scan_str
10095    takes: start position in buffer
10096           keep_quoted preserve \ on the embedded delimiter(s)
10097           keep_delims preserve the delimiters around the string
10098           re_reparse  compiling a run-time /(?{})/:
10099                         collapse // to /,  and skip encoding src
10100    returns: position to continue reading from buffer
10101    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10102         updates the read buffer.
10103
10104    This subroutine pulls a string out of the input.  It is called for:
10105         q               single quotes           q(literal text)
10106         '               single quotes           'literal text'
10107         qq              double quotes           qq(interpolate $here please)
10108         "               double quotes           "interpolate $here please"
10109         qx              backticks               qx(/bin/ls -l)
10110         `               backticks               `/bin/ls -l`
10111         qw              quote words             @EXPORT_OK = qw( func() $spam )
10112         m//             regexp match            m/this/
10113         s///            regexp substitute       s/this/that/
10114         tr///           string transliterate    tr/this/that/
10115         y///            string transliterate    y/this/that/
10116         ($*@)           sub prototypes          sub foo ($)
10117         (stuff)         sub attr parameters     sub foo : attr(stuff)
10118         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10119
10120    In most of these cases (all but <>, patterns and transliterate)
10121    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10122    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10123    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10124    calls scan_str().
10125
10126    It skips whitespace before the string starts, and treats the first
10127    character as the delimiter.  If the delimiter is one of ([{< then
10128    the corresponding "close" character )]}> is used as the closing
10129    delimiter.  It allows quoting of delimiters, and if the string has
10130    balanced delimiters ([{<>}]) it allows nesting.
10131
10132    On success, the SV with the resulting string is put into lex_stuff or,
10133    if that is already non-NULL, into lex_repl. The second case occurs only
10134    when parsing the RHS of the special constructs s/// and tr/// (y///).
10135    For convenience, the terminating delimiter character is stuffed into
10136    SvIVX of the SV.
10137 */
10138
10139 STATIC char *
10140 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10141 {
10142     dVAR;
10143     SV *sv;                             /* scalar value: string */
10144     const char *tmps;                   /* temp string, used for delimiter matching */
10145     char *s = start;            /* current position in the buffer */
10146     char term;                  /* terminating character */
10147     char *to;                   /* current position in the sv's data */
10148     I32 brackets = 1;                   /* bracket nesting level */
10149     bool has_utf8 = FALSE;              /* is there any utf8 content? */
10150     I32 termcode;                       /* terminating char. code */
10151     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
10152     STRLEN termlen;                     /* length of terminating string */
10153     int last_off = 0;                   /* last position for nesting bracket */
10154 #ifdef PERL_MAD
10155     int stuffstart;
10156     char *tstart;
10157 #endif
10158
10159     PERL_ARGS_ASSERT_SCAN_STR;
10160
10161     /* skip space before the delimiter */
10162     if (isSPACE(*s)) {
10163         s = PEEKSPACE(s);
10164     }
10165
10166 #ifdef PERL_MAD
10167     if (PL_realtokenstart >= 0) {
10168         stuffstart = PL_realtokenstart;
10169         PL_realtokenstart = -1;
10170     }
10171     else
10172         stuffstart = start - SvPVX(PL_linestr);
10173 #endif
10174     /* mark where we are, in case we need to report errors */
10175     CLINE;
10176
10177     /* after skipping whitespace, the next character is the terminator */
10178     term = *s;
10179     if (!UTF) {
10180         termcode = termstr[0] = term;
10181         termlen = 1;
10182     }
10183     else {
10184         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10185         Copy(s, termstr, termlen, U8);
10186         if (!UTF8_IS_INVARIANT(term))
10187             has_utf8 = TRUE;
10188     }
10189
10190     /* mark where we are */
10191     PL_multi_start = CopLINE(PL_curcop);
10192     PL_multi_open = term;
10193
10194     /* find corresponding closing delimiter */
10195     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10196         termcode = termstr[0] = term = tmps[5];
10197
10198     PL_multi_close = term;
10199
10200     /* create a new SV to hold the contents.  79 is the SV's initial length.
10201        What a random number. */
10202     sv = newSV_type(SVt_PVIV);
10203     SvGROW(sv, 80);
10204     SvIV_set(sv, termcode);
10205     (void)SvPOK_only(sv);               /* validate pointer */
10206
10207     /* move past delimiter and try to read a complete string */
10208     if (keep_delims)
10209         sv_catpvn(sv, s, termlen);
10210     s += termlen;
10211 #ifdef PERL_MAD
10212     tstart = SvPVX(PL_linestr) + stuffstart;
10213     if (!PL_thisopen && !keep_delims) {
10214         PL_thisopen = newSVpvn(tstart, s - tstart);
10215         stuffstart = s - SvPVX(PL_linestr);
10216     }
10217 #endif
10218     for (;;) {
10219         if (PL_encoding && !UTF && !re_reparse) {
10220             bool cont = TRUE;
10221
10222             while (cont) {
10223                 int offset = s - SvPVX_const(PL_linestr);
10224                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10225                                            &offset, (char*)termstr, termlen);
10226                 const char * const ns = SvPVX_const(PL_linestr) + offset;
10227                 char * const svlast = SvEND(sv) - 1;
10228
10229                 for (; s < ns; s++) {
10230                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10231                         COPLINE_INC_WITH_HERELINES;
10232                 }
10233                 if (!found)
10234                     goto read_more_line;
10235                 else {
10236                     /* handle quoted delimiters */
10237                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10238                         const char *t;
10239                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10240                             t--;
10241                         if ((svlast-1 - t) % 2) {
10242                             if (!keep_quoted) {
10243                                 *(svlast-1) = term;
10244                                 *svlast = '\0';
10245                                 SvCUR_set(sv, SvCUR(sv) - 1);
10246                             }
10247                             continue;
10248                         }
10249                     }
10250                     if (PL_multi_open == PL_multi_close) {
10251                         cont = FALSE;
10252                     }
10253                     else {
10254                         const char *t;
10255                         char *w;
10256                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10257                             /* At here, all closes are "was quoted" one,
10258                                so we don't check PL_multi_close. */
10259                             if (*t == '\\') {
10260                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10261                                     t++;
10262                                 else
10263                                     *w++ = *t++;
10264                             }
10265                             else if (*t == PL_multi_open)
10266                                 brackets++;
10267
10268                             *w = *t;
10269                         }
10270                         if (w < t) {
10271                             *w++ = term;
10272                             *w = '\0';
10273                             SvCUR_set(sv, w - SvPVX_const(sv));
10274                         }
10275                         last_off = w - SvPVX(sv);
10276                         if (--brackets <= 0)
10277                             cont = FALSE;
10278                     }
10279                 }
10280             }
10281             if (!keep_delims) {
10282                 SvCUR_set(sv, SvCUR(sv) - 1);
10283                 *SvEND(sv) = '\0';
10284             }
10285             break;
10286         }
10287
10288         /* extend sv if need be */
10289         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10290         /* set 'to' to the next character in the sv's string */
10291         to = SvPVX(sv)+SvCUR(sv);
10292
10293         /* if open delimiter is the close delimiter read unbridle */
10294         if (PL_multi_open == PL_multi_close) {
10295             for (; s < PL_bufend; s++,to++) {
10296                 /* embedded newlines increment the current line number */
10297                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10298                     COPLINE_INC_WITH_HERELINES;
10299                 /* handle quoted delimiters */
10300                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10301                     if (!keep_quoted
10302                         && (s[1] == term
10303                             || (re_reparse && s[1] == '\\'))
10304                     )
10305                         s++;
10306                     /* any other quotes are simply copied straight through */
10307                     else
10308                         *to++ = *s++;
10309                 }
10310                 /* terminate when run out of buffer (the for() condition), or
10311                    have found the terminator */
10312                 else if (*s == term) {
10313                     if (termlen == 1)
10314                         break;
10315                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10316                         break;
10317                 }
10318                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10319                     has_utf8 = TRUE;
10320                 *to = *s;
10321             }
10322         }
10323
10324         /* if the terminator isn't the same as the start character (e.g.,
10325            matched brackets), we have to allow more in the quoting, and
10326            be prepared for nested brackets.
10327         */
10328         else {
10329             /* read until we run out of string, or we find the terminator */
10330             for (; s < PL_bufend; s++,to++) {
10331                 /* embedded newlines increment the line count */
10332                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10333                     COPLINE_INC_WITH_HERELINES;
10334                 /* backslashes can escape the open or closing characters */
10335                 if (*s == '\\' && s+1 < PL_bufend) {
10336                     if (!keep_quoted &&
10337                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10338                         s++;
10339                     else
10340                         *to++ = *s++;
10341                 }
10342                 /* allow nested opens and closes */
10343                 else if (*s == PL_multi_close && --brackets <= 0)
10344                     break;
10345                 else if (*s == PL_multi_open)
10346                     brackets++;
10347                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10348                     has_utf8 = TRUE;
10349                 *to = *s;
10350             }
10351         }
10352         /* terminate the copied string and update the sv's end-of-string */
10353         *to = '\0';
10354         SvCUR_set(sv, to - SvPVX_const(sv));
10355
10356         /*
10357          * this next chunk reads more into the buffer if we're not done yet
10358          */
10359
10360         if (s < PL_bufend)
10361             break;              /* handle case where we are done yet :-) */
10362
10363 #ifndef PERL_STRICT_CR
10364         if (to - SvPVX_const(sv) >= 2) {
10365             if ((to[-2] == '\r' && to[-1] == '\n') ||
10366                 (to[-2] == '\n' && to[-1] == '\r'))
10367             {
10368                 to[-2] = '\n';
10369                 to--;
10370                 SvCUR_set(sv, to - SvPVX_const(sv));
10371             }
10372             else if (to[-1] == '\r')
10373                 to[-1] = '\n';
10374         }
10375         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10376             to[-1] = '\n';
10377 #endif
10378
10379      read_more_line:
10380         /* if we're out of file, or a read fails, bail and reset the current
10381            line marker so we can report where the unterminated string began
10382         */
10383 #ifdef PERL_MAD
10384         if (PL_madskills) {
10385             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10386             if (PL_thisstuff)
10387                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10388             else
10389                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10390         }
10391 #endif
10392         COPLINE_INC_WITH_HERELINES;
10393         PL_bufptr = PL_bufend;
10394         if (!lex_next_chunk(0)) {
10395             sv_free(sv);
10396             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10397             return NULL;
10398         }
10399         s = PL_bufptr;
10400 #ifdef PERL_MAD
10401         stuffstart = 0;
10402 #endif
10403     }
10404
10405     /* at this point, we have successfully read the delimited string */
10406
10407     if (!PL_encoding || UTF || re_reparse) {
10408 #ifdef PERL_MAD
10409         if (PL_madskills) {
10410             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10411             const int len = s - tstart;
10412             if (PL_thisstuff)
10413                 sv_catpvn(PL_thisstuff, tstart, len);
10414             else
10415                 PL_thisstuff = newSVpvn(tstart, len);
10416             if (!PL_thisclose && !keep_delims)
10417                 PL_thisclose = newSVpvn(s,termlen);
10418         }
10419 #endif
10420
10421         if (keep_delims)
10422             sv_catpvn(sv, s, termlen);
10423         s += termlen;
10424     }
10425 #ifdef PERL_MAD
10426     else {
10427         if (PL_madskills) {
10428             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10429             const int len = s - tstart - termlen;
10430             if (PL_thisstuff)
10431                 sv_catpvn(PL_thisstuff, tstart, len);
10432             else
10433                 PL_thisstuff = newSVpvn(tstart, len);
10434             if (!PL_thisclose && !keep_delims)
10435                 PL_thisclose = newSVpvn(s - termlen,termlen);
10436         }
10437     }
10438 #endif
10439     if (has_utf8 || (PL_encoding && !re_reparse))
10440         SvUTF8_on(sv);
10441
10442     PL_multi_end = CopLINE(PL_curcop);
10443
10444     /* if we allocated too much space, give some back */
10445     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10446         SvLEN_set(sv, SvCUR(sv) + 1);
10447         SvPV_renew(sv, SvLEN(sv));
10448     }
10449
10450     /* decide whether this is the first or second quoted string we've read
10451        for this op
10452     */
10453
10454     if (PL_lex_stuff)
10455         PL_sublex_info.repl = sv;
10456     else
10457         PL_lex_stuff = sv;
10458     return s;
10459 }
10460
10461 /*
10462   scan_num
10463   takes: pointer to position in buffer
10464   returns: pointer to new position in buffer
10465   side-effects: builds ops for the constant in pl_yylval.op
10466
10467   Read a number in any of the formats that Perl accepts:
10468
10469   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10470   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10471   0b[01](_?[01])*
10472   0[0-7](_?[0-7])*
10473   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10474
10475   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10476   thing it reads.
10477
10478   If it reads a number without a decimal point or an exponent, it will
10479   try converting the number to an integer and see if it can do so
10480   without loss of precision.
10481 */
10482
10483 char *
10484 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10485 {
10486     dVAR;
10487     const char *s = start;      /* current position in buffer */
10488     char *d;                    /* destination in temp buffer */
10489     char *e;                    /* end of temp buffer */
10490     NV nv;                              /* number read, as a double */
10491     SV *sv = NULL;                      /* place to put the converted number */
10492     bool floatit;                       /* boolean: int or float? */
10493     const char *lastub = NULL;          /* position of last underbar */
10494     static char const number_too_long[] = "Number too long";
10495
10496     PERL_ARGS_ASSERT_SCAN_NUM;
10497
10498     /* We use the first character to decide what type of number this is */
10499
10500     switch (*s) {
10501     default:
10502         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10503
10504     /* if it starts with a 0, it could be an octal number, a decimal in
10505        0.13 disguise, or a hexadecimal number, or a binary number. */
10506     case '0':
10507         {
10508           /* variables:
10509              u          holds the "number so far"
10510              shift      the power of 2 of the base
10511                         (hex == 4, octal == 3, binary == 1)
10512              overflowed was the number more than we can hold?
10513
10514              Shift is used when we add a digit.  It also serves as an "are
10515              we in octal/hex/binary?" indicator to disallow hex characters
10516              when in octal mode.
10517            */
10518             NV n = 0.0;
10519             UV u = 0;
10520             I32 shift;
10521             bool overflowed = FALSE;
10522             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10523             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10524             static const char* const bases[5] =
10525               { "", "binary", "", "octal", "hexadecimal" };
10526             static const char* const Bases[5] =
10527               { "", "Binary", "", "Octal", "Hexadecimal" };
10528             static const char* const maxima[5] =
10529               { "",
10530                 "0b11111111111111111111111111111111",
10531                 "",
10532                 "037777777777",
10533                 "0xffffffff" };
10534             const char *base, *Base, *max;
10535
10536             /* check for hex */
10537             if (s[1] == 'x' || s[1] == 'X') {
10538                 shift = 4;
10539                 s += 2;
10540                 just_zero = FALSE;
10541             } else if (s[1] == 'b' || s[1] == 'B') {
10542                 shift = 1;
10543                 s += 2;
10544                 just_zero = FALSE;
10545             }
10546             /* check for a decimal in disguise */
10547             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10548                 goto decimal;
10549             /* so it must be octal */
10550             else {
10551                 shift = 3;
10552                 s++;
10553             }
10554
10555             if (*s == '_') {
10556                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10557                                "Misplaced _ in number");
10558                lastub = s++;
10559             }
10560
10561             base = bases[shift];
10562             Base = Bases[shift];
10563             max  = maxima[shift];
10564
10565             /* read the rest of the number */
10566             for (;;) {
10567                 /* x is used in the overflow test,
10568                    b is the digit we're adding on. */
10569                 UV x, b;
10570
10571                 switch (*s) {
10572
10573                 /* if we don't mention it, we're done */
10574                 default:
10575                     goto out;
10576
10577                 /* _ are ignored -- but warned about if consecutive */
10578                 case '_':
10579                     if (lastub && s == lastub + 1)
10580                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10581                                        "Misplaced _ in number");
10582                     lastub = s++;
10583                     break;
10584
10585                 /* 8 and 9 are not octal */
10586                 case '8': case '9':
10587                     if (shift == 3)
10588                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10589                     /* FALL THROUGH */
10590
10591                 /* octal digits */
10592                 case '2': case '3': case '4':
10593                 case '5': case '6': case '7':
10594                     if (shift == 1)
10595                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10596                     /* FALL THROUGH */
10597
10598                 case '0': case '1':
10599                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10600                     goto digit;
10601
10602                 /* hex digits */
10603                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10604                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10605                     /* make sure they said 0x */
10606                     if (shift != 4)
10607                         goto out;
10608                     b = (*s++ & 7) + 9;
10609
10610                     /* Prepare to put the digit we have onto the end
10611                        of the number so far.  We check for overflows.
10612                     */
10613
10614                   digit:
10615                     just_zero = FALSE;
10616                     if (!overflowed) {
10617                         x = u << shift; /* make room for the digit */
10618
10619                         if ((x >> shift) != u
10620                             && !(PL_hints & HINT_NEW_BINARY)) {
10621                             overflowed = TRUE;
10622                             n = (NV) u;
10623                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10624                                              "Integer overflow in %s number",
10625                                              base);
10626                         } else
10627                             u = x | b;          /* add the digit to the end */
10628                     }
10629                     if (overflowed) {
10630                         n *= nvshift[shift];
10631                         /* If an NV has not enough bits in its
10632                          * mantissa to represent an UV this summing of
10633                          * small low-order numbers is a waste of time
10634                          * (because the NV cannot preserve the
10635                          * low-order bits anyway): we could just
10636                          * remember when did we overflow and in the
10637                          * end just multiply n by the right
10638                          * amount. */
10639                         n += (NV) b;
10640                     }
10641                     break;
10642                 }
10643             }
10644
10645           /* if we get here, we had success: make a scalar value from
10646              the number.
10647           */
10648           out:
10649
10650             /* final misplaced underbar check */
10651             if (s[-1] == '_') {
10652                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10653             }
10654
10655             if (overflowed) {
10656                 if (n > 4294967295.0)
10657                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10658                                    "%s number > %s non-portable",
10659                                    Base, max);
10660                 sv = newSVnv(n);
10661             }
10662             else {
10663 #if UVSIZE > 4
10664                 if (u > 0xffffffff)
10665                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10666                                    "%s number > %s non-portable",
10667                                    Base, max);
10668 #endif
10669                 sv = newSVuv(u);
10670             }
10671             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10672                 sv = new_constant(start, s - start, "integer",
10673                                   sv, NULL, NULL, 0);
10674             else if (PL_hints & HINT_NEW_BINARY)
10675                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10676         }
10677         break;
10678
10679     /*
10680       handle decimal numbers.
10681       we're also sent here when we read a 0 as the first digit
10682     */
10683     case '1': case '2': case '3': case '4': case '5':
10684     case '6': case '7': case '8': case '9': case '.':
10685       decimal:
10686         d = PL_tokenbuf;
10687         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10688         floatit = FALSE;
10689
10690         /* read next group of digits and _ and copy into d */
10691         while (isDIGIT(*s) || *s == '_') {
10692             /* skip underscores, checking for misplaced ones
10693                if -w is on
10694             */
10695             if (*s == '_') {
10696                 if (lastub && s == lastub + 1)
10697                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10698                                    "Misplaced _ in number");
10699                 lastub = s++;
10700             }
10701             else {
10702                 /* check for end of fixed-length buffer */
10703                 if (d >= e)
10704                     Perl_croak(aTHX_ number_too_long);
10705                 /* if we're ok, copy the character */
10706                 *d++ = *s++;
10707             }
10708         }
10709
10710         /* final misplaced underbar check */
10711         if (lastub && s == lastub + 1) {
10712             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10713         }
10714
10715         /* read a decimal portion if there is one.  avoid
10716            3..5 being interpreted as the number 3. followed
10717            by .5
10718         */
10719         if (*s == '.' && s[1] != '.') {
10720             floatit = TRUE;
10721             *d++ = *s++;
10722
10723             if (*s == '_') {
10724                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10725                                "Misplaced _ in number");
10726                 lastub = s;
10727             }
10728
10729             /* copy, ignoring underbars, until we run out of digits.
10730             */
10731             for (; isDIGIT(*s) || *s == '_'; s++) {
10732                 /* fixed length buffer check */
10733                 if (d >= e)
10734                     Perl_croak(aTHX_ number_too_long);
10735                 if (*s == '_') {
10736                    if (lastub && s == lastub + 1)
10737                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10738                                       "Misplaced _ in number");
10739                    lastub = s;
10740                 }
10741                 else
10742                     *d++ = *s;
10743             }
10744             /* fractional part ending in underbar? */
10745             if (s[-1] == '_') {
10746                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10747                                "Misplaced _ in number");
10748             }
10749             if (*s == '.' && isDIGIT(s[1])) {
10750                 /* oops, it's really a v-string, but without the "v" */
10751                 s = start;
10752                 goto vstring;
10753             }
10754         }
10755
10756         /* read exponent part, if present */
10757         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10758             floatit = TRUE;
10759             s++;
10760
10761             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10762             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10763
10764             /* stray preinitial _ */
10765             if (*s == '_') {
10766                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10767                                "Misplaced _ in number");
10768                 lastub = s++;
10769             }
10770
10771             /* allow positive or negative exponent */
10772             if (*s == '+' || *s == '-')
10773                 *d++ = *s++;
10774
10775             /* stray initial _ */
10776             if (*s == '_') {
10777                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10778                                "Misplaced _ in number");
10779                 lastub = s++;
10780             }
10781
10782             /* read digits of exponent */
10783             while (isDIGIT(*s) || *s == '_') {
10784                 if (isDIGIT(*s)) {
10785                     if (d >= e)
10786                         Perl_croak(aTHX_ number_too_long);
10787                     *d++ = *s++;
10788                 }
10789                 else {
10790                    if (((lastub && s == lastub + 1) ||
10791                         (!isDIGIT(s[1]) && s[1] != '_')))
10792                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10793                                       "Misplaced _ in number");
10794                    lastub = s++;
10795                 }
10796             }
10797         }
10798
10799
10800         /*
10801            We try to do an integer conversion first if no characters
10802            indicating "float" have been found.
10803          */
10804
10805         if (!floatit) {
10806             UV uv;
10807             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10808
10809             if (flags == IS_NUMBER_IN_UV) {
10810               if (uv <= IV_MAX)
10811                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10812               else
10813                 sv = newSVuv(uv);
10814             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10815               if (uv <= (UV) IV_MIN)
10816                 sv = newSViv(-(IV)uv);
10817               else
10818                 floatit = TRUE;
10819             } else
10820               floatit = TRUE;
10821         }
10822         if (floatit) {
10823             /* terminate the string */
10824             *d = '\0';
10825             nv = Atof(PL_tokenbuf);
10826             sv = newSVnv(nv);
10827         }
10828
10829         if ( floatit
10830              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10831             const char *const key = floatit ? "float" : "integer";
10832             const STRLEN keylen = floatit ? 5 : 7;
10833             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10834                                 key, keylen, sv, NULL, NULL, 0);
10835         }
10836         break;
10837
10838     /* if it starts with a v, it could be a v-string */
10839     case 'v':
10840 vstring:
10841                 sv = newSV(5); /* preallocate storage space */
10842                 s = scan_vstring(s, PL_bufend, sv);
10843         break;
10844     }
10845
10846     /* make the op for the constant and return */
10847
10848     if (sv)
10849         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10850     else
10851         lvalp->opval = NULL;
10852
10853     return (char *)s;
10854 }
10855
10856 STATIC char *
10857 S_scan_formline(pTHX_ register char *s)
10858 {
10859     dVAR;
10860     char *eol;
10861     char *t;
10862     SV * const stuff = newSVpvs("");
10863     bool needargs = FALSE;
10864     bool eofmt = FALSE;
10865 #ifdef PERL_MAD
10866     char *tokenstart = s;
10867     SV* savewhite = NULL;
10868
10869     if (PL_madskills) {
10870         savewhite = PL_thiswhite;
10871         PL_thiswhite = 0;
10872     }
10873 #endif
10874
10875     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10876
10877     while (!needargs) {
10878         if (*s == '.') {
10879             t = s+1;
10880 #ifdef PERL_STRICT_CR
10881             while (SPACE_OR_TAB(*t))
10882                 t++;
10883 #else
10884             while (SPACE_OR_TAB(*t) || *t == '\r')
10885                 t++;
10886 #endif
10887             if (*t == '\n' || t == PL_bufend) {
10888                 eofmt = TRUE;
10889                 break;
10890             }
10891         }
10892         eol = (char *) memchr(s,'\n',PL_bufend-s);
10893         if (!eol++)
10894                 eol = PL_bufend;
10895         if (*s != '#') {
10896             for (t = s; t < eol; t++) {
10897                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10898                     needargs = FALSE;
10899                     goto enough;        /* ~~ must be first line in formline */
10900                 }
10901                 if (*t == '@' || *t == '^')
10902                     needargs = TRUE;
10903             }
10904             if (eol > s) {
10905                 sv_catpvn(stuff, s, eol-s);
10906 #ifndef PERL_STRICT_CR
10907                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10908                     char *end = SvPVX(stuff) + SvCUR(stuff);
10909                     end[-2] = '\n';
10910                     end[-1] = '\0';
10911                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10912                 }
10913 #endif
10914             }
10915             else
10916               break;
10917         }
10918         s = (char*)eol;
10919         if ((PL_rsfp || PL_parser->filtered)
10920          && PL_parser->form_lex_state == LEX_NORMAL) {
10921             bool got_some;
10922 #ifdef PERL_MAD
10923             if (PL_madskills) {
10924                 if (PL_thistoken)
10925                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10926                 else
10927                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10928             }
10929 #endif
10930             PL_bufptr = PL_bufend;
10931             COPLINE_INC_WITH_HERELINES;
10932             got_some = lex_next_chunk(0);
10933             CopLINE_dec(PL_curcop);
10934             s = PL_bufptr;
10935 #ifdef PERL_MAD
10936             tokenstart = PL_bufptr;
10937 #endif
10938             if (!got_some)
10939                 break;
10940         }
10941         incline(s);
10942     }
10943   enough:
10944     if (!SvCUR(stuff) || needargs)
10945         PL_lex_state = PL_parser->form_lex_state;
10946     if (SvCUR(stuff)) {
10947         PL_expect = XSTATE;
10948         if (needargs) {
10949             start_force(PL_curforce);
10950             NEXTVAL_NEXTTOKE.ival = 0;
10951             force_next(FORMLBRACK);
10952         }
10953         if (!IN_BYTES) {
10954             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10955                 SvUTF8_on(stuff);
10956             else if (PL_encoding)
10957                 sv_recode_to_utf8(stuff, PL_encoding);
10958         }
10959         start_force(PL_curforce);
10960         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10961         force_next(THING);
10962     }
10963     else {
10964         SvREFCNT_dec(stuff);
10965         if (eofmt)
10966             PL_lex_formbrack = 0;
10967     }
10968 #ifdef PERL_MAD
10969     if (PL_madskills) {
10970         if (PL_thistoken)
10971             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10972         else
10973             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10974         PL_thiswhite = savewhite;
10975     }
10976 #endif
10977     return s;
10978 }
10979
10980 I32
10981 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10982 {
10983     dVAR;
10984     const I32 oldsavestack_ix = PL_savestack_ix;
10985     CV* const outsidecv = PL_compcv;
10986
10987     SAVEI32(PL_subline);
10988     save_item(PL_subname);
10989     SAVESPTR(PL_compcv);
10990
10991     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10992     CvFLAGS(PL_compcv) |= flags;
10993
10994     PL_subline = CopLINE(PL_curcop);
10995     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10996     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10997     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10998     if (outsidecv && CvPADLIST(outsidecv))
10999         CvPADLIST(PL_compcv)->xpadl_outid =
11000             PadlistNAMES(CvPADLIST(outsidecv));
11001
11002     return oldsavestack_ix;
11003 }
11004
11005 #ifdef __SC__
11006 #pragma segment Perl_yylex
11007 #endif
11008 static int
11009 S_yywarn(pTHX_ const char *const s, U32 flags)
11010 {
11011     dVAR;
11012
11013     PERL_ARGS_ASSERT_YYWARN;
11014
11015     PL_in_eval |= EVAL_WARNONLY;
11016     yyerror_pv(s, flags);
11017     PL_in_eval &= ~EVAL_WARNONLY;
11018     return 0;
11019 }
11020
11021 int
11022 Perl_yyerror(pTHX_ const char *const s)
11023 {
11024     PERL_ARGS_ASSERT_YYERROR;
11025     return yyerror_pvn(s, strlen(s), 0);
11026 }
11027
11028 int
11029 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11030 {
11031     PERL_ARGS_ASSERT_YYERROR_PV;
11032     return yyerror_pvn(s, strlen(s), flags);
11033 }
11034
11035 int
11036 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11037 {
11038     dVAR;
11039     const char *context = NULL;
11040     int contlen = -1;
11041     SV *msg;
11042     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11043     int yychar  = PL_parser->yychar;
11044     U32 is_utf8 = flags & SVf_UTF8;
11045
11046     PERL_ARGS_ASSERT_YYERROR_PVN;
11047
11048     if (!yychar || (yychar == ';' && !PL_rsfp))
11049         sv_catpvs(where_sv, "at EOF");
11050     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11051       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11052       PL_oldbufptr != PL_bufptr) {
11053         /*
11054                 Only for NetWare:
11055                 The code below is removed for NetWare because it abends/crashes on NetWare
11056                 when the script has error such as not having the closing quotes like:
11057                     if ($var eq "value)
11058                 Checking of white spaces is anyway done in NetWare code.
11059         */
11060 #ifndef NETWARE
11061         while (isSPACE(*PL_oldoldbufptr))
11062             PL_oldoldbufptr++;
11063 #endif
11064         context = PL_oldoldbufptr;
11065         contlen = PL_bufptr - PL_oldoldbufptr;
11066     }
11067     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11068       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11069         /*
11070                 Only for NetWare:
11071                 The code below is removed for NetWare because it abends/crashes on NetWare
11072                 when the script has error such as not having the closing quotes like:
11073                     if ($var eq "value)
11074                 Checking of white spaces is anyway done in NetWare code.
11075         */
11076 #ifndef NETWARE
11077         while (isSPACE(*PL_oldbufptr))
11078             PL_oldbufptr++;
11079 #endif
11080         context = PL_oldbufptr;
11081         contlen = PL_bufptr - PL_oldbufptr;
11082     }
11083     else if (yychar > 255)
11084         sv_catpvs(where_sv, "next token ???");
11085     else if (yychar == -2) { /* YYEMPTY */
11086         if (PL_lex_state == LEX_NORMAL ||
11087            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11088             sv_catpvs(where_sv, "at end of line");
11089         else if (PL_lex_inpat)
11090             sv_catpvs(where_sv, "within pattern");
11091         else
11092             sv_catpvs(where_sv, "within string");
11093     }
11094     else {
11095         sv_catpvs(where_sv, "next char ");
11096         if (yychar < 32)
11097             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11098         else if (isPRINT_LC(yychar)) {
11099             const char string = yychar;
11100             sv_catpvn(where_sv, &string, 1);
11101         }
11102         else
11103             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11104     }
11105     msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
11106     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11107         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11108     if (context)
11109         Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11110                             SVfARG(newSVpvn_flags(context, contlen,
11111                                         SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11112     else
11113         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11114     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11115         Perl_sv_catpvf(aTHX_ msg,
11116         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11117                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11118         PL_multi_end = 0;
11119     }
11120     if (PL_in_eval & EVAL_WARNONLY) {
11121         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11122     }
11123     else
11124         qerror(msg);
11125     if (PL_error_count >= 10) {
11126         if (PL_in_eval && SvCUR(ERRSV))
11127             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11128                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
11129         else
11130             Perl_croak(aTHX_ "%s has too many errors.\n",
11131             OutCopFILE(PL_curcop));
11132     }
11133     PL_in_my = 0;
11134     PL_in_my_stash = NULL;
11135     return 0;
11136 }
11137 #ifdef __SC__
11138 #pragma segment Main
11139 #endif
11140
11141 STATIC char*
11142 S_swallow_bom(pTHX_ U8 *s)
11143 {
11144     dVAR;
11145     const STRLEN slen = SvCUR(PL_linestr);
11146
11147     PERL_ARGS_ASSERT_SWALLOW_BOM;
11148
11149     switch (s[0]) {
11150     case 0xFF:
11151         if (s[1] == 0xFE) {
11152             /* UTF-16 little-endian? (or UTF-32LE?) */
11153             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11154                 /* diag_listed_as: Unsupported script encoding %s */
11155                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11156 #ifndef PERL_NO_UTF16_FILTER
11157             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11158             s += 2;
11159             if (PL_bufend > (char*)s) {
11160                 s = add_utf16_textfilter(s, TRUE);
11161             }
11162 #else
11163             /* diag_listed_as: Unsupported script encoding %s */
11164             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11165 #endif
11166         }
11167         break;
11168     case 0xFE:
11169         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11170 #ifndef PERL_NO_UTF16_FILTER
11171             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11172             s += 2;
11173             if (PL_bufend > (char *)s) {
11174                 s = add_utf16_textfilter(s, FALSE);
11175             }
11176 #else
11177             /* diag_listed_as: Unsupported script encoding %s */
11178             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11179 #endif
11180         }
11181         break;
11182     case 0xEF:
11183         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11184             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11185             s += 3;                      /* UTF-8 */
11186         }
11187         break;
11188     case 0:
11189         if (slen > 3) {
11190              if (s[1] == 0) {
11191                   if (s[2] == 0xFE && s[3] == 0xFF) {
11192                        /* UTF-32 big-endian */
11193                        /* diag_listed_as: Unsupported script encoding %s */
11194                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11195                   }
11196              }
11197              else if (s[2] == 0 && s[3] != 0) {
11198                   /* Leading bytes
11199                    * 00 xx 00 xx
11200                    * are a good indicator of UTF-16BE. */
11201 #ifndef PERL_NO_UTF16_FILTER
11202                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11203                   s = add_utf16_textfilter(s, FALSE);
11204 #else
11205                   /* diag_listed_as: Unsupported script encoding %s */
11206                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11207 #endif
11208              }
11209         }
11210 #ifdef EBCDIC
11211     case 0xDD:
11212         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11213             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11214             s += 4;                      /* UTF-8 */
11215         }
11216         break;
11217 #endif
11218
11219     default:
11220          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11221                   /* Leading bytes
11222                    * xx 00 xx 00
11223                    * are a good indicator of UTF-16LE. */
11224 #ifndef PERL_NO_UTF16_FILTER
11225               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11226               s = add_utf16_textfilter(s, TRUE);
11227 #else
11228               /* diag_listed_as: Unsupported script encoding %s */
11229               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11230 #endif
11231          }
11232     }
11233     return (char*)s;
11234 }
11235
11236
11237 #ifndef PERL_NO_UTF16_FILTER
11238 static I32
11239 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11240 {
11241     dVAR;
11242     SV *const filter = FILTER_DATA(idx);
11243     /* We re-use this each time round, throwing the contents away before we
11244        return.  */
11245     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11246     SV *const utf8_buffer = filter;
11247     IV status = IoPAGE(filter);
11248     const bool reverse = cBOOL(IoLINES(filter));
11249     I32 retval;
11250
11251     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11252
11253     /* As we're automatically added, at the lowest level, and hence only called
11254        from this file, we can be sure that we're not called in block mode. Hence
11255        don't bother writing code to deal with block mode.  */
11256     if (maxlen) {
11257         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11258     }
11259     if (status < 0) {
11260         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11261     }
11262     DEBUG_P(PerlIO_printf(Perl_debug_log,
11263                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11264                           FPTR2DPTR(void *, S_utf16_textfilter),
11265                           reverse ? 'l' : 'b', idx, maxlen, status,
11266                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11267
11268     while (1) {
11269         STRLEN chars;
11270         STRLEN have;
11271         I32 newlen;
11272         U8 *end;
11273         /* First, look in our buffer of existing UTF-8 data:  */
11274         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11275
11276         if (nl) {
11277             ++nl;
11278         } else if (status == 0) {
11279             /* EOF */
11280             IoPAGE(filter) = 0;
11281             nl = SvEND(utf8_buffer);
11282         }
11283         if (nl) {
11284             STRLEN got = nl - SvPVX(utf8_buffer);
11285             /* Did we have anything to append?  */
11286             retval = got != 0;
11287             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11288             /* Everything else in this code works just fine if SVp_POK isn't
11289                set.  This, however, needs it, and we need it to work, else
11290                we loop infinitely because the buffer is never consumed.  */
11291             sv_chop(utf8_buffer, nl);
11292             break;
11293         }
11294
11295         /* OK, not a complete line there, so need to read some more UTF-16.
11296            Read an extra octect if the buffer currently has an odd number. */
11297         while (1) {
11298             if (status <= 0)
11299                 break;
11300             if (SvCUR(utf16_buffer) >= 2) {
11301                 /* Location of the high octet of the last complete code point.
11302                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11303                    *coupled* with all the benefits of partial reads and
11304                    endianness.  */
11305                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11306                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11307
11308                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11309                     break;
11310                 }
11311
11312                 /* We have the first half of a surrogate. Read more.  */
11313                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11314             }
11315
11316             status = FILTER_READ(idx + 1, utf16_buffer,
11317                                  160 + (SvCUR(utf16_buffer) & 1));
11318             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11319             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11320             if (status < 0) {
11321                 /* Error */
11322                 IoPAGE(filter) = status;
11323                 return status;
11324             }
11325         }
11326
11327         chars = SvCUR(utf16_buffer) >> 1;
11328         have = SvCUR(utf8_buffer);
11329         SvGROW(utf8_buffer, have + chars * 3 + 1);
11330
11331         if (reverse) {
11332             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11333                                          (U8*)SvPVX_const(utf8_buffer) + have,
11334                                          chars * 2, &newlen);
11335         } else {
11336             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11337                                 (U8*)SvPVX_const(utf8_buffer) + have,
11338                                 chars * 2, &newlen);
11339         }
11340         SvCUR_set(utf8_buffer, have + newlen);
11341         *end = '\0';
11342
11343         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11344            it's private to us, and utf16_to_utf8{,reversed} take a
11345            (pointer,length) pair, rather than a NUL-terminated string.  */
11346         if(SvCUR(utf16_buffer) & 1) {
11347             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11348             SvCUR_set(utf16_buffer, 1);
11349         } else {
11350             SvCUR_set(utf16_buffer, 0);
11351         }
11352     }
11353     DEBUG_P(PerlIO_printf(Perl_debug_log,
11354                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11355                           status,
11356                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11357     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11358     return retval;
11359 }
11360
11361 static U8 *
11362 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11363 {
11364     SV *filter = filter_add(S_utf16_textfilter, NULL);
11365
11366     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11367
11368     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11369     sv_setpvs(filter, "");
11370     IoLINES(filter) = reversed;
11371     IoPAGE(filter) = 1; /* Not EOF */
11372
11373     /* Sadly, we have to return a valid pointer, come what may, so we have to
11374        ignore any error return from this.  */
11375     SvCUR_set(PL_linestr, 0);
11376     if (FILTER_READ(0, PL_linestr, 0)) {
11377         SvUTF8_on(PL_linestr);
11378     } else {
11379         SvUTF8_on(PL_linestr);
11380     }
11381     PL_bufend = SvEND(PL_linestr);
11382     return (U8*)SvPVX(PL_linestr);
11383 }
11384 #endif
11385
11386 /*
11387 Returns a pointer to the next character after the parsed
11388 vstring, as well as updating the passed in sv.
11389
11390 Function must be called like
11391
11392         sv = newSV(5);
11393         s = scan_vstring(s,e,sv);
11394
11395 where s and e are the start and end of the string.
11396 The sv should already be large enough to store the vstring
11397 passed in, for performance reasons.
11398
11399 */
11400
11401 char *
11402 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11403 {
11404     dVAR;
11405     const char *pos = s;
11406     const char *start = s;
11407
11408     PERL_ARGS_ASSERT_SCAN_VSTRING;
11409
11410     if (*pos == 'v') pos++;  /* get past 'v' */
11411     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11412         pos++;
11413     if ( *pos != '.') {
11414         /* this may not be a v-string if followed by => */
11415         const char *next = pos;
11416         while (next < e && isSPACE(*next))
11417             ++next;
11418         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11419             /* return string not v-string */
11420             sv_setpvn(sv,(char *)s,pos-s);
11421             return (char *)pos;
11422         }
11423     }
11424
11425     if (!isALPHA(*pos)) {
11426         U8 tmpbuf[UTF8_MAXBYTES+1];
11427
11428         if (*s == 'v')
11429             s++;  /* get past 'v' */
11430
11431         sv_setpvs(sv, "");
11432
11433         for (;;) {
11434             /* this is atoi() that tolerates underscores */
11435             U8 *tmpend;
11436             UV rev = 0;
11437             const char *end = pos;
11438             UV mult = 1;
11439             while (--end >= s) {
11440                 if (*end != '_') {
11441                     const UV orev = rev;
11442                     rev += (*end - '0') * mult;
11443                     mult *= 10;
11444                     if (orev > rev)
11445                         /* diag_listed_as: Integer overflow in %s number */
11446                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11447                                          "Integer overflow in decimal number");
11448                 }
11449             }
11450 #ifdef EBCDIC
11451             if (rev > 0x7FFFFFFF)
11452                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11453 #endif
11454             /* Append native character for the rev point */
11455             tmpend = uvchr_to_utf8(tmpbuf, rev);
11456             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11457             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11458                  SvUTF8_on(sv);
11459             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11460                  s = ++pos;
11461             else {
11462                  s = pos;
11463                  break;
11464             }
11465             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11466                  pos++;
11467         }
11468         SvPOK_on(sv);
11469         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11470         SvRMAGICAL_on(sv);
11471     }
11472     return (char *)s;
11473 }
11474
11475 int
11476 Perl_keyword_plugin_standard(pTHX_
11477         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11478 {
11479     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11480     PERL_UNUSED_CONTEXT;
11481     PERL_UNUSED_ARG(keyword_ptr);
11482     PERL_UNUSED_ARG(keyword_len);
11483     PERL_UNUSED_ARG(op_ptr);
11484     return KEYWORD_PLUGIN_DECLINE;
11485 }
11486
11487 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11488 static void
11489 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11490 {
11491     SAVEI32(PL_lex_brackets);
11492     if (PL_lex_brackets > 100)
11493         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11494     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11495     SAVEI32(PL_lex_allbrackets);
11496     PL_lex_allbrackets = 0;
11497     SAVEI8(PL_lex_fakeeof);
11498     PL_lex_fakeeof = (U8)fakeeof;
11499     if(yyparse(gramtype) && !PL_parser->error_count)
11500         qerror(Perl_mess(aTHX_ "Parse error"));
11501 }
11502
11503 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11504 static OP *
11505 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11506 {
11507     OP *o;
11508     ENTER;
11509     SAVEVPTR(PL_eval_root);
11510     PL_eval_root = NULL;
11511     parse_recdescent(gramtype, fakeeof);
11512     o = PL_eval_root;
11513     LEAVE;
11514     return o;
11515 }
11516
11517 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11518 static OP *
11519 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11520 {
11521     OP *exprop;
11522     if (flags & ~PARSE_OPTIONAL)
11523         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11524     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11525     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11526         if (!PL_parser->error_count)
11527             qerror(Perl_mess(aTHX_ "Parse error"));
11528         exprop = newOP(OP_NULL, 0);
11529     }
11530     return exprop;
11531 }
11532
11533 /*
11534 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11535
11536 Parse a Perl arithmetic expression.  This may contain operators of precedence
11537 down to the bit shift operators.  The expression must be followed (and thus
11538 terminated) either by a comparison or lower-precedence operator or by
11539 something that would normally terminate an expression such as semicolon.
11540 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11541 otherwise it is mandatory.  It is up to the caller to ensure that the
11542 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11543 the source of the code to be parsed and the lexical context for the
11544 expression.
11545
11546 The op tree representing the expression is returned.  If an optional
11547 expression is absent, a null pointer is returned, otherwise the pointer
11548 will be non-null.
11549
11550 If an error occurs in parsing or compilation, in most cases a valid op
11551 tree is returned anyway.  The error is reflected in the parser state,
11552 normally resulting in a single exception at the top level of parsing
11553 which covers all the compilation errors that occurred.  Some compilation
11554 errors, however, will throw an exception immediately.
11555
11556 =cut
11557 */
11558
11559 OP *
11560 Perl_parse_arithexpr(pTHX_ U32 flags)
11561 {
11562     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11563 }
11564
11565 /*
11566 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11567
11568 Parse a Perl term expression.  This may contain operators of precedence
11569 down to the assignment operators.  The expression must be followed (and thus
11570 terminated) either by a comma or lower-precedence operator or by
11571 something that would normally terminate an expression such as semicolon.
11572 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11573 otherwise it is mandatory.  It is up to the caller to ensure that the
11574 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11575 the source of the code to be parsed and the lexical context for the
11576 expression.
11577
11578 The op tree representing the expression is returned.  If an optional
11579 expression is absent, a null pointer is returned, otherwise the pointer
11580 will be non-null.
11581
11582 If an error occurs in parsing or compilation, in most cases a valid op
11583 tree is returned anyway.  The error is reflected in the parser state,
11584 normally resulting in a single exception at the top level of parsing
11585 which covers all the compilation errors that occurred.  Some compilation
11586 errors, however, will throw an exception immediately.
11587
11588 =cut
11589 */
11590
11591 OP *
11592 Perl_parse_termexpr(pTHX_ U32 flags)
11593 {
11594     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11595 }
11596
11597 /*
11598 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11599
11600 Parse a Perl list expression.  This may contain operators of precedence
11601 down to the comma operator.  The expression must be followed (and thus
11602 terminated) either by a low-precedence logic operator such as C<or> or by
11603 something that would normally terminate an expression such as semicolon.
11604 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11605 otherwise it is mandatory.  It is up to the caller to ensure that the
11606 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11607 the source of the code to be parsed and the lexical context for the
11608 expression.
11609
11610 The op tree representing the expression is returned.  If an optional
11611 expression is absent, a null pointer is returned, otherwise the pointer
11612 will be non-null.
11613
11614 If an error occurs in parsing or compilation, in most cases a valid op
11615 tree is returned anyway.  The error is reflected in the parser state,
11616 normally resulting in a single exception at the top level of parsing
11617 which covers all the compilation errors that occurred.  Some compilation
11618 errors, however, will throw an exception immediately.
11619
11620 =cut
11621 */
11622
11623 OP *
11624 Perl_parse_listexpr(pTHX_ U32 flags)
11625 {
11626     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11627 }
11628
11629 /*
11630 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11631
11632 Parse a single complete Perl expression.  This allows the full
11633 expression grammar, including the lowest-precedence operators such
11634 as C<or>.  The expression must be followed (and thus terminated) by a
11635 token that an expression would normally be terminated by: end-of-file,
11636 closing bracketing punctuation, semicolon, or one of the keywords that
11637 signals a postfix expression-statement modifier.  If I<flags> includes
11638 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11639 mandatory.  It is up to the caller to ensure that the dynamic parser
11640 state (L</PL_parser> et al) is correctly set to reflect the source of
11641 the code to be parsed and the lexical context for the expression.
11642
11643 The op tree representing the expression is returned.  If an optional
11644 expression is absent, a null pointer is returned, otherwise the pointer
11645 will be non-null.
11646
11647 If an error occurs in parsing or compilation, in most cases a valid op
11648 tree is returned anyway.  The error is reflected in the parser state,
11649 normally resulting in a single exception at the top level of parsing
11650 which covers all the compilation errors that occurred.  Some compilation
11651 errors, however, will throw an exception immediately.
11652
11653 =cut
11654 */
11655
11656 OP *
11657 Perl_parse_fullexpr(pTHX_ U32 flags)
11658 {
11659     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11660 }
11661
11662 /*
11663 =for apidoc Amx|OP *|parse_block|U32 flags
11664
11665 Parse a single complete Perl code block.  This consists of an opening
11666 brace, a sequence of statements, and a closing brace.  The block
11667 constitutes a lexical scope, so C<my> variables and various compile-time
11668 effects can be contained within it.  It is up to the caller to ensure
11669 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11670 reflect the source of the code to be parsed and the lexical context for
11671 the statement.
11672
11673 The op tree representing the code block is returned.  This is always a
11674 real op, never a null pointer.  It will normally be a C<lineseq> list,
11675 including C<nextstate> or equivalent ops.  No ops to construct any kind
11676 of runtime scope are included by virtue of it being a block.
11677
11678 If an error occurs in parsing or compilation, in most cases a valid op
11679 tree (most likely null) is returned anyway.  The error is reflected in
11680 the parser state, normally resulting in a single exception at the top
11681 level of parsing which covers all the compilation errors that occurred.
11682 Some compilation errors, however, will throw an exception immediately.
11683
11684 The I<flags> parameter is reserved for future use, and must always
11685 be zero.
11686
11687 =cut
11688 */
11689
11690 OP *
11691 Perl_parse_block(pTHX_ U32 flags)
11692 {
11693     if (flags)
11694         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11695     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11696 }
11697
11698 /*
11699 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11700
11701 Parse a single unadorned Perl statement.  This may be a normal imperative
11702 statement or a declaration that has compile-time effect.  It does not
11703 include any label or other affixture.  It is up to the caller to ensure
11704 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11705 reflect the source of the code to be parsed and the lexical context for
11706 the statement.
11707
11708 The op tree representing the statement is returned.  This may be a
11709 null pointer if the statement is null, for example if it was actually
11710 a subroutine definition (which has compile-time side effects).  If not
11711 null, it will be ops directly implementing the statement, suitable to
11712 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11713 equivalent op (except for those embedded in a scope contained entirely
11714 within the statement).
11715
11716 If an error occurs in parsing or compilation, in most cases a valid op
11717 tree (most likely null) is returned anyway.  The error is reflected in
11718 the parser state, normally resulting in a single exception at the top
11719 level of parsing which covers all the compilation errors that occurred.
11720 Some compilation errors, however, will throw an exception immediately.
11721
11722 The I<flags> parameter is reserved for future use, and must always
11723 be zero.
11724
11725 =cut
11726 */
11727
11728 OP *
11729 Perl_parse_barestmt(pTHX_ U32 flags)
11730 {
11731     if (flags)
11732         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11733     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11734 }
11735
11736 /*
11737 =for apidoc Amx|SV *|parse_label|U32 flags
11738
11739 Parse a single label, possibly optional, of the type that may prefix a
11740 Perl statement.  It is up to the caller to ensure that the dynamic parser
11741 state (L</PL_parser> et al) is correctly set to reflect the source of
11742 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11743 label is optional, otherwise it is mandatory.
11744
11745 The name of the label is returned in the form of a fresh scalar.  If an
11746 optional label is absent, a null pointer is returned.
11747
11748 If an error occurs in parsing, which can only occur if the label is
11749 mandatory, a valid label is returned anyway.  The error is reflected in
11750 the parser state, normally resulting in a single exception at the top
11751 level of parsing which covers all the compilation errors that occurred.
11752
11753 =cut
11754 */
11755
11756 SV *
11757 Perl_parse_label(pTHX_ U32 flags)
11758 {
11759     if (flags & ~PARSE_OPTIONAL)
11760         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11761     if (PL_lex_state == LEX_KNOWNEXT) {
11762         PL_parser->yychar = yylex();
11763         if (PL_parser->yychar == LABEL) {
11764             SV *lsv;
11765             PL_parser->yychar = YYEMPTY;
11766             lsv = newSV_type(SVt_PV);
11767             sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11768             return lsv;
11769         } else {
11770             yyunlex();
11771             goto no_label;
11772         }
11773     } else {
11774         char *s, *t;
11775         STRLEN wlen, bufptr_pos;
11776         lex_read_space(0);
11777         t = s = PL_bufptr;
11778         if (!isIDFIRST_lazy_if(s, UTF))
11779             goto no_label;
11780         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11781         if (word_takes_any_delimeter(s, wlen))
11782             goto no_label;
11783         bufptr_pos = s - SvPVX(PL_linestr);
11784         PL_bufptr = t;
11785         lex_read_space(LEX_KEEP_PREVIOUS);
11786         t = PL_bufptr;
11787         s = SvPVX(PL_linestr) + bufptr_pos;
11788         if (t[0] == ':' && t[1] != ':') {
11789             PL_oldoldbufptr = PL_oldbufptr;
11790             PL_oldbufptr = s;
11791             PL_bufptr = t+1;
11792             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11793         } else {
11794             PL_bufptr = s;
11795             no_label:
11796             if (flags & PARSE_OPTIONAL) {
11797                 return NULL;
11798             } else {
11799                 qerror(Perl_mess(aTHX_ "Parse error"));
11800                 return newSVpvs("x");
11801             }
11802         }
11803     }
11804 }
11805
11806 /*
11807 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11808
11809 Parse a single complete Perl statement.  This may be a normal imperative
11810 statement or a declaration that has compile-time effect, and may include
11811 optional labels.  It is up to the caller to ensure that the dynamic
11812 parser state (L</PL_parser> et al) is correctly set to reflect the source
11813 of the code to be parsed and the lexical context for the statement.
11814
11815 The op tree representing the statement is returned.  This may be a
11816 null pointer if the statement is null, for example if it was actually
11817 a subroutine definition (which has compile-time side effects).  If not
11818 null, it will be the result of a L</newSTATEOP> call, normally including
11819 a C<nextstate> or equivalent op.
11820
11821 If an error occurs in parsing or compilation, in most cases a valid op
11822 tree (most likely null) is returned anyway.  The error is reflected in
11823 the parser state, normally resulting in a single exception at the top
11824 level of parsing which covers all the compilation errors that occurred.
11825 Some compilation errors, however, will throw an exception immediately.
11826
11827 The I<flags> parameter is reserved for future use, and must always
11828 be zero.
11829
11830 =cut
11831 */
11832
11833 OP *
11834 Perl_parse_fullstmt(pTHX_ U32 flags)
11835 {
11836     if (flags)
11837         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11838     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11839 }
11840
11841 /*
11842 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11843
11844 Parse a sequence of zero or more Perl statements.  These may be normal
11845 imperative statements, including optional labels, or declarations
11846 that have compile-time effect, or any mixture thereof.  The statement
11847 sequence ends when a closing brace or end-of-file is encountered in a
11848 place where a new statement could have validly started.  It is up to
11849 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11850 is correctly set to reflect the source of the code to be parsed and the
11851 lexical context for the statements.
11852
11853 The op tree representing the statement sequence is returned.  This may
11854 be a null pointer if the statements were all null, for example if there
11855 were no statements or if there were only subroutine definitions (which
11856 have compile-time side effects).  If not null, it will be a C<lineseq>
11857 list, normally including C<nextstate> or equivalent ops.
11858
11859 If an error occurs in parsing or compilation, in most cases a valid op
11860 tree is returned anyway.  The error is reflected in the parser state,
11861 normally resulting in a single exception at the top level of parsing
11862 which covers all the compilation errors that occurred.  Some compilation
11863 errors, however, will throw an exception immediately.
11864
11865 The I<flags> parameter is reserved for future use, and must always
11866 be zero.
11867
11868 =cut
11869 */
11870
11871 OP *
11872 Perl_parse_stmtseq(pTHX_ U32 flags)
11873 {
11874     OP *stmtseqop;
11875     I32 c;
11876     if (flags)
11877         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11878     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11879     c = lex_peek_unichar(0);
11880     if (c != -1 && c != /*{*/'}')
11881         qerror(Perl_mess(aTHX_ "Parse error"));
11882     return stmtseqop;
11883 }
11884
11885 /*
11886  * Local variables:
11887  * c-indentation-style: bsd
11888  * c-basic-offset: 4
11889  * indent-tabs-mode: nil
11890  * End:
11891  *
11892  * ex: set ts=8 sts=4 sw=4 et:
11893  */