toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN0OP       : zero-argument function, with its op created in this file
 228  * FUN1         : not used, except for not, which isn't a UNIOP
 229  * BOop         : bitwise or or xor
 230  * BAop         : bitwise and
 231  * SHop         : shift operator
 232  * PWop         : power operator
 233  * PMop         : pattern-matching operator
 234  * Aop          : addition-level operator
 235  * Mop          : multiplication-level operator
 236  * Eop          : equality-testing operator
 237  * Rop          : relational operator <= != gt
 238  *
 239  * Also see LOP and lop() below.
 240  */
 241
 242 #ifdef DEBUGGING /* Serve -DT. */
 243 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 244 #else
 245 #   define REPORT(retval) (retval)
 246 #endif
 247
 248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 256 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 257 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 258 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 259 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 260 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 261 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 262 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 263 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 264 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 265 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 266 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 267 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 268 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 269
 270 /* This bit of chicanery makes a unary function followed by
 271  * a parenthesis into a function with one argument, highest precedence.
 272  * The UNIDOR macro is for unary functions that can be followed by the //
 273  * operator (such as C<shift // 0>).
 274  */
 275 #define UNI2(f,x) { \
 276         pl_yylval.ival = f; \
 277         PL_expect = x; \
 278         PL_bufptr = s; \
 279         PL_last_uni = PL_oldbufptr; \
 280         PL_last_lop_op = f; \
 281         if (*s == '(') \
 282             return REPORT( (int)FUNC1 ); \
 283         s = PEEKSPACE(s); \
 284         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 285         }
 286 #define UNI(f)    UNI2(f,XTERM)
 287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 288
 289 #define UNIBRACK(f) { \
 290         pl_yylval.ival = f; \
 291         PL_bufptr = s; \
 292         PL_last_uni = PL_oldbufptr; \
 293         if (*s == '(') \
 294             return REPORT( (int)FUNC1 ); \
 295         s = PEEKSPACE(s); \
 296         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 297         }
 298
 299 /* grandfather return to old style */
 300 #define OLDLOP(f) \
 301         do { \
 302             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 303                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 304             pl_yylval.ival = (f); \
 305             PL_expect = XTERM; \
 306             PL_bufptr = s; \
 307             return (int)LSTOP; \
 308         } while(0)
 309
 310 #ifdef DEBUGGING
 311
 312 /* how to interpret the pl_yylval associated with the token */
 313 enum token_type {
 314     TOKENTYPE_NONE,
 315     TOKENTYPE_IVAL,
 316     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 317     TOKENTYPE_PVAL,
 318     TOKENTYPE_OPVAL,
 319     TOKENTYPE_GVVAL
 320 };
 321
 322 static struct debug_tokens {
 323     const int token;
 324     enum token_type type;
 325     const char *name;
 326 } const debug_tokens[] =
 327 {
 328     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 329     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 330     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 331     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 332     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 333     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 334     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 335     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 336     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 337     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 338     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 339     { DO,               TOKENTYPE_NONE,         "DO" },
 340     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 341     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 342     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 343     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 344     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 345     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 346     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 347     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 348     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 349     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 350     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 351     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 352     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 353     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 354     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 355     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 356     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 357     { IF,               TOKENTYPE_IVAL,         "IF" },
 358     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 359     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 360     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 361     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 362     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 363     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 364     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 365     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 366     { MY,               TOKENTYPE_IVAL,         "MY" },
 367     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 368     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 369     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 370     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 371     { OROR,             TOKENTYPE_NONE,         "OROR" },
 372     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 373     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 374     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 375     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 376     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 377     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 378     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 379     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 380     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 381     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 382     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 383     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 384     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 385     { SUB,              TOKENTYPE_NONE,         "SUB" },
 386     { THING,            TOKENTYPE_OPVAL,        "THING" },
 387     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 388     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 389     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 390     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 391     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 392     { USE,              TOKENTYPE_IVAL,         "USE" },
 393     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 394     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 395     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 396     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 397     { 0,                TOKENTYPE_NONE,         NULL }
 398 };
 399
 400 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 401
 402 STATIC int
 403 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 404 {
 405     dVAR;
 406
 407     PERL_ARGS_ASSERT_TOKEREPORT;
 408
 409     if (DEBUG_T_TEST) {
 410         const char *name = NULL;
 411         enum token_type type = TOKENTYPE_NONE;
 412         const struct debug_tokens *p;
 413         SV* const report = newSVpvs("<== ");
 414
 415         for (p = debug_tokens; p->token; p++) {
 416             if (p->token == (int)rv) {
 417                 name = p->name;
 418                 type = p->type;
 419                 break;
 420             }
 421         }
 422         if (name)
 423             Perl_sv_catpv(aTHX_ report, name);
 424         else if ((char)rv > ' ' && (char)rv < '~')
 425             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 426         else if (!rv)
 427             sv_catpvs(report, "EOF");
 428         else
 429             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 430         switch (type) {
 431         case TOKENTYPE_NONE:
 432         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 433             break;
 434         case TOKENTYPE_IVAL:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 436             break;
 437         case TOKENTYPE_OPNUM:
 438             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 439                                     PL_op_name[lvalp->ival]);
 440             break;
 441         case TOKENTYPE_PVAL:
 442             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 443             break;
 444         case TOKENTYPE_OPVAL:
 445             if (lvalp->opval) {
 446                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 447                                     PL_op_name[lvalp->opval->op_type]);
 448                 if (lvalp->opval->op_type == OP_CONST) {
 449                     Perl_sv_catpvf(aTHX_ report, " %s",
 450                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 451                 }
 452
 453             }
 454             else
 455                 sv_catpvs(report, "(opval=null)");
 456             break;
 457         }
 458         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 459     };
 460     return (int)rv;
 461 }
 462
 463
 464 /* print the buffer with suitable escapes */
 465
 466 STATIC void
 467 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 468 {
 469     SV* const tmp = newSVpvs("");
 470
 471     PERL_ARGS_ASSERT_PRINTBUF;
 472
 473     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 474     SvREFCNT_dec(tmp);
 475 }
 476
 477 #endif
 478
 479 static int
 480 S_deprecate_commaless_var_list(pTHX) {
 481     PL_expect = XTERM;
 482     deprecate("comma-less variable list");
 483     return REPORT(','); /* grandfather non-comma-format format */
 484 }
 485
 486 /*
 487  * S_ao
 488  *
 489  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 490  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 491  */
 492
 493 STATIC int
 494 S_ao(pTHX_ int toketype)
 495 {
 496     dVAR;
 497     if (*PL_bufptr == '=') {
 498         PL_bufptr++;
 499         if (toketype == ANDAND)
 500             pl_yylval.ival = OP_ANDASSIGN;
 501         else if (toketype == OROR)
 502             pl_yylval.ival = OP_ORASSIGN;
 503         else if (toketype == DORDOR)
 504             pl_yylval.ival = OP_DORASSIGN;
 505         toketype = ASSIGNOP;
 506     }
 507     return toketype;
 508 }
 509
 510 /*
 511  * S_no_op
 512  * When Perl expects an operator and finds something else, no_op
 513  * prints the warning.  It always prints "<something> found where
 514  * operator expected.  It prints "Missing semicolon on previous line?"
 515  * if the surprise occurs at the start of the line.  "do you need to
 516  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 517  * where the compiler doesn't know if foo is a method call or a function.
 518  * It prints "Missing operator before end of line" if there's nothing
 519  * after the missing operator, or "... before <...>" if there is something
 520  * after the missing operator.
 521  */
 522
 523 STATIC void
 524 S_no_op(pTHX_ const char *const what, char *s)
 525 {
 526     dVAR;
 527     char * const oldbp = PL_bufptr;
 528     const bool is_first = (PL_oldbufptr == PL_linestart);
 529
 530     PERL_ARGS_ASSERT_NO_OP;
 531
 532     if (!s)
 533         s = oldbp;
 534     else
 535         PL_bufptr = s;
 536     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 537     if (ckWARN_d(WARN_SYNTAX)) {
 538         if (is_first)
 539             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 540                     "\t(Missing semicolon on previous line?)\n");
 541         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 542             const char *t;
 543             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 544                 NOOP;
 545             if (t < PL_bufptr && isSPACE(*t))
 546                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 547                         "\t(Do you need to predeclare %.*s?)\n",
 548                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 549         }
 550         else {
 551             assert(s >= oldbp);
 552             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 553                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 554         }
 555     }
 556     PL_bufptr = oldbp;
 557 }
 558
 559 /*
 560  * S_missingterm
 561  * Complain about missing quote/regexp/heredoc terminator.
 562  * If it's called with NULL then it cauterizes the line buffer.
 563  * If we're in a delimited string and the delimiter is a control
 564  * character, it's reformatted into a two-char sequence like ^C.
 565  * This is fatal.
 566  */
 567
 568 STATIC void
 569 S_missingterm(pTHX_ char *s)
 570 {
 571     dVAR;
 572     char tmpbuf[3];
 573     char q;
 574     if (s) {
 575         char * const nl = strrchr(s,'\n');
 576         if (nl)
 577             *nl = '\0';
 578     }
 579     else if (isCNTRL(PL_multi_close)) {
 580         *tmpbuf = '^';
 581         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 582         tmpbuf[2] = '\0';
 583         s = tmpbuf;
 584     }
 585     else {
 586         *tmpbuf = (char)PL_multi_close;
 587         tmpbuf[1] = '\0';
 588         s = tmpbuf;
 589     }
 590     q = strchr(s,'"') ? '\'' : '"';
 591     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 592 }
 593
 594 /*
 595  * Check whether the named feature is enabled.
 596  */
 597 bool
 598 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 599 {
 600     dVAR;
 601     HV * const hinthv = GvHV(PL_hintgv);
 602     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 603
 604     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 605
 606     if (namelen > MAX_FEATURE_LEN)
 607         return FALSE;
 608     memcpy(&he_name[8], name, namelen);
 609
 610     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 611 }
 612
 613 /*
 614  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 615  * utf16-to-utf8-reversed.
 616  */
 617
 618 #ifdef PERL_CR_FILTER
 619 static void
 620 strip_return(SV *sv)
 621 {
 622     register const char *s = SvPVX_const(sv);
 623     register const char * const e = s + SvCUR(sv);
 624
 625     PERL_ARGS_ASSERT_STRIP_RETURN;
 626
 627     /* outer loop optimized to do nothing if there are no CR-LFs */
 628     while (s < e) {
 629         if (*s++ == '\r' && *s == '\n') {
 630             /* hit a CR-LF, need to copy the rest */
 631             register char *d = s - 1;
 632             *d++ = *s++;
 633             while (s < e) {
 634                 if (*s == '\r' && s[1] == '\n')
 635                     s++;
 636                 *d++ = *s++;
 637             }
 638             SvCUR(sv) -= s - d;
 639             return;
 640         }
 641     }
 642 }
 643
 644 STATIC I32
 645 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 646 {
 647     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 648     if (count > 0 && !maxlen)
 649         strip_return(sv);
 650     return count;
 651 }
 652 #endif
 653
 654 /*
 655 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 656
 657 Creates and initialises a new lexer/parser state object, supplying
 658 a context in which to lex and parse from a new source of Perl code.
 659 A pointer to the new state object is placed in L</PL_parser>.  An entry
 660 is made on the save stack so that upon unwinding the new state object
 661 will be destroyed and the former value of L</PL_parser> will be restored.
 662 Nothing else need be done to clean up the parsing context.
 663
 664 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 665 non-null, provides a string (in SV form) containing code to be parsed.
 666 A copy of the string is made, so subsequent modification of I<line>
 667 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 668 from which code will be read to be parsed.  If both are non-null, the
 669 code in I<line> comes first and must consist of complete lines of input,
 670 and I<rsfp> supplies the remainder of the source.
 671
 672 The I<flags> parameter is reserved for future use, and must always
 673 be zero, except for one flag that is currently reserved for perl's internal
 674 use.
 675
 676 =cut
 677 */
 678
 679 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 680    can share filters with the current parser. */
 681
 682 void
 683 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 684 {
 685     dVAR;
 686     const char *s = NULL;
 687     STRLEN len;
 688     yy_parser *parser, *oparser;
 689     if (flags && flags != LEX_START_SAME_FILTER)
 690         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 691
 692     /* create and initialise a parser */
 693
 694     Newxz(parser, 1, yy_parser);
 695     parser->old_parser = oparser = PL_parser;
 696     PL_parser = parser;
 697
 698     parser->stack = NULL;
 699     parser->ps = NULL;
 700     parser->stack_size = 0;
 701
 702     /* on scope exit, free this parser and restore any outer one */
 703     SAVEPARSER(parser);
 704     parser->saved_curcop = PL_curcop;
 705
 706     /* initialise lexer state */
 707
 708 #ifdef PERL_MAD
 709     parser->curforce = -1;
 710 #else
 711     parser->nexttoke = 0;
 712 #endif
 713     parser->error_count = oparser ? oparser->error_count : 0;
 714     parser->copline = NOLINE;
 715     parser->lex_state = LEX_NORMAL;
 716     parser->expect = XSTATE;
 717     parser->rsfp = rsfp;
 718     parser->rsfp_filters =
 719       !(flags & LEX_START_SAME_FILTER) || !oparser
 720         ? newAV()
 721         : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 722
 723     Newx(parser->lex_brackstack, 120, char);
 724     Newx(parser->lex_casestack, 12, char);
 725     *parser->lex_casestack = '\0';
 726
 727     if (line) {
 728         s = SvPV_const(line, len);
 729     } else {
 730         len = 0;
 731     }
 732
 733     if (!len) {
 734         parser->linestr = newSVpvs("\n;");
 735     } else {
 736         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 737         if (s[len-1] != ';')
 738             sv_catpvs(parser->linestr, "\n;");
 739     }
 740     parser->oldoldbufptr =
 741         parser->oldbufptr =
 742         parser->bufptr =
 743         parser->linestart = SvPVX(parser->linestr);
 744     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 745     parser->last_lop = parser->last_uni = NULL;
 746
 747     parser->in_pod = 0;
 748 }
 749
 750
 751 /* delete a parser object */
 752
 753 void
 754 Perl_parser_free(pTHX_  const yy_parser *parser)
 755 {
 756     PERL_ARGS_ASSERT_PARSER_FREE;
 757
 758     PL_curcop = parser->saved_curcop;
 759     SvREFCNT_dec(parser->linestr);
 760
 761     if (parser->rsfp == PerlIO_stdin())
 762         PerlIO_clearerr(parser->rsfp);
 763     else if (parser->rsfp && (!parser->old_parser ||
 764                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 765         PerlIO_close(parser->rsfp);
 766     SvREFCNT_dec(parser->rsfp_filters);
 767
 768     Safefree(parser->lex_brackstack);
 769     Safefree(parser->lex_casestack);
 770     PL_parser = parser->old_parser;
 771     Safefree(parser);
 772 }
 773
 774
 775 /*
 776 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 777
 778 Buffer scalar containing the chunk currently under consideration of the
 779 text currently being lexed.  This is always a plain string scalar (for
 780 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 781 normal scalar means; instead refer to the buffer directly by the pointer
 782 variables described below.
 783
 784 The lexer maintains various C<char*> pointers to things in the
 785 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 786 reallocated, all of these pointers must be updated.  Don't attempt to
 787 do this manually, but rather use L</lex_grow_linestr> if you need to
 788 reallocate the buffer.
 789
 790 The content of the text chunk in the buffer is commonly exactly one
 791 complete line of input, up to and including a newline terminator,
 792 but there are situations where it is otherwise.  The octets of the
 793 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 794 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 795 flag on this scalar, which may disagree with it.
 796
 797 For direct examination of the buffer, the variable
 798 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 799 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 800 of these pointers is usually preferable to examination of the scalar
 801 through normal scalar means.
 802
 803 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 804
 805 Direct pointer to the end of the chunk of text currently being lexed, the
 806 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 807 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 808 always located at the end of the buffer, and does not count as part of
 809 the buffer's contents.
 810
 811 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 812
 813 Points to the current position of lexing inside the lexer buffer.
 814 Characters around this point may be freely examined, within
 815 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 816 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 817 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 818
 819 Lexing code (whether in the Perl core or not) moves this pointer past
 820 the characters that it consumes.  It is also expected to perform some
 821 bookkeeping whenever a newline character is consumed.  This movement
 822 can be more conveniently performed by the function L</lex_read_to>,
 823 which handles newlines appropriately.
 824
 825 Interpretation of the buffer's octets can be abstracted out by
 826 using the slightly higher-level functions L</lex_peek_unichar> and
 827 L</lex_read_unichar>.
 828
 829 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 830
 831 Points to the start of the current line inside the lexer buffer.
 832 This is useful for indicating at which column an error occurred, and
 833 not much else.  This must be updated by any lexing code that consumes
 834 a newline; the function L</lex_read_to> handles this detail.
 835
 836 =cut
 837 */
 838
 839 /*
 840 =for apidoc Amx|bool|lex_bufutf8
 841
 842 Indicates whether the octets in the lexer buffer
 843 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 844 of Unicode characters.  If not, they should be interpreted as Latin-1
 845 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 846
 847 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 848 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 849 encoding.
 850
 851 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 852 is significant, but not the whole story regarding the input character
 853 encoding.  Normally, when a file is being read, the scalar contains octets
 854 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 855 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 856 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 857 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 858 is in effect.  This logic may change in the future; use this function
 859 instead of implementing the logic yourself.
 860
 861 =cut
 862 */
 863
 864 bool
 865 Perl_lex_bufutf8(pTHX)
 866 {
 867     return UTF;
 868 }
 869
 870 /*
 871 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 872
 873 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 874 at least I<len> octets (including terminating NUL).  Returns a
 875 pointer to the reallocated buffer.  This is necessary before making
 876 any direct modification of the buffer that would increase its length.
 877 L</lex_stuff_pvn> provides a more convenient way to insert text into
 878 the buffer.
 879
 880 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 881 this function updates all of the lexer's variables that point directly
 882 into the buffer.
 883
 884 =cut
 885 */
 886
 887 char *
 888 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 889 {
 890     SV *linestr;
 891     char *buf;
 892     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 893     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 894     linestr = PL_parser->linestr;
 895     buf = SvPVX(linestr);
 896     if (len <= SvLEN(linestr))
 897         return buf;
 898     bufend_pos = PL_parser->bufend - buf;
 899     bufptr_pos = PL_parser->bufptr - buf;
 900     oldbufptr_pos = PL_parser->oldbufptr - buf;
 901     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 902     linestart_pos = PL_parser->linestart - buf;
 903     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 904     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 905     buf = sv_grow(linestr, len);
 906     PL_parser->bufend = buf + bufend_pos;
 907     PL_parser->bufptr = buf + bufptr_pos;
 908     PL_parser->oldbufptr = buf + oldbufptr_pos;
 909     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 910     PL_parser->linestart = buf + linestart_pos;
 911     if (PL_parser->last_uni)
 912         PL_parser->last_uni = buf + last_uni_pos;
 913     if (PL_parser->last_lop)
 914         PL_parser->last_lop = buf + last_lop_pos;
 915     return buf;
 916 }
 917
 918 /*
 919 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 920
 921 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 922 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 923 reallocating the buffer if necessary.  This means that lexing code that
 924 runs later will see the characters as if they had appeared in the input.
 925 It is not recommended to do this as part of normal parsing, and most
 926 uses of this facility run the risk of the inserted characters being
 927 interpreted in an unintended manner.
 928
 929 The string to be inserted is represented by I<len> octets starting
 930 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 931 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 932 The characters are recoded for the lexer buffer, according to how the
 933 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 934 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 935 function is more convenient.
 936
 937 =cut
 938 */
 939
 940 void
 941 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 942 {
 943     dVAR;
 944     char *bufptr;
 945     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 946     if (flags & ~(LEX_STUFF_UTF8))
 947         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 948     if (UTF) {
 949         if (flags & LEX_STUFF_UTF8) {
 950             goto plain_copy;
 951         } else {
 952             STRLEN highhalf = 0;
 953             const char *p, *e = pv+len;
 954             for (p = pv; p != e; p++)
 955                 highhalf += !!(((U8)*p) & 0x80);
 956             if (!highhalf)
 957                 goto plain_copy;
 958             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 959             bufptr = PL_parser->bufptr;
 960             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 961             SvCUR_set(PL_parser->linestr,
 962                 SvCUR(PL_parser->linestr) + len+highhalf);
 963             PL_parser->bufend += len+highhalf;
 964             for (p = pv; p != e; p++) {
 965                 U8 c = (U8)*p;
 966                 if (c & 0x80) {
 967                     *bufptr++ = (char)(0xc0 | (c >> 6));
 968                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 969                 } else {
 970                     *bufptr++ = (char)c;
 971                 }
 972             }
 973         }
 974     } else {
 975         if (flags & LEX_STUFF_UTF8) {
 976             STRLEN highhalf = 0;
 977             const char *p, *e = pv+len;
 978             for (p = pv; p != e; p++) {
 979                 U8 c = (U8)*p;
 980                 if (c >= 0xc4) {
 981                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 982                                 "non-Latin-1 character into Latin-1 input");
 983                 } else if (c >= 0xc2 && p+1 != e &&
 984                             (((U8)p[1]) & 0xc0) == 0x80) {
 985                     p++;
 986                     highhalf++;
 987                 } else if (c >= 0x80) {
 988                     /* malformed UTF-8 */
 989                     ENTER;
 990                     SAVESPTR(PL_warnhook);
 991                     PL_warnhook = PERL_WARNHOOK_FATAL;
 992                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 993                     LEAVE;
 994                 }
 995             }
 996             if (!highhalf)
 997                 goto plain_copy;
 998             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 999             bufptr = PL_parser->bufptr;
1000             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1001             SvCUR_set(PL_parser->linestr,
1002                 SvCUR(PL_parser->linestr) + len-highhalf);
1003             PL_parser->bufend += len-highhalf;
1004             for (p = pv; p != e; p++) {
1005                 U8 c = (U8)*p;
1006                 if (c & 0x80) {
1007                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008                     p++;
1009                 } else {
1010                     *bufptr++ = (char)c;
1011                 }
1012             }
1013         } else {
1014             plain_copy:
1015             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016             bufptr = PL_parser->bufptr;
1017             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1018             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1019             PL_parser->bufend += len;
1020             Copy(pv, bufptr, len, char);
1021         }
1022     }
1023 }
1024
1025 /*
1026 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030 reallocating the buffer if necessary.  This means that lexing code that
1031 runs later will see the characters as if they had appeared in the input.
1032 It is not recommended to do this as part of normal parsing, and most
1033 uses of this facility run the risk of the inserted characters being
1034 interpreted in an unintended manner.
1035
1036 The string to be inserted is represented by octets starting at I<pv>
1037 and continuing to the first nul.  These octets are interpreted as either
1038 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039 in I<flags>.  The characters are recoded for the lexer buffer, according
1040 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041 If it is not convenient to nul-terminate a string to be inserted, the
1042 L</lex_stuff_pvn> function is more appropriate.
1043
1044 =cut
1045 */
1046
1047 void
1048 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049 {
1050     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051     lex_stuff_pvn(pv, strlen(pv), flags);
1052 }
1053
1054 /*
1055 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059 reallocating the buffer if necessary.  This means that lexing code that
1060 runs later will see the characters as if they had appeared in the input.
1061 It is not recommended to do this as part of normal parsing, and most
1062 uses of this facility run the risk of the inserted characters being
1063 interpreted in an unintended manner.
1064
1065 The string to be inserted is the string value of I<sv>.  The characters
1066 are recoded for the lexer buffer, according to how the buffer is currently
1067 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1068 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069 need to construct a scalar.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076 {
1077     char *pv;
1078     STRLEN len;
1079     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080     if (flags)
1081         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082     pv = SvPV(sv, len);
1083     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084 }
1085
1086 /*
1087 =for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1091 This hides the discarded text from any lexing code that runs later,
1092 as if the text had never appeared.
1093
1094 This is not the normal way to consume lexed text.  For that, use
1095 L</lex_read_to>.
1096
1097 =cut
1098 */
1099
1100 void
1101 Perl_lex_unstuff(pTHX_ char *ptr)
1102 {
1103     char *buf, *bufend;
1104     STRLEN unstuff_len;
1105     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106     buf = PL_parser->bufptr;
1107     if (ptr < buf)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109     if (ptr == buf)
1110         return;
1111     bufend = PL_parser->bufend;
1112     if (ptr > bufend)
1113         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114     unstuff_len = ptr - buf;
1115     Move(ptr, buf, bufend+1-ptr, char);
1116     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117     PL_parser->bufend = bufend - unstuff_len;
1118 }
1119
1120 /*
1121 =for apidoc Amx|void|lex_read_to|char *ptr
1122
1123 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125 performing the correct bookkeeping whenever a newline character is passed.
1126 This is the normal way to consume lexed text.
1127
1128 Interpretation of the buffer's octets can be abstracted out by
1129 using the slightly higher-level functions L</lex_peek_unichar> and
1130 L</lex_read_unichar>.
1131
1132 =cut
1133 */
1134
1135 void
1136 Perl_lex_read_to(pTHX_ char *ptr)
1137 {
1138     char *s;
1139     PERL_ARGS_ASSERT_LEX_READ_TO;
1140     s = PL_parser->bufptr;
1141     if (ptr < s || ptr > PL_parser->bufend)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143     for (; s != ptr; s++)
1144         if (*s == '\n') {
1145             CopLINE_inc(PL_curcop);
1146             PL_parser->linestart = s+1;
1147         }
1148     PL_parser->bufptr = ptr;
1149 }
1150
1151 /*
1152 =for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155 up to I<ptr>.  The remaining content of the buffer will be moved, and
1156 all pointers into the buffer updated appropriately.  I<ptr> must not
1157 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158 it is not permitted to discard text that has yet to be lexed.
1159
1160 Normally it is not necessarily to do this directly, because it suffices to
1161 use the implicit discarding behaviour of L</lex_next_chunk> and things
1162 based on it.  However, if a token stretches across multiple lines,
1163 and the lexing code has kept multiple lines of text in the buffer for
1164 that purpose, then after completion of the token it would be wise to
1165 explicitly discard the now-unneeded earlier lines, to avoid future
1166 multi-line tokens growing the buffer without bound.
1167
1168 =cut
1169 */
1170
1171 void
1172 Perl_lex_discard_to(pTHX_ char *ptr)
1173 {
1174     char *buf;
1175     STRLEN discard_len;
1176     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177     buf = SvPVX(PL_parser->linestr);
1178     if (ptr < buf)
1179         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180     if (ptr == buf)
1181         return;
1182     if (ptr > PL_parser->bufptr)
1183         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184     discard_len = ptr - buf;
1185     if (PL_parser->oldbufptr < ptr)
1186         PL_parser->oldbufptr = ptr;
1187     if (PL_parser->oldoldbufptr < ptr)
1188         PL_parser->oldoldbufptr = ptr;
1189     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190         PL_parser->last_uni = NULL;
1191     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192         PL_parser->last_lop = NULL;
1193     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195     PL_parser->bufend -= discard_len;
1196     PL_parser->bufptr -= discard_len;
1197     PL_parser->oldbufptr -= discard_len;
1198     PL_parser->oldoldbufptr -= discard_len;
1199     if (PL_parser->last_uni)
1200         PL_parser->last_uni -= discard_len;
1201     if (PL_parser->last_lop)
1202         PL_parser->last_lop -= discard_len;
1203 }
1204
1205 /*
1206 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208 Reads in the next chunk of text to be lexed, appending it to
1209 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1210 looked to the end of the current chunk and wants to know more.  It is
1211 usual, but not necessary, for lexing to have consumed the entirety of
1212 the current chunk at this time.
1213
1214 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215 chunk (i.e., the current chunk has been entirely consumed), normally the
1216 current chunk will be discarded at the same time that the new chunk is
1217 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218 will not be discarded.  If the current chunk has not been entirely
1219 consumed, then it will not be discarded regardless of the flag.
1220
1221 Returns true if some new text was added to the buffer, or false if the
1222 buffer has reached the end of the input text.
1223
1224 =cut
1225 */
1226
1227 #define LEX_FAKE_EOF 0x80000000
1228
1229 bool
1230 Perl_lex_next_chunk(pTHX_ U32 flags)
1231 {
1232     SV *linestr;
1233     char *buf;
1234     STRLEN old_bufend_pos, new_bufend_pos;
1235     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1237     bool got_some_for_debugger = 0;
1238     bool got_some;
1239     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1241     linestr = PL_parser->linestr;
1242     buf = SvPVX(linestr);
1243     if (!(flags & LEX_KEEP_PREVIOUS) &&
1244             PL_parser->bufptr == PL_parser->bufend) {
1245         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246         linestart_pos = 0;
1247         if (PL_parser->last_uni != PL_parser->bufend)
1248             PL_parser->last_uni = NULL;
1249         if (PL_parser->last_lop != PL_parser->bufend)
1250             PL_parser->last_lop = NULL;
1251         last_uni_pos = last_lop_pos = 0;
1252         *buf = 0;
1253         SvCUR(linestr) = 0;
1254     } else {
1255         old_bufend_pos = PL_parser->bufend - buf;
1256         bufptr_pos = PL_parser->bufptr - buf;
1257         oldbufptr_pos = PL_parser->oldbufptr - buf;
1258         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259         linestart_pos = PL_parser->linestart - buf;
1260         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262     }
1263     if (flags & LEX_FAKE_EOF) {
1264         goto eof;
1265     } else if (!PL_parser->rsfp) {
1266         got_some = 0;
1267     } else if (filter_gets(linestr, old_bufend_pos)) {
1268         got_some = 1;
1269         got_some_for_debugger = 1;
1270     } else {
1271         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1272             sv_setpvs(linestr, "");
1273         eof:
1274         /* End of real input.  Close filehandle (unless it was STDIN),
1275          * then add implicit termination.
1276          */
1277         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278             PerlIO_clearerr(PL_parser->rsfp);
1279         else if (PL_parser->rsfp)
1280             (void)PerlIO_close(PL_parser->rsfp);
1281         PL_parser->rsfp = NULL;
1282         PL_parser->in_pod = 0;
1283 #ifdef PERL_MAD
1284         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285             PL_faketokens = 1;
1286 #endif
1287         if (!PL_in_eval && PL_minus_p) {
1288             sv_catpvs(linestr,
1289                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290             PL_minus_n = PL_minus_p = 0;
1291         } else if (!PL_in_eval && PL_minus_n) {
1292             sv_catpvs(linestr, /*{*/";}");
1293             PL_minus_n = 0;
1294         } else
1295             sv_catpvs(linestr, ";");
1296         got_some = 1;
1297     }
1298     buf = SvPVX(linestr);
1299     new_bufend_pos = SvCUR(linestr);
1300     PL_parser->bufend = buf + new_bufend_pos;
1301     PL_parser->bufptr = buf + bufptr_pos;
1302     PL_parser->oldbufptr = buf + oldbufptr_pos;
1303     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304     PL_parser->linestart = buf + linestart_pos;
1305     if (PL_parser->last_uni)
1306         PL_parser->last_uni = buf + last_uni_pos;
1307     if (PL_parser->last_lop)
1308         PL_parser->last_lop = buf + last_lop_pos;
1309     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1310             PL_curstash != PL_debstash) {
1311         /* debugger active and we're not compiling the debugger code,
1312          * so store the line into the debugger's array of lines
1313          */
1314         update_debugger_info(NULL, buf+old_bufend_pos,
1315             new_bufend_pos-old_bufend_pos);
1316     }
1317     return got_some;
1318 }
1319
1320 /*
1321 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323 Looks ahead one (Unicode) character in the text currently being lexed.
1324 Returns the codepoint (unsigned integer value) of the next character,
1325 or -1 if lexing has reached the end of the input text.  To consume the
1326 peeked character, use L</lex_read_unichar>.
1327
1328 If the next character is in (or extends into) the next chunk of input
1329 text, the next chunk will be read in.  Normally the current chunk will be
1330 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331 then the current chunk will not be discarded.
1332
1333 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334 is encountered, an exception is generated.
1335
1336 =cut
1337 */
1338
1339 I32
1340 Perl_lex_peek_unichar(pTHX_ U32 flags)
1341 {
1342     dVAR;
1343     char *s, *bufend;
1344     if (flags & ~(LEX_KEEP_PREVIOUS))
1345         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346     s = PL_parser->bufptr;
1347     bufend = PL_parser->bufend;
1348     if (UTF) {
1349         U8 head;
1350         I32 unichar;
1351         STRLEN len, retlen;
1352         if (s == bufend) {
1353             if (!lex_next_chunk(flags))
1354                 return -1;
1355             s = PL_parser->bufptr;
1356             bufend = PL_parser->bufend;
1357         }
1358         head = (U8)*s;
1359         if (!(head & 0x80))
1360             return head;
1361         if (head & 0x40) {
1362             len = PL_utf8skip[head];
1363             while ((STRLEN)(bufend-s) < len) {
1364                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365                     break;
1366                 s = PL_parser->bufptr;
1367                 bufend = PL_parser->bufend;
1368             }
1369         }
1370         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371         if (retlen == (STRLEN)-1) {
1372             /* malformed UTF-8 */
1373             ENTER;
1374             SAVESPTR(PL_warnhook);
1375             PL_warnhook = PERL_WARNHOOK_FATAL;
1376             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377             LEAVE;
1378         }
1379         return unichar;
1380     } else {
1381         if (s == bufend) {
1382             if (!lex_next_chunk(flags))
1383                 return -1;
1384             s = PL_parser->bufptr;
1385         }
1386         return (U8)*s;
1387     }
1388 }
1389
1390 /*
1391 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393 Reads the next (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the character read,
1395 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396 if lexing has reached the end of the input text.  To non-destructively
1397 examine the next character, use L</lex_peek_unichar> instead.
1398
1399 If the next character is in (or extends into) the next chunk of input
1400 text, the next chunk will be read in.  Normally the current chunk will be
1401 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402 then the current chunk will not be discarded.
1403
1404 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405 is encountered, an exception is generated.
1406
1407 =cut
1408 */
1409
1410 I32
1411 Perl_lex_read_unichar(pTHX_ U32 flags)
1412 {
1413     I32 c;
1414     if (flags & ~(LEX_KEEP_PREVIOUS))
1415         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416     c = lex_peek_unichar(flags);
1417     if (c != -1) {
1418         if (c == '\n')
1419             CopLINE_inc(PL_curcop);
1420         if (UTF)
1421             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422         else
1423             ++(PL_parser->bufptr);
1424     }
1425     return c;
1426 }
1427
1428 /*
1429 =for apidoc Amx|void|lex_read_space|U32 flags
1430
1431 Reads optional spaces, in Perl style, in the text currently being
1432 lexed.  The spaces may include ordinary whitespace characters and
1433 Perl-style comments.  C<#line> directives are processed if encountered.
1434 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1435 at a non-space character (or the end of the input text).
1436
1437 If spaces extend into the next chunk of input text, the next chunk will
1438 be read in.  Normally the current chunk will be discarded at the same
1439 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1440 chunk will not be discarded.
1441
1442 =cut
1443 */
1444
1445 #define LEX_NO_NEXT_CHUNK 0x80000000
1446
1447 void
1448 Perl_lex_read_space(pTHX_ U32 flags)
1449 {
1450     char *s, *bufend;
1451     bool need_incline = 0;
1452     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1453         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1454 #ifdef PERL_MAD
1455     if (PL_skipwhite) {
1456         sv_free(PL_skipwhite);
1457         PL_skipwhite = NULL;
1458     }
1459     if (PL_madskills)
1460         PL_skipwhite = newSVpvs("");
1461 #endif /* PERL_MAD */
1462     s = PL_parser->bufptr;
1463     bufend = PL_parser->bufend;
1464     while (1) {
1465         char c = *s;
1466         if (c == '#') {
1467             do {
1468                 c = *++s;
1469             } while (!(c == '\n' || (c == 0 && s == bufend)));
1470         } else if (c == '\n') {
1471             s++;
1472             PL_parser->linestart = s;
1473             if (s == bufend)
1474                 need_incline = 1;
1475             else
1476                 incline(s);
1477         } else if (isSPACE(c)) {
1478             s++;
1479         } else if (c == 0 && s == bufend) {
1480             bool got_more;
1481 #ifdef PERL_MAD
1482             if (PL_madskills)
1483                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1484 #endif /* PERL_MAD */
1485             if (flags & LEX_NO_NEXT_CHUNK)
1486                 break;
1487             PL_parser->bufptr = s;
1488             CopLINE_inc(PL_curcop);
1489             got_more = lex_next_chunk(flags);
1490             CopLINE_dec(PL_curcop);
1491             s = PL_parser->bufptr;
1492             bufend = PL_parser->bufend;
1493             if (!got_more)
1494                 break;
1495             if (need_incline && PL_parser->rsfp) {
1496                 incline(s);
1497                 need_incline = 0;
1498             }
1499         } else {
1500             break;
1501         }
1502     }
1503 #ifdef PERL_MAD
1504     if (PL_madskills)
1505         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1506 #endif /* PERL_MAD */
1507     PL_parser->bufptr = s;
1508 }
1509
1510 /*
1511  * S_incline
1512  * This subroutine has nothing to do with tilting, whether at windmills
1513  * or pinball tables.  Its name is short for "increment line".  It
1514  * increments the current line number in CopLINE(PL_curcop) and checks
1515  * to see whether the line starts with a comment of the form
1516  *    # line 500 "foo.pm"
1517  * If so, it sets the current line number and file to the values in the comment.
1518  */
1519
1520 STATIC void
1521 S_incline(pTHX_ const char *s)
1522 {
1523     dVAR;
1524     const char *t;
1525     const char *n;
1526     const char *e;
1527     line_t line_num;
1528
1529     PERL_ARGS_ASSERT_INCLINE;
1530
1531     CopLINE_inc(PL_curcop);
1532     if (*s++ != '#')
1533         return;
1534     while (SPACE_OR_TAB(*s))
1535         s++;
1536     if (strnEQ(s, "line", 4))
1537         s += 4;
1538     else
1539         return;
1540     if (SPACE_OR_TAB(*s))
1541         s++;
1542     else
1543         return;
1544     while (SPACE_OR_TAB(*s))
1545         s++;
1546     if (!isDIGIT(*s))
1547         return;
1548
1549     n = s;
1550     while (isDIGIT(*s))
1551         s++;
1552     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1553         return;
1554     while (SPACE_OR_TAB(*s))
1555         s++;
1556     if (*s == '"' && (t = strchr(s+1, '"'))) {
1557         s++;
1558         e = t + 1;
1559     }
1560     else {
1561         t = s;
1562         while (!isSPACE(*t))
1563             t++;
1564         e = t;
1565     }
1566     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1567         e++;
1568     if (*e != '\n' && *e != '\0')
1569         return;         /* false alarm */
1570
1571     line_num = atoi(n)-1;
1572
1573     if (t - s > 0) {
1574         const STRLEN len = t - s;
1575         SV *const temp_sv = CopFILESV(PL_curcop);
1576         const char *cf;
1577         STRLEN tmplen;
1578
1579         if (temp_sv) {
1580             cf = SvPVX(temp_sv);
1581             tmplen = SvCUR(temp_sv);
1582         } else {
1583             cf = NULL;
1584             tmplen = 0;
1585         }
1586
1587         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1588             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1589              * to *{"::_<newfilename"} */
1590             /* However, the long form of evals is only turned on by the
1591                debugger - usually they're "(eval %lu)" */
1592             char smallbuf[128];
1593             char *tmpbuf;
1594             GV **gvp;
1595             STRLEN tmplen2 = len;
1596             if (tmplen + 2 <= sizeof smallbuf)
1597                 tmpbuf = smallbuf;
1598             else
1599                 Newx(tmpbuf, tmplen + 2, char);
1600             tmpbuf[0] = '_';
1601             tmpbuf[1] = '<';
1602             memcpy(tmpbuf + 2, cf, tmplen);
1603             tmplen += 2;
1604             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1605             if (gvp) {
1606                 char *tmpbuf2;
1607                 GV *gv2;
1608
1609                 if (tmplen2 + 2 <= sizeof smallbuf)
1610                     tmpbuf2 = smallbuf;
1611                 else
1612                     Newx(tmpbuf2, tmplen2 + 2, char);
1613
1614                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1615                     /* Either they malloc'd it, or we malloc'd it,
1616                        so no prefix is present in ours.  */
1617                     tmpbuf2[0] = '_';
1618                     tmpbuf2[1] = '<';
1619                 }
1620
1621                 memcpy(tmpbuf2 + 2, s, tmplen2);
1622                 tmplen2 += 2;
1623
1624                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1625                 if (!isGV(gv2)) {
1626                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1627                     /* adjust ${"::_<newfilename"} to store the new file name */
1628                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1629                     /* The line number may differ. If that is the case,
1630                        alias the saved lines that are in the array.
1631                        Otherwise alias the whole array. */
1632                     if (CopLINE(PL_curcop) == line_num) {
1633                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1634                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1635                     }
1636                     else if (GvAV(*gvp)) {
1637                         AV * const av = GvAV(*gvp);
1638                         const I32 start = CopLINE(PL_curcop)+1;
1639                         I32 items = AvFILLp(av) - start;
1640                         if (items > 0) {
1641                             AV * const av2 = GvAVn(gv2);
1642                             SV **svp = AvARRAY(av) + start;
1643                             I32 l = (I32)line_num+1;
1644                             while (items--)
1645                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1646                         }
1647                     }
1648                 }
1649
1650                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1651             }
1652             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1653         }
1654         CopFILE_free(PL_curcop);
1655         CopFILE_setn(PL_curcop, s, len);
1656     }
1657     CopLINE_set(PL_curcop, line_num);
1658 }
1659
1660 #ifdef PERL_MAD
1661 /* skip space before PL_thistoken */
1662
1663 STATIC char *
1664 S_skipspace0(pTHX_ register char *s)
1665 {
1666     PERL_ARGS_ASSERT_SKIPSPACE0;
1667
1668     s = skipspace(s);
1669     if (!PL_madskills)
1670         return s;
1671     if (PL_skipwhite) {
1672         if (!PL_thiswhite)
1673             PL_thiswhite = newSVpvs("");
1674         sv_catsv(PL_thiswhite, PL_skipwhite);
1675         sv_free(PL_skipwhite);
1676         PL_skipwhite = 0;
1677     }
1678     PL_realtokenstart = s - SvPVX(PL_linestr);
1679     return s;
1680 }
1681
1682 /* skip space after PL_thistoken */
1683
1684 STATIC char *
1685 S_skipspace1(pTHX_ register char *s)
1686 {
1687     const char *start = s;
1688     I32 startoff = start - SvPVX(PL_linestr);
1689
1690     PERL_ARGS_ASSERT_SKIPSPACE1;
1691
1692     s = skipspace(s);
1693     if (!PL_madskills)
1694         return s;
1695     start = SvPVX(PL_linestr) + startoff;
1696     if (!PL_thistoken && PL_realtokenstart >= 0) {
1697         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698         PL_thistoken = newSVpvn(tstart, start - tstart);
1699     }
1700     PL_realtokenstart = -1;
1701     if (PL_skipwhite) {
1702         if (!PL_nextwhite)
1703             PL_nextwhite = newSVpvs("");
1704         sv_catsv(PL_nextwhite, PL_skipwhite);
1705         sv_free(PL_skipwhite);
1706         PL_skipwhite = 0;
1707     }
1708     return s;
1709 }
1710
1711 STATIC char *
1712 S_skipspace2(pTHX_ register char *s, SV **svp)
1713 {
1714     char *start;
1715     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1716     const I32 startoff = s - SvPVX(PL_linestr);
1717
1718     PERL_ARGS_ASSERT_SKIPSPACE2;
1719
1720     s = skipspace(s);
1721     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1722     if (!PL_madskills || !svp)
1723         return s;
1724     start = SvPVX(PL_linestr) + startoff;
1725     if (!PL_thistoken && PL_realtokenstart >= 0) {
1726         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1727         PL_thistoken = newSVpvn(tstart, start - tstart);
1728         PL_realtokenstart = -1;
1729     }
1730     if (PL_skipwhite) {
1731         if (!*svp)
1732             *svp = newSVpvs("");
1733         sv_setsv(*svp, PL_skipwhite);
1734         sv_free(PL_skipwhite);
1735         PL_skipwhite = 0;
1736     }
1737
1738     return s;
1739 }
1740 #endif
1741
1742 STATIC void
1743 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1744 {
1745     AV *av = CopFILEAVx(PL_curcop);
1746     if (av) {
1747         SV * const sv = newSV_type(SVt_PVMG);
1748         if (orig_sv)
1749             sv_setsv(sv, orig_sv);
1750         else
1751             sv_setpvn(sv, buf, len);
1752         (void)SvIOK_on(sv);
1753         SvIV_set(sv, 0);
1754         av_store(av, (I32)CopLINE(PL_curcop), sv);
1755     }
1756 }
1757
1758 /*
1759  * S_skipspace
1760  * Called to gobble the appropriate amount and type of whitespace.
1761  * Skips comments as well.
1762  */
1763
1764 STATIC char *
1765 S_skipspace(pTHX_ register char *s)
1766 {
1767 #ifdef PERL_MAD
1768     char *start = s;
1769 #endif /* PERL_MAD */
1770     PERL_ARGS_ASSERT_SKIPSPACE;
1771 #ifdef PERL_MAD
1772     if (PL_skipwhite) {
1773         sv_free(PL_skipwhite);
1774         PL_skipwhite = NULL;
1775     }
1776 #endif /* PERL_MAD */
1777     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1778         while (s < PL_bufend && SPACE_OR_TAB(*s))
1779             s++;
1780     } else {
1781         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1782         PL_bufptr = s;
1783         lex_read_space(LEX_KEEP_PREVIOUS |
1784                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1785                     LEX_NO_NEXT_CHUNK : 0));
1786         s = PL_bufptr;
1787         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1788         if (PL_linestart > PL_bufptr)
1789             PL_bufptr = PL_linestart;
1790         return s;
1791     }
1792 #ifdef PERL_MAD
1793     if (PL_madskills)
1794         PL_skipwhite = newSVpvn(start, s-start);
1795 #endif /* PERL_MAD */
1796     return s;
1797 }
1798
1799 /*
1800  * S_check_uni
1801  * Check the unary operators to ensure there's no ambiguity in how they're
1802  * used.  An ambiguous piece of code would be:
1803  *     rand + 5
1804  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1805  * the +5 is its argument.
1806  */
1807
1808 STATIC void
1809 S_check_uni(pTHX)
1810 {
1811     dVAR;
1812     const char *s;
1813     const char *t;
1814
1815     if (PL_oldoldbufptr != PL_last_uni)
1816         return;
1817     while (isSPACE(*PL_last_uni))
1818         PL_last_uni++;
1819     s = PL_last_uni;
1820     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1821         s++;
1822     if ((t = strchr(s, '(')) && t < PL_bufptr)
1823         return;
1824
1825     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1826                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1827                      (int)(s - PL_last_uni), PL_last_uni);
1828 }
1829
1830 /*
1831  * LOP : macro to build a list operator.  Its behaviour has been replaced
1832  * with a subroutine, S_lop() for which LOP is just another name.
1833  */
1834
1835 #define LOP(f,x) return lop(f,x,s)
1836
1837 /*
1838  * S_lop
1839  * Build a list operator (or something that might be one).  The rules:
1840  *  - if we have a next token, then it's a list operator [why?]
1841  *  - if the next thing is an opening paren, then it's a function
1842  *  - else it's a list operator
1843  */
1844
1845 STATIC I32
1846 S_lop(pTHX_ I32 f, int x, char *s)
1847 {
1848     dVAR;
1849
1850     PERL_ARGS_ASSERT_LOP;
1851
1852     pl_yylval.ival = f;
1853     CLINE;
1854     PL_expect = x;
1855     PL_bufptr = s;
1856     PL_last_lop = PL_oldbufptr;
1857     PL_last_lop_op = (OPCODE)f;
1858 #ifdef PERL_MAD
1859     if (PL_lasttoke)
1860         goto lstop;
1861 #else
1862     if (PL_nexttoke)
1863         goto lstop;
1864 #endif
1865     if (*s == '(')
1866         return REPORT(FUNC);
1867     s = PEEKSPACE(s);
1868     if (*s == '(')
1869         return REPORT(FUNC);
1870     else {
1871         lstop:
1872         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1873             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1874         return REPORT(LSTOP);
1875     }
1876 }
1877
1878 #ifdef PERL_MAD
1879  /*
1880  * S_start_force
1881  * Sets up for an eventual force_next().  start_force(0) basically does
1882  * an unshift, while start_force(-1) does a push.  yylex removes items
1883  * on the "pop" end.
1884  */
1885
1886 STATIC void
1887 S_start_force(pTHX_ int where)
1888 {
1889     int i;
1890
1891     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1892         where = PL_lasttoke;
1893     assert(PL_curforce < 0 || PL_curforce == where);
1894     if (PL_curforce != where) {
1895         for (i = PL_lasttoke; i > where; --i) {
1896             PL_nexttoke[i] = PL_nexttoke[i-1];
1897         }
1898         PL_lasttoke++;
1899     }
1900     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1901         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1902     PL_curforce = where;
1903     if (PL_nextwhite) {
1904         if (PL_madskills)
1905             curmad('^', newSVpvs(""));
1906         CURMAD('_', PL_nextwhite);
1907     }
1908 }
1909
1910 STATIC void
1911 S_curmad(pTHX_ char slot, SV *sv)
1912 {
1913     MADPROP **where;
1914
1915     if (!sv)
1916         return;
1917     if (PL_curforce < 0)
1918         where = &PL_thismad;
1919     else
1920         where = &PL_nexttoke[PL_curforce].next_mad;
1921
1922     if (PL_faketokens)
1923         sv_setpvs(sv, "");
1924     else {
1925         if (!IN_BYTES) {
1926             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1927                 SvUTF8_on(sv);
1928             else if (PL_encoding) {
1929                 sv_recode_to_utf8(sv, PL_encoding);
1930             }
1931         }
1932     }
1933
1934     /* keep a slot open for the head of the list? */
1935     if (slot != '_' && *where && (*where)->mad_key == '^') {
1936         (*where)->mad_key = slot;
1937         sv_free(MUTABLE_SV(((*where)->mad_val)));
1938         (*where)->mad_val = (void*)sv;
1939     }
1940     else
1941         addmad(newMADsv(slot, sv), where, 0);
1942 }
1943 #else
1944 #  define start_force(where)    NOOP
1945 #  define curmad(slot, sv)      NOOP
1946 #endif
1947
1948 /*
1949  * S_force_next
1950  * When the lexer realizes it knows the next token (for instance,
1951  * it is reordering tokens for the parser) then it can call S_force_next
1952  * to know what token to return the next time the lexer is called.  Caller
1953  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1954  * and possibly PL_expect to ensure the lexer handles the token correctly.
1955  */
1956
1957 STATIC void
1958 S_force_next(pTHX_ I32 type)
1959 {
1960     dVAR;
1961 #ifdef DEBUGGING
1962     if (DEBUG_T_TEST) {
1963         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1964         tokereport(type, &NEXTVAL_NEXTTOKE);
1965     }
1966 #endif
1967 #ifdef PERL_MAD
1968     if (PL_curforce < 0)
1969         start_force(PL_lasttoke);
1970     PL_nexttoke[PL_curforce].next_type = type;
1971     if (PL_lex_state != LEX_KNOWNEXT)
1972         PL_lex_defer = PL_lex_state;
1973     PL_lex_state = LEX_KNOWNEXT;
1974     PL_lex_expect = PL_expect;
1975     PL_curforce = -1;
1976 #else
1977     PL_nexttype[PL_nexttoke] = type;
1978     PL_nexttoke++;
1979     if (PL_lex_state != LEX_KNOWNEXT) {
1980         PL_lex_defer = PL_lex_state;
1981         PL_lex_expect = PL_expect;
1982         PL_lex_state = LEX_KNOWNEXT;
1983     }
1984 #endif
1985 }
1986
1987 void
1988 Perl_yyunlex(pTHX)
1989 {
1990     int yyc = PL_parser->yychar;
1991     if (yyc != YYEMPTY) {
1992         if (yyc) {
1993             start_force(-1);
1994             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1995             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1996                 PL_lex_allbrackets--;
1997                 PL_lex_brackets--;
1998                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1999             } else if (yyc == '('/*)*/) {
2000                 PL_lex_allbrackets--;
2001                 yyc |= (2<<24);
2002             }
2003             force_next(yyc);
2004         }
2005         PL_parser->yychar = YYEMPTY;
2006     }
2007 }
2008
2009 STATIC SV *
2010 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2011 {
2012     dVAR;
2013     SV * const sv = newSVpvn_utf8(start, len,
2014                                   !IN_BYTES
2015                                   && UTF
2016                                   && !is_ascii_string((const U8*)start, len)
2017                                   && is_utf8_string((const U8*)start, len));
2018     return sv;
2019 }
2020
2021 /*
2022  * S_force_word
2023  * When the lexer knows the next thing is a word (for instance, it has
2024  * just seen -> and it knows that the next char is a word char, then
2025  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2026  * lookahead.
2027  *
2028  * Arguments:
2029  *   char *start : buffer position (must be within PL_linestr)
2030  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2031  *   int check_keyword : if true, Perl checks to make sure the word isn't
2032  *       a keyword (do this if the word is a label, e.g. goto FOO)
2033  *   int allow_pack : if true, : characters will also be allowed (require,
2034  *       use, etc. do this)
2035  *   int allow_initial_tick : used by the "sub" lexer only.
2036  */
2037
2038 STATIC char *
2039 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2040 {
2041     dVAR;
2042     register char *s;
2043     STRLEN len;
2044
2045     PERL_ARGS_ASSERT_FORCE_WORD;
2046
2047     start = SKIPSPACE1(start);
2048     s = start;
2049     if (isIDFIRST_lazy_if(s,UTF) ||
2050         (allow_pack && *s == ':') ||
2051         (allow_initial_tick && *s == '\'') )
2052     {
2053         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2054         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2055             return start;
2056         start_force(PL_curforce);
2057         if (PL_madskills)
2058             curmad('X', newSVpvn(start,s-start));
2059         if (token == METHOD) {
2060             s = SKIPSPACE1(s);
2061             if (*s == '(')
2062                 PL_expect = XTERM;
2063             else {
2064                 PL_expect = XOPERATOR;
2065             }
2066         }
2067         if (PL_madskills)
2068             curmad('g', newSVpvs( "forced" ));
2069         NEXTVAL_NEXTTOKE.opval
2070             = (OP*)newSVOP(OP_CONST,0,
2071                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2072         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2073         force_next(token);
2074     }
2075     return s;
2076 }
2077
2078 /*
2079  * S_force_ident
2080  * Called when the lexer wants $foo *foo &foo etc, but the program
2081  * text only contains the "foo" portion.  The first argument is a pointer
2082  * to the "foo", and the second argument is the type symbol to prefix.
2083  * Forces the next token to be a "WORD".
2084  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2085  */
2086
2087 STATIC void
2088 S_force_ident(pTHX_ register const char *s, int kind)
2089 {
2090     dVAR;
2091
2092     PERL_ARGS_ASSERT_FORCE_IDENT;
2093
2094     if (*s) {
2095         const STRLEN len = strlen(s);
2096         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2097                                                                 UTF ? SVf_UTF8 : 0));
2098         start_force(PL_curforce);
2099         NEXTVAL_NEXTTOKE.opval = o;
2100         force_next(WORD);
2101         if (kind) {
2102             o->op_private = OPpCONST_ENTERED;
2103             /* XXX see note in pp_entereval() for why we forgo typo
2104                warnings if the symbol must be introduced in an eval.
2105                GSAR 96-10-12 */
2106             gv_fetchpvn_flags(s, len,
2107                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2108                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2109                               kind == '$' ? SVt_PV :
2110                               kind == '@' ? SVt_PVAV :
2111                               kind == '%' ? SVt_PVHV :
2112                               SVt_PVGV
2113                               );
2114         }
2115     }
2116 }
2117
2118 NV
2119 Perl_str_to_version(pTHX_ SV *sv)
2120 {
2121     NV retval = 0.0;
2122     NV nshift = 1.0;
2123     STRLEN len;
2124     const char *start = SvPV_const(sv,len);
2125     const char * const end = start + len;
2126     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2127
2128     PERL_ARGS_ASSERT_STR_TO_VERSION;
2129
2130     while (start < end) {
2131         STRLEN skip;
2132         UV n;
2133         if (utf)
2134             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2135         else {
2136             n = *(U8*)start;
2137             skip = 1;
2138         }
2139         retval += ((NV)n)/nshift;
2140         start += skip;
2141         nshift *= 1000;
2142     }
2143     return retval;
2144 }
2145
2146 /*
2147  * S_force_version
2148  * Forces the next token to be a version number.
2149  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2150  * and if "guessing" is TRUE, then no new token is created (and the caller
2151  * must use an alternative parsing method).
2152  */
2153
2154 STATIC char *
2155 S_force_version(pTHX_ char *s, int guessing)
2156 {
2157     dVAR;
2158     OP *version = NULL;
2159     char *d;
2160 #ifdef PERL_MAD
2161     I32 startoff = s - SvPVX(PL_linestr);
2162 #endif
2163
2164     PERL_ARGS_ASSERT_FORCE_VERSION;
2165
2166     s = SKIPSPACE1(s);
2167
2168     d = s;
2169     if (*d == 'v')
2170         d++;
2171     if (isDIGIT(*d)) {
2172         while (isDIGIT(*d) || *d == '_' || *d == '.')
2173             d++;
2174 #ifdef PERL_MAD
2175         if (PL_madskills) {
2176             start_force(PL_curforce);
2177             curmad('X', newSVpvn(s,d-s));
2178         }
2179 #endif
2180         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2181             SV *ver;
2182 #ifdef USE_LOCALE_NUMERIC
2183             char *loc = setlocale(LC_NUMERIC, "C");
2184 #endif
2185             s = scan_num(s, &pl_yylval);
2186 #ifdef USE_LOCALE_NUMERIC
2187             setlocale(LC_NUMERIC, loc);
2188 #endif
2189             version = pl_yylval.opval;
2190             ver = cSVOPx(version)->op_sv;
2191             if (SvPOK(ver) && !SvNIOK(ver)) {
2192                 SvUPGRADE(ver, SVt_PVNV);
2193                 SvNV_set(ver, str_to_version(ver));
2194                 SvNOK_on(ver);          /* hint that it is a version */
2195             }
2196         }
2197         else if (guessing) {
2198 #ifdef PERL_MAD
2199             if (PL_madskills) {
2200                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2201                 PL_nextwhite = 0;
2202                 s = SvPVX(PL_linestr) + startoff;
2203             }
2204 #endif
2205             return s;
2206         }
2207     }
2208
2209 #ifdef PERL_MAD
2210     if (PL_madskills && !version) {
2211         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2212         PL_nextwhite = 0;
2213         s = SvPVX(PL_linestr) + startoff;
2214     }
2215 #endif
2216     /* NOTE: The parser sees the package name and the VERSION swapped */
2217     start_force(PL_curforce);
2218     NEXTVAL_NEXTTOKE.opval = version;
2219     force_next(WORD);
2220
2221     return s;
2222 }
2223
2224 /*
2225  * S_force_strict_version
2226  * Forces the next token to be a version number using strict syntax rules.
2227  */
2228
2229 STATIC char *
2230 S_force_strict_version(pTHX_ char *s)
2231 {
2232     dVAR;
2233     OP *version = NULL;
2234 #ifdef PERL_MAD
2235     I32 startoff = s - SvPVX(PL_linestr);
2236 #endif
2237     const char *errstr = NULL;
2238
2239     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2240
2241     while (isSPACE(*s)) /* leading whitespace */
2242         s++;
2243
2244     if (is_STRICT_VERSION(s,&errstr)) {
2245         SV *ver = newSV(0);
2246         s = (char *)scan_version(s, ver, 0);
2247         version = newSVOP(OP_CONST, 0, ver);
2248     }
2249     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2250             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2251     {
2252         PL_bufptr = s;
2253         if (errstr)
2254             yyerror(errstr); /* version required */
2255         return s;
2256     }
2257
2258 #ifdef PERL_MAD
2259     if (PL_madskills && !version) {
2260         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2261         PL_nextwhite = 0;
2262         s = SvPVX(PL_linestr) + startoff;
2263     }
2264 #endif
2265     /* NOTE: The parser sees the package name and the VERSION swapped */
2266     start_force(PL_curforce);
2267     NEXTVAL_NEXTTOKE.opval = version;
2268     force_next(WORD);
2269
2270     return s;
2271 }
2272
2273 /*
2274  * S_tokeq
2275  * Tokenize a quoted string passed in as an SV.  It finds the next
2276  * chunk, up to end of string or a backslash.  It may make a new
2277  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2278  * turns \\ into \.
2279  */
2280
2281 STATIC SV *
2282 S_tokeq(pTHX_ SV *sv)
2283 {
2284     dVAR;
2285     register char *s;
2286     register char *send;
2287     register char *d;
2288     STRLEN len = 0;
2289     SV *pv = sv;
2290
2291     PERL_ARGS_ASSERT_TOKEQ;
2292
2293     if (!SvLEN(sv))
2294         goto finish;
2295
2296     s = SvPV_force(sv, len);
2297     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2298         goto finish;
2299     send = s + len;
2300     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2301     while (s < send && !(*s == '\\' && s[1] == '\\'))
2302         s++;
2303     if (s == send)
2304         goto finish;
2305     d = s;
2306     if ( PL_hints & HINT_NEW_STRING ) {
2307         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2308     }
2309     while (s < send) {
2310         if (*s == '\\') {
2311             if (s + 1 < send && (s[1] == '\\'))
2312                 s++;            /* all that, just for this */
2313         }
2314         *d++ = *s++;
2315     }
2316     *d = '\0';
2317     SvCUR_set(sv, d - SvPVX_const(sv));
2318   finish:
2319     if ( PL_hints & HINT_NEW_STRING )
2320        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2321     return sv;
2322 }
2323
2324 /*
2325  * Now come three functions related to double-quote context,
2326  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2327  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2328  * interact with PL_lex_state, and create fake ( ... ) argument lists
2329  * to handle functions and concatenation.
2330  * They assume that whoever calls them will be setting up a fake
2331  * join call, because each subthing puts a ',' after it.  This lets
2332  *   "lower \luPpEr"
2333  * become
2334  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2335  *
2336  * (I'm not sure whether the spurious commas at the end of lcfirst's
2337  * arguments and join's arguments are created or not).
2338  */
2339
2340 /*
2341  * S_sublex_start
2342  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2343  *
2344  * Pattern matching will set PL_lex_op to the pattern-matching op to
2345  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2346  *
2347  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2348  *
2349  * Everything else becomes a FUNC.
2350  *
2351  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2352  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2353  * call to S_sublex_push().
2354  */
2355
2356 STATIC I32
2357 S_sublex_start(pTHX)
2358 {
2359     dVAR;
2360     register const I32 op_type = pl_yylval.ival;
2361
2362     if (op_type == OP_NULL) {
2363         pl_yylval.opval = PL_lex_op;
2364         PL_lex_op = NULL;
2365         return THING;
2366     }
2367     if (op_type == OP_CONST || op_type == OP_READLINE) {
2368         SV *sv = tokeq(PL_lex_stuff);
2369
2370         if (SvTYPE(sv) == SVt_PVIV) {
2371             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2372             STRLEN len;
2373             const char * const p = SvPV_const(sv, len);
2374             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2375             SvREFCNT_dec(sv);
2376             sv = nsv;
2377         }
2378         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2379         PL_lex_stuff = NULL;
2380         /* Allow <FH> // "foo" */
2381         if (op_type == OP_READLINE)
2382             PL_expect = XTERMORDORDOR;
2383         return THING;
2384     }
2385     else if (op_type == OP_BACKTICK && PL_lex_op) {
2386         /* readpipe() vas overriden */
2387         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2388         pl_yylval.opval = PL_lex_op;
2389         PL_lex_op = NULL;
2390         PL_lex_stuff = NULL;
2391         return THING;
2392     }
2393
2394     PL_sublex_info.super_state = PL_lex_state;
2395     PL_sublex_info.sub_inwhat = (U16)op_type;
2396     PL_sublex_info.sub_op = PL_lex_op;
2397     PL_lex_state = LEX_INTERPPUSH;
2398
2399     PL_expect = XTERM;
2400     if (PL_lex_op) {
2401         pl_yylval.opval = PL_lex_op;
2402         PL_lex_op = NULL;
2403         return PMFUNC;
2404     }
2405     else
2406         return FUNC;
2407 }
2408
2409 /*
2410  * S_sublex_push
2411  * Create a new scope to save the lexing state.  The scope will be
2412  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2413  * to the uc, lc, etc. found before.
2414  * Sets PL_lex_state to LEX_INTERPCONCAT.
2415  */
2416
2417 STATIC I32
2418 S_sublex_push(pTHX)
2419 {
2420     dVAR;
2421     ENTER;
2422
2423     PL_lex_state = PL_sublex_info.super_state;
2424     SAVEBOOL(PL_lex_dojoin);
2425     SAVEI32(PL_lex_brackets);
2426     SAVEI32(PL_lex_allbrackets);
2427     SAVEI8(PL_lex_fakeeof);
2428     SAVEI32(PL_lex_casemods);
2429     SAVEI32(PL_lex_starts);
2430     SAVEI8(PL_lex_state);
2431     SAVEVPTR(PL_lex_inpat);
2432     SAVEI16(PL_lex_inwhat);
2433     SAVECOPLINE(PL_curcop);
2434     SAVEPPTR(PL_bufptr);
2435     SAVEPPTR(PL_bufend);
2436     SAVEPPTR(PL_oldbufptr);
2437     SAVEPPTR(PL_oldoldbufptr);
2438     SAVEPPTR(PL_last_lop);
2439     SAVEPPTR(PL_last_uni);
2440     SAVEPPTR(PL_linestart);
2441     SAVESPTR(PL_linestr);
2442     SAVEGENERICPV(PL_lex_brackstack);
2443     SAVEGENERICPV(PL_lex_casestack);
2444
2445     PL_linestr = PL_lex_stuff;
2446     PL_lex_stuff = NULL;
2447
2448     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2449         = SvPVX(PL_linestr);
2450     PL_bufend += SvCUR(PL_linestr);
2451     PL_last_lop = PL_last_uni = NULL;
2452     SAVEFREESV(PL_linestr);
2453
2454     PL_lex_dojoin = FALSE;
2455     PL_lex_brackets = 0;
2456     PL_lex_allbrackets = 0;
2457     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2458     Newx(PL_lex_brackstack, 120, char);
2459     Newx(PL_lex_casestack, 12, char);
2460     PL_lex_casemods = 0;
2461     *PL_lex_casestack = '\0';
2462     PL_lex_starts = 0;
2463     PL_lex_state = LEX_INTERPCONCAT;
2464     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2465
2466     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2467     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2468     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2469         PL_lex_inpat = PL_sublex_info.sub_op;
2470     else
2471         PL_lex_inpat = NULL;
2472
2473     return '(';
2474 }
2475
2476 /*
2477  * S_sublex_done
2478  * Restores lexer state after a S_sublex_push.
2479  */
2480
2481 STATIC I32
2482 S_sublex_done(pTHX)
2483 {
2484     dVAR;
2485     if (!PL_lex_starts++) {
2486         SV * const sv = newSVpvs("");
2487         if (SvUTF8(PL_linestr))
2488             SvUTF8_on(sv);
2489         PL_expect = XOPERATOR;
2490         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2491         return THING;
2492     }
2493
2494     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2495         PL_lex_state = LEX_INTERPCASEMOD;
2496         return yylex();
2497     }
2498
2499     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2500     assert(PL_lex_inwhat != OP_TRANSR);
2501     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2502         PL_linestr = PL_lex_repl;
2503         PL_lex_inpat = 0;
2504         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2505         PL_bufend += SvCUR(PL_linestr);
2506         PL_last_lop = PL_last_uni = NULL;
2507         SAVEFREESV(PL_linestr);
2508         PL_lex_dojoin = FALSE;
2509         PL_lex_brackets = 0;
2510         PL_lex_allbrackets = 0;
2511         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2512         PL_lex_casemods = 0;
2513         *PL_lex_casestack = '\0';
2514         PL_lex_starts = 0;
2515         if (SvEVALED(PL_lex_repl)) {
2516             PL_lex_state = LEX_INTERPNORMAL;
2517             PL_lex_starts++;
2518             /*  we don't clear PL_lex_repl here, so that we can check later
2519                 whether this is an evalled subst; that means we rely on the
2520                 logic to ensure sublex_done() is called again only via the
2521                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2522         }
2523         else {
2524             PL_lex_state = LEX_INTERPCONCAT;
2525             PL_lex_repl = NULL;
2526         }
2527         return ',';
2528     }
2529     else {
2530 #ifdef PERL_MAD
2531         if (PL_madskills) {
2532             if (PL_thiswhite) {
2533                 if (!PL_endwhite)
2534                     PL_endwhite = newSVpvs("");
2535                 sv_catsv(PL_endwhite, PL_thiswhite);
2536                 PL_thiswhite = 0;
2537             }
2538             if (PL_thistoken)
2539                 sv_setpvs(PL_thistoken,"");
2540             else
2541                 PL_realtokenstart = -1;
2542         }
2543 #endif
2544         LEAVE;
2545         PL_bufend = SvPVX(PL_linestr);
2546         PL_bufend += SvCUR(PL_linestr);
2547         PL_expect = XOPERATOR;
2548         PL_sublex_info.sub_inwhat = 0;
2549         return ')';
2550     }
2551 }
2552
2553 /*
2554   scan_const
2555
2556   Extracts a pattern, double-quoted string, or transliteration.  This
2557   is terrifying code.
2558
2559   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2560   processing a pattern (PL_lex_inpat is true), a transliteration
2561   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2562
2563   Returns a pointer to the character scanned up to. If this is
2564   advanced from the start pointer supplied (i.e. if anything was
2565   successfully parsed), will leave an OP for the substring scanned
2566   in pl_yylval. Caller must intuit reason for not parsing further
2567   by looking at the next characters herself.
2568
2569   In patterns:
2570     backslashes:
2571       constants: \N{NAME} only
2572       case and quoting: \U \Q \E
2573     stops on @ and $, but not for $ as tail anchor
2574
2575   In transliterations:
2576     characters are VERY literal, except for - not at the start or end
2577     of the string, which indicates a range. If the range is in bytes,
2578     scan_const expands the range to the full set of intermediate
2579     characters. If the range is in utf8, the hyphen is replaced with
2580     a certain range mark which will be handled by pmtrans() in op.c.
2581
2582   In double-quoted strings:
2583     backslashes:
2584       double-quoted style: \r and \n
2585       constants: \x31, etc.
2586       deprecated backrefs: \1 (in substitution replacements)
2587       case and quoting: \U \Q \E
2588     stops on @ and $
2589
2590   scan_const does *not* construct ops to handle interpolated strings.
2591   It stops processing as soon as it finds an embedded $ or @ variable
2592   and leaves it to the caller to work out what's going on.
2593
2594   embedded arrays (whether in pattern or not) could be:
2595       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2596
2597   $ in double-quoted strings must be the symbol of an embedded scalar.
2598
2599   $ in pattern could be $foo or could be tail anchor.  Assumption:
2600   it's a tail anchor if $ is the last thing in the string, or if it's
2601   followed by one of "()| \r\n\t"
2602
2603   \1 (backreferences) are turned into $1
2604
2605   The structure of the code is
2606       while (there's a character to process) {
2607           handle transliteration ranges
2608           skip regexp comments /(?#comment)/ and codes /(?{code})/
2609           skip #-initiated comments in //x patterns
2610           check for embedded arrays
2611           check for embedded scalars
2612           if (backslash) {
2613               deprecate \1 in substitution replacements
2614               handle string-changing backslashes \l \U \Q \E, etc.
2615               switch (what was escaped) {
2616                   handle \- in a transliteration (becomes a literal -)
2617                   if a pattern and not \N{, go treat as regular character
2618                   handle \132 (octal characters)
2619                   handle \x15 and \x{1234} (hex characters)
2620                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2621                   handle \cV (control characters)
2622                   handle printf-style backslashes (\f, \r, \n, etc)
2623               } (end switch)
2624               continue
2625           } (end if backslash)
2626           handle regular character
2627     } (end while character to read)
2628
2629 */
2630
2631 STATIC char *
2632 S_scan_const(pTHX_ char *start)
2633 {
2634     dVAR;
2635     register char *send = PL_bufend;            /* end of the constant */
2636     SV *sv = newSV(send - start);               /* sv for the constant.  See
2637                                                    note below on sizing. */
2638     register char *s = start;                   /* start of the constant */
2639     register char *d = SvPVX(sv);               /* destination for copies */
2640     bool dorange = FALSE;                       /* are we in a translit range? */
2641     bool didrange = FALSE;                      /* did we just finish a range? */
2642     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2643     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2644                                                    to be UTF8?  But, this can
2645                                                    show as true when the source
2646                                                    isn't utf8, as for example
2647                                                    when it is entirely composed
2648                                                    of hex constants */
2649
2650     /* Note on sizing:  The scanned constant is placed into sv, which is
2651      * initialized by newSV() assuming one byte of output for every byte of
2652      * input.  This routine expects newSV() to allocate an extra byte for a
2653      * trailing NUL, which this routine will append if it gets to the end of
2654      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2655      * CAPITAL LETTER A}), or more output than input if the constant ends up
2656      * recoded to utf8, but each time a construct is found that might increase
2657      * the needed size, SvGROW() is called.  Its size parameter each time is
2658      * based on the best guess estimate at the time, namely the length used so
2659      * far, plus the length the current construct will occupy, plus room for
2660      * the trailing NUL, plus one byte for every input byte still unscanned */
2661
2662     UV uv;
2663 #ifdef EBCDIC
2664     UV literal_endpoint = 0;
2665     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2666 #endif
2667
2668     PERL_ARGS_ASSERT_SCAN_CONST;
2669
2670     assert(PL_lex_inwhat != OP_TRANSR);
2671     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2672         /* If we are doing a trans and we know we want UTF8 set expectation */
2673         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2674         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2675     }
2676
2677
2678     while (s < send || dorange) {
2679
2680         /* get transliterations out of the way (they're most literal) */
2681         if (PL_lex_inwhat == OP_TRANS) {
2682             /* expand a range A-Z to the full set of characters.  AIE! */
2683             if (dorange) {
2684                 I32 i;                          /* current expanded character */
2685                 I32 min;                        /* first character in range */
2686                 I32 max;                        /* last character in range */
2687
2688 #ifdef EBCDIC
2689                 UV uvmax = 0;
2690 #endif
2691
2692                 if (has_utf8
2693 #ifdef EBCDIC
2694                     && !native_range
2695 #endif
2696                     ) {
2697                     char * const c = (char*)utf8_hop((U8*)d, -1);
2698                     char *e = d++;
2699                     while (e-- > c)
2700                         *(e + 1) = *e;
2701                     *c = (char)UTF_TO_NATIVE(0xff);
2702                     /* mark the range as done, and continue */
2703                     dorange = FALSE;
2704                     didrange = TRUE;
2705                     continue;
2706                 }
2707
2708                 i = d - SvPVX_const(sv);                /* remember current offset */
2709 #ifdef EBCDIC
2710                 SvGROW(sv,
2711                        SvLEN(sv) + (has_utf8 ?
2712                                     (512 - UTF_CONTINUATION_MARK +
2713                                      UNISKIP(0x100))
2714                                     : 256));
2715                 /* How many two-byte within 0..255: 128 in UTF-8,
2716                  * 96 in UTF-8-mod. */
2717 #else
2718                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2719 #endif
2720                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2721 #ifdef EBCDIC
2722                 if (has_utf8) {
2723                     int j;
2724                     for (j = 0; j <= 1; j++) {
2725                         char * const c = (char*)utf8_hop((U8*)d, -1);
2726                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2727                         if (j)
2728                             min = (U8)uv;
2729                         else if (uv < 256)
2730                             max = (U8)uv;
2731                         else {
2732                             max = (U8)0xff; /* only to \xff */
2733                             uvmax = uv; /* \x{100} to uvmax */
2734                         }
2735                         d = c; /* eat endpoint chars */
2736                      }
2737                 }
2738                else {
2739 #endif
2740                    d -= 2;              /* eat the first char and the - */
2741                    min = (U8)*d;        /* first char in range */
2742                    max = (U8)d[1];      /* last char in range  */
2743 #ifdef EBCDIC
2744                }
2745 #endif
2746
2747                 if (min > max) {
2748                     Perl_croak(aTHX_
2749                                "Invalid range \"%c-%c\" in transliteration operator",
2750                                (char)min, (char)max);
2751                 }
2752
2753 #ifdef EBCDIC
2754                 if (literal_endpoint == 2 &&
2755                     ((isLOWER(min) && isLOWER(max)) ||
2756                      (isUPPER(min) && isUPPER(max)))) {
2757                     if (isLOWER(min)) {
2758                         for (i = min; i <= max; i++)
2759                             if (isLOWER(i))
2760                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2761                     } else {
2762                         for (i = min; i <= max; i++)
2763                             if (isUPPER(i))
2764                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2765                     }
2766                 }
2767                 else
2768 #endif
2769                     for (i = min; i <= max; i++)
2770 #ifdef EBCDIC
2771                         if (has_utf8) {
2772                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2773                             if (UNI_IS_INVARIANT(ch))
2774                                 *d++ = (U8)i;
2775                             else {
2776                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2777                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2778                             }
2779                         }
2780                         else
2781 #endif
2782                             *d++ = (char)i;
2783
2784 #ifdef EBCDIC
2785                 if (uvmax) {
2786                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2787                     if (uvmax > 0x101)
2788                         *d++ = (char)UTF_TO_NATIVE(0xff);
2789                     if (uvmax > 0x100)
2790                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2791                 }
2792 #endif
2793
2794                 /* mark the range as done, and continue */
2795                 dorange = FALSE;
2796                 didrange = TRUE;
2797 #ifdef EBCDIC
2798                 literal_endpoint = 0;
2799 #endif
2800                 continue;
2801             }
2802
2803             /* range begins (ignore - as first or last char) */
2804             else if (*s == '-' && s+1 < send  && s != start) {
2805                 if (didrange) {
2806                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2807                 }
2808                 if (has_utf8
2809 #ifdef EBCDIC
2810                     && !native_range
2811 #endif
2812                     ) {
2813                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2814                     s++;
2815                     continue;
2816                 }
2817                 dorange = TRUE;
2818                 s++;
2819             }
2820             else {
2821                 didrange = FALSE;
2822 #ifdef EBCDIC
2823                 literal_endpoint = 0;
2824                 native_range = TRUE;
2825 #endif
2826             }
2827         }
2828
2829         /* if we get here, we're not doing a transliteration */
2830
2831         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2832            except for the last char, which will be done separately. */
2833         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2834             if (s[2] == '#') {
2835                 while (s+1 < send && *s != ')')
2836                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2837             }
2838             else if (s[2] == '{' /* This should match regcomp.c */
2839                     || (s[2] == '?' && s[3] == '{'))
2840             {
2841                 I32 count = 1;
2842                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2843                 char c;
2844
2845                 while (count && (c = *regparse)) {
2846                     if (c == '\\' && regparse[1])
2847                         regparse++;
2848                     else if (c == '{')
2849                         count++;
2850                     else if (c == '}')
2851                         count--;
2852                     regparse++;
2853                 }
2854                 if (*regparse != ')')
2855                     regparse--;         /* Leave one char for continuation. */
2856                 while (s < regparse)
2857                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2858             }
2859         }
2860
2861         /* likewise skip #-initiated comments in //x patterns */
2862         else if (*s == '#' && PL_lex_inpat &&
2863           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2864             while (s+1 < send && *s != '\n')
2865                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2866         }
2867
2868         /* check for embedded arrays
2869            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2870            */
2871         else if (*s == '@' && s[1]) {
2872             if (isALNUM_lazy_if(s+1,UTF))
2873                 break;
2874             if (strchr(":'{$", s[1]))
2875                 break;
2876             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2877                 break; /* in regexp, neither @+ nor @- are interpolated */
2878         }
2879
2880         /* check for embedded scalars.  only stop if we're sure it's a
2881            variable.
2882         */
2883         else if (*s == '$') {
2884             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2885                 break;
2886             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2887                 if (s[1] == '\\') {
2888                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2889                                    "Possible unintended interpolation of $\\ in regex");
2890                 }
2891                 break;          /* in regexp, $ might be tail anchor */
2892             }
2893         }
2894
2895         /* End of else if chain - OP_TRANS rejoin rest */
2896
2897         /* backslashes */
2898         if (*s == '\\' && s+1 < send) {
2899             char* e;    /* Can be used for ending '}', etc. */
2900
2901             s++;
2902
2903             /* warn on \1 - \9 in substitution replacements, but note that \11
2904              * is an octal; and \19 is \1 followed by '9' */
2905             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2906                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2907             {
2908                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2909                 *--s = '$';
2910                 break;
2911             }
2912
2913             /* string-change backslash escapes */
2914             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2915                 --s;
2916                 break;
2917             }
2918             /* In a pattern, process \N, but skip any other backslash escapes.
2919              * This is because we don't want to translate an escape sequence
2920              * into a meta symbol and have the regex compiler use the meta
2921              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2922              * in spite of this, we do have to process \N here while the proper
2923              * charnames handler is in scope.  See bugs #56444 and #62056.
2924              * There is a complication because \N in a pattern may also stand
2925              * for 'match a non-nl', and not mean a charname, in which case its
2926              * processing should be deferred to the regex compiler.  To be a
2927              * charname it must be followed immediately by a '{', and not look
2928              * like \N followed by a curly quantifier, i.e., not something like
2929              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2930              * quantifier */
2931             else if (PL_lex_inpat
2932                     && (*s != 'N'
2933                         || s[1] != '{'
2934                         || regcurly(s + 1)))
2935             {
2936                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2937                 goto default_action;
2938             }
2939
2940             switch (*s) {
2941
2942             /* quoted - in transliterations */
2943             case '-':
2944                 if (PL_lex_inwhat == OP_TRANS) {
2945                     *d++ = *s++;
2946                     continue;
2947                 }
2948                 /* FALL THROUGH */
2949             default:
2950                 {
2951                     if ((isALPHA(*s) || isDIGIT(*s)))
2952                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2953                                        "Unrecognized escape \\%c passed through",
2954                                        *s);
2955                     /* default action is to copy the quoted character */
2956                     goto default_action;
2957                 }
2958
2959             /* eg. \132 indicates the octal constant 0132 */
2960             case '0': case '1': case '2': case '3':
2961             case '4': case '5': case '6': case '7':
2962                 {
2963                     I32 flags = 0;
2964                     STRLEN len = 3;
2965                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2966                     s += len;
2967                 }
2968                 goto NUM_ESCAPE_INSERT;
2969
2970             /* eg. \o{24} indicates the octal constant \024 */
2971             case 'o':
2972                 {
2973                     STRLEN len;
2974                     const char* error;
2975
2976                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2977                     s += len;
2978                     if (! valid) {
2979                         yyerror(error);
2980                         continue;
2981                     }
2982                     goto NUM_ESCAPE_INSERT;
2983                 }
2984
2985             /* eg. \x24 indicates the hex constant 0x24 */
2986             case 'x':
2987                 ++s;
2988                 if (*s == '{') {
2989                     char* const e = strchr(s, '}');
2990                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2991                       PERL_SCAN_DISALLOW_PREFIX;
2992                     STRLEN len;
2993
2994                     ++s;
2995                     if (!e) {
2996                         yyerror("Missing right brace on \\x{}");
2997                         continue;
2998                     }
2999                     len = e - s;
3000                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3001                     s = e + 1;
3002                 }
3003                 else {
3004                     {
3005                         STRLEN len = 2;
3006                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3007                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3008                         s += len;
3009                     }
3010                 }
3011
3012               NUM_ESCAPE_INSERT:
3013                 /* Insert oct or hex escaped character.  There will always be
3014                  * enough room in sv since such escapes will be longer than any
3015                  * UTF-8 sequence they can end up as, except if they force us
3016                  * to recode the rest of the string into utf8 */
3017
3018                 /* Here uv is the ordinal of the next character being added in
3019                  * unicode (converted from native). */
3020                 if (!UNI_IS_INVARIANT(uv)) {
3021                     if (!has_utf8 && uv > 255) {
3022                         /* Might need to recode whatever we have accumulated so
3023                          * far if it contains any chars variant in utf8 or
3024                          * utf-ebcdic. */
3025
3026                         SvCUR_set(sv, d - SvPVX_const(sv));
3027                         SvPOK_on(sv);
3028                         *d = '\0';
3029                         /* See Note on sizing above.  */
3030                         sv_utf8_upgrade_flags_grow(sv,
3031                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3032                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3033                         d = SvPVX(sv) + SvCUR(sv);
3034                         has_utf8 = TRUE;
3035                     }
3036
3037                     if (has_utf8) {
3038                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3039                         if (PL_lex_inwhat == OP_TRANS &&
3040                             PL_sublex_info.sub_op) {
3041                             PL_sublex_info.sub_op->op_private |=
3042                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3043                                              : OPpTRANS_TO_UTF);
3044                         }
3045 #ifdef EBCDIC
3046                         if (uv > 255 && !dorange)
3047                             native_range = FALSE;
3048 #endif
3049                     }
3050                     else {
3051                         *d++ = (char)uv;
3052                     }
3053                 }
3054                 else {
3055                     *d++ = (char) uv;
3056                 }
3057                 continue;
3058
3059             case 'N':
3060                 /* In a non-pattern \N must be a named character, like \N{LATIN
3061                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3062                  * mean to match a non-newline.  For non-patterns, named
3063                  * characters are converted to their string equivalents. In
3064                  * patterns, named characters are not converted to their
3065                  * ultimate forms for the same reasons that other escapes
3066                  * aren't.  Instead, they are converted to the \N{U+...} form
3067                  * to get the value from the charnames that is in effect right
3068                  * now, while preserving the fact that it was a named character
3069                  * so that the regex compiler knows this */
3070
3071                 /* This section of code doesn't generally use the
3072                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3073                  * a close examination of this macro and determined it is a
3074                  * no-op except on utfebcdic variant characters.  Every
3075                  * character generated by this that would normally need to be
3076                  * enclosed by this macro is invariant, so the macro is not
3077                  * needed, and would complicate use of copy().  XXX There are
3078                  * other parts of this file where the macro is used
3079                  * inconsistently, but are saved by it being a no-op */
3080
3081                 /* The structure of this section of code (besides checking for
3082                  * errors and upgrading to utf8) is:
3083                  *  Further disambiguate between the two meanings of \N, and if
3084                  *      not a charname, go process it elsewhere
3085                  *  If of form \N{U+...}, pass it through if a pattern;
3086                  *      otherwise convert to utf8
3087                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3088                  *  pattern; otherwise convert to utf8 */
3089
3090                 /* Here, s points to the 'N'; the test below is guaranteed to
3091                  * succeed if we are being called on a pattern as we already
3092                  * know from a test above that the next character is a '{'.
3093                  * On a non-pattern \N must mean 'named sequence, which
3094                  * requires braces */
3095                 s++;
3096                 if (*s != '{') {
3097                     yyerror("Missing braces on \\N{}");
3098                     continue;
3099                 }
3100                 s++;
3101
3102                 /* If there is no matching '}', it is an error. */
3103                 if (! (e = strchr(s, '}'))) {
3104                     if (! PL_lex_inpat) {
3105                         yyerror("Missing right brace on \\N{}");
3106                     } else {
3107                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3108                     }
3109                     continue;
3110                 }
3111
3112                 /* Here it looks like a named character */
3113
3114                 if (PL_lex_inpat) {
3115
3116                     /* XXX This block is temporary code.  \N{} implies that the
3117                      * pattern is to have Unicode semantics, and therefore
3118                      * currently has to be encoded in utf8.  By putting it in
3119                      * utf8 now, we save a whole pass in the regular expression
3120                      * compiler.  Once that code is changed so Unicode
3121                      * semantics doesn't necessarily have to be in utf8, this
3122                      * block should be removed.  However, the code that parses
3123                      * the output of this would have to be changed to not
3124                      * necessarily expect utf8 */
3125                     if (!has_utf8) {
3126                         SvCUR_set(sv, d - SvPVX_const(sv));
3127                         SvPOK_on(sv);
3128                         *d = '\0';
3129                         /* See Note on sizing above.  */
3130                         sv_utf8_upgrade_flags_grow(sv,
3131                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3132                                         /* 5 = '\N{' + cur char + NUL */
3133                                         (STRLEN)(send - s) + 5);
3134                         d = SvPVX(sv) + SvCUR(sv);
3135                         has_utf8 = TRUE;
3136                     }
3137                 }
3138
3139                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3140                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3141                                 | PERL_SCAN_DISALLOW_PREFIX;
3142                     STRLEN len;
3143
3144                     /* For \N{U+...}, the '...' is a unicode value even on
3145                      * EBCDIC machines */
3146                     s += 2;         /* Skip to next char after the 'U+' */
3147                     len = e - s;
3148                     uv = grok_hex(s, &len, &flags, NULL);
3149                     if (len == 0 || len != (STRLEN)(e - s)) {
3150                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3151                         s = e + 1;
3152                         continue;
3153                     }
3154
3155                     if (PL_lex_inpat) {
3156
3157                         /* On non-EBCDIC platforms, pass through to the regex
3158                          * compiler unchanged.  The reason we evaluated the
3159                          * number above is to make sure there wasn't a syntax
3160                          * error.  But on EBCDIC we convert to native so
3161                          * downstream code can continue to assume it's native
3162                          */
3163                         s -= 5;     /* Include the '\N{U+' */
3164 #ifdef EBCDIC
3165                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3166                                                                and the \0 */
3167                                     "\\N{U+%X}",
3168                                     (unsigned int) UNI_TO_NATIVE(uv));
3169 #else
3170                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3171                         d += e - s + 1;
3172 #endif
3173                     }
3174                     else {  /* Not a pattern: convert the hex to string */
3175
3176                          /* If destination is not in utf8, unconditionally
3177                           * recode it to be so.  This is because \N{} implies
3178                           * Unicode semantics, and scalars have to be in utf8
3179                           * to guarantee those semantics */
3180                         if (! has_utf8) {
3181                             SvCUR_set(sv, d - SvPVX_const(sv));
3182                             SvPOK_on(sv);
3183                             *d = '\0';
3184                             /* See Note on sizing above.  */
3185                             sv_utf8_upgrade_flags_grow(
3186                                         sv,
3187                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3188                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3189                             d = SvPVX(sv) + SvCUR(sv);
3190                             has_utf8 = TRUE;
3191                         }
3192
3193                         /* Add the string to the output */
3194                         if (UNI_IS_INVARIANT(uv)) {
3195                             *d++ = (char) uv;
3196                         }
3197                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3198                     }
3199                 }
3200                 else { /* Here is \N{NAME} but not \N{U+...}. */
3201
3202                     SV *res;            /* result from charnames */
3203                     const char *str;    /* the string in 'res' */
3204                     STRLEN len;         /* its length */
3205
3206                     /* Get the value for NAME */
3207                     res = newSVpvn(s, e - s);
3208                     res = new_constant( NULL, 0, "charnames",
3209                                         /* includes all of: \N{...} */
3210                                         res, NULL, s - 3, e - s + 4 );
3211
3212                     /* Most likely res will be in utf8 already since the
3213                      * standard charnames uses pack U, but a custom translator
3214                      * can leave it otherwise, so make sure.  XXX This can be
3215                      * revisited to not have charnames use utf8 for characters
3216                      * that don't need it when regexes don't have to be in utf8
3217                      * for Unicode semantics.  If doing so, remember EBCDIC */
3218                     sv_utf8_upgrade(res);
3219                     str = SvPV_const(res, len);
3220
3221                     /* Don't accept malformed input */
3222                     if (! is_utf8_string((U8 *) str, len)) {
3223                         yyerror("Malformed UTF-8 returned by \\N");
3224                     }
3225                     else if (PL_lex_inpat) {
3226
3227                         if (! len) { /* The name resolved to an empty string */
3228                             Copy("\\N{}", d, 4, char);
3229                             d += 4;
3230                         }
3231                         else {
3232                             /* In order to not lose information for the regex
3233                             * compiler, pass the result in the specially made
3234                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3235                             * the code points in hex of each character
3236                             * returned by charnames */
3237
3238                             const char *str_end = str + len;
3239                             STRLEN char_length;     /* cur char's byte length */
3240                             STRLEN output_length;   /* and the number of bytes
3241                                                        after this is translated
3242                                                        into hex digits */
3243                             const STRLEN off = d - SvPVX_const(sv);
3244
3245                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3246                              * max('U+', '.'); and 1 for NUL */
3247                             char hex_string[2 * UTF8_MAXBYTES + 5];
3248
3249                             /* Get the first character of the result. */
3250                             U32 uv = utf8n_to_uvuni((U8 *) str,
3251                                                     len,
3252                                                     &char_length,
3253                                                     UTF8_ALLOW_ANYUV);
3254
3255                             /* The call to is_utf8_string() above hopefully
3256                              * guarantees that there won't be an error.  But
3257                              * it's easy here to make sure.  The function just
3258                              * above warns and returns 0 if invalid utf8, but
3259                              * it can also return 0 if the input is validly a
3260                              * NUL. Disambiguate */
3261                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3262                                 uv = UNICODE_REPLACEMENT;
3263                             }
3264
3265                             /* Convert first code point to hex, including the
3266                              * boiler plate before it.  For all these, we
3267                              * convert to native format so that downstream code
3268                              * can continue to assume the input is native */
3269                             output_length =
3270                                 my_snprintf(hex_string, sizeof(hex_string),
3271                                             "\\N{U+%X",
3272                                             (unsigned int) UNI_TO_NATIVE(uv));
3273
3274                             /* Make sure there is enough space to hold it */
3275                             d = off + SvGROW(sv, off
3276                                                  + output_length
3277                                                  + (STRLEN)(send - e)
3278                                                  + 2);  /* '}' + NUL */
3279                             /* And output it */
3280                             Copy(hex_string, d, output_length, char);
3281                             d += output_length;
3282
3283                             /* For each subsequent character, append dot and
3284                              * its ordinal in hex */
3285                             while ((str += char_length) < str_end) {
3286                                 const STRLEN off = d - SvPVX_const(sv);
3287                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3288                                                         str_end - str,
3289                                                         &char_length,
3290                                                         UTF8_ALLOW_ANYUV);
3291                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3292                                     uv = UNICODE_REPLACEMENT;
3293                                 }
3294
3295                                 output_length =
3296                                     my_snprintf(hex_string, sizeof(hex_string),
3297                                             ".%X",
3298                                             (unsigned int) UNI_TO_NATIVE(uv));
3299
3300                                 d = off + SvGROW(sv, off
3301                                                      + output_length
3302                                                      + (STRLEN)(send - e)
3303                                                      + 2);      /* '}' +  NUL */
3304                                 Copy(hex_string, d, output_length, char);
3305                                 d += output_length;
3306                             }
3307
3308                             *d++ = '}'; /* Done.  Add the trailing brace */
3309                         }
3310                     }
3311                     else { /* Here, not in a pattern.  Convert the name to a
3312                             * string. */
3313
3314                          /* If destination is not in utf8, unconditionally
3315                           * recode it to be so.  This is because \N{} implies
3316                           * Unicode semantics, and scalars have to be in utf8
3317                           * to guarantee those semantics */
3318                         if (! has_utf8) {
3319                             SvCUR_set(sv, d - SvPVX_const(sv));
3320                             SvPOK_on(sv);
3321                             *d = '\0';
3322                             /* See Note on sizing above.  */
3323                             sv_utf8_upgrade_flags_grow(sv,
3324                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3325                                                 len + (STRLEN)(send - s) + 1);
3326                             d = SvPVX(sv) + SvCUR(sv);
3327                             has_utf8 = TRUE;
3328                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3329
3330                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3331                              * set correctly here). */
3332                             const STRLEN off = d - SvPVX_const(sv);
3333                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3334                         }
3335                         Copy(str, d, len, char);
3336                         d += len;
3337                     }
3338                     SvREFCNT_dec(res);
3339
3340                     /* Deprecate non-approved name syntax */
3341                     if (ckWARN_d(WARN_DEPRECATED)) {
3342                         bool problematic = FALSE;
3343                         char* i = s;
3344
3345                         /* For non-ut8 input, look to see that the first
3346                          * character is an alpha, then loop through the rest
3347                          * checking that each is a continuation */
3348                         if (! this_utf8) {
3349                             if (! isALPHAU(*i)) problematic = TRUE;
3350                             else for (i = s + 1; i < e; i++) {
3351                                 if (isCHARNAME_CONT(*i)) continue;
3352                                 problematic = TRUE;
3353                                 break;
3354                             }
3355                         }
3356                         else {
3357                             /* Similarly for utf8.  For invariants can check
3358                              * directly.  We accept anything above the latin1
3359                              * range because it is immaterial to Perl if it is
3360                              * correct or not, and is expensive to check.  But
3361                              * it is fairly easy in the latin1 range to convert
3362                              * the variants into a single character and check
3363                              * those */
3364                             if (UTF8_IS_INVARIANT(*i)) {
3365                                 if (! isALPHAU(*i)) problematic = TRUE;
3366                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3367                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3368                                                                             *(i+1)))))
3369                                 {
3370                                     problematic = TRUE;
3371                                 }
3372                             }
3373                             if (! problematic) for (i = s + UTF8SKIP(s);
3374                                                     i < e;
3375                                                     i+= UTF8SKIP(i))
3376                             {
3377                                 if (UTF8_IS_INVARIANT(*i)) {
3378                                     if (isCHARNAME_CONT(*i)) continue;
3379                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3380                                     continue;
3381                                 } else if (isCHARNAME_CONT(
3382                                             UNI_TO_NATIVE(
3383                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3384                                 {
3385                                     continue;
3386                                 }
3387                                 problematic = TRUE;
3388                                 break;
3389                             }
3390                         }
3391                         if (problematic) {
3392                             /* The e-i passed to the final %.*s makes sure that
3393                              * should the trailing NUL be missing that this
3394                              * print won't run off the end of the string */
3395                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3396                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3397                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3398                         }
3399                     }
3400                 } /* End \N{NAME} */
3401 #ifdef EBCDIC
3402                 if (!dorange)
3403                     native_range = FALSE; /* \N{} is defined to be Unicode */
3404 #endif
3405                 s = e + 1;  /* Point to just after the '}' */
3406                 continue;
3407
3408             /* \c is a control character */
3409             case 'c':
3410                 s++;
3411                 if (s < send) {
3412                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3413                 }
3414                 else {
3415                     yyerror("Missing control char name in \\c");
3416                 }
3417                 continue;
3418
3419             /* printf-style backslashes, formfeeds, newlines, etc */
3420             case 'b':
3421                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3422                 break;
3423             case 'n':
3424                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3425                 break;
3426             case 'r':
3427                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3428                 break;
3429             case 'f':
3430                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3431                 break;
3432             case 't':
3433                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3434                 break;
3435             case 'e':
3436                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3437                 break;
3438             case 'a':
3439                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3440                 break;
3441             } /* end switch */
3442
3443             s++;
3444             continue;
3445         } /* end if (backslash) */
3446 #ifdef EBCDIC
3447         else
3448             literal_endpoint++;
3449 #endif
3450
3451     default_action:
3452         /* If we started with encoded form, or already know we want it,
3453            then encode the next character */
3454         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3455             STRLEN len  = 1;
3456
3457
3458             /* One might think that it is wasted effort in the case of the
3459              * source being utf8 (this_utf8 == TRUE) to take the next character
3460              * in the source, convert it to an unsigned value, and then convert
3461              * it back again.  But the source has not been validated here.  The
3462              * routine that does the conversion checks for errors like
3463              * malformed utf8 */
3464
3465             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3466             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3467             if (!has_utf8) {
3468                 SvCUR_set(sv, d - SvPVX_const(sv));
3469                 SvPOK_on(sv);
3470                 *d = '\0';
3471                 /* See Note on sizing above.  */
3472                 sv_utf8_upgrade_flags_grow(sv,
3473                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3474                                         need + (STRLEN)(send - s) + 1);
3475                 d = SvPVX(sv) + SvCUR(sv);
3476                 has_utf8 = TRUE;
3477             } else if (need > len) {
3478                 /* encoded value larger than old, may need extra space (NOTE:
3479                  * SvCUR() is not set correctly here).   See Note on sizing
3480                  * above.  */
3481                 const STRLEN off = d - SvPVX_const(sv);
3482                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3483             }
3484             s += len;
3485
3486             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3487 #ifdef EBCDIC
3488             if (uv > 255 && !dorange)
3489                 native_range = FALSE;
3490 #endif
3491         }
3492         else {
3493             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3494         }
3495     } /* while loop to process each character */
3496
3497     /* terminate the string and set up the sv */
3498     *d = '\0';
3499     SvCUR_set(sv, d - SvPVX_const(sv));
3500     if (SvCUR(sv) >= SvLEN(sv))
3501         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3502
3503     SvPOK_on(sv);
3504     if (PL_encoding && !has_utf8) {
3505         sv_recode_to_utf8(sv, PL_encoding);
3506         if (SvUTF8(sv))
3507             has_utf8 = TRUE;
3508     }
3509     if (has_utf8) {
3510         SvUTF8_on(sv);
3511         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3512             PL_sublex_info.sub_op->op_private |=
3513                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3514         }
3515     }
3516
3517     /* shrink the sv if we allocated more than we used */
3518     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3519         SvPV_shrink_to_cur(sv);
3520     }
3521
3522     /* return the substring (via pl_yylval) only if we parsed anything */
3523     if (s > PL_bufptr) {
3524         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3525             const char *const key = PL_lex_inpat ? "qr" : "q";
3526             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3527             const char *type;
3528             STRLEN typelen;
3529
3530             if (PL_lex_inwhat == OP_TRANS) {
3531                 type = "tr";
3532                 typelen = 2;
3533             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3534                 type = "s";
3535                 typelen = 1;
3536             } else  {
3537                 type = "qq";
3538                 typelen = 2;
3539             }
3540
3541             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3542                                 type, typelen);
3543         }
3544         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3545     } else
3546         SvREFCNT_dec(sv);
3547     return s;
3548 }
3549
3550 /* S_intuit_more
3551  * Returns TRUE if there's more to the expression (e.g., a subscript),
3552  * FALSE otherwise.
3553  *
3554  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3555  *
3556  * ->[ and ->{ return TRUE
3557  * { and [ outside a pattern are always subscripts, so return TRUE
3558  * if we're outside a pattern and it's not { or [, then return FALSE
3559  * if we're in a pattern and the first char is a {
3560  *   {4,5} (any digits around the comma) returns FALSE
3561  * if we're in a pattern and the first char is a [
3562  *   [] returns FALSE
3563  *   [SOMETHING] has a funky algorithm to decide whether it's a
3564  *      character class or not.  It has to deal with things like
3565  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3566  * anything else returns TRUE
3567  */
3568
3569 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3570
3571 STATIC int
3572 S_intuit_more(pTHX_ register char *s)
3573 {
3574     dVAR;
3575
3576     PERL_ARGS_ASSERT_INTUIT_MORE;
3577
3578     if (PL_lex_brackets)
3579         return TRUE;
3580     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3581         return TRUE;
3582     if (*s != '{' && *s != '[')
3583         return FALSE;
3584     if (!PL_lex_inpat)
3585         return TRUE;
3586
3587     /* In a pattern, so maybe we have {n,m}. */
3588     if (*s == '{') {
3589         if (regcurly(s)) {
3590             return FALSE;
3591         }
3592         return TRUE;
3593     }
3594
3595     /* On the other hand, maybe we have a character class */
3596
3597     s++;
3598     if (*s == ']' || *s == '^')
3599         return FALSE;
3600     else {
3601         /* this is terrifying, and it works */
3602         int weight = 2;         /* let's weigh the evidence */
3603         char seen[256];
3604         unsigned char un_char = 255, last_un_char;
3605         const char * const send = strchr(s,']');
3606         char tmpbuf[sizeof PL_tokenbuf * 4];
3607
3608         if (!send)              /* has to be an expression */
3609             return TRUE;
3610
3611         Zero(seen,256,char);
3612         if (*s == '$')
3613             weight -= 3;
3614         else if (isDIGIT(*s)) {
3615             if (s[1] != ']') {
3616                 if (isDIGIT(s[1]) && s[2] == ']')
3617                     weight -= 10;
3618             }
3619             else
3620                 weight -= 100;
3621         }
3622         for (; s < send; s++) {
3623             last_un_char = un_char;
3624             un_char = (unsigned char)*s;
3625             switch (*s) {
3626             case '@':
3627             case '&':
3628             case '$':
3629                 weight -= seen[un_char] * 10;
3630                 if (isALNUM_lazy_if(s+1,UTF)) {
3631                     int len;
3632                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3633                     len = (int)strlen(tmpbuf);
3634                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3635                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3636                         weight -= 100;
3637                     else
3638                         weight -= 10;
3639                 }
3640                 else if (*s == '$' && s[1] &&
3641                   strchr("[#!%*<>()-=",s[1])) {
3642                     if (/*{*/ strchr("])} =",s[2]))
3643                         weight -= 10;
3644                     else
3645                         weight -= 1;
3646                 }
3647                 break;
3648             case '\\':
3649                 un_char = 254;
3650                 if (s[1]) {
3651                     if (strchr("wds]",s[1]))
3652                         weight += 100;
3653                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3654                         weight += 1;
3655                     else if (strchr("rnftbxcav",s[1]))
3656                         weight += 40;
3657                     else if (isDIGIT(s[1])) {
3658                         weight += 40;
3659                         while (s[1] && isDIGIT(s[1]))
3660                             s++;
3661                     }
3662                 }
3663                 else
3664                     weight += 100;
3665                 break;
3666             case '-':
3667                 if (s[1] == '\\')
3668                     weight += 50;
3669                 if (strchr("aA01! ",last_un_char))
3670                     weight += 30;
3671                 if (strchr("zZ79~",s[1]))
3672                     weight += 30;
3673                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3674                     weight -= 5;        /* cope with negative subscript */
3675                 break;
3676             default:
3677                 if (!isALNUM(last_un_char)
3678                     && !(last_un_char == '$' || last_un_char == '@'
3679                          || last_un_char == '&')
3680                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3681                     char *d = tmpbuf;
3682                     while (isALPHA(*s))
3683                         *d++ = *s++;
3684                     *d = '\0';
3685                     if (keyword(tmpbuf, d - tmpbuf, 0))
3686                         weight -= 150;
3687                 }
3688                 if (un_char == last_un_char + 1)
3689                     weight += 5;
3690                 weight -= seen[un_char];
3691                 break;
3692             }
3693             seen[un_char]++;
3694         }
3695         if (weight >= 0)        /* probably a character class */
3696             return FALSE;
3697     }
3698
3699     return TRUE;
3700 }
3701
3702 /*
3703  * S_intuit_method
3704  *
3705  * Does all the checking to disambiguate
3706  *   foo bar
3707  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3708  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3709  *
3710  * First argument is the stuff after the first token, e.g. "bar".
3711  *
3712  * Not a method if bar is a filehandle.
3713  * Not a method if foo is a subroutine prototyped to take a filehandle.
3714  * Not a method if it's really "Foo $bar"
3715  * Method if it's "foo $bar"
3716  * Not a method if it's really "print foo $bar"
3717  * Method if it's really "foo package::" (interpreted as package->foo)
3718  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3719  * Not a method if bar is a filehandle or package, but is quoted with
3720  *   =>
3721  */
3722
3723 STATIC int
3724 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3725 {
3726     dVAR;
3727     char *s = start + (*start == '$');
3728     char tmpbuf[sizeof PL_tokenbuf];
3729     STRLEN len;
3730     GV* indirgv;
3731 #ifdef PERL_MAD
3732     int soff;
3733 #endif
3734
3735     PERL_ARGS_ASSERT_INTUIT_METHOD;
3736
3737     if (gv) {
3738         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3739             return 0;
3740         if (cv) {
3741             if (SvPOK(cv)) {
3742                 const char *proto = SvPVX_const(cv);
3743                 if (proto) {
3744                     if (*proto == ';')
3745                         proto++;
3746                     if (*proto == '*')
3747                         return 0;
3748                 }
3749             }
3750         } else
3751             gv = NULL;
3752     }
3753     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3754     /* start is the beginning of the possible filehandle/object,
3755      * and s is the end of it
3756      * tmpbuf is a copy of it
3757      */
3758
3759     if (*start == '$') {
3760         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3761                 isUPPER(*PL_tokenbuf))
3762             return 0;
3763 #ifdef PERL_MAD
3764         len = start - SvPVX(PL_linestr);
3765 #endif
3766         s = PEEKSPACE(s);
3767 #ifdef PERL_MAD
3768         start = SvPVX(PL_linestr) + len;
3769 #endif
3770         PL_bufptr = start;
3771         PL_expect = XREF;
3772         return *s == '(' ? FUNCMETH : METHOD;
3773     }
3774     if (!keyword(tmpbuf, len, 0)) {
3775         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3776             len -= 2;
3777             tmpbuf[len] = '\0';
3778 #ifdef PERL_MAD
3779             soff = s - SvPVX(PL_linestr);
3780 #endif
3781             goto bare_package;
3782         }
3783         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3784         if (indirgv && GvCVu(indirgv))
3785             return 0;
3786         /* filehandle or package name makes it a method */
3787         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3788 #ifdef PERL_MAD
3789             soff = s - SvPVX(PL_linestr);
3790 #endif
3791             s = PEEKSPACE(s);
3792             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3793                 return 0;       /* no assumptions -- "=>" quotes bareword */
3794       bare_package:
3795             start_force(PL_curforce);
3796             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3797                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3798             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3799             if (PL_madskills)
3800                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3801             PL_expect = XTERM;
3802             force_next(WORD);
3803             PL_bufptr = s;
3804 #ifdef PERL_MAD
3805             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3806 #endif
3807             return *s == '(' ? FUNCMETH : METHOD;
3808         }
3809     }
3810     return 0;
3811 }
3812
3813 /* Encoded script support. filter_add() effectively inserts a
3814  * 'pre-processing' function into the current source input stream.
3815  * Note that the filter function only applies to the current source file
3816  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3817  *
3818  * The datasv parameter (which may be NULL) can be used to pass
3819  * private data to this instance of the filter. The filter function
3820  * can recover the SV using the FILTER_DATA macro and use it to
3821  * store private buffers and state information.
3822  *
3823  * The supplied datasv parameter is upgraded to a PVIO type
3824  * and the IoDIRP/IoANY field is used to store the function pointer,
3825  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3826  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3827  * private use must be set using malloc'd pointers.
3828  */
3829
3830 SV *
3831 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3832 {
3833     dVAR;
3834     if (!funcp)
3835         return NULL;
3836
3837     if (!PL_parser)
3838         return NULL;
3839
3840     if (!PL_rsfp_filters)
3841         PL_rsfp_filters = newAV();
3842     if (!datasv)
3843         datasv = newSV(0);
3844     SvUPGRADE(datasv, SVt_PVIO);
3845     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3846     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3847     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3848                           FPTR2DPTR(void *, IoANY(datasv)),
3849                           SvPV_nolen(datasv)));
3850     av_unshift(PL_rsfp_filters, 1);
3851     av_store(PL_rsfp_filters, 0, datasv) ;
3852     return(datasv);
3853 }
3854
3855
3856 /* Delete most recently added instance of this filter function. */
3857 void
3858 Perl_filter_del(pTHX_ filter_t funcp)
3859 {
3860     dVAR;
3861     SV *datasv;
3862
3863     PERL_ARGS_ASSERT_FILTER_DEL;
3864
3865 #ifdef DEBUGGING
3866     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3867                           FPTR2DPTR(void*, funcp)));
3868 #endif
3869     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3870         return;
3871     /* if filter is on top of stack (usual case) just pop it off */
3872     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3873     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3874         sv_free(av_pop(PL_rsfp_filters));
3875
3876         return;
3877     }
3878     /* we need to search for the correct entry and clear it     */
3879     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3880 }
3881
3882
3883 /* Invoke the idxth filter function for the current rsfp.        */
3884 /* maxlen 0 = read one text line */
3885 I32
3886 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3887 {
3888     dVAR;
3889     filter_t funcp;
3890     SV *datasv = NULL;
3891     /* This API is bad. It should have been using unsigned int for maxlen.
3892        Not sure if we want to change the API, but if not we should sanity
3893        check the value here.  */
3894     const unsigned int correct_length
3895         = maxlen < 0 ?
3896 #ifdef PERL_MICRO
3897         0x7FFFFFFF
3898 #else
3899         INT_MAX
3900 #endif
3901         : maxlen;
3902
3903     PERL_ARGS_ASSERT_FILTER_READ;
3904
3905     if (!PL_parser || !PL_rsfp_filters)
3906         return -1;
3907     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3908         /* Provide a default input filter to make life easy.    */
3909         /* Note that we append to the line. This is handy.      */
3910         DEBUG_P(PerlIO_printf(Perl_debug_log,
3911                               "filter_read %d: from rsfp\n", idx));
3912         if (correct_length) {
3913             /* Want a block */
3914             int len ;
3915             const int old_len = SvCUR(buf_sv);
3916
3917             /* ensure buf_sv is large enough */
3918             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3919             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3920                                    correct_length)) <= 0) {
3921                 if (PerlIO_error(PL_rsfp))
3922                     return -1;          /* error */
3923                 else
3924                     return 0 ;          /* end of file */
3925             }
3926             SvCUR_set(buf_sv, old_len + len) ;
3927             SvPVX(buf_sv)[old_len + len] = '\0';
3928         } else {
3929             /* Want a line */
3930             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3931                 if (PerlIO_error(PL_rsfp))
3932                     return -1;          /* error */
3933                 else
3934                     return 0 ;          /* end of file */
3935             }
3936         }
3937         return SvCUR(buf_sv);
3938     }
3939     /* Skip this filter slot if filter has been deleted */
3940     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3941         DEBUG_P(PerlIO_printf(Perl_debug_log,
3942                               "filter_read %d: skipped (filter deleted)\n",
3943                               idx));
3944         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3945     }
3946     /* Get function pointer hidden within datasv        */
3947     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3948     DEBUG_P(PerlIO_printf(Perl_debug_log,
3949                           "filter_read %d: via function %p (%s)\n",
3950                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3951     /* Call function. The function is expected to       */
3952     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3953     /* Return: <0:error, =0:eof, >0:not eof             */
3954     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3955 }
3956
3957 STATIC char *
3958 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3959 {
3960     dVAR;
3961
3962     PERL_ARGS_ASSERT_FILTER_GETS;
3963
3964 #ifdef PERL_CR_FILTER
3965     if (!PL_rsfp_filters) {
3966         filter_add(S_cr_textfilter,NULL);
3967     }
3968 #endif
3969     if (PL_rsfp_filters) {
3970         if (!append)
3971             SvCUR_set(sv, 0);   /* start with empty line        */
3972         if (FILTER_READ(0, sv, 0) > 0)
3973             return ( SvPVX(sv) ) ;
3974         else
3975             return NULL ;
3976     }
3977     else
3978         return (sv_gets(sv, PL_rsfp, append));
3979 }
3980
3981 STATIC HV *
3982 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3983 {
3984     dVAR;
3985     GV *gv;
3986
3987     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3988
3989     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3990         return PL_curstash;
3991
3992     if (len > 2 &&
3993         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3994         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3995     {
3996         return GvHV(gv);                        /* Foo:: */
3997     }
3998
3999     /* use constant CLASS => 'MyClass' */
4000     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
4001     if (gv && GvCV(gv)) {
4002         SV * const sv = cv_const_sv(GvCV(gv));
4003         if (sv)
4004             pkgname = SvPV_const(sv, len);
4005     }
4006
4007     return gv_stashpvn(pkgname, len, 0);
4008 }
4009
4010 /*
4011  * S_readpipe_override
4012  * Check whether readpipe() is overridden, and generates the appropriate
4013  * optree, provided sublex_start() is called afterwards.
4014  */
4015 STATIC void
4016 S_readpipe_override(pTHX)
4017 {
4018     GV **gvp;
4019     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4020     pl_yylval.ival = OP_BACKTICK;
4021     if ((gv_readpipe
4022                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4023             ||
4024             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4025              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4026              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4027     {
4028         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4029             op_append_elem(OP_LIST,
4030                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4031                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4032     }
4033 }
4034
4035 #ifdef PERL_MAD
4036  /*
4037  * Perl_madlex
4038  * The intent of this yylex wrapper is to minimize the changes to the
4039  * tokener when we aren't interested in collecting madprops.  It remains
4040  * to be seen how successful this strategy will be...
4041  */
4042
4043 int
4044 Perl_madlex(pTHX)
4045 {
4046     int optype;
4047     char *s = PL_bufptr;
4048
4049     /* make sure PL_thiswhite is initialized */
4050     PL_thiswhite = 0;
4051     PL_thismad = 0;
4052
4053     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4054     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4055         return S_pending_ident(aTHX);
4056
4057     /* previous token ate up our whitespace? */
4058     if (!PL_lasttoke && PL_nextwhite) {
4059         PL_thiswhite = PL_nextwhite;
4060         PL_nextwhite = 0;
4061     }
4062
4063     /* isolate the token, and figure out where it is without whitespace */
4064     PL_realtokenstart = -1;
4065     PL_thistoken = 0;
4066     optype = yylex();
4067     s = PL_bufptr;
4068     assert(PL_curforce < 0);
4069
4070     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4071         if (!PL_thistoken) {
4072             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4073                 PL_thistoken = newSVpvs("");
4074             else {
4075                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4076                 PL_thistoken = newSVpvn(tstart, s - tstart);
4077             }
4078         }
4079         if (PL_thismad) /* install head */
4080             CURMAD('X', PL_thistoken);
4081     }
4082
4083     /* last whitespace of a sublex? */
4084     if (optype == ')' && PL_endwhite) {
4085         CURMAD('X', PL_endwhite);
4086     }
4087
4088     if (!PL_thismad) {
4089
4090         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4091         if (!PL_thiswhite && !PL_endwhite && !optype) {
4092             sv_free(PL_thistoken);
4093             PL_thistoken = 0;
4094             return 0;
4095         }
4096
4097         /* put off final whitespace till peg */
4098         if (optype == ';' && !PL_rsfp) {
4099             PL_nextwhite = PL_thiswhite;
4100             PL_thiswhite = 0;
4101         }
4102         else if (PL_thisopen) {
4103             CURMAD('q', PL_thisopen);
4104             if (PL_thistoken)
4105                 sv_free(PL_thistoken);
4106             PL_thistoken = 0;
4107         }
4108         else {
4109             /* Store actual token text as madprop X */
4110             CURMAD('X', PL_thistoken);
4111         }
4112
4113         if (PL_thiswhite) {
4114             /* add preceding whitespace as madprop _ */
4115             CURMAD('_', PL_thiswhite);
4116         }
4117
4118         if (PL_thisstuff) {
4119             /* add quoted material as madprop = */
4120             CURMAD('=', PL_thisstuff);
4121         }
4122
4123         if (PL_thisclose) {
4124             /* add terminating quote as madprop Q */
4125             CURMAD('Q', PL_thisclose);
4126         }
4127     }
4128
4129     /* special processing based on optype */
4130
4131     switch (optype) {
4132
4133     /* opval doesn't need a TOKEN since it can already store mp */
4134     case WORD:
4135     case METHOD:
4136     case FUNCMETH:
4137     case THING:
4138     case PMFUNC:
4139     case PRIVATEREF:
4140     case FUNC0SUB:
4141     case UNIOPSUB:
4142     case LSTOPSUB:
4143         if (pl_yylval.opval)
4144             append_madprops(PL_thismad, pl_yylval.opval, 0);
4145         PL_thismad = 0;
4146         return optype;
4147
4148     /* fake EOF */
4149     case 0:
4150         optype = PEG;
4151         if (PL_endwhite) {
4152             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4153             PL_endwhite = 0;
4154         }
4155         break;
4156
4157     case ']':
4158     case '}':
4159         if (PL_faketokens)
4160             break;
4161         /* remember any fake bracket that lexer is about to discard */
4162         if (PL_lex_brackets == 1 &&
4163             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4164         {
4165             s = PL_bufptr;
4166             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4167                 s++;
4168             if (*s == '}') {
4169                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4170                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4171                 PL_thiswhite = 0;
4172                 PL_bufptr = s - 1;
4173                 break;  /* don't bother looking for trailing comment */
4174             }
4175             else
4176                 s = PL_bufptr;
4177         }
4178         if (optype == ']')
4179             break;
4180         /* FALLTHROUGH */
4181
4182     /* attach a trailing comment to its statement instead of next token */
4183     case ';':
4184         if (PL_faketokens)
4185             break;
4186         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4187             s = PL_bufptr;
4188             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4189                 s++;
4190             if (*s == '\n' || *s == '#') {
4191                 while (s < PL_bufend && *s != '\n')
4192                     s++;
4193                 if (s < PL_bufend)
4194                     s++;
4195                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4196                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4197                 PL_thiswhite = 0;
4198                 PL_bufptr = s;
4199             }
4200         }
4201         break;
4202
4203     /* pval */
4204     case LABEL:
4205         break;
4206
4207     /* ival */
4208     default:
4209         break;
4210
4211     }
4212
4213     /* Create new token struct.  Note: opvals return early above. */
4214     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4215     PL_thismad = 0;
4216     return optype;
4217 }
4218 #endif
4219
4220 STATIC char *
4221 S_tokenize_use(pTHX_ int is_use, char *s) {
4222     dVAR;
4223
4224     PERL_ARGS_ASSERT_TOKENIZE_USE;
4225
4226     if (PL_expect != XSTATE)
4227         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4228                     is_use ? "use" : "no"));
4229     s = SKIPSPACE1(s);
4230     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4231         s = force_version(s, TRUE);
4232         if (*s == ';' || *s == '}'
4233                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4234             start_force(PL_curforce);
4235             NEXTVAL_NEXTTOKE.opval = NULL;
4236             force_next(WORD);
4237         }
4238         else if (*s == 'v') {
4239             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4240             s = force_version(s, FALSE);
4241         }
4242     }
4243     else {
4244         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4245         s = force_version(s, FALSE);
4246     }
4247     pl_yylval.ival = is_use;
4248     return s;
4249 }
4250 #ifdef DEBUGGING
4251     static const char* const exp_name[] =
4252         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4253           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4254         };
4255 #endif
4256
4257 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4258 STATIC bool
4259 S_word_takes_any_delimeter(char *p, STRLEN len)
4260 {
4261     return (len == 1 && strchr("msyq", p[0])) ||
4262            (len == 2 && (
4263             (p[0] == 't' && p[1] == 'r') ||
4264             (p[0] == 'q' && strchr("qwxr", p[1]))));
4265 }
4266
4267 /*
4268   yylex
4269
4270   Works out what to call the token just pulled out of the input
4271   stream.  The yacc parser takes care of taking the ops we return and
4272   stitching them into a tree.
4273
4274   Returns:
4275     PRIVATEREF
4276
4277   Structure:
4278       if read an identifier
4279           if we're in a my declaration
4280               croak if they tried to say my($foo::bar)
4281               build the ops for a my() declaration
4282           if it's an access to a my() variable
4283               are we in a sort block?
4284                   croak if my($a); $a <=> $b
4285               build ops for access to a my() variable
4286           if in a dq string, and they've said @foo and we can't find @foo
4287               croak
4288           build ops for a bareword
4289       if we already built the token before, use it.
4290 */
4291
4292
4293 #ifdef __SC__
4294 #pragma segment Perl_yylex
4295 #endif
4296 int
4297 Perl_yylex(pTHX)
4298 {
4299     dVAR;
4300     register char *s = PL_bufptr;
4301     register char *d;
4302     STRLEN len;
4303     bool bof = FALSE;
4304     U32 fake_eof = 0;
4305
4306     /* orig_keyword, gvp, and gv are initialized here because
4307      * jump to the label just_a_word_zero can bypass their
4308      * initialization later. */
4309     I32 orig_keyword = 0;
4310     GV *gv = NULL;
4311     GV **gvp = NULL;
4312
4313     DEBUG_T( {
4314         SV* tmp = newSVpvs("");
4315         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4316             (IV)CopLINE(PL_curcop),
4317             lex_state_names[PL_lex_state],
4318             exp_name[PL_expect],
4319             pv_display(tmp, s, strlen(s), 0, 60));
4320         SvREFCNT_dec(tmp);
4321     } );
4322     /* check if there's an identifier for us to look at */
4323     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4324         return REPORT(S_pending_ident(aTHX));
4325
4326     /* no identifier pending identification */
4327
4328     switch (PL_lex_state) {
4329 #ifdef COMMENTARY
4330     case LEX_NORMAL:            /* Some compilers will produce faster */
4331     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4332         break;
4333 #endif
4334
4335     /* when we've already built the next token, just pull it out of the queue */
4336     case LEX_KNOWNEXT:
4337 #ifdef PERL_MAD
4338         PL_lasttoke--;
4339         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4340         if (PL_madskills) {
4341             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4342             PL_nexttoke[PL_lasttoke].next_mad = 0;
4343             if (PL_thismad && PL_thismad->mad_key == '_') {
4344                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4345                 PL_thismad->mad_val = 0;
4346                 mad_free(PL_thismad);
4347                 PL_thismad = 0;
4348             }
4349         }
4350         if (!PL_lasttoke) {
4351             PL_lex_state = PL_lex_defer;
4352             PL_expect = PL_lex_expect;
4353             PL_lex_defer = LEX_NORMAL;
4354             if (!PL_nexttoke[PL_lasttoke].next_type)
4355                 return yylex();
4356         }
4357 #else
4358         PL_nexttoke--;
4359         pl_yylval = PL_nextval[PL_nexttoke];
4360         if (!PL_nexttoke) {
4361             PL_lex_state = PL_lex_defer;
4362             PL_expect = PL_lex_expect;
4363             PL_lex_defer = LEX_NORMAL;
4364         }
4365 #endif
4366         {
4367             I32 next_type;
4368 #ifdef PERL_MAD
4369             next_type = PL_nexttoke[PL_lasttoke].next_type;
4370 #else
4371             next_type = PL_nexttype[PL_nexttoke];
4372 #endif
4373             if (next_type & (7<<24)) {
4374                 if (next_type & (1<<24)) {
4375                     if (PL_lex_brackets > 100)
4376                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4377                     PL_lex_brackstack[PL_lex_brackets++] =
4378                         (char) ((next_type >> 16) & 0xff);
4379                 }
4380                 if (next_type & (2<<24))
4381                     PL_lex_allbrackets++;
4382                 if (next_type & (4<<24))
4383                     PL_lex_allbrackets--;
4384                 next_type &= 0xffff;
4385             }
4386 #ifdef PERL_MAD
4387             /* FIXME - can these be merged?  */
4388             return next_type;
4389 #else
4390             return REPORT(next_type);
4391 #endif
4392         }
4393
4394     /* interpolated case modifiers like \L \U, including \Q and \E.
4395        when we get here, PL_bufptr is at the \
4396     */
4397     case LEX_INTERPCASEMOD:
4398 #ifdef DEBUGGING
4399         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4400             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4401 #endif
4402         /* handle \E or end of string */
4403         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4404             /* if at a \E */
4405             if (PL_lex_casemods) {
4406                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4407                 PL_lex_casestack[PL_lex_casemods] = '\0';
4408
4409                 if (PL_bufptr != PL_bufend
4410                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4411                     PL_bufptr += 2;
4412                     PL_lex_state = LEX_INTERPCONCAT;
4413 #ifdef PERL_MAD
4414                     if (PL_madskills)
4415                         PL_thistoken = newSVpvs("\\E");
4416 #endif
4417                 }
4418                 PL_lex_allbrackets--;
4419                 return REPORT(')');
4420             }
4421 #ifdef PERL_MAD
4422             while (PL_bufptr != PL_bufend &&
4423               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4424                 if (!PL_thiswhite)
4425                     PL_thiswhite = newSVpvs("");
4426                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4427                 PL_bufptr += 2;
4428             }
4429 #else
4430             if (PL_bufptr != PL_bufend)
4431                 PL_bufptr += 2;
4432 #endif
4433             PL_lex_state = LEX_INTERPCONCAT;
4434             return yylex();
4435         }
4436         else {
4437             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4438               "### Saw case modifier\n"); });
4439             s = PL_bufptr + 1;
4440             if (s[1] == '\\' && s[2] == 'E') {
4441 #ifdef PERL_MAD
4442                 if (!PL_thiswhite)
4443                     PL_thiswhite = newSVpvs("");
4444                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4445 #endif
4446                 PL_bufptr = s + 3;
4447                 PL_lex_state = LEX_INTERPCONCAT;
4448                 return yylex();
4449             }
4450             else {
4451                 I32 tmp;
4452                 if (!PL_madskills) /* when just compiling don't need correct */
4453                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4454                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4455                 if ((*s == 'L' || *s == 'U') &&
4456                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4457                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4458                     PL_lex_allbrackets--;
4459                     return REPORT(')');
4460                 }
4461                 if (PL_lex_casemods > 10)
4462                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4463                 PL_lex_casestack[PL_lex_casemods++] = *s;
4464                 PL_lex_casestack[PL_lex_casemods] = '\0';
4465                 PL_lex_state = LEX_INTERPCONCAT;
4466                 start_force(PL_curforce);
4467                 NEXTVAL_NEXTTOKE.ival = 0;
4468                 force_next((2<<24)|'(');
4469                 start_force(PL_curforce);
4470                 if (*s == 'l')
4471                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4472                 else if (*s == 'u')
4473                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4474                 else if (*s == 'L')
4475                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4476                 else if (*s == 'U')
4477                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4478                 else if (*s == 'Q')
4479                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4480                 else
4481                     Perl_croak(aTHX_ "panic: yylex");
4482                 if (PL_madskills) {
4483                     SV* const tmpsv = newSVpvs("\\ ");
4484                     /* replace the space with the character we want to escape
4485                      */
4486                     SvPVX(tmpsv)[1] = *s;
4487                     curmad('_', tmpsv);
4488                 }
4489                 PL_bufptr = s + 1;
4490             }
4491             force_next(FUNC);
4492             if (PL_lex_starts) {
4493                 s = PL_bufptr;
4494                 PL_lex_starts = 0;
4495 #ifdef PERL_MAD
4496                 if (PL_madskills) {
4497                     if (PL_thistoken)
4498                         sv_free(PL_thistoken);
4499                     PL_thistoken = newSVpvs("");
4500                 }
4501 #endif
4502                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4503                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4504                     OPERATOR(',');
4505                 else
4506                     Aop(OP_CONCAT);
4507             }
4508             else
4509                 return yylex();
4510         }
4511
4512     case LEX_INTERPPUSH:
4513         return REPORT(sublex_push());
4514
4515     case LEX_INTERPSTART:
4516         if (PL_bufptr == PL_bufend)
4517             return REPORT(sublex_done());
4518         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4519               "### Interpolated variable\n"); });
4520         PL_expect = XTERM;
4521         PL_lex_dojoin = (*PL_bufptr == '@');
4522         PL_lex_state = LEX_INTERPNORMAL;
4523         if (PL_lex_dojoin) {
4524             start_force(PL_curforce);
4525             NEXTVAL_NEXTTOKE.ival = 0;
4526             force_next(',');
4527             start_force(PL_curforce);
4528             force_ident("\"", '$');
4529             start_force(PL_curforce);
4530             NEXTVAL_NEXTTOKE.ival = 0;
4531             force_next('$');
4532             start_force(PL_curforce);
4533             NEXTVAL_NEXTTOKE.ival = 0;
4534             force_next((2<<24)|'(');
4535             start_force(PL_curforce);
4536             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4537             force_next(FUNC);
4538         }
4539         if (PL_lex_starts++) {
4540             s = PL_bufptr;
4541 #ifdef PERL_MAD
4542             if (PL_madskills) {
4543                 if (PL_thistoken)
4544                     sv_free(PL_thistoken);
4545                 PL_thistoken = newSVpvs("");
4546             }
4547 #endif
4548             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4549             if (!PL_lex_casemods && PL_lex_inpat)
4550                 OPERATOR(',');
4551             else
4552                 Aop(OP_CONCAT);
4553         }
4554         return yylex();
4555
4556     case LEX_INTERPENDMAYBE:
4557         if (intuit_more(PL_bufptr)) {
4558             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4559             break;
4560         }
4561         /* FALL THROUGH */
4562
4563     case LEX_INTERPEND:
4564         if (PL_lex_dojoin) {
4565             PL_lex_dojoin = FALSE;
4566             PL_lex_state = LEX_INTERPCONCAT;
4567 #ifdef PERL_MAD
4568             if (PL_madskills) {
4569                 if (PL_thistoken)
4570                     sv_free(PL_thistoken);
4571                 PL_thistoken = newSVpvs("");
4572             }
4573 #endif
4574             PL_lex_allbrackets--;
4575             return REPORT(')');
4576         }
4577         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4578             && SvEVALED(PL_lex_repl))
4579         {
4580             if (PL_bufptr != PL_bufend)
4581                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4582             PL_lex_repl = NULL;
4583         }
4584         /* FALLTHROUGH */
4585     case LEX_INTERPCONCAT:
4586 #ifdef DEBUGGING
4587         if (PL_lex_brackets)
4588             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4589 #endif
4590         if (PL_bufptr == PL_bufend)
4591             return REPORT(sublex_done());
4592
4593         if (SvIVX(PL_linestr) == '\'') {
4594             SV *sv = newSVsv(PL_linestr);
4595             if (!PL_lex_inpat)
4596                 sv = tokeq(sv);
4597             else if ( PL_hints & HINT_NEW_RE )
4598                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4599             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4600             s = PL_bufend;
4601         }
4602         else {
4603             s = scan_const(PL_bufptr);
4604             if (*s == '\\')
4605                 PL_lex_state = LEX_INTERPCASEMOD;
4606             else
4607                 PL_lex_state = LEX_INTERPSTART;
4608         }
4609
4610         if (s != PL_bufptr) {
4611             start_force(PL_curforce);
4612             if (PL_madskills) {
4613                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4614             }
4615             NEXTVAL_NEXTTOKE = pl_yylval;
4616             PL_expect = XTERM;
4617             force_next(THING);
4618             if (PL_lex_starts++) {
4619 #ifdef PERL_MAD
4620                 if (PL_madskills) {
4621                     if (PL_thistoken)
4622                         sv_free(PL_thistoken);
4623                     PL_thistoken = newSVpvs("");
4624                 }
4625 #endif
4626                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4627                 if (!PL_lex_casemods && PL_lex_inpat)
4628                     OPERATOR(',');
4629                 else
4630                     Aop(OP_CONCAT);
4631             }
4632             else {
4633                 PL_bufptr = s;
4634                 return yylex();
4635             }
4636         }
4637
4638         return yylex();
4639     case LEX_FORMLINE:
4640         PL_lex_state = LEX_NORMAL;
4641         s = scan_formline(PL_bufptr);
4642         if (!PL_lex_formbrack)
4643             goto rightbracket;
4644         OPERATOR(';');
4645     }
4646
4647     s = PL_bufptr;
4648     PL_oldoldbufptr = PL_oldbufptr;
4649     PL_oldbufptr = s;
4650
4651   retry:
4652 #ifdef PERL_MAD
4653     if (PL_thistoken) {
4654         sv_free(PL_thistoken);
4655         PL_thistoken = 0;
4656     }
4657     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4658 #endif
4659     switch (*s) {
4660     default:
4661         if (isIDFIRST_lazy_if(s,UTF))
4662             goto keylookup;
4663         {
4664         unsigned char c = *s;
4665         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4666         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4667             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4668         } else {
4669             d = PL_linestart;
4670         }
4671         *s = '\0';
4672         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4673     }
4674     case 4:
4675     case 26:
4676         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4677     case 0:
4678 #ifdef PERL_MAD
4679         if (PL_madskills)
4680             PL_faketokens = 0;
4681 #endif
4682         if (!PL_rsfp) {
4683             PL_last_uni = 0;
4684             PL_last_lop = 0;
4685             if (PL_lex_brackets &&
4686                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4687                 yyerror((const char *)
4688                         (PL_lex_formbrack
4689                          ? "Format not terminated"
4690                          : "Missing right curly or square bracket"));
4691             }
4692             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4693                         "### Tokener got EOF\n");
4694             } );
4695             TOKEN(0);
4696         }
4697         if (s++ < PL_bufend)
4698             goto retry;                 /* ignore stray nulls */
4699         PL_last_uni = 0;
4700         PL_last_lop = 0;
4701         if (!PL_in_eval && !PL_preambled) {
4702             PL_preambled = TRUE;
4703 #ifdef PERL_MAD
4704             if (PL_madskills)
4705                 PL_faketokens = 1;
4706 #endif
4707             if (PL_perldb) {
4708                 /* Generate a string of Perl code to load the debugger.
4709                  * If PERL5DB is set, it will return the contents of that,
4710                  * otherwise a compile-time require of perl5db.pl.  */
4711
4712                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4713
4714                 if (pdb) {
4715                     sv_setpv(PL_linestr, pdb);
4716                     sv_catpvs(PL_linestr,";");
4717                 } else {
4718                     SETERRNO(0,SS_NORMAL);
4719                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4720                 }
4721             } else
4722                 sv_setpvs(PL_linestr,"");
4723             if (PL_preambleav) {
4724                 SV **svp = AvARRAY(PL_preambleav);
4725                 SV **const end = svp + AvFILLp(PL_preambleav);
4726                 while(svp <= end) {
4727                     sv_catsv(PL_linestr, *svp);
4728                     ++svp;
4729                     sv_catpvs(PL_linestr, ";");
4730                 }
4731                 sv_free(MUTABLE_SV(PL_preambleav));
4732                 PL_preambleav = NULL;
4733             }
4734             if (PL_minus_E)
4735                 sv_catpvs(PL_linestr,
4736                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4737             if (PL_minus_n || PL_minus_p) {
4738                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4739                 if (PL_minus_l)
4740                     sv_catpvs(PL_linestr,"chomp;");
4741                 if (PL_minus_a) {
4742                     if (PL_minus_F) {
4743                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4744                              || *PL_splitstr == '"')
4745                               && strchr(PL_splitstr + 1, *PL_splitstr))
4746                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4747                         else {
4748                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4749                                bytes can be used as quoting characters.  :-) */
4750                             const char *splits = PL_splitstr;
4751                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4752                             do {
4753                                 /* Need to \ \s  */
4754                                 if (*splits == '\\')
4755                                     sv_catpvn(PL_linestr, splits, 1);
4756                                 sv_catpvn(PL_linestr, splits, 1);
4757                             } while (*splits++);
4758                             /* This loop will embed the trailing NUL of
4759                                PL_linestr as the last thing it does before
4760                                terminating.  */
4761                             sv_catpvs(PL_linestr, ");");
4762                         }
4763                     }
4764                     else
4765                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4766                 }
4767             }
4768             sv_catpvs(PL_linestr, "\n");
4769             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4770             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4771             PL_last_lop = PL_last_uni = NULL;
4772             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4773                 update_debugger_info(PL_linestr, NULL, 0);
4774             goto retry;
4775         }
4776         do {
4777             fake_eof = 0;
4778             bof = PL_rsfp ? TRUE : FALSE;
4779             if (0) {
4780               fake_eof:
4781                 fake_eof = LEX_FAKE_EOF;
4782             }
4783             PL_bufptr = PL_bufend;
4784             CopLINE_inc(PL_curcop);
4785             if (!lex_next_chunk(fake_eof)) {
4786                 CopLINE_dec(PL_curcop);
4787                 s = PL_bufptr;
4788                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4789             }
4790             CopLINE_dec(PL_curcop);
4791 #ifdef PERL_MAD
4792             if (!PL_rsfp)
4793                 PL_realtokenstart = -1;
4794 #endif
4795             s = PL_bufptr;
4796             /* If it looks like the start of a BOM or raw UTF-16,
4797              * check if it in fact is. */
4798             if (bof && PL_rsfp &&
4799                      (*s == 0 ||
4800                       *(U8*)s == 0xEF ||
4801                       *(U8*)s >= 0xFE ||
4802                       s[1] == 0)) {
4803                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4804                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4805 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4806                 /* offset may include swallowed CR */
4807                 if (!bof)
4808                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4809 #endif
4810                 if (bof) {
4811                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4812                     s = swallow_bom((U8*)s);
4813                 }
4814             }
4815             if (PL_parser->in_pod) {
4816                 /* Incest with pod. */
4817 #ifdef PERL_MAD
4818                 if (PL_madskills)
4819                     sv_catsv(PL_thiswhite, PL_linestr);
4820 #endif
4821                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4822                     sv_setpvs(PL_linestr, "");
4823                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4824                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4825                     PL_last_lop = PL_last_uni = NULL;
4826                     PL_parser->in_pod = 0;
4827                 }
4828             }
4829             if (PL_rsfp)
4830                 incline(s);
4831         } while (PL_parser->in_pod);
4832         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4833         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4834         PL_last_lop = PL_last_uni = NULL;
4835         if (CopLINE(PL_curcop) == 1) {
4836             while (s < PL_bufend && isSPACE(*s))
4837                 s++;
4838             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4839                 s++;
4840 #ifdef PERL_MAD
4841             if (PL_madskills)
4842                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4843 #endif
4844             d = NULL;
4845             if (!PL_in_eval) {
4846                 if (*s == '#' && *(s+1) == '!')
4847                     d = s + 2;
4848 #ifdef ALTERNATE_SHEBANG
4849                 else {
4850                     static char const as[] = ALTERNATE_SHEBANG;
4851                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4852                         d = s + (sizeof(as) - 1);
4853                 }
4854 #endif /* ALTERNATE_SHEBANG */
4855             }
4856             if (d) {
4857                 char *ipath;
4858                 char *ipathend;
4859
4860                 while (isSPACE(*d))
4861                     d++;
4862                 ipath = d;
4863                 while (*d && !isSPACE(*d))
4864                     d++;
4865                 ipathend = d;
4866
4867 #ifdef ARG_ZERO_IS_SCRIPT
4868                 if (ipathend > ipath) {
4869                     /*
4870                      * HP-UX (at least) sets argv[0] to the script name,
4871                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4872                      * at least, set argv[0] to the basename of the Perl
4873                      * interpreter. So, having found "#!", we'll set it right.
4874                      */
4875                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4876                                                     SVt_PV)); /* $^X */
4877                     assert(SvPOK(x) || SvGMAGICAL(x));
4878                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4879                         sv_setpvn(x, ipath, ipathend - ipath);
4880                         SvSETMAGIC(x);
4881                     }
4882                     else {
4883                         STRLEN blen;
4884                         STRLEN llen;
4885                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4886                         const char * const lstart = SvPV_const(x,llen);
4887                         if (llen < blen) {
4888                             bstart += blen - llen;
4889                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4890                                 sv_setpvn(x, ipath, ipathend - ipath);
4891                                 SvSETMAGIC(x);
4892                             }
4893                         }
4894                     }
4895                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4896                 }
4897 #endif /* ARG_ZERO_IS_SCRIPT */
4898
4899                 /*
4900                  * Look for options.
4901                  */
4902                 d = instr(s,"perl -");
4903                 if (!d) {
4904                     d = instr(s,"perl");
4905 #if defined(DOSISH)
4906                     /* avoid getting into infinite loops when shebang
4907                      * line contains "Perl" rather than "perl" */
4908                     if (!d) {
4909                         for (d = ipathend-4; d >= ipath; --d) {
4910                             if ((*d == 'p' || *d == 'P')
4911                                 && !ibcmp(d, "perl", 4))
4912                             {
4913                                 break;
4914                             }
4915                         }
4916                         if (d < ipath)
4917                             d = NULL;
4918                     }
4919 #endif
4920                 }
4921 #ifdef ALTERNATE_SHEBANG
4922                 /*
4923                  * If the ALTERNATE_SHEBANG on this system starts with a
4924                  * character that can be part of a Perl expression, then if
4925                  * we see it but not "perl", we're probably looking at the
4926                  * start of Perl code, not a request to hand off to some
4927                  * other interpreter.  Similarly, if "perl" is there, but
4928                  * not in the first 'word' of the line, we assume the line
4929                  * contains the start of the Perl program.
4930                  */
4931                 if (d && *s != '#') {
4932                     const char *c = ipath;
4933                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4934                         c++;
4935                     if (c < d)
4936                         d = NULL;       /* "perl" not in first word; ignore */
4937                     else
4938                         *s = '#';       /* Don't try to parse shebang line */
4939                 }
4940 #endif /* ALTERNATE_SHEBANG */
4941                 if (!d &&
4942                     *s == '#' &&
4943                     ipathend > ipath &&
4944                     !PL_minus_c &&
4945                     !instr(s,"indir") &&
4946                     instr(PL_origargv[0],"perl"))
4947                 {
4948                     dVAR;
4949                     char **newargv;
4950
4951                     *ipathend = '\0';
4952                     s = ipathend + 1;
4953                     while (s < PL_bufend && isSPACE(*s))
4954                         s++;
4955                     if (s < PL_bufend) {
4956                         Newx(newargv,PL_origargc+3,char*);
4957                         newargv[1] = s;
4958                         while (s < PL_bufend && !isSPACE(*s))
4959                             s++;
4960                         *s = '\0';
4961                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4962                     }
4963                     else
4964                         newargv = PL_origargv;
4965                     newargv[0] = ipath;
4966                     PERL_FPU_PRE_EXEC
4967                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4968                     PERL_FPU_POST_EXEC
4969                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4970                 }
4971                 if (d) {
4972                     while (*d && !isSPACE(*d))
4973                         d++;
4974                     while (SPACE_OR_TAB(*d))
4975                         d++;
4976
4977                     if (*d++ == '-') {
4978                         const bool switches_done = PL_doswitches;
4979                         const U32 oldpdb = PL_perldb;
4980                         const bool oldn = PL_minus_n;
4981                         const bool oldp = PL_minus_p;
4982                         const char *d1 = d;
4983
4984                         do {
4985                             bool baduni = FALSE;
4986                             if (*d1 == 'C') {
4987                                 const char *d2 = d1 + 1;
4988                                 if (parse_unicode_opts((const char **)&d2)
4989                                     != PL_unicode)
4990                                     baduni = TRUE;
4991                             }
4992                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4993                                 const char * const m = d1;
4994                                 while (*d1 && !isSPACE(*d1))
4995                                     d1++;
4996                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4997                                       (int)(d1 - m), m);
4998                             }
4999                             d1 = moreswitches(d1);
5000                         } while (d1);
5001                         if (PL_doswitches && !switches_done) {
5002                             int argc = PL_origargc;
5003                             char **argv = PL_origargv;
5004                             do {
5005                                 argc--,argv++;
5006                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5007                             init_argv_symbols(argc,argv);
5008                         }
5009                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5010                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5011                               /* if we have already added "LINE: while (<>) {",
5012                                  we must not do it again */
5013                         {
5014                             sv_setpvs(PL_linestr, "");
5015                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5016                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5017                             PL_last_lop = PL_last_uni = NULL;
5018                             PL_preambled = FALSE;
5019                             if (PERLDB_LINE || PERLDB_SAVESRC)
5020                                 (void)gv_fetchfile(PL_origfilename);
5021                             goto retry;
5022                         }
5023                     }
5024                 }
5025             }
5026         }
5027         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5028             PL_bufptr = s;
5029             PL_lex_state = LEX_FORMLINE;
5030             return yylex();
5031         }
5032         goto retry;
5033     case '\r':
5034 #ifdef PERL_STRICT_CR
5035         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5036         Perl_croak(aTHX_
5037       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5038 #endif
5039     case ' ': case '\t': case '\f': case 013:
5040 #ifdef PERL_MAD
5041         PL_realtokenstart = -1;
5042         if (!PL_thiswhite)
5043             PL_thiswhite = newSVpvs("");
5044         sv_catpvn(PL_thiswhite, s, 1);
5045 #endif
5046         s++;
5047         goto retry;
5048     case '#':
5049     case '\n':
5050 #ifdef PERL_MAD
5051         PL_realtokenstart = -1;
5052         if (PL_madskills)
5053             PL_faketokens = 0;
5054 #endif
5055         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5056             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5057                 /* handle eval qq[#line 1 "foo"\n ...] */
5058                 CopLINE_dec(PL_curcop);
5059                 incline(s);
5060             }
5061             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5062                 s = SKIPSPACE0(s);
5063                 if (!PL_in_eval || PL_rsfp)
5064                     incline(s);
5065             }
5066             else {
5067                 d = s;
5068                 while (d < PL_bufend && *d != '\n')
5069                     d++;
5070                 if (d < PL_bufend)
5071                     d++;
5072                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5073                   Perl_croak(aTHX_ "panic: input overflow");
5074 #ifdef PERL_MAD
5075                 if (PL_madskills)
5076                     PL_thiswhite = newSVpvn(s, d - s);
5077 #endif
5078                 s = d;
5079                 incline(s);
5080             }
5081             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5082                 PL_bufptr = s;
5083                 PL_lex_state = LEX_FORMLINE;
5084                 return yylex();
5085             }
5086         }
5087         else {
5088 #ifdef PERL_MAD
5089             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5090                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5091                     PL_faketokens = 0;
5092                     s = SKIPSPACE0(s);
5093                     TOKEN(PEG); /* make sure any #! line is accessible */
5094                 }
5095                 s = SKIPSPACE0(s);
5096             }
5097             else {
5098 /*              if (PL_madskills && PL_lex_formbrack) { */
5099                     d = s;
5100                     while (d < PL_bufend && *d != '\n')
5101                         d++;
5102                     if (d < PL_bufend)
5103                         d++;
5104                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5105                       Perl_croak(aTHX_ "panic: input overflow");
5106                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5107                         if (!PL_thiswhite)
5108                             PL_thiswhite = newSVpvs("");
5109                         if (CopLINE(PL_curcop) == 1) {
5110                             sv_setpvs(PL_thiswhite, "");
5111                             PL_faketokens = 0;
5112                         }
5113                         sv_catpvn(PL_thiswhite, s, d - s);
5114                     }
5115                     s = d;
5116 /*              }
5117                 *s = '\0';
5118                 PL_bufend = s; */
5119             }
5120 #else
5121             *s = '\0';
5122             PL_bufend = s;
5123 #endif
5124         }
5125         goto retry;
5126     case '-':
5127         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5128             I32 ftst = 0;
5129             char tmp;
5130
5131             s++;
5132             PL_bufptr = s;
5133             tmp = *s++;
5134
5135             while (s < PL_bufend && SPACE_OR_TAB(*s))
5136                 s++;
5137
5138             if (strnEQ(s,"=>",2)) {
5139                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5140                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5141                 OPERATOR('-');          /* unary minus */
5142             }
5143             PL_last_uni = PL_oldbufptr;
5144             switch (tmp) {
5145             case 'r': ftst = OP_FTEREAD;        break;
5146             case 'w': ftst = OP_FTEWRITE;       break;
5147             case 'x': ftst = OP_FTEEXEC;        break;
5148             case 'o': ftst = OP_FTEOWNED;       break;
5149             case 'R': ftst = OP_FTRREAD;        break;
5150             case 'W': ftst = OP_FTRWRITE;       break;
5151             case 'X': ftst = OP_FTREXEC;        break;
5152             case 'O': ftst = OP_FTROWNED;       break;
5153             case 'e': ftst = OP_FTIS;           break;
5154             case 'z': ftst = OP_FTZERO;         break;
5155             case 's': ftst = OP_FTSIZE;         break;
5156             case 'f': ftst = OP_FTFILE;         break;
5157             case 'd': ftst = OP_FTDIR;          break;
5158             case 'l': ftst = OP_FTLINK;         break;
5159             case 'p': ftst = OP_FTPIPE;         break;
5160             case 'S': ftst = OP_FTSOCK;         break;
5161             case 'u': ftst = OP_FTSUID;         break;
5162             case 'g': ftst = OP_FTSGID;         break;
5163             case 'k': ftst = OP_FTSVTX;         break;
5164             case 'b': ftst = OP_FTBLK;          break;
5165             case 'c': ftst = OP_FTCHR;          break;
5166             case 't': ftst = OP_FTTTY;          break;
5167             case 'T': ftst = OP_FTTEXT;         break;
5168             case 'B': ftst = OP_FTBINARY;       break;
5169             case 'M': case 'A': case 'C':
5170                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5171                 switch (tmp) {
5172                 case 'M': ftst = OP_FTMTIME;    break;
5173                 case 'A': ftst = OP_FTATIME;    break;
5174                 case 'C': ftst = OP_FTCTIME;    break;
5175                 default:                        break;
5176                 }
5177                 break;
5178             default:
5179                 break;
5180             }
5181             if (ftst) {
5182                 PL_last_lop_op = (OPCODE)ftst;
5183                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5184                         "### Saw file test %c\n", (int)tmp);
5185                 } );
5186                 FTST(ftst);
5187             }
5188             else {
5189                 /* Assume it was a minus followed by a one-letter named
5190                  * subroutine call (or a -bareword), then. */
5191                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5192                         "### '-%c' looked like a file test but was not\n",
5193                         (int) tmp);
5194                 } );
5195                 s = --PL_bufptr;
5196             }
5197         }
5198         {
5199             const char tmp = *s++;
5200             if (*s == tmp) {
5201                 s++;
5202                 if (PL_expect == XOPERATOR)
5203                     TERM(POSTDEC);
5204                 else
5205                     OPERATOR(PREDEC);
5206             }
5207             else if (*s == '>') {
5208                 s++;
5209                 s = SKIPSPACE1(s);
5210                 if (isIDFIRST_lazy_if(s,UTF)) {
5211                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5212                     TOKEN(ARROW);
5213                 }
5214                 else if (*s == '$')
5215                     OPERATOR(ARROW);
5216                 else
5217                     TERM(ARROW);
5218             }
5219             if (PL_expect == XOPERATOR) {
5220                 if (*s == '=' && !PL_lex_allbrackets &&
5221                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5222                     s--;
5223                     TOKEN(0);
5224                 }
5225                 Aop(OP_SUBTRACT);
5226             }
5227             else {
5228                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5229                     check_uni();
5230                 OPERATOR('-');          /* unary minus */
5231             }
5232         }
5233
5234     case '+':
5235         {
5236             const char tmp = *s++;
5237             if (*s == tmp) {
5238                 s++;
5239                 if (PL_expect == XOPERATOR)
5240                     TERM(POSTINC);
5241                 else
5242                     OPERATOR(PREINC);
5243             }
5244             if (PL_expect == XOPERATOR) {
5245                 if (*s == '=' && !PL_lex_allbrackets &&
5246                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5247                     s--;
5248                     TOKEN(0);
5249                 }
5250                 Aop(OP_ADD);
5251             }
5252             else {
5253                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5254                     check_uni();
5255                 OPERATOR('+');
5256             }
5257         }
5258
5259     case '*':
5260         if (PL_expect != XOPERATOR) {
5261             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5262             PL_expect = XOPERATOR;
5263             force_ident(PL_tokenbuf, '*');
5264             if (!*PL_tokenbuf)
5265                 PREREF('*');
5266             TERM('*');
5267         }
5268         s++;
5269         if (*s == '*') {
5270             s++;
5271             if (*s == '=' && !PL_lex_allbrackets &&
5272                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5273                 s -= 2;
5274                 TOKEN(0);
5275             }
5276             PWop(OP_POW);
5277         }
5278         if (*s == '=' && !PL_lex_allbrackets &&
5279                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5280             s--;
5281             TOKEN(0);
5282         }
5283         Mop(OP_MULTIPLY);
5284
5285     case '%':
5286         if (PL_expect == XOPERATOR) {
5287             if (s[1] == '=' && !PL_lex_allbrackets &&
5288                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5289                 TOKEN(0);
5290             ++s;
5291             Mop(OP_MODULO);
5292         }
5293         PL_tokenbuf[0] = '%';
5294         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5295                 sizeof PL_tokenbuf - 1, FALSE);
5296         if (!PL_tokenbuf[1]) {
5297             PREREF('%');
5298         }
5299         PL_pending_ident = '%';
5300         TERM('%');
5301
5302     case '^':
5303         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5304                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5305             TOKEN(0);
5306         s++;
5307         BOop(OP_BIT_XOR);
5308     case '[':
5309         if (PL_lex_brackets > 100)
5310             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5311         PL_lex_brackstack[PL_lex_brackets++] = 0;
5312         PL_lex_allbrackets++;
5313         {
5314             const char tmp = *s++;
5315             OPERATOR(tmp);
5316         }
5317     case '~':
5318         if (s[1] == '~'
5319             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5320         {
5321             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5322                 TOKEN(0);
5323             s += 2;
5324             Eop(OP_SMARTMATCH);
5325         }
5326         s++;
5327         OPERATOR('~');
5328     case ',':
5329         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5330             TOKEN(0);
5331         s++;
5332         OPERATOR(',');
5333     case ':':
5334         if (s[1] == ':') {
5335             len = 0;
5336             goto just_a_word_zero_gv;
5337         }
5338         s++;
5339         switch (PL_expect) {
5340             OP *attrs;
5341 #ifdef PERL_MAD
5342             I32 stuffstart;
5343 #endif
5344         case XOPERATOR:
5345             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5346                 break;
5347             PL_bufptr = s;      /* update in case we back off */
5348             if (*s == '=') {
5349                 Perl_croak(aTHX_
5350                            "Use of := for an empty attribute list is not allowed");
5351             }
5352             goto grabattrs;
5353         case XATTRBLOCK:
5354             PL_expect = XBLOCK;
5355             goto grabattrs;
5356         case XATTRTERM:
5357             PL_expect = XTERMBLOCK;
5358          grabattrs:
5359 #ifdef PERL_MAD
5360             stuffstart = s - SvPVX(PL_linestr) - 1;
5361 #endif
5362             s = PEEKSPACE(s);
5363             attrs = NULL;
5364             while (isIDFIRST_lazy_if(s,UTF)) {
5365                 I32 tmp;
5366                 SV *sv;
5367                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5368                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5369                     if (tmp < 0) tmp = -tmp;
5370                     switch (tmp) {
5371                     case KEY_or:
5372                     case KEY_and:
5373                     case KEY_for:
5374                     case KEY_foreach:
5375                     case KEY_unless:
5376                     case KEY_if:
5377                     case KEY_while:
5378                     case KEY_until:
5379                         goto got_attrs;
5380                     default:
5381                         break;
5382                     }
5383                 }
5384                 sv = newSVpvn(s, len);
5385                 if (*d == '(') {
5386                     d = scan_str(d,TRUE,TRUE);
5387                     if (!d) {
5388                         /* MUST advance bufptr here to avoid bogus
5389                            "at end of line" context messages from yyerror().
5390                          */
5391                         PL_bufptr = s + len;
5392                         yyerror("Unterminated attribute parameter in attribute list");
5393                         if (attrs)
5394                             op_free(attrs);
5395                         sv_free(sv);
5396                         return REPORT(0);       /* EOF indicator */
5397                     }
5398                 }
5399                 if (PL_lex_stuff) {
5400                     sv_catsv(sv, PL_lex_stuff);
5401                     attrs = op_append_elem(OP_LIST, attrs,
5402                                         newSVOP(OP_CONST, 0, sv));
5403                     SvREFCNT_dec(PL_lex_stuff);
5404                     PL_lex_stuff = NULL;
5405                 }
5406                 else {
5407                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5408                         sv_free(sv);
5409                         if (PL_in_my == KEY_our) {
5410                             deprecate(":unique");
5411                         }
5412                         else
5413                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5414                     }
5415
5416                     /* NOTE: any CV attrs applied here need to be part of
5417                        the CVf_BUILTIN_ATTRS define in cv.h! */
5418                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5419                         sv_free(sv);
5420                         CvLVALUE_on(PL_compcv);
5421                     }
5422                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5423                         sv_free(sv);
5424                         deprecate(":locked");
5425                     }
5426                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5427                         sv_free(sv);
5428                         CvMETHOD_on(PL_compcv);
5429                     }
5430                     /* After we've set the flags, it could be argued that
5431                        we don't need to do the attributes.pm-based setting
5432                        process, and shouldn't bother appending recognized
5433                        flags.  To experiment with that, uncomment the
5434                        following "else".  (Note that's already been
5435                        uncommented.  That keeps the above-applied built-in
5436                        attributes from being intercepted (and possibly
5437                        rejected) by a package's attribute routines, but is
5438                        justified by the performance win for the common case
5439                        of applying only built-in attributes.) */
5440                     else
5441                         attrs = op_append_elem(OP_LIST, attrs,
5442                                             newSVOP(OP_CONST, 0,
5443                                                     sv));
5444                 }
5445                 s = PEEKSPACE(d);
5446                 if (*s == ':' && s[1] != ':')
5447                     s = PEEKSPACE(s+1);
5448                 else if (s == d)
5449                     break;      /* require real whitespace or :'s */
5450                 /* XXX losing whitespace on sequential attributes here */
5451             }
5452             {
5453                 const char tmp
5454                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5455                 if (*s != ';' && *s != '}' && *s != tmp
5456                     && (tmp != '=' || *s != ')')) {
5457                     const char q = ((*s == '\'') ? '"' : '\'');
5458                     /* If here for an expression, and parsed no attrs, back
5459                        off. */
5460                     if (tmp == '=' && !attrs) {
5461                         s = PL_bufptr;
5462                         break;
5463                     }
5464                     /* MUST advance bufptr here to avoid bogus "at end of line"
5465                        context messages from yyerror().
5466                     */
5467                     PL_bufptr = s;
5468                     yyerror( (const char *)
5469                              (*s
5470                               ? Perl_form(aTHX_ "Invalid separator character "
5471                                           "%c%c%c in attribute list", q, *s, q)
5472                               : "Unterminated attribute list" ) );
5473                     if (attrs)
5474                         op_free(attrs);
5475                     OPERATOR(':');
5476                 }
5477             }
5478         got_attrs:
5479             if (attrs) {
5480                 start_force(PL_curforce);
5481                 NEXTVAL_NEXTTOKE.opval = attrs;
5482                 CURMAD('_', PL_nextwhite);
5483                 force_next(THING);
5484             }
5485 #ifdef PERL_MAD
5486             if (PL_madskills) {
5487                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5488                                      (s - SvPVX(PL_linestr)) - stuffstart);
5489             }
5490 #endif
5491             TOKEN(COLONATTR);
5492         }
5493         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5494             s--;
5495             TOKEN(0);
5496         }
5497         PL_lex_allbrackets--;
5498         OPERATOR(':');
5499     case '(':
5500         s++;
5501         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5502             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5503         else
5504             PL_expect = XTERM;
5505         s = SKIPSPACE1(s);
5506         PL_lex_allbrackets++;
5507         TOKEN('(');
5508     case ';':
5509         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5510             TOKEN(0);
5511         CLINE;
5512         s++;
5513         OPERATOR(';');
5514     case ')':
5515         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5516             TOKEN(0);
5517         s++;
5518         PL_lex_allbrackets--;
5519         s = SKIPSPACE1(s);
5520         if (*s == '{')
5521             PREBLOCK(')');
5522         TERM(')');
5523     case ']':
5524         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5525             TOKEN(0);
5526         s++;
5527         if (PL_lex_brackets <= 0)
5528             yyerror("Unmatched right square bracket");
5529         else
5530             --PL_lex_brackets;
5531         PL_lex_allbrackets--;
5532         if (PL_lex_state == LEX_INTERPNORMAL) {
5533             if (PL_lex_brackets == 0) {
5534                 if (*s == '-' && s[1] == '>')
5535                     PL_lex_state = LEX_INTERPENDMAYBE;
5536                 else if (*s != '[' && *s != '{')
5537                     PL_lex_state = LEX_INTERPEND;
5538             }
5539         }
5540         TERM(']');
5541     case '{':
5542       leftbracket:
5543         s++;
5544         if (PL_lex_brackets > 100) {
5545             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5546         }
5547         switch (PL_expect) {
5548         case XTERM:
5549             if (PL_lex_formbrack) {
5550                 s--;
5551                 PRETERMBLOCK(DO);
5552             }
5553             if (PL_oldoldbufptr == PL_last_lop)
5554                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5555             else
5556                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5557             PL_lex_allbrackets++;
5558             OPERATOR(HASHBRACK);
5559         case XOPERATOR:
5560             while (s < PL_bufend && SPACE_OR_TAB(*s))
5561                 s++;
5562             d = s;
5563             PL_tokenbuf[0] = '\0';
5564             if (d < PL_bufend && *d == '-') {
5565                 PL_tokenbuf[0] = '-';
5566                 d++;
5567                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5568                     d++;
5569             }
5570             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5571                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5572                               FALSE, &len);
5573                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5574                     d++;
5575                 if (*d == '}') {
5576                     const char minus = (PL_tokenbuf[0] == '-');
5577                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5578                     if (minus)
5579                         force_next('-');
5580                 }
5581             }
5582             /* FALL THROUGH */
5583         case XATTRBLOCK:
5584         case XBLOCK:
5585             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5586             PL_lex_allbrackets++;
5587             PL_expect = XSTATE;
5588             break;
5589         case XATTRTERM:
5590         case XTERMBLOCK:
5591             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5592             PL_lex_allbrackets++;
5593             PL_expect = XSTATE;
5594             break;
5595         default: {
5596                 const char *t;
5597                 if (PL_oldoldbufptr == PL_last_lop)
5598                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5599                 else
5600                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5601                 PL_lex_allbrackets++;
5602                 s = SKIPSPACE1(s);
5603                 if (*s == '}') {
5604                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5605                         PL_expect = XTERM;
5606                         /* This hack is to get the ${} in the message. */
5607                         PL_bufptr = s+1;
5608                         yyerror("syntax error");
5609                         break;
5610                     }
5611                     OPERATOR(HASHBRACK);
5612                 }
5613                 /* This hack serves to disambiguate a pair of curlies
5614                  * as being a block or an anon hash.  Normally, expectation
5615                  * determines that, but in cases where we're not in a
5616                  * position to expect anything in particular (like inside
5617                  * eval"") we have to resolve the ambiguity.  This code
5618                  * covers the case where the first term in the curlies is a
5619                  * quoted string.  Most other cases need to be explicitly
5620                  * disambiguated by prepending a "+" before the opening
5621                  * curly in order to force resolution as an anon hash.
5622                  *
5623                  * XXX should probably propagate the outer expectation
5624                  * into eval"" to rely less on this hack, but that could
5625                  * potentially break current behavior of eval"".
5626                  * GSAR 97-07-21
5627                  */
5628                 t = s;
5629                 if (*s == '\'' || *s == '"' || *s == '`') {
5630                     /* common case: get past first string, handling escapes */
5631                     for (t++; t < PL_bufend && *t != *s;)
5632                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5633                             t++;
5634                     t++;
5635                 }
5636                 else if (*s == 'q') {
5637                     if (++t < PL_bufend
5638                         && (!isALNUM(*t)
5639                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5640                                 && !isALNUM(*t))))
5641                     {
5642                         /* skip q//-like construct */
5643                         const char *tmps;
5644                         char open, close, term;
5645                         I32 brackets = 1;
5646
5647                         while (t < PL_bufend && isSPACE(*t))
5648                             t++;
5649                         /* check for q => */
5650                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5651                             OPERATOR(HASHBRACK);
5652                         }
5653                         term = *t;
5654                         open = term;
5655                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5656                             term = tmps[5];
5657                         close = term;
5658                         if (open == close)
5659                             for (t++; t < PL_bufend; t++) {
5660                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5661                                     t++;
5662                                 else if (*t == open)
5663                                     break;
5664                             }
5665                         else {
5666                             for (t++; t < PL_bufend; t++) {
5667                                 if (*t == '\\' && t+1 < PL_bufend)
5668                                     t++;
5669                                 else if (*t == close && --brackets <= 0)
5670                                     break;
5671                                 else if (*t == open)
5672                                     brackets++;
5673                             }
5674                         }
5675                         t++;
5676                     }
5677                     else
5678                         /* skip plain q word */
5679                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5680                              t += UTF8SKIP(t);
5681                 }
5682                 else if (isALNUM_lazy_if(t,UTF)) {
5683                     t += UTF8SKIP(t);
5684                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5685                          t += UTF8SKIP(t);
5686                 }
5687                 while (t < PL_bufend && isSPACE(*t))
5688                     t++;
5689                 /* if comma follows first term, call it an anon hash */
5690                 /* XXX it could be a comma expression with loop modifiers */
5691                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5692                                    || (*t == '=' && t[1] == '>')))
5693                     OPERATOR(HASHBRACK);
5694                 if (PL_expect == XREF)
5695                     PL_expect = XTERM;
5696                 else {
5697                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5698                     PL_expect = XSTATE;
5699                 }
5700             }
5701             break;
5702         }
5703         pl_yylval.ival = CopLINE(PL_curcop);
5704         if (isSPACE(*s) || *s == '#')
5705             PL_copline = NOLINE;   /* invalidate current command line number */
5706         TOKEN('{');
5707     case '}':
5708         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5709             TOKEN(0);
5710       rightbracket:
5711         s++;
5712         if (PL_lex_brackets <= 0)
5713             yyerror("Unmatched right curly bracket");
5714         else
5715             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5716         PL_lex_allbrackets--;
5717         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5718             PL_lex_formbrack = 0;
5719         if (PL_lex_state == LEX_INTERPNORMAL) {
5720             if (PL_lex_brackets == 0) {
5721                 if (PL_expect & XFAKEBRACK) {
5722                     PL_expect &= XENUMMASK;
5723                     PL_lex_state = LEX_INTERPEND;
5724                     PL_bufptr = s;
5725 #if 0
5726                     if (PL_madskills) {
5727                         if (!PL_thiswhite)
5728                             PL_thiswhite = newSVpvs("");
5729                         sv_catpvs(PL_thiswhite,"}");
5730                     }
5731 #endif
5732                     return yylex();     /* ignore fake brackets */
5733                 }
5734                 if (*s == '-' && s[1] == '>')
5735                     PL_lex_state = LEX_INTERPENDMAYBE;
5736                 else if (*s != '[' && *s != '{')
5737                     PL_lex_state = LEX_INTERPEND;
5738             }
5739         }
5740         if (PL_expect & XFAKEBRACK) {
5741             PL_expect &= XENUMMASK;
5742             PL_bufptr = s;
5743             return yylex();             /* ignore fake brackets */
5744         }
5745         start_force(PL_curforce);
5746         if (PL_madskills) {
5747             curmad('X', newSVpvn(s-1,1));
5748             CURMAD('_', PL_thiswhite);
5749         }
5750         force_next('}');
5751 #ifdef PERL_MAD
5752         if (!PL_thistoken)
5753             PL_thistoken = newSVpvs("");
5754 #endif
5755         TOKEN(';');
5756     case '&':
5757         s++;
5758         if (*s++ == '&') {
5759             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5760                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5761                 s -= 2;
5762                 TOKEN(0);
5763             }
5764             AOPERATOR(ANDAND);
5765         }
5766         s--;
5767         if (PL_expect == XOPERATOR) {
5768             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5769                 && isIDFIRST_lazy_if(s,UTF))
5770             {
5771                 CopLINE_dec(PL_curcop);
5772                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5773                 CopLINE_inc(PL_curcop);
5774             }
5775             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5776                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5777                 s--;
5778                 TOKEN(0);
5779             }
5780             BAop(OP_BIT_AND);
5781         }
5782
5783         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5784         if (*PL_tokenbuf) {
5785             PL_expect = XOPERATOR;
5786             force_ident(PL_tokenbuf, '&');
5787         }
5788         else
5789             PREREF('&');
5790         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5791         TERM('&');
5792
5793     case '|':
5794         s++;
5795         if (*s++ == '|') {
5796             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5797                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5798                 s -= 2;
5799                 TOKEN(0);
5800             }
5801             AOPERATOR(OROR);
5802         }
5803         s--;
5804         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5805                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5806             s--;
5807             TOKEN(0);
5808         }
5809         BOop(OP_BIT_OR);
5810     case '=':
5811         s++;
5812         {
5813             const char tmp = *s++;
5814             if (tmp == '=') {
5815                 if (!PL_lex_allbrackets &&
5816                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5817                     s -= 2;
5818                     TOKEN(0);
5819                 }
5820                 Eop(OP_EQ);
5821             }
5822             if (tmp == '>') {
5823                 if (!PL_lex_allbrackets &&
5824                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5825                     s -= 2;
5826                     TOKEN(0);
5827                 }
5828                 OPERATOR(',');
5829             }
5830             if (tmp == '~')
5831                 PMop(OP_MATCH);
5832             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5833                 && strchr("+-*/%.^&|<",tmp))
5834                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5835                             "Reversed %c= operator",(int)tmp);
5836             s--;
5837             if (PL_expect == XSTATE && isALPHA(tmp) &&
5838                 (s == PL_linestart+1 || s[-2] == '\n') )
5839                 {
5840                     if (PL_in_eval && !PL_rsfp) {
5841                         d = PL_bufend;
5842                         while (s < d) {
5843                             if (*s++ == '\n') {
5844                                 incline(s);
5845                                 if (strnEQ(s,"=cut",4)) {
5846                                     s = strchr(s,'\n');
5847                                     if (s)
5848                                         s++;
5849                                     else
5850                                         s = d;
5851                                     incline(s);
5852                                     goto retry;
5853                                 }
5854                             }
5855                         }
5856                         goto retry;
5857                     }
5858 #ifdef PERL_MAD
5859                     if (PL_madskills) {
5860                         if (!PL_thiswhite)
5861                             PL_thiswhite = newSVpvs("");
5862                         sv_catpvn(PL_thiswhite, PL_linestart,
5863                                   PL_bufend - PL_linestart);
5864                     }
5865 #endif
5866                     s = PL_bufend;
5867                     PL_parser->in_pod = 1;
5868                     goto retry;
5869                 }
5870         }
5871         if (PL_lex_brackets < PL_lex_formbrack) {
5872             const char *t = s;
5873 #ifdef PERL_STRICT_CR
5874             while (SPACE_OR_TAB(*t))
5875 #else
5876             while (SPACE_OR_TAB(*t) || *t == '\r')
5877 #endif
5878                 t++;
5879             if (*t == '\n' || *t == '#') {
5880                 s--;
5881                 PL_expect = XBLOCK;
5882                 goto leftbracket;
5883             }
5884         }
5885         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5886             s--;
5887             TOKEN(0);
5888         }
5889         pl_yylval.ival = 0;
5890         OPERATOR(ASSIGNOP);
5891     case '!':
5892         s++;
5893         {
5894             const char tmp = *s++;
5895             if (tmp == '=') {
5896                 /* was this !=~ where !~ was meant?
5897                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5898
5899                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5900                     const char *t = s+1;
5901
5902                     while (t < PL_bufend && isSPACE(*t))
5903                         ++t;
5904
5905                     if (*t == '/' || *t == '?' ||
5906                         ((*t == 'm' || *t == 's' || *t == 'y')
5907                          && !isALNUM(t[1])) ||
5908                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5909                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5910                                     "!=~ should be !~");
5911                 }
5912                 if (!PL_lex_allbrackets &&
5913                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5914                     s -= 2;
5915                     TOKEN(0);
5916                 }
5917                 Eop(OP_NE);
5918             }
5919             if (tmp == '~')
5920                 PMop(OP_NOT);
5921         }
5922         s--;
5923         OPERATOR('!');
5924     case '<':
5925         if (PL_expect != XOPERATOR) {
5926             if (s[1] != '<' && !strchr(s,'>'))
5927                 check_uni();
5928             if (s[1] == '<')
5929                 s = scan_heredoc(s);
5930             else
5931                 s = scan_inputsymbol(s);
5932             TERM(sublex_start());
5933         }
5934         s++;
5935         {
5936             char tmp = *s++;
5937             if (tmp == '<') {
5938                 if (*s == '=' && !PL_lex_allbrackets &&
5939                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5940                     s -= 2;
5941                     TOKEN(0);
5942                 }
5943                 SHop(OP_LEFT_SHIFT);
5944             }
5945             if (tmp == '=') {
5946                 tmp = *s++;
5947                 if (tmp == '>') {
5948                     if (!PL_lex_allbrackets &&
5949                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5950                         s -= 3;
5951                         TOKEN(0);
5952                     }
5953                     Eop(OP_NCMP);
5954                 }
5955                 s--;
5956                 if (!PL_lex_allbrackets &&
5957                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5958                     s -= 2;
5959                     TOKEN(0);
5960                 }
5961                 Rop(OP_LE);
5962             }
5963         }
5964         s--;
5965         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5966             s--;
5967             TOKEN(0);
5968         }
5969         Rop(OP_LT);
5970     case '>':
5971         s++;
5972         {
5973             const char tmp = *s++;
5974             if (tmp == '>') {
5975                 if (*s == '=' && !PL_lex_allbrackets &&
5976                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5977                     s -= 2;
5978                     TOKEN(0);
5979                 }
5980                 SHop(OP_RIGHT_SHIFT);
5981             }
5982             else if (tmp == '=') {
5983                 if (!PL_lex_allbrackets &&
5984                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5985                     s -= 2;
5986                     TOKEN(0);
5987                 }
5988                 Rop(OP_GE);
5989             }
5990         }
5991         s--;
5992         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5993             s--;
5994             TOKEN(0);
5995         }
5996         Rop(OP_GT);
5997
5998     case '$':
5999         CLINE;
6000
6001         if (PL_expect == XOPERATOR) {
6002             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6003                 return deprecate_commaless_var_list();
6004             }
6005         }
6006
6007         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6008             PL_tokenbuf[0] = '@';
6009             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6010                            sizeof PL_tokenbuf - 1, FALSE);
6011             if (PL_expect == XOPERATOR)
6012                 no_op("Array length", s);
6013             if (!PL_tokenbuf[1])
6014                 PREREF(DOLSHARP);
6015             PL_expect = XOPERATOR;
6016             PL_pending_ident = '#';
6017             TOKEN(DOLSHARP);
6018         }
6019
6020         PL_tokenbuf[0] = '$';
6021         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6022                        sizeof PL_tokenbuf - 1, FALSE);
6023         if (PL_expect == XOPERATOR)
6024             no_op("Scalar", s);
6025         if (!PL_tokenbuf[1]) {
6026             if (s == PL_bufend)
6027                 yyerror("Final $ should be \\$ or $name");
6028             PREREF('$');
6029         }
6030
6031         d = s;
6032         {
6033             const char tmp = *s;
6034             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6035                 s = SKIPSPACE1(s);
6036
6037             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6038                 && intuit_more(s)) {
6039                 if (*s == '[') {
6040                     PL_tokenbuf[0] = '@';
6041                     if (ckWARN(WARN_SYNTAX)) {
6042                         char *t = s+1;
6043
6044                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6045                             t++;
6046                         if (*t++ == ',') {
6047                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6048                             while (t < PL_bufend && *t != ']')
6049                                 t++;
6050                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6051                                         "Multidimensional syntax %.*s not supported",
6052                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6053                         }
6054                     }
6055                 }
6056                 else if (*s == '{') {
6057                     char *t;
6058                     PL_tokenbuf[0] = '%';
6059                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6060                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6061                         {
6062                             char tmpbuf[sizeof PL_tokenbuf];
6063                             do {
6064                                 t++;
6065                             } while (isSPACE(*t));
6066                             if (isIDFIRST_lazy_if(t,UTF)) {
6067                                 STRLEN len;
6068                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6069                                               &len);
6070                                 while (isSPACE(*t))
6071                                     t++;
6072                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6073                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6074                                                 "You need to quote \"%s\"",
6075                                                 tmpbuf);
6076                             }
6077                         }
6078                 }
6079             }
6080
6081             PL_expect = XOPERATOR;
6082             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6083                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6084                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6085                     PL_expect = XOPERATOR;
6086                 else if (strchr("$@\"'`q", *s))
6087                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6088                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6089                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6090                 else if (isIDFIRST_lazy_if(s,UTF)) {
6091                     char tmpbuf[sizeof PL_tokenbuf];
6092                     int t2;
6093                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6094                     if ((t2 = keyword(tmpbuf, len, 0))) {
6095                         /* binary operators exclude handle interpretations */
6096                         switch (t2) {
6097                         case -KEY_x:
6098                         case -KEY_eq:
6099                         case -KEY_ne:
6100                         case -KEY_gt:
6101                         case -KEY_lt:
6102                         case -KEY_ge:
6103                         case -KEY_le:
6104                         case -KEY_cmp:
6105                             break;
6106                         default:
6107                             PL_expect = XTERM;  /* e.g. print $fh length() */
6108                             break;
6109                         }
6110                     }
6111                     else {
6112                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6113                     }
6114                 }
6115                 else if (isDIGIT(*s))
6116                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6117                 else if (*s == '.' && isDIGIT(s[1]))
6118                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6119                 else if ((*s == '?' || *s == '-' || *s == '+')
6120                          && !isSPACE(s[1]) && s[1] != '=')
6121                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6122                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6123                          && s[1] != '/')
6124                     PL_expect = XTERM;          /* e.g. print $fh /.../
6125                                                    XXX except DORDOR operator
6126                                                 */
6127                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6128                          && s[2] != '=')
6129                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6130             }
6131         }
6132         PL_pending_ident = '$';
6133         TOKEN('$');
6134
6135     case '@':
6136         if (PL_expect == XOPERATOR)
6137             no_op("Array", s);
6138         PL_tokenbuf[0] = '@';
6139         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6140         if (!PL_tokenbuf[1]) {
6141             PREREF('@');
6142         }
6143         if (PL_lex_state == LEX_NORMAL)
6144             s = SKIPSPACE1(s);
6145         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6146             if (*s == '{')
6147                 PL_tokenbuf[0] = '%';
6148
6149             /* Warn about @ where they meant $. */
6150             if (*s == '[' || *s == '{') {
6151                 if (ckWARN(WARN_SYNTAX)) {
6152                     const char *t = s + 1;
6153                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6154                         t++;
6155                     if (*t == '}' || *t == ']') {
6156                         t++;
6157                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6158                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6159                             "Scalar value %.*s better written as $%.*s",
6160                             (int)(t-PL_bufptr), PL_bufptr,
6161                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6162                     }
6163                 }
6164             }
6165         }
6166         PL_pending_ident = '@';
6167         TERM('@');
6168
6169      case '/':                  /* may be division, defined-or, or pattern */
6170         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6171             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6172                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6173                 TOKEN(0);
6174             s += 2;
6175             AOPERATOR(DORDOR);
6176         }
6177      case '?':                  /* may either be conditional or pattern */
6178         if (PL_expect == XOPERATOR) {
6179              char tmp = *s++;
6180              if(tmp == '?') {
6181                 if (!PL_lex_allbrackets &&
6182                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6183                     s--;
6184                     TOKEN(0);
6185                 }
6186                 PL_lex_allbrackets++;
6187                 OPERATOR('?');
6188              }
6189              else {
6190                  tmp = *s++;
6191                  if(tmp == '/') {
6192                      /* A // operator. */
6193                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6194                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6195                                             LEX_FAKEEOF_LOGIC)) {
6196                         s -= 2;
6197                         TOKEN(0);
6198                     }
6199                     AOPERATOR(DORDOR);
6200                  }
6201                  else {
6202                      s--;
6203                      if (*s == '=' && !PL_lex_allbrackets &&
6204                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6205                          s--;
6206                          TOKEN(0);
6207                      }
6208                      Mop(OP_DIVIDE);
6209                  }
6210              }
6211          }
6212          else {
6213              /* Disable warning on "study /blah/" */
6214              if (PL_oldoldbufptr == PL_last_uni
6215               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6216                   || memNE(PL_last_uni, "study", 5)
6217                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6218               ))
6219                  check_uni();
6220              if (*s == '?')
6221                  deprecate("?PATTERN? without explicit operator");
6222              s = scan_pat(s,OP_MATCH);
6223              TERM(sublex_start());
6224          }
6225
6226     case '.':
6227         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6228 #ifdef PERL_STRICT_CR
6229             && s[1] == '\n'
6230 #else
6231             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6232 #endif
6233             && (s == PL_linestart || s[-1] == '\n') )
6234         {
6235             PL_lex_formbrack = 0;
6236             PL_expect = XSTATE;
6237             goto rightbracket;
6238         }
6239         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6240             s += 3;
6241             OPERATOR(YADAYADA);
6242         }
6243         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6244             char tmp = *s++;
6245             if (*s == tmp) {
6246                 if (!PL_lex_allbrackets &&
6247                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6248                     s--;
6249                     TOKEN(0);
6250                 }
6251                 s++;
6252                 if (*s == tmp) {
6253                     s++;
6254                     pl_yylval.ival = OPf_SPECIAL;
6255                 }
6256                 else
6257                     pl_yylval.ival = 0;
6258                 OPERATOR(DOTDOT);
6259             }
6260             if (*s == '=' && !PL_lex_allbrackets &&
6261                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6262                 s--;
6263                 TOKEN(0);
6264             }
6265             Aop(OP_CONCAT);
6266         }
6267         /* FALL THROUGH */
6268     case '0': case '1': case '2': case '3': case '4':
6269     case '5': case '6': case '7': case '8': case '9':
6270         s = scan_num(s, &pl_yylval);
6271         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6272         if (PL_expect == XOPERATOR)
6273             no_op("Number",s);
6274         TERM(THING);
6275
6276     case '\'':
6277         s = scan_str(s,!!PL_madskills,FALSE);
6278         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6279         if (PL_expect == XOPERATOR) {
6280             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6281                 return deprecate_commaless_var_list();
6282             }
6283             else
6284                 no_op("String",s);
6285         }
6286         if (!s)
6287             missingterm(NULL);
6288         pl_yylval.ival = OP_CONST;
6289         TERM(sublex_start());
6290
6291     case '"':
6292         s = scan_str(s,!!PL_madskills,FALSE);
6293         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6294         if (PL_expect == XOPERATOR) {
6295             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6296                 return deprecate_commaless_var_list();
6297             }
6298             else
6299                 no_op("String",s);
6300         }
6301         if (!s)
6302             missingterm(NULL);
6303         pl_yylval.ival = OP_CONST;
6304         /* FIXME. I think that this can be const if char *d is replaced by
6305            more localised variables.  */
6306         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6307             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6308                 pl_yylval.ival = OP_STRINGIFY;
6309                 break;
6310             }
6311         }
6312         TERM(sublex_start());
6313
6314     case '`':
6315         s = scan_str(s,!!PL_madskills,FALSE);
6316         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6317         if (PL_expect == XOPERATOR)
6318             no_op("Backticks",s);
6319         if (!s)
6320             missingterm(NULL);
6321         readpipe_override();
6322         TERM(sublex_start());
6323
6324     case '\\':
6325         s++;
6326         if (PL_lex_inwhat && isDIGIT(*s))
6327             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6328                            *s, *s);
6329         if (PL_expect == XOPERATOR)
6330             no_op("Backslash",s);
6331         OPERATOR(REFGEN);
6332
6333     case 'v':
6334         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6335             char *start = s + 2;
6336             while (isDIGIT(*start) || *start == '_')
6337                 start++;
6338             if (*start == '.' && isDIGIT(start[1])) {
6339                 s = scan_num(s, &pl_yylval);
6340                 TERM(THING);
6341             }
6342             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6343             else if (!isALPHA(*start) && (PL_expect == XTERM
6344                         || PL_expect == XREF || PL_expect == XSTATE
6345                         || PL_expect == XTERMORDORDOR)) {
6346                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6347                 if (!gv) {
6348                     s = scan_num(s, &pl_yylval);
6349                     TERM(THING);
6350                 }
6351             }
6352         }
6353         goto keylookup;
6354     case 'x':
6355         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6356             s++;
6357             Mop(OP_REPEAT);
6358         }
6359         goto keylookup;
6360
6361     case '_':
6362     case 'a': case 'A':
6363     case 'b': case 'B':
6364     case 'c': case 'C':
6365     case 'd': case 'D':
6366     case 'e': case 'E':
6367     case 'f': case 'F':
6368     case 'g': case 'G':
6369     case 'h': case 'H':
6370     case 'i': case 'I':
6371     case 'j': case 'J':
6372     case 'k': case 'K':
6373     case 'l': case 'L':
6374     case 'm': case 'M':
6375     case 'n': case 'N':
6376     case 'o': case 'O':
6377     case 'p': case 'P':
6378     case 'q': case 'Q':
6379     case 'r': case 'R':
6380     case 's': case 'S':
6381     case 't': case 'T':
6382     case 'u': case 'U':
6383               case 'V':
6384     case 'w': case 'W':
6385               case 'X':
6386     case 'y': case 'Y':
6387     case 'z': case 'Z':
6388
6389       keylookup: {
6390         bool anydelim;
6391         I32 tmp;
6392
6393         orig_keyword = 0;
6394         gv = NULL;
6395         gvp = NULL;
6396
6397         PL_bufptr = s;
6398         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6399
6400         /* Some keywords can be followed by any delimiter, including ':' */
6401         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6402
6403         /* x::* is just a word, unless x is "CORE" */
6404         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6405             goto just_a_word;
6406
6407         d = s;
6408         while (d < PL_bufend && isSPACE(*d))
6409                 d++;    /* no comments skipped here, or s### is misparsed */
6410
6411         /* Is this a word before a => operator? */
6412         if (*d == '=' && d[1] == '>') {
6413             CLINE;
6414             pl_yylval.opval
6415                 = (OP*)newSVOP(OP_CONST, 0,
6416                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6417             pl_yylval.opval->op_private = OPpCONST_BARE;
6418             TERM(WORD);
6419         }
6420
6421         /* Check for plugged-in keyword */
6422         {
6423             OP *o;
6424             int result;
6425             char *saved_bufptr = PL_bufptr;
6426             PL_bufptr = s;
6427             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6428             s = PL_bufptr;
6429             if (result == KEYWORD_PLUGIN_DECLINE) {
6430                 /* not a plugged-in keyword */
6431                 PL_bufptr = saved_bufptr;
6432             } else if (result == KEYWORD_PLUGIN_STMT) {
6433                 pl_yylval.opval = o;
6434                 CLINE;
6435                 PL_expect = XSTATE;
6436                 return REPORT(PLUGSTMT);
6437             } else if (result == KEYWORD_PLUGIN_EXPR) {
6438                 pl_yylval.opval = o;
6439                 CLINE;
6440                 PL_expect = XOPERATOR;
6441                 return REPORT(PLUGEXPR);
6442             } else {
6443                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6444                                         PL_tokenbuf);
6445             }
6446         }
6447
6448         /* Check for built-in keyword */
6449         tmp = keyword(PL_tokenbuf, len, 0);
6450
6451         /* Is this a label? */
6452         if (!anydelim && PL_expect == XSTATE
6453               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6454             s = d + 1;
6455             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6456             CLINE;
6457             TOKEN(LABEL);
6458         }
6459
6460         if (tmp < 0) {                  /* second-class keyword? */
6461             GV *ogv = NULL;     /* override (winner) */
6462             GV *hgv = NULL;     /* hidden (loser) */
6463             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6464                 CV *cv;
6465                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6466                     (cv = GvCVu(gv)))
6467                 {
6468                     if (GvIMPORTED_CV(gv))
6469                         ogv = gv;
6470                     else if (! CvMETHOD(cv))
6471                         hgv = gv;
6472                 }
6473                 if (!ogv &&
6474                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6475                     (gv = *gvp) && isGV_with_GP(gv) &&
6476                     GvCVu(gv) && GvIMPORTED_CV(gv))
6477                 {
6478                     ogv = gv;
6479                 }
6480             }
6481             if (ogv) {
6482                 orig_keyword = tmp;
6483                 tmp = 0;                /* overridden by import or by GLOBAL */
6484             }
6485             else if (gv && !gvp
6486                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6487                      && GvCVu(gv))
6488             {
6489                 tmp = 0;                /* any sub overrides "weak" keyword */
6490             }
6491             else {                      /* no override */
6492                 tmp = -tmp;
6493                 if (tmp == KEY_dump) {
6494                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6495                                    "dump() better written as CORE::dump()");
6496                 }
6497                 gv = NULL;
6498                 gvp = 0;
6499                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6500                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6501                                    "Ambiguous call resolved as CORE::%s(), "
6502                                    "qualify as such or use &",
6503                                    GvENAME(hgv));
6504             }
6505         }
6506
6507       reserved_word:
6508         switch (tmp) {
6509
6510         default:                        /* not a keyword */
6511             /* Trade off - by using this evil construction we can pull the
6512                variable gv into the block labelled keylookup. If not, then
6513                we have to give it function scope so that the goto from the
6514                earlier ':' case doesn't bypass the initialisation.  */
6515             if (0) {
6516             just_a_word_zero_gv:
6517                 gv = NULL;
6518                 gvp = NULL;
6519                 orig_keyword = 0;
6520             }
6521           just_a_word: {
6522                 SV *sv;
6523                 int pkgname = 0;
6524                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6525                 OP *rv2cv_op;
6526                 CV *cv;
6527 #ifdef PERL_MAD
6528                 SV *nextPL_nextwhite = 0;
6529 #endif
6530
6531
6532                 /* Get the rest if it looks like a package qualifier */
6533
6534                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6535                     STRLEN morelen;
6536                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6537                                   TRUE, &morelen);
6538                     if (!morelen)
6539                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6540                                 *s == '\'' ? "'" : "::");
6541                     len += morelen;
6542                     pkgname = 1;
6543                 }
6544
6545                 if (PL_expect == XOPERATOR) {
6546                     if (PL_bufptr == PL_linestart) {
6547                         CopLINE_dec(PL_curcop);
6548                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6549                         CopLINE_inc(PL_curcop);
6550                     }
6551                     else
6552                         no_op("Bareword",s);
6553                 }
6554
6555                 /* Look for a subroutine with this name in current package,
6556                    unless name is "Foo::", in which case Foo is a bareword
6557                    (and a package name). */
6558
6559                 if (len > 2 && !PL_madskills &&
6560                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6561                 {
6562                     if (ckWARN(WARN_BAREWORD)
6563                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6564                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6565                             "Bareword \"%s\" refers to nonexistent package",
6566                              PL_tokenbuf);
6567                     len -= 2;
6568                     PL_tokenbuf[len] = '\0';
6569                     gv = NULL;
6570                     gvp = 0;
6571                 }
6572                 else {
6573                     if (!gv) {
6574                         /* Mustn't actually add anything to a symbol table.
6575                            But also don't want to "initialise" any placeholder
6576                            constants that might already be there into full
6577                            blown PVGVs with attached PVCV.  */
6578                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6579                                                GV_NOADD_NOINIT, SVt_PVCV);
6580                     }
6581                     len = 0;
6582                 }
6583
6584                 /* if we saw a global override before, get the right name */
6585
6586                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6587                     len ? len : strlen(PL_tokenbuf));
6588                 if (gvp) {
6589                     SV * const tmp_sv = sv;
6590                     sv = newSVpvs("CORE::GLOBAL::");
6591                     sv_catsv(sv, tmp_sv);
6592                     SvREFCNT_dec(tmp_sv);
6593                 }
6594
6595 #ifdef PERL_MAD
6596                 if (PL_madskills && !PL_thistoken) {
6597                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6598                     PL_thistoken = newSVpvn(start,s - start);
6599                     PL_realtokenstart = s - SvPVX(PL_linestr);
6600                 }
6601 #endif
6602
6603                 /* Presume this is going to be a bareword of some sort. */
6604                 CLINE;
6605                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6606                 pl_yylval.opval->op_private = OPpCONST_BARE;
6607
6608                 /* And if "Foo::", then that's what it certainly is. */
6609                 if (len)
6610                     goto safe_bareword;
6611
6612                 {
6613                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6614                     const_op->op_private = OPpCONST_BARE;
6615                     rv2cv_op = newCVREF(0, const_op);
6616                 }
6617                 cv = rv2cv_op_cv(rv2cv_op, 0);
6618
6619                 /* See if it's the indirect object for a list operator. */
6620
6621                 if (PL_oldoldbufptr &&
6622                     PL_oldoldbufptr < PL_bufptr &&
6623                     (PL_oldoldbufptr == PL_last_lop
6624                      || PL_oldoldbufptr == PL_last_uni) &&
6625                     /* NO SKIPSPACE BEFORE HERE! */
6626                     (PL_expect == XREF ||
6627                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6628                 {
6629                     bool immediate_paren = *s == '(';
6630
6631                     /* (Now we can afford to cross potential line boundary.) */
6632                     s = SKIPSPACE2(s,nextPL_nextwhite);
6633 #ifdef PERL_MAD
6634                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6635 #endif
6636
6637                     /* Two barewords in a row may indicate method call. */
6638
6639                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6640                         (tmp = intuit_method(s, gv, cv))) {
6641                         op_free(rv2cv_op);
6642                         if (tmp == METHOD && !PL_lex_allbrackets &&
6643                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6644                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6645                         return REPORT(tmp);
6646                     }
6647
6648                     /* If not a declared subroutine, it's an indirect object. */
6649                     /* (But it's an indir obj regardless for sort.) */
6650                     /* Also, if "_" follows a filetest operator, it's a bareword */
6651
6652                     if (
6653                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6654                          (!cv &&
6655                         (PL_last_lop_op != OP_MAPSTART &&
6656                          PL_last_lop_op != OP_GREPSTART))))
6657                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6658                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6659                        )
6660                     {
6661                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6662                         goto bareword;
6663                     }
6664                 }
6665
6666                 PL_expect = XOPERATOR;
6667 #ifdef PERL_MAD
6668                 if (isSPACE(*s))
6669                     s = SKIPSPACE2(s,nextPL_nextwhite);
6670                 PL_nextwhite = nextPL_nextwhite;
6671 #else
6672                 s = skipspace(s);
6673 #endif
6674
6675                 /* Is this a word before a => operator? */
6676                 if (*s == '=' && s[1] == '>' && !pkgname) {
6677                     op_free(rv2cv_op);
6678                     CLINE;
6679                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6680                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6681                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6682                     TERM(WORD);
6683                 }
6684
6685                 /* If followed by a paren, it's certainly a subroutine. */
6686                 if (*s == '(') {
6687                     CLINE;
6688                     if (cv) {
6689                         d = s + 1;
6690                         while (SPACE_OR_TAB(*d))
6691                             d++;
6692                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6693                             s = d + 1;
6694                             goto its_constant;
6695                         }
6696                     }
6697 #ifdef PERL_MAD
6698                     if (PL_madskills) {
6699                         PL_nextwhite = PL_thiswhite;
6700                         PL_thiswhite = 0;
6701                     }
6702                     start_force(PL_curforce);
6703 #endif
6704                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6705                     PL_expect = XOPERATOR;
6706 #ifdef PERL_MAD
6707                     if (PL_madskills) {
6708                         PL_nextwhite = nextPL_nextwhite;
6709                         curmad('X', PL_thistoken);
6710                         PL_thistoken = newSVpvs("");
6711                     }
6712 #endif
6713                     op_free(rv2cv_op);
6714                     force_next(WORD);
6715                     pl_yylval.ival = 0;
6716                     TOKEN('&');
6717                 }
6718
6719                 /* If followed by var or block, call it a method (unless sub) */
6720
6721                 if ((*s == '$' || *s == '{') && !cv) {
6722                     op_free(rv2cv_op);
6723                     PL_last_lop = PL_oldbufptr;
6724                     PL_last_lop_op = OP_METHOD;
6725                     if (!PL_lex_allbrackets &&
6726                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6727                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6728                     PREBLOCK(METHOD);
6729                 }
6730
6731                 /* If followed by a bareword, see if it looks like indir obj. */
6732
6733                 if (!orig_keyword
6734                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6735                         && (tmp = intuit_method(s, gv, cv))) {
6736                     op_free(rv2cv_op);
6737                     if (tmp == METHOD && !PL_lex_allbrackets &&
6738                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6739                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6740                     return REPORT(tmp);
6741                 }
6742
6743                 /* Not a method, so call it a subroutine (if defined) */
6744
6745                 if (cv) {
6746                     if (lastchar == '-')
6747                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6748                                          "Ambiguous use of -%s resolved as -&%s()",
6749                                          PL_tokenbuf, PL_tokenbuf);
6750                     /* Check for a constant sub */
6751                     if ((sv = cv_const_sv(cv))) {
6752                   its_constant:
6753                         op_free(rv2cv_op);
6754                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6755                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6756                         pl_yylval.opval->op_private = 0;
6757                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6758                         TOKEN(WORD);
6759                     }
6760
6761                     op_free(pl_yylval.opval);
6762                     pl_yylval.opval = rv2cv_op;
6763                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6764                     PL_last_lop = PL_oldbufptr;
6765                     PL_last_lop_op = OP_ENTERSUB;
6766                     /* Is there a prototype? */
6767                     if (
6768 #ifdef PERL_MAD
6769                         cv &&
6770 #endif
6771                         SvPOK(cv))
6772                     {
6773                         STRLEN protolen;
6774                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6775                         if (!protolen)
6776                             TERM(FUNC0SUB);
6777                         while (*proto == ';')
6778                             proto++;
6779                         if (
6780                             (
6781                                 (
6782                                     *proto == '$' || *proto == '_'
6783                                  || *proto == '*' || *proto == '+'
6784                                 )
6785                              && proto[1] == '\0'
6786                             )
6787                          || (
6788                              *proto == '\\' && proto[1] && proto[2] == '\0'
6789                             )
6790                         )
6791                             OPERATOR(UNIOPSUB);
6792                         if (*proto == '\\' && proto[1] == '[') {
6793                             const char *p = proto + 2;
6794                             while(*p && *p != ']')
6795                                 ++p;
6796                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6797                         }
6798                         if (*proto == '&' && *s == '{') {
6799                             if (PL_curstash)
6800                                 sv_setpvs(PL_subname, "__ANON__");
6801                             else
6802                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6803                             if (!PL_lex_allbrackets &&
6804                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6805                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6806                             PREBLOCK(LSTOPSUB);
6807                         }
6808                     }
6809 #ifdef PERL_MAD
6810                     {
6811                         if (PL_madskills) {
6812                             PL_nextwhite = PL_thiswhite;
6813                             PL_thiswhite = 0;
6814                         }
6815                         start_force(PL_curforce);
6816                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6817                         PL_expect = XTERM;
6818                         if (PL_madskills) {
6819                             PL_nextwhite = nextPL_nextwhite;
6820                             curmad('X', PL_thistoken);
6821                             PL_thistoken = newSVpvs("");
6822                         }
6823                         force_next(WORD);
6824                         if (!PL_lex_allbrackets &&
6825                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6826                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6827                         TOKEN(NOAMP);
6828                     }
6829                 }
6830
6831                 /* Guess harder when madskills require "best effort". */
6832                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6833                     int probable_sub = 0;
6834                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6835                         probable_sub = 1;
6836                     else if (isALPHA(*s)) {
6837                         char tmpbuf[1024];
6838                         STRLEN tmplen;
6839                         d = s;
6840                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6841                         if (!keyword(tmpbuf, tmplen, 0))
6842                             probable_sub = 1;
6843                         else {
6844                             while (d < PL_bufend && isSPACE(*d))
6845                                 d++;
6846                             if (*d == '=' && d[1] == '>')
6847                                 probable_sub = 1;
6848                         }
6849                     }
6850                     if (probable_sub) {
6851                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6852                         op_free(pl_yylval.opval);
6853                         pl_yylval.opval = rv2cv_op;
6854                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6855                         PL_last_lop = PL_oldbufptr;
6856                         PL_last_lop_op = OP_ENTERSUB;
6857                         PL_nextwhite = PL_thiswhite;
6858                         PL_thiswhite = 0;
6859                         start_force(PL_curforce);
6860                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6861                         PL_expect = XTERM;
6862                         PL_nextwhite = nextPL_nextwhite;
6863                         curmad('X', PL_thistoken);
6864                         PL_thistoken = newSVpvs("");
6865                         force_next(WORD);
6866                         if (!PL_lex_allbrackets &&
6867                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6868                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6869                         TOKEN(NOAMP);
6870                     }
6871 #else
6872                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6873                     PL_expect = XTERM;
6874                     force_next(WORD);
6875                     if (!PL_lex_allbrackets &&
6876                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6877                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6878                     TOKEN(NOAMP);
6879 #endif
6880                 }
6881
6882                 /* Call it a bare word */
6883
6884                 if (PL_hints & HINT_STRICT_SUBS)
6885                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6886                 else {
6887                 bareword:
6888                     /* after "print" and similar functions (corresponding to
6889                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6890                      * a filehandle should be subject to "strict subs".
6891                      * Likewise for the optional indirect-object argument to system
6892                      * or exec, which can't be a bareword */
6893                     if ((PL_last_lop_op == OP_PRINT
6894                             || PL_last_lop_op == OP_PRTF
6895                             || PL_last_lop_op == OP_SAY
6896                             || PL_last_lop_op == OP_SYSTEM
6897                             || PL_last_lop_op == OP_EXEC)
6898                             && (PL_hints & HINT_STRICT_SUBS))
6899                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6900                     if (lastchar != '-') {
6901                         if (ckWARN(WARN_RESERVED)) {
6902                             d = PL_tokenbuf;
6903                             while (isLOWER(*d))
6904                                 d++;
6905                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6906                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6907                                        PL_tokenbuf);
6908                         }
6909                     }
6910                 }
6911                 op_free(rv2cv_op);
6912
6913             safe_bareword:
6914                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6915                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6916                                      "Operator or semicolon missing before %c%s",
6917                                      lastchar, PL_tokenbuf);
6918                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6919                                      "Ambiguous use of %c resolved as operator %c",
6920                                      lastchar, lastchar);
6921                 }
6922                 TOKEN(WORD);
6923             }
6924
6925         case KEY___FILE__:
6926             FUN0OP(
6927                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6928             );
6929
6930         case KEY___LINE__:
6931             FUN0OP(
6932                 (OP*)newSVOP(OP_CONST, 0,
6933                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6934             );
6935
6936         case KEY___PACKAGE__:
6937             FUN0OP(
6938                 (OP*)newSVOP(OP_CONST, 0,
6939                                         (PL_curstash
6940                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6941                                          : &PL_sv_undef))
6942             );
6943
6944         case KEY___DATA__:
6945         case KEY___END__: {
6946             GV *gv;
6947             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6948                 const char *pname = "main";
6949                 if (PL_tokenbuf[2] == 'D')
6950                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6951                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6952                                 SVt_PVIO);
6953                 GvMULTI_on(gv);
6954                 if (!GvIO(gv))
6955                     GvIOp(gv) = newIO();
6956                 IoIFP(GvIOp(gv)) = PL_rsfp;
6957 #if defined(HAS_FCNTL) && defined(F_SETFD)
6958                 {
6959                     const int fd = PerlIO_fileno(PL_rsfp);
6960                     fcntl(fd,F_SETFD,fd >= 3);
6961                 }
6962 #endif
6963                 /* Mark this internal pseudo-handle as clean */
6964                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6965                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6966                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6967                 else
6968                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6969 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6970                 /* if the script was opened in binmode, we need to revert
6971                  * it to text mode for compatibility; but only iff it has CRs
6972                  * XXX this is a questionable hack at best. */
6973                 if (PL_bufend-PL_bufptr > 2
6974                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6975                 {
6976                     Off_t loc = 0;
6977                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6978                         loc = PerlIO_tell(PL_rsfp);
6979                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6980                     }
6981 #ifdef NETWARE
6982                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6983 #else
6984                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6985 #endif  /* NETWARE */
6986                         if (loc > 0)
6987                             PerlIO_seek(PL_rsfp, loc, 0);
6988                     }
6989                 }
6990 #endif
6991 #ifdef PERLIO_LAYERS
6992                 if (!IN_BYTES) {
6993                     if (UTF)
6994                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6995                     else if (PL_encoding) {
6996                         SV *name;
6997                         dSP;
6998                         ENTER;
6999                         SAVETMPS;
7000                         PUSHMARK(sp);
7001                         EXTEND(SP, 1);
7002                         XPUSHs(PL_encoding);
7003                         PUTBACK;
7004                         call_method("name", G_SCALAR);
7005                         SPAGAIN;
7006                         name = POPs;
7007                         PUTBACK;
7008                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7009                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7010                                                       SVfARG(name)));
7011                         FREETMPS;
7012                         LEAVE;
7013                     }
7014                 }
7015 #endif
7016 #ifdef PERL_MAD
7017                 if (PL_madskills) {
7018                     if (PL_realtokenstart >= 0) {
7019                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7020                         if (!PL_endwhite)
7021                             PL_endwhite = newSVpvs("");
7022                         sv_catsv(PL_endwhite, PL_thiswhite);
7023                         PL_thiswhite = 0;
7024                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7025                         PL_realtokenstart = -1;
7026                     }
7027                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7028                            != NULL) ;
7029                 }
7030 #endif
7031                 PL_rsfp = NULL;
7032             }
7033             goto fake_eof;
7034         }
7035
7036         case KEY_AUTOLOAD:
7037         case KEY_DESTROY:
7038         case KEY_BEGIN:
7039         case KEY_UNITCHECK:
7040         case KEY_CHECK:
7041         case KEY_INIT:
7042         case KEY_END:
7043             if (PL_expect == XSTATE) {
7044                 s = PL_bufptr;
7045                 goto really_sub;
7046             }
7047             goto just_a_word;
7048
7049         case KEY_CORE:
7050             if (*s == ':' && s[1] == ':') {
7051                 s += 2;
7052                 d = s;
7053                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7054                 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7055                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7056                 if (tmp < 0)
7057                     tmp = -tmp;
7058                 else if (tmp == KEY_require || tmp == KEY_do)
7059                     /* that's a way to remember we saw "CORE::" */
7060                     orig_keyword = tmp;
7061                 goto reserved_word;
7062             }
7063             goto just_a_word;
7064
7065         case KEY_abs:
7066             UNI(OP_ABS);
7067
7068         case KEY_alarm:
7069             UNI(OP_ALARM);
7070
7071         case KEY_accept:
7072             LOP(OP_ACCEPT,XTERM);
7073
7074         case KEY_and:
7075             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7076                 return REPORT(0);
7077             OPERATOR(ANDOP);
7078
7079         case KEY_atan2:
7080             LOP(OP_ATAN2,XTERM);
7081
7082         case KEY_bind:
7083             LOP(OP_BIND,XTERM);
7084
7085         case KEY_binmode:
7086             LOP(OP_BINMODE,XTERM);
7087
7088         case KEY_bless:
7089             LOP(OP_BLESS,XTERM);
7090
7091         case KEY_break:
7092             FUN0(OP_BREAK);
7093
7094         case KEY_chop:
7095             UNI(OP_CHOP);
7096
7097         case KEY_continue:
7098                     /* We have to disambiguate the two senses of
7099                       "continue". If the next token is a '{' then
7100                       treat it as the start of a continue block;
7101                       otherwise treat it as a control operator.
7102                      */
7103                     s = skipspace(s);
7104                     if (*s == '{')
7105             PREBLOCK(CONTINUE);
7106                     else
7107                         FUN0(OP_CONTINUE);
7108
7109         case KEY_chdir:
7110             /* may use HOME */
7111             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7112             UNI(OP_CHDIR);
7113
7114         case KEY_close:
7115             UNI(OP_CLOSE);
7116
7117         case KEY_closedir:
7118             UNI(OP_CLOSEDIR);
7119
7120         case KEY_cmp:
7121             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7122                 return REPORT(0);
7123             Eop(OP_SCMP);
7124
7125         case KEY_caller:
7126             UNI(OP_CALLER);
7127
7128         case KEY_crypt:
7129 #ifdef FCRYPT
7130             if (!PL_cryptseen) {
7131                 PL_cryptseen = TRUE;
7132                 init_des();
7133             }
7134 #endif
7135             LOP(OP_CRYPT,XTERM);
7136
7137         case KEY_chmod:
7138             LOP(OP_CHMOD,XTERM);
7139
7140         case KEY_chown:
7141             LOP(OP_CHOWN,XTERM);
7142
7143         case KEY_connect:
7144             LOP(OP_CONNECT,XTERM);
7145
7146         case KEY_chr:
7147             UNI(OP_CHR);
7148
7149         case KEY_cos:
7150             UNI(OP_COS);
7151
7152         case KEY_chroot:
7153             UNI(OP_CHROOT);
7154
7155         case KEY_default:
7156             PREBLOCK(DEFAULT);
7157
7158         case KEY_do:
7159             s = SKIPSPACE1(s);
7160             if (*s == '{')
7161                 PRETERMBLOCK(DO);
7162             if (*s != '\'')
7163                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7164             if (orig_keyword == KEY_do) {
7165                 orig_keyword = 0;
7166                 pl_yylval.ival = 1;
7167             }
7168             else
7169                 pl_yylval.ival = 0;
7170             OPERATOR(DO);
7171
7172         case KEY_die:
7173             PL_hints |= HINT_BLOCK_SCOPE;
7174             LOP(OP_DIE,XTERM);
7175
7176         case KEY_defined:
7177             UNI(OP_DEFINED);
7178
7179         case KEY_delete:
7180             UNI(OP_DELETE);
7181
7182         case KEY_dbmopen:
7183             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7184                               STR_WITH_LEN("NDBM_File::"),
7185                               STR_WITH_LEN("DB_File::"),
7186                               STR_WITH_LEN("GDBM_File::"),
7187                               STR_WITH_LEN("SDBM_File::"),
7188                               STR_WITH_LEN("ODBM_File::"),
7189                               NULL);
7190             LOP(OP_DBMOPEN,XTERM);
7191
7192         case KEY_dbmclose:
7193             UNI(OP_DBMCLOSE);
7194
7195         case KEY_dump:
7196             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7197             LOOPX(OP_DUMP);
7198
7199         case KEY_else:
7200             PREBLOCK(ELSE);
7201
7202         case KEY_elsif:
7203             pl_yylval.ival = CopLINE(PL_curcop);
7204             OPERATOR(ELSIF);
7205
7206         case KEY_eq:
7207             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7208                 return REPORT(0);
7209             Eop(OP_SEQ);
7210
7211         case KEY_exists:
7212             UNI(OP_EXISTS);
7213
7214         case KEY_exit:
7215             if (PL_madskills)
7216                 UNI(OP_INT);
7217             UNI(OP_EXIT);
7218
7219         case KEY_eval:
7220             s = SKIPSPACE1(s);
7221             if (*s == '{') { /* block eval */
7222                 PL_expect = XTERMBLOCK;
7223                 UNIBRACK(OP_ENTERTRY);
7224             }
7225             else { /* string eval */
7226                 PL_expect = XTERM;
7227                 UNIBRACK(OP_ENTEREVAL);
7228             }
7229
7230         case KEY_eof:
7231             UNI(OP_EOF);
7232
7233         case KEY_exp:
7234             UNI(OP_EXP);
7235
7236         case KEY_each:
7237             UNI(OP_EACH);
7238
7239         case KEY_exec:
7240             LOP(OP_EXEC,XREF);
7241
7242         case KEY_endhostent:
7243             FUN0(OP_EHOSTENT);
7244
7245         case KEY_endnetent:
7246             FUN0(OP_ENETENT);
7247
7248         case KEY_endservent:
7249             FUN0(OP_ESERVENT);
7250
7251         case KEY_endprotoent:
7252             FUN0(OP_EPROTOENT);
7253
7254         case KEY_endpwent:
7255             FUN0(OP_EPWENT);
7256
7257         case KEY_endgrent:
7258             FUN0(OP_EGRENT);
7259
7260         case KEY_for:
7261         case KEY_foreach:
7262             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7263                 return REPORT(0);
7264             pl_yylval.ival = CopLINE(PL_curcop);
7265             s = SKIPSPACE1(s);
7266             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7267                 char *p = s;
7268 #ifdef PERL_MAD
7269                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7270 #endif
7271
7272                 if ((PL_bufend - p) >= 3 &&
7273                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7274                     p += 2;
7275                 else if ((PL_bufend - p) >= 4 &&
7276                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7277                     p += 3;
7278                 p = PEEKSPACE(p);
7279                 if (isIDFIRST_lazy_if(p,UTF)) {
7280                     p = scan_ident(p, PL_bufend,
7281                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7282                     p = PEEKSPACE(p);
7283                 }
7284                 if (*p != '$')
7285                     Perl_croak(aTHX_ "Missing $ on loop variable");
7286 #ifdef PERL_MAD
7287                 s = SvPVX(PL_linestr) + soff;
7288 #endif
7289             }
7290             OPERATOR(FOR);
7291
7292         case KEY_formline:
7293             LOP(OP_FORMLINE,XTERM);
7294
7295         case KEY_fork:
7296             FUN0(OP_FORK);
7297
7298         case KEY_fcntl:
7299             LOP(OP_FCNTL,XTERM);
7300
7301         case KEY_fileno:
7302             UNI(OP_FILENO);
7303
7304         case KEY_flock:
7305             LOP(OP_FLOCK,XTERM);
7306
7307         case KEY_gt:
7308             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7309                 return REPORT(0);
7310             Rop(OP_SGT);
7311
7312         case KEY_ge:
7313             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7314                 return REPORT(0);
7315             Rop(OP_SGE);
7316
7317         case KEY_grep:
7318             LOP(OP_GREPSTART, XREF);
7319
7320         case KEY_goto:
7321             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7322             LOOPX(OP_GOTO);
7323
7324         case KEY_gmtime:
7325             UNI(OP_GMTIME);
7326
7327         case KEY_getc:
7328             UNIDOR(OP_GETC);
7329
7330         case KEY_getppid:
7331             FUN0(OP_GETPPID);
7332
7333         case KEY_getpgrp:
7334             UNI(OP_GETPGRP);
7335
7336         case KEY_getpriority:
7337             LOP(OP_GETPRIORITY,XTERM);
7338
7339         case KEY_getprotobyname:
7340             UNI(OP_GPBYNAME);
7341
7342         case KEY_getprotobynumber:
7343             LOP(OP_GPBYNUMBER,XTERM);
7344
7345         case KEY_getprotoent:
7346             FUN0(OP_GPROTOENT);
7347
7348         case KEY_getpwent:
7349             FUN0(OP_GPWENT);
7350
7351         case KEY_getpwnam:
7352             UNI(OP_GPWNAM);
7353
7354         case KEY_getpwuid:
7355             UNI(OP_GPWUID);
7356
7357         case KEY_getpeername:
7358             UNI(OP_GETPEERNAME);
7359
7360         case KEY_gethostbyname:
7361             UNI(OP_GHBYNAME);
7362
7363         case KEY_gethostbyaddr:
7364             LOP(OP_GHBYADDR,XTERM);
7365
7366         case KEY_gethostent:
7367             FUN0(OP_GHOSTENT);
7368
7369         case KEY_getnetbyname:
7370             UNI(OP_GNBYNAME);
7371
7372         case KEY_getnetbyaddr:
7373             LOP(OP_GNBYADDR,XTERM);
7374
7375         case KEY_getnetent:
7376             FUN0(OP_GNETENT);
7377
7378         case KEY_getservbyname:
7379             LOP(OP_GSBYNAME,XTERM);
7380
7381         case KEY_getservbyport:
7382             LOP(OP_GSBYPORT,XTERM);
7383
7384         case KEY_getservent:
7385             FUN0(OP_GSERVENT);
7386
7387         case KEY_getsockname:
7388             UNI(OP_GETSOCKNAME);
7389
7390         case KEY_getsockopt:
7391             LOP(OP_GSOCKOPT,XTERM);
7392
7393         case KEY_getgrent:
7394             FUN0(OP_GGRENT);
7395
7396         case KEY_getgrnam:
7397             UNI(OP_GGRNAM);
7398
7399         case KEY_getgrgid:
7400             UNI(OP_GGRGID);
7401
7402         case KEY_getlogin:
7403             FUN0(OP_GETLOGIN);
7404
7405         case KEY_given:
7406             pl_yylval.ival = CopLINE(PL_curcop);
7407             OPERATOR(GIVEN);
7408
7409         case KEY_glob:
7410             LOP(OP_GLOB,XTERM);
7411
7412         case KEY_hex:
7413             UNI(OP_HEX);
7414
7415         case KEY_if:
7416             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7417                 return REPORT(0);
7418             pl_yylval.ival = CopLINE(PL_curcop);
7419             OPERATOR(IF);
7420
7421         case KEY_index:
7422             LOP(OP_INDEX,XTERM);
7423
7424         case KEY_int:
7425             UNI(OP_INT);
7426
7427         case KEY_ioctl:
7428             LOP(OP_IOCTL,XTERM);
7429
7430         case KEY_join:
7431             LOP(OP_JOIN,XTERM);
7432
7433         case KEY_keys:
7434             UNI(OP_KEYS);
7435
7436         case KEY_kill:
7437             LOP(OP_KILL,XTERM);
7438
7439         case KEY_last:
7440             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7441             LOOPX(OP_LAST);
7442
7443         case KEY_lc:
7444             UNI(OP_LC);
7445
7446         case KEY_lcfirst:
7447             UNI(OP_LCFIRST);
7448
7449         case KEY_local:
7450             pl_yylval.ival = 0;
7451             OPERATOR(LOCAL);
7452
7453         case KEY_length:
7454             UNI(OP_LENGTH);
7455
7456         case KEY_lt:
7457             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7458                 return REPORT(0);
7459             Rop(OP_SLT);
7460
7461         case KEY_le:
7462             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7463                 return REPORT(0);
7464             Rop(OP_SLE);
7465
7466         case KEY_localtime:
7467             UNI(OP_LOCALTIME);
7468
7469         case KEY_log:
7470             UNI(OP_LOG);
7471
7472         case KEY_link:
7473             LOP(OP_LINK,XTERM);
7474
7475         case KEY_listen:
7476             LOP(OP_LISTEN,XTERM);
7477
7478         case KEY_lock:
7479             UNI(OP_LOCK);
7480
7481         case KEY_lstat:
7482             UNI(OP_LSTAT);
7483
7484         case KEY_m:
7485             s = scan_pat(s,OP_MATCH);
7486             TERM(sublex_start());
7487
7488         case KEY_map:
7489             LOP(OP_MAPSTART, XREF);
7490
7491         case KEY_mkdir:
7492             LOP(OP_MKDIR,XTERM);
7493
7494         case KEY_msgctl:
7495             LOP(OP_MSGCTL,XTERM);
7496
7497         case KEY_msgget:
7498             LOP(OP_MSGGET,XTERM);
7499
7500         case KEY_msgrcv:
7501             LOP(OP_MSGRCV,XTERM);
7502
7503         case KEY_msgsnd:
7504             LOP(OP_MSGSND,XTERM);
7505
7506         case KEY_our:
7507         case KEY_my:
7508         case KEY_state:
7509             PL_in_my = (U16)tmp;
7510             s = SKIPSPACE1(s);
7511             if (isIDFIRST_lazy_if(s,UTF)) {
7512 #ifdef PERL_MAD
7513                 char* start = s;
7514 #endif
7515                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7516                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7517                     goto really_sub;
7518                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7519                 if (!PL_in_my_stash) {
7520                     char tmpbuf[1024];
7521                     PL_bufptr = s;
7522                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7523                     yyerror(tmpbuf);
7524                 }
7525 #ifdef PERL_MAD
7526                 if (PL_madskills) {     /* just add type to declarator token */
7527                     sv_catsv(PL_thistoken, PL_nextwhite);
7528                     PL_nextwhite = 0;
7529                     sv_catpvn(PL_thistoken, start, s - start);
7530                 }
7531 #endif
7532             }
7533             pl_yylval.ival = 1;
7534             OPERATOR(MY);
7535
7536         case KEY_next:
7537             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7538             LOOPX(OP_NEXT);
7539
7540         case KEY_ne:
7541             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7542                 return REPORT(0);
7543             Eop(OP_SNE);
7544
7545         case KEY_no:
7546             s = tokenize_use(0, s);
7547             OPERATOR(USE);
7548
7549         case KEY_not:
7550             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7551                 FUN1(OP_NOT);
7552             else {
7553                 if (!PL_lex_allbrackets &&
7554                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7555                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7556                 OPERATOR(NOTOP);
7557             }
7558
7559         case KEY_open:
7560             s = SKIPSPACE1(s);
7561             if (isIDFIRST_lazy_if(s,UTF)) {
7562                 const char *t;
7563                 for (d = s; isALNUM_lazy_if(d,UTF);)
7564                     d++;
7565                 for (t=d; isSPACE(*t);)
7566                     t++;
7567                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7568                     /* [perl #16184] */
7569                     && !(t[0] == '=' && t[1] == '>')
7570                 ) {
7571                     int parms_len = (int)(d-s);
7572                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7573                            "Precedence problem: open %.*s should be open(%.*s)",
7574                             parms_len, s, parms_len, s);
7575                 }
7576             }
7577             LOP(OP_OPEN,XTERM);
7578
7579         case KEY_or:
7580             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7581                 return REPORT(0);
7582             pl_yylval.ival = OP_OR;
7583             OPERATOR(OROP);
7584
7585         case KEY_ord:
7586             UNI(OP_ORD);
7587
7588         case KEY_oct:
7589             UNI(OP_OCT);
7590
7591         case KEY_opendir:
7592             LOP(OP_OPEN_DIR,XTERM);
7593
7594         case KEY_print:
7595             checkcomma(s,PL_tokenbuf,"filehandle");
7596             LOP(OP_PRINT,XREF);
7597
7598         case KEY_printf:
7599             checkcomma(s,PL_tokenbuf,"filehandle");
7600             LOP(OP_PRTF,XREF);
7601
7602         case KEY_prototype:
7603             UNI(OP_PROTOTYPE);
7604
7605         case KEY_push:
7606             LOP(OP_PUSH,XTERM);
7607
7608         case KEY_pop:
7609             UNIDOR(OP_POP);
7610
7611         case KEY_pos:
7612             UNIDOR(OP_POS);
7613
7614         case KEY_pack:
7615             LOP(OP_PACK,XTERM);
7616
7617         case KEY_package:
7618             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7619             s = SKIPSPACE1(s);
7620             s = force_strict_version(s);
7621             PL_lex_expect = XBLOCK;
7622             OPERATOR(PACKAGE);
7623
7624         case KEY_pipe:
7625             LOP(OP_PIPE_OP,XTERM);
7626
7627         case KEY_q:
7628             s = scan_str(s,!!PL_madskills,FALSE);
7629             if (!s)
7630                 missingterm(NULL);
7631             pl_yylval.ival = OP_CONST;
7632             TERM(sublex_start());
7633
7634         case KEY_quotemeta:
7635             UNI(OP_QUOTEMETA);
7636
7637         case KEY_qw: {
7638             OP *words = NULL;
7639             s = scan_str(s,!!PL_madskills,FALSE);
7640             if (!s)
7641                 missingterm(NULL);
7642             PL_expect = XOPERATOR;
7643             if (SvCUR(PL_lex_stuff)) {
7644                 int warned_comma = !ckWARN(WARN_QW);
7645                 int warned_comment = warned_comma;
7646                 d = SvPV_force(PL_lex_stuff, len);
7647                 while (len) {
7648                     for (; isSPACE(*d) && len; --len, ++d)
7649                         /**/;
7650                     if (len) {
7651                         SV *sv;
7652                         const char *b = d;
7653                         if (!warned_comma || !warned_comment) {
7654                             for (; !isSPACE(*d) && len; --len, ++d) {
7655                                 if (!warned_comma && *d == ',') {
7656                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7657                                         "Possible attempt to separate words with commas");
7658                                     ++warned_comma;
7659                                 }
7660                                 else if (!warned_comment && *d == '#') {
7661                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7662                                         "Possible attempt to put comments in qw() list");
7663                                     ++warned_comment;
7664                                 }
7665                             }
7666                         }
7667                         else {
7668                             for (; !isSPACE(*d) && len; --len, ++d)
7669                                 /**/;
7670                         }
7671                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7672                         words = op_append_elem(OP_LIST, words,
7673                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7674                     }
7675                 }
7676             }
7677             if (!words)
7678                 words = newNULLLIST();
7679             if (PL_lex_stuff) {
7680                 SvREFCNT_dec(PL_lex_stuff);
7681                 PL_lex_stuff = NULL;
7682             }
7683             PL_expect = XOPERATOR;
7684             pl_yylval.opval = sawparens(words);
7685             TOKEN(QWLIST);
7686         }
7687
7688         case KEY_qq:
7689             s = scan_str(s,!!PL_madskills,FALSE);
7690             if (!s)
7691                 missingterm(NULL);
7692             pl_yylval.ival = OP_STRINGIFY;
7693             if (SvIVX(PL_lex_stuff) == '\'')
7694                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7695             TERM(sublex_start());
7696
7697         case KEY_qr:
7698             s = scan_pat(s,OP_QR);
7699             TERM(sublex_start());
7700
7701         case KEY_qx:
7702             s = scan_str(s,!!PL_madskills,FALSE);
7703             if (!s)
7704                 missingterm(NULL);
7705             readpipe_override();
7706             TERM(sublex_start());
7707
7708         case KEY_return:
7709             OLDLOP(OP_RETURN);
7710
7711         case KEY_require:
7712             s = SKIPSPACE1(s);
7713             if (isDIGIT(*s)) {
7714                 s = force_version(s, FALSE);
7715             }
7716             else if (*s != 'v' || !isDIGIT(s[1])
7717                     || (s = force_version(s, TRUE), *s == 'v'))
7718             {
7719                 *PL_tokenbuf = '\0';
7720                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7721                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7722                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7723                 else if (*s == '<')
7724                     yyerror("<> should be quotes");
7725             }
7726             if (orig_keyword == KEY_require) {
7727                 orig_keyword = 0;
7728                 pl_yylval.ival = 1;
7729             }
7730             else
7731                 pl_yylval.ival = 0;
7732             PL_expect = XTERM;
7733             PL_bufptr = s;
7734             PL_last_uni = PL_oldbufptr;
7735             PL_last_lop_op = OP_REQUIRE;
7736             s = skipspace(s);
7737             return REPORT( (int)REQUIRE );
7738
7739         case KEY_reset:
7740             UNI(OP_RESET);
7741
7742         case KEY_redo:
7743             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7744             LOOPX(OP_REDO);
7745
7746         case KEY_rename:
7747             LOP(OP_RENAME,XTERM);
7748
7749         case KEY_rand:
7750             UNI(OP_RAND);
7751
7752         case KEY_rmdir:
7753             UNI(OP_RMDIR);
7754
7755         case KEY_rindex:
7756             LOP(OP_RINDEX,XTERM);
7757
7758         case KEY_read:
7759             LOP(OP_READ,XTERM);
7760
7761         case KEY_readdir:
7762             UNI(OP_READDIR);
7763
7764         case KEY_readline:
7765             UNIDOR(OP_READLINE);
7766
7767         case KEY_readpipe:
7768             UNIDOR(OP_BACKTICK);
7769
7770         case KEY_rewinddir:
7771             UNI(OP_REWINDDIR);
7772
7773         case KEY_recv:
7774             LOP(OP_RECV,XTERM);
7775
7776         case KEY_reverse:
7777             LOP(OP_REVERSE,XTERM);
7778
7779         case KEY_readlink:
7780             UNIDOR(OP_READLINK);
7781
7782         case KEY_ref:
7783             UNI(OP_REF);
7784
7785         case KEY_s:
7786             s = scan_subst(s);
7787             if (pl_yylval.opval)
7788                 TERM(sublex_start());
7789             else
7790                 TOKEN(1);       /* force error */
7791
7792         case KEY_say:
7793             checkcomma(s,PL_tokenbuf,"filehandle");
7794             LOP(OP_SAY,XREF);
7795
7796         case KEY_chomp:
7797             UNI(OP_CHOMP);
7798
7799         case KEY_scalar:
7800             UNI(OP_SCALAR);
7801
7802         case KEY_select:
7803             LOP(OP_SELECT,XTERM);
7804
7805         case KEY_seek:
7806             LOP(OP_SEEK,XTERM);
7807
7808         case KEY_semctl:
7809             LOP(OP_SEMCTL,XTERM);
7810
7811         case KEY_semget:
7812             LOP(OP_SEMGET,XTERM);
7813
7814         case KEY_semop:
7815             LOP(OP_SEMOP,XTERM);
7816
7817         case KEY_send:
7818             LOP(OP_SEND,XTERM);
7819
7820         case KEY_setpgrp:
7821             LOP(OP_SETPGRP,XTERM);
7822
7823         case KEY_setpriority:
7824             LOP(OP_SETPRIORITY,XTERM);
7825
7826         case KEY_sethostent:
7827             UNI(OP_SHOSTENT);
7828
7829         case KEY_setnetent:
7830             UNI(OP_SNETENT);
7831
7832         case KEY_setservent:
7833             UNI(OP_SSERVENT);
7834
7835         case KEY_setprotoent:
7836             UNI(OP_SPROTOENT);
7837
7838         case KEY_setpwent:
7839             FUN0(OP_SPWENT);
7840
7841         case KEY_setgrent:
7842             FUN0(OP_SGRENT);
7843
7844         case KEY_seekdir:
7845             LOP(OP_SEEKDIR,XTERM);
7846
7847         case KEY_setsockopt:
7848             LOP(OP_SSOCKOPT,XTERM);
7849
7850         case KEY_shift:
7851             UNIDOR(OP_SHIFT);
7852
7853         case KEY_shmctl:
7854             LOP(OP_SHMCTL,XTERM);
7855
7856         case KEY_shmget:
7857             LOP(OP_SHMGET,XTERM);
7858
7859         case KEY_shmread:
7860             LOP(OP_SHMREAD,XTERM);
7861
7862         case KEY_shmwrite:
7863             LOP(OP_SHMWRITE,XTERM);
7864
7865         case KEY_shutdown:
7866             LOP(OP_SHUTDOWN,XTERM);
7867
7868         case KEY_sin:
7869             UNI(OP_SIN);
7870
7871         case KEY_sleep:
7872             UNI(OP_SLEEP);
7873
7874         case KEY_socket:
7875             LOP(OP_SOCKET,XTERM);
7876
7877         case KEY_socketpair:
7878             LOP(OP_SOCKPAIR,XTERM);
7879
7880         case KEY_sort:
7881             checkcomma(s,PL_tokenbuf,"subroutine name");
7882             s = SKIPSPACE1(s);
7883             if (*s == ';' || *s == ')')         /* probably a close */
7884                 Perl_croak(aTHX_ "sort is now a reserved word");
7885             PL_expect = XTERM;
7886             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7887             LOP(OP_SORT,XREF);
7888
7889         case KEY_split:
7890             LOP(OP_SPLIT,XTERM);
7891
7892         case KEY_sprintf:
7893             LOP(OP_SPRINTF,XTERM);
7894
7895         case KEY_splice:
7896             LOP(OP_SPLICE,XTERM);
7897
7898         case KEY_sqrt:
7899             UNI(OP_SQRT);
7900
7901         case KEY_srand:
7902             UNI(OP_SRAND);
7903
7904         case KEY_stat:
7905             UNI(OP_STAT);
7906
7907         case KEY_study:
7908             UNI(OP_STUDY);
7909
7910         case KEY_substr:
7911             LOP(OP_SUBSTR,XTERM);
7912
7913         case KEY_format:
7914         case KEY_sub:
7915           really_sub:
7916             {
7917                 char tmpbuf[sizeof PL_tokenbuf];
7918                 SSize_t tboffset = 0;
7919                 expectation attrful;
7920                 bool have_name, have_proto;
7921                 const int key = tmp;
7922
7923 #ifdef PERL_MAD
7924                 SV *tmpwhite = 0;
7925
7926                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7927                 SV *subtoken = newSVpvn(tstart, s - tstart);
7928                 PL_thistoken = 0;
7929
7930                 d = s;
7931                 s = SKIPSPACE2(s,tmpwhite);
7932 #else
7933                 s = skipspace(s);
7934 #endif
7935
7936                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7937                     (*s == ':' && s[1] == ':'))
7938                 {
7939 #ifdef PERL_MAD
7940                     SV *nametoke = NULL;
7941 #endif
7942
7943                     PL_expect = XBLOCK;
7944                     attrful = XATTRBLOCK;
7945                     /* remember buffer pos'n for later force_word */
7946                     tboffset = s - PL_oldbufptr;
7947                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7948 #ifdef PERL_MAD
7949                     if (PL_madskills)
7950                         nametoke = newSVpvn(s, d - s);
7951 #endif
7952                     if (memchr(tmpbuf, ':', len))
7953                         sv_setpvn(PL_subname, tmpbuf, len);
7954                     else {
7955                         sv_setsv(PL_subname,PL_curstname);
7956                         sv_catpvs(PL_subname,"::");
7957                         sv_catpvn(PL_subname,tmpbuf,len);
7958                     }
7959                     have_name = TRUE;
7960
7961 #ifdef PERL_MAD
7962
7963                     start_force(0);
7964                     CURMAD('X', nametoke);
7965                     CURMAD('_', tmpwhite);
7966                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7967                                       FALSE, TRUE, TRUE);
7968
7969                     s = SKIPSPACE2(d,tmpwhite);
7970 #else
7971                     s = skipspace(d);
7972 #endif
7973                 }
7974                 else {
7975                     if (key == KEY_my)
7976                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7977                     PL_expect = XTERMBLOCK;
7978                     attrful = XATTRTERM;
7979                     sv_setpvs(PL_subname,"?");
7980                     have_name = FALSE;
7981                 }
7982
7983                 if (key == KEY_format) {
7984                     if (*s == '=')
7985                         PL_lex_formbrack = PL_lex_brackets + 1;
7986 #ifdef PERL_MAD
7987                     PL_thistoken = subtoken;
7988                     s = d;
7989 #else
7990                     if (have_name)
7991                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7992                                           FALSE, TRUE, TRUE);
7993 #endif
7994                     OPERATOR(FORMAT);
7995                 }
7996
7997                 /* Look for a prototype */
7998                 if (*s == '(') {
7999                     char *p;
8000                     bool bad_proto = FALSE;
8001                     bool in_brackets = FALSE;
8002                     char greedy_proto = ' ';
8003                     bool proto_after_greedy_proto = FALSE;
8004                     bool must_be_last = FALSE;
8005                     bool underscore = FALSE;
8006                     bool seen_underscore = FALSE;
8007                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8008
8009                     s = scan_str(s,!!PL_madskills,FALSE);
8010                     if (!s)
8011                         Perl_croak(aTHX_ "Prototype not terminated");
8012                     /* strip spaces and check for bad characters */
8013                     d = SvPVX(PL_lex_stuff);
8014                     tmp = 0;
8015                     for (p = d; *p; ++p) {
8016                         if (!isSPACE(*p)) {
8017                             d[tmp++] = *p;
8018
8019                             if (warnillegalproto) {
8020                                 if (must_be_last)
8021                                     proto_after_greedy_proto = TRUE;
8022                                 if (!strchr("$@%*;[]&\\_+", *p)) {
8023                                     bad_proto = TRUE;
8024                                 }
8025                                 else {
8026                                     if ( underscore ) {
8027                                         if ( *p != ';' )
8028                                             bad_proto = TRUE;
8029                                         underscore = FALSE;
8030                                     }
8031                                     if ( *p == '[' ) {
8032                                         in_brackets = TRUE;
8033                                     }
8034                                     else if ( *p == ']' ) {
8035                                         in_brackets = FALSE;
8036                                     }
8037                                     else if ( (*p == '@' || *p == '%') &&
8038                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8039                                          !in_brackets ) {
8040                                         must_be_last = TRUE;
8041                                         greedy_proto = *p;
8042                                     }
8043                                     else if ( *p == '_' ) {
8044                                         underscore = seen_underscore = TRUE;
8045                                     }
8046                                 }
8047                             }
8048                         }
8049                     }
8050                     d[tmp] = '\0';
8051                     if (proto_after_greedy_proto)
8052                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8053                                     "Prototype after '%c' for %"SVf" : %s",
8054                                     greedy_proto, SVfARG(PL_subname), d);
8055                     if (bad_proto)
8056                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8057                                     "Illegal character %sin prototype for %"SVf" : %s",
8058                                     seen_underscore ? "after '_' " : "",
8059                                     SVfARG(PL_subname), d);
8060                     SvCUR_set(PL_lex_stuff, tmp);
8061                     have_proto = TRUE;
8062
8063 #ifdef PERL_MAD
8064                     start_force(0);
8065                     CURMAD('q', PL_thisopen);
8066                     CURMAD('_', tmpwhite);
8067                     CURMAD('=', PL_thisstuff);
8068                     CURMAD('Q', PL_thisclose);
8069                     NEXTVAL_NEXTTOKE.opval =
8070                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8071                     PL_lex_stuff = NULL;
8072                     force_next(THING);
8073
8074                     s = SKIPSPACE2(s,tmpwhite);
8075 #else
8076                     s = skipspace(s);
8077 #endif
8078                 }
8079                 else
8080                     have_proto = FALSE;
8081
8082                 if (*s == ':' && s[1] != ':')
8083                     PL_expect = attrful;
8084                 else if (*s != '{' && key == KEY_sub) {
8085                     if (!have_name)
8086                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8087                     else if (*s != ';' && *s != '}')
8088                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8089                 }
8090
8091 #ifdef PERL_MAD
8092                 start_force(0);
8093                 if (tmpwhite) {
8094                     if (PL_madskills)
8095                         curmad('^', newSVpvs(""));
8096                     CURMAD('_', tmpwhite);
8097                 }
8098                 force_next(0);
8099
8100                 PL_thistoken = subtoken;
8101 #else
8102                 if (have_proto) {
8103                     NEXTVAL_NEXTTOKE.opval =
8104                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8105                     PL_lex_stuff = NULL;
8106                     force_next(THING);
8107                 }
8108 #endif
8109                 if (!have_name) {
8110                     if (PL_curstash)
8111                         sv_setpvs(PL_subname, "__ANON__");
8112                     else
8113                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8114                     TOKEN(ANONSUB);
8115                 }
8116 #ifndef PERL_MAD
8117                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8118                                   FALSE, TRUE, TRUE);
8119 #endif
8120                 if (key == KEY_my)
8121                     TOKEN(MYSUB);
8122                 TOKEN(SUB);
8123             }
8124
8125         case KEY_system:
8126             LOP(OP_SYSTEM,XREF);
8127
8128         case KEY_symlink:
8129             LOP(OP_SYMLINK,XTERM);
8130
8131         case KEY_syscall:
8132             LOP(OP_SYSCALL,XTERM);
8133
8134         case KEY_sysopen:
8135             LOP(OP_SYSOPEN,XTERM);
8136
8137         case KEY_sysseek:
8138             LOP(OP_SYSSEEK,XTERM);
8139
8140         case KEY_sysread:
8141             LOP(OP_SYSREAD,XTERM);
8142
8143         case KEY_syswrite:
8144             LOP(OP_SYSWRITE,XTERM);
8145
8146         case KEY_tr:
8147             s = scan_trans(s);
8148             TERM(sublex_start());
8149
8150         case KEY_tell:
8151             UNI(OP_TELL);
8152
8153         case KEY_telldir:
8154             UNI(OP_TELLDIR);
8155
8156         case KEY_tie:
8157             LOP(OP_TIE,XTERM);
8158
8159         case KEY_tied:
8160             UNI(OP_TIED);
8161
8162         case KEY_time:
8163             FUN0(OP_TIME);
8164
8165         case KEY_times:
8166             FUN0(OP_TMS);
8167
8168         case KEY_truncate:
8169             LOP(OP_TRUNCATE,XTERM);
8170
8171         case KEY_uc:
8172             UNI(OP_UC);
8173
8174         case KEY_ucfirst:
8175             UNI(OP_UCFIRST);
8176
8177         case KEY_untie:
8178             UNI(OP_UNTIE);
8179
8180         case KEY_until:
8181             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8182                 return REPORT(0);
8183             pl_yylval.ival = CopLINE(PL_curcop);
8184             OPERATOR(UNTIL);
8185
8186         case KEY_unless:
8187             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8188                 return REPORT(0);
8189             pl_yylval.ival = CopLINE(PL_curcop);
8190             OPERATOR(UNLESS);
8191
8192         case KEY_unlink:
8193             LOP(OP_UNLINK,XTERM);
8194
8195         case KEY_undef:
8196             UNIDOR(OP_UNDEF);
8197
8198         case KEY_unpack:
8199             LOP(OP_UNPACK,XTERM);
8200
8201         case KEY_utime:
8202             LOP(OP_UTIME,XTERM);
8203
8204         case KEY_umask:
8205             UNIDOR(OP_UMASK);
8206
8207         case KEY_unshift:
8208             LOP(OP_UNSHIFT,XTERM);
8209
8210         case KEY_use:
8211             s = tokenize_use(1, s);
8212             OPERATOR(USE);
8213
8214         case KEY_values:
8215             UNI(OP_VALUES);
8216
8217         case KEY_vec:
8218             LOP(OP_VEC,XTERM);
8219
8220         case KEY_when:
8221             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8222                 return REPORT(0);
8223             pl_yylval.ival = CopLINE(PL_curcop);
8224             OPERATOR(WHEN);
8225
8226         case KEY_while:
8227             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8228                 return REPORT(0);
8229             pl_yylval.ival = CopLINE(PL_curcop);
8230             OPERATOR(WHILE);
8231
8232         case KEY_warn:
8233             PL_hints |= HINT_BLOCK_SCOPE;
8234             LOP(OP_WARN,XTERM);
8235
8236         case KEY_wait:
8237             FUN0(OP_WAIT);
8238
8239         case KEY_waitpid:
8240             LOP(OP_WAITPID,XTERM);
8241
8242         case KEY_wantarray:
8243             FUN0(OP_WANTARRAY);
8244
8245         case KEY_write:
8246 #ifdef EBCDIC
8247         {
8248             char ctl_l[2];
8249             ctl_l[0] = toCTRL('L');
8250             ctl_l[1] = '\0';
8251             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8252         }
8253 #else
8254             /* Make sure $^L is defined */
8255             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8256 #endif
8257             UNI(OP_ENTERWRITE);
8258
8259         case KEY_x:
8260             if (PL_expect == XOPERATOR) {
8261                 if (*s == '=' && !PL_lex_allbrackets &&
8262                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8263                     return REPORT(0);
8264                 Mop(OP_REPEAT);
8265             }
8266             check_uni();
8267             goto just_a_word;
8268
8269         case KEY_xor:
8270             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8271                 return REPORT(0);
8272             pl_yylval.ival = OP_XOR;
8273             OPERATOR(OROP);
8274
8275         case KEY_y:
8276             s = scan_trans(s);
8277             TERM(sublex_start());
8278         }
8279     }}
8280 }
8281 #ifdef __SC__
8282 #pragma segment Main
8283 #endif
8284
8285 static int
8286 S_pending_ident(pTHX)
8287 {
8288     dVAR;
8289     register char *d;
8290     PADOFFSET tmp = 0;
8291     /* pit holds the identifier we read and pending_ident is reset */
8292     char pit = PL_pending_ident;
8293     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8294     /* All routes through this function want to know if there is a colon.  */
8295     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8296     PL_pending_ident = 0;
8297
8298     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8299     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8300           "### Pending identifier '%s'\n", PL_tokenbuf); });
8301
8302     /* if we're in a my(), we can't allow dynamics here.
8303        $foo'bar has already been turned into $foo::bar, so
8304        just check for colons.
8305
8306        if it's a legal name, the OP is a PADANY.
8307     */
8308     if (PL_in_my) {
8309         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8310             if (has_colon)
8311                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8312                                   "variable %s in \"our\"",
8313                                   PL_tokenbuf));
8314             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8315         }
8316         else {
8317             if (has_colon)
8318                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8319                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8320
8321             pl_yylval.opval = newOP(OP_PADANY, 0);
8322             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8323                                                         UTF ? SVf_UTF8 : 0);
8324             return PRIVATEREF;
8325         }
8326     }
8327
8328     /*
8329        build the ops for accesses to a my() variable.
8330
8331        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8332        then used in a comparison.  This catches most, but not
8333        all cases.  For instance, it catches
8334            sort { my($a); $a <=> $b }
8335        but not
8336            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8337        (although why you'd do that is anyone's guess).
8338     */
8339
8340     if (!has_colon) {
8341         if (!PL_in_my)
8342             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8343                                     UTF ? SVf_UTF8 : 0);
8344         if (tmp != NOT_IN_PAD) {
8345             /* might be an "our" variable" */
8346             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8347                 /* build ops for a bareword */
8348                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8349                 HEK * const stashname = HvNAME_HEK(stash);
8350                 SV *  const sym = newSVhek(stashname);
8351                 sv_catpvs(sym, "::");
8352                 sv_catsv(sym, newSVpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, SVs_TEMP | (UTF ? SVf_UTF8 : 0 )));
8353                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8354                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8355                 gv_fetchsv(sym,
8356                     (PL_in_eval
8357                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8358                         : GV_ADDMULTI
8359                     ),
8360                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8361                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8362                      : SVt_PVHV));
8363                 return WORD;
8364             }
8365
8366             /* if it's a sort block and they're naming $a or $b */
8367             if (PL_last_lop_op == OP_SORT &&
8368                 PL_tokenbuf[0] == '$' &&
8369                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8370                 && !PL_tokenbuf[2])
8371             {
8372                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8373                      d < PL_bufend && *d != '\n';
8374                      d++)
8375                 {
8376                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8377                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8378                               PL_tokenbuf);
8379                     }
8380                 }
8381             }
8382
8383             pl_yylval.opval = newOP(OP_PADANY, 0);
8384             pl_yylval.opval->op_targ = tmp;
8385             return PRIVATEREF;
8386         }
8387     }
8388
8389     /*
8390        Whine if they've said @foo in a doublequoted string,
8391        and @foo isn't a variable we can find in the symbol
8392        table.
8393     */
8394     if (ckWARN(WARN_AMBIGUOUS) &&
8395         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8396         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8397                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8398         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8399                 /* DO NOT warn for @- and @+ */
8400                 && !( PL_tokenbuf[2] == '\0' &&
8401                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8402            )
8403         {
8404             /* Downgraded from fatal to warning 20000522 mjd */
8405             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8406                         "Possible unintended interpolation of %s in string",
8407                         PL_tokenbuf);
8408         }
8409     }
8410
8411     /* build ops for a bareword */
8412     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8413                                                       tokenbuf_len - 1,
8414                                                       UTF ? SVf_UTF8 : 0 ));
8415     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8416     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8417                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8418                      | ( UTF ? SVf_UTF8 : 0 ),
8419                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8420                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8421                       : SVt_PVHV));
8422     return WORD;
8423 }
8424
8425 STATIC void
8426 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8427 {
8428     dVAR;
8429
8430     PERL_ARGS_ASSERT_CHECKCOMMA;
8431
8432     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8433         if (ckWARN(WARN_SYNTAX)) {
8434             int level = 1;
8435             const char *w;
8436             for (w = s+2; *w && level; w++) {
8437                 if (*w == '(')
8438                     ++level;
8439                 else if (*w == ')')
8440                     --level;
8441             }
8442             while (isSPACE(*w))
8443                 ++w;
8444             /* the list of chars below is for end of statements or
8445              * block / parens, boolean operators (&&, ||, //) and branch
8446              * constructs (or, and, if, until, unless, while, err, for).
8447              * Not a very solid hack... */
8448             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8449                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8450                             "%s (...) interpreted as function",name);
8451         }
8452     }
8453     while (s < PL_bufend && isSPACE(*s))
8454         s++;
8455     if (*s == '(')
8456         s++;
8457     while (s < PL_bufend && isSPACE(*s))
8458         s++;
8459     if (isIDFIRST_lazy_if(s,UTF)) {
8460         const char * const w = s++;
8461         while (isALNUM_lazy_if(s,UTF))
8462             s++;
8463         while (s < PL_bufend && isSPACE(*s))
8464             s++;
8465         if (*s == ',') {
8466             GV* gv;
8467             if (keyword(w, s - w, 0))
8468                 return;
8469
8470             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8471             if (gv && GvCVu(gv))
8472                 return;
8473             Perl_croak(aTHX_ "No comma allowed after %s", what);
8474         }
8475     }
8476 }
8477
8478 /* Either returns sv, or mortalizes sv and returns a new SV*.
8479    Best used as sv=new_constant(..., sv, ...).
8480    If s, pv are NULL, calls subroutine with one argument,
8481    and type is used with error messages only. */
8482
8483 STATIC SV *
8484 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8485                SV *sv, SV *pv, const char *type, STRLEN typelen)
8486 {
8487     dVAR; dSP;
8488     HV * const table = GvHV(PL_hintgv);          /* ^H */
8489     SV *res;
8490     SV **cvp;
8491     SV *cv, *typesv;
8492     const char *why1 = "", *why2 = "", *why3 = "";
8493
8494     PERL_ARGS_ASSERT_NEW_CONSTANT;
8495
8496     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8497         SV *msg;
8498
8499         why2 = (const char *)
8500             (strEQ(key,"charnames")
8501              ? "(possibly a missing \"use charnames ...\")"
8502              : "");
8503         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8504                             (type ? type: "undef"), why2);
8505
8506         /* This is convoluted and evil ("goto considered harmful")
8507          * but I do not understand the intricacies of all the different
8508          * failure modes of %^H in here.  The goal here is to make
8509          * the most probable error message user-friendly. --jhi */
8510
8511         goto msgdone;
8512
8513     report:
8514         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8515                             (type ? type: "undef"), why1, why2, why3);
8516     msgdone:
8517         yyerror(SvPVX_const(msg));
8518         SvREFCNT_dec(msg);
8519         return sv;
8520     }
8521
8522     /* charnames doesn't work well if there have been errors found */
8523     if (PL_error_count > 0 && strEQ(key,"charnames"))
8524         return &PL_sv_undef;
8525
8526     cvp = hv_fetch(table, key, keylen, FALSE);
8527     if (!cvp || !SvOK(*cvp)) {
8528         why1 = "$^H{";
8529         why2 = key;
8530         why3 = "} is not defined";
8531         goto report;
8532     }
8533     sv_2mortal(sv);                     /* Parent created it permanently */
8534     cv = *cvp;
8535     if (!pv && s)
8536         pv = newSVpvn_flags(s, len, SVs_TEMP);
8537     if (type && pv)
8538         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8539     else
8540         typesv = &PL_sv_undef;
8541
8542     PUSHSTACKi(PERLSI_OVERLOAD);
8543     ENTER ;
8544     SAVETMPS;
8545
8546     PUSHMARK(SP) ;
8547     EXTEND(sp, 3);
8548     if (pv)
8549         PUSHs(pv);
8550     PUSHs(sv);
8551     if (pv)
8552         PUSHs(typesv);
8553     PUTBACK;
8554     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8555
8556     SPAGAIN ;
8557
8558     /* Check the eval first */
8559     if (!PL_in_eval && SvTRUE(ERRSV)) {
8560         sv_catpvs(ERRSV, "Propagated");
8561         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8562         (void)POPs;
8563         res = SvREFCNT_inc_simple(sv);
8564     }
8565     else {
8566         res = POPs;
8567         SvREFCNT_inc_simple_void(res);
8568     }
8569
8570     PUTBACK ;
8571     FREETMPS ;
8572     LEAVE ;
8573     POPSTACK;
8574
8575     if (!SvOK(res)) {
8576         why1 = "Call to &{$^H{";
8577         why2 = key;
8578         why3 = "}} did not return a defined value";
8579         sv = res;
8580         goto report;
8581     }
8582
8583     return res;
8584 }
8585
8586 /* Returns a NUL terminated string, with the length of the string written to
8587    *slp
8588    */
8589 STATIC char *
8590 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8591 {
8592     dVAR;
8593     register char *d = dest;
8594     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8595
8596     PERL_ARGS_ASSERT_SCAN_WORD;
8597
8598     for (;;) {
8599         if (d >= e)
8600             Perl_croak(aTHX_ ident_too_long);
8601         if (isALNUM(*s))        /* UTF handled below */
8602             *d++ = *s++;
8603         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8604             *d++ = ':';
8605             *d++ = ':';
8606             s++;
8607         }
8608         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8609             *d++ = *s++;
8610             *d++ = *s++;
8611         }
8612         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8613             char *t = s + UTF8SKIP(s);
8614             size_t len;
8615             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8616                 t += UTF8SKIP(t);
8617             len = t - s;
8618             if (d + len > e)
8619                 Perl_croak(aTHX_ ident_too_long);
8620             Copy(s, d, len, char);
8621             d += len;
8622             s = t;
8623         }
8624         else {
8625             *d = '\0';
8626             *slp = d - dest;
8627             return s;
8628         }
8629     }
8630 }
8631
8632 STATIC char *
8633 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8634 {
8635     dVAR;
8636     char *bracket = NULL;
8637     char funny = *s++;
8638     register char *d = dest;
8639     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8640
8641     PERL_ARGS_ASSERT_SCAN_IDENT;
8642
8643     if (isSPACE(*s))
8644         s = PEEKSPACE(s);
8645     if (isDIGIT(*s)) {
8646         while (isDIGIT(*s)) {
8647             if (d >= e)
8648                 Perl_croak(aTHX_ ident_too_long);
8649             *d++ = *s++;
8650         }
8651     }
8652     else {
8653         for (;;) {
8654             if (d >= e)
8655                 Perl_croak(aTHX_ ident_too_long);
8656             if (isALNUM(*s))    /* UTF handled below */
8657                 *d++ = *s++;
8658             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8659                 *d++ = ':';
8660                 *d++ = ':';
8661                 s++;
8662             }
8663             else if (*s == ':' && s[1] == ':') {
8664                 *d++ = *s++;
8665                 *d++ = *s++;
8666             }
8667             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8668                 char *t = s + UTF8SKIP(s);
8669                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8670                     t += UTF8SKIP(t);
8671                 if (d + (t - s) > e)
8672                     Perl_croak(aTHX_ ident_too_long);
8673                 Copy(s, d, t - s, char);
8674                 d += t - s;
8675                 s = t;
8676             }
8677             else
8678                 break;
8679         }
8680     }
8681     *d = '\0';
8682     d = dest;
8683     if (*d) {
8684         if (PL_lex_state != LEX_NORMAL)
8685             PL_lex_state = LEX_INTERPENDMAYBE;
8686         return s;
8687     }
8688     if (*s == '$' && s[1] &&
8689         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8690     {
8691         return s;
8692     }
8693     if (*s == '{') {
8694         bracket = s;
8695         s++;
8696     }
8697     else if (ck_uni)
8698         check_uni();
8699     if (s < send)
8700         *d = *s++;
8701     d[1] = '\0';
8702     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8703         *d = toCTRL(*s);
8704         s++;
8705     }
8706     if (bracket) {
8707         if (isSPACE(s[-1])) {
8708             while (s < send) {
8709                 const char ch = *s++;
8710                 if (!SPACE_OR_TAB(ch)) {
8711                     *d = ch;
8712                     break;
8713                 }
8714             }
8715         }
8716         if (isIDFIRST_lazy_if(d,UTF)) {
8717             d++;
8718             if (UTF) {
8719                 char *end = s;
8720                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8721                     end += UTF8SKIP(end);
8722                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8723                         end += UTF8SKIP(end);
8724                 }
8725                 Copy(s, d, end - s, char);
8726                 d += end - s;
8727                 s = end;
8728             }
8729             else {
8730                 while ((isALNUM(*s) || *s == ':') && d < e)
8731                     *d++ = *s++;
8732                 if (d >= e)
8733                     Perl_croak(aTHX_ ident_too_long);
8734             }
8735             *d = '\0';
8736             while (s < send && SPACE_OR_TAB(*s))
8737                 s++;
8738             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8739                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8740                     const char * const brack =
8741                         (const char *)
8742                         ((*s == '[') ? "[...]" : "{...}");
8743    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8744                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8745                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8746                         funny, dest, brack, funny, dest, brack);
8747                 }
8748                 bracket++;
8749                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8750                 PL_lex_allbrackets++;
8751                 return s;
8752             }
8753         }
8754         /* Handle extended ${^Foo} variables
8755          * 1999-02-27 mjd-perl-patch@plover.com */
8756         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8757                  && isALNUM(*s))
8758         {
8759             d++;
8760             while (isALNUM(*s) && d < e) {
8761                 *d++ = *s++;
8762             }
8763             if (d >= e)
8764                 Perl_croak(aTHX_ ident_too_long);
8765             *d = '\0';
8766         }
8767         if (*s == '}') {
8768             s++;
8769             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8770                 PL_lex_state = LEX_INTERPEND;
8771                 PL_expect = XREF;
8772             }
8773             if (PL_lex_state == LEX_NORMAL) {
8774                 if (ckWARN(WARN_AMBIGUOUS) &&
8775                     (keyword(dest, d - dest, 0)
8776                      || get_cvn_flags(dest, d - dest, 0)))
8777                 {
8778                     if (funny == '#')
8779                         funny = '@';
8780                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8781                         "Ambiguous use of %c{%s} resolved to %c%s",
8782                         funny, dest, funny, dest);
8783                 }
8784             }
8785         }
8786         else {
8787             s = bracket;                /* let the parser handle it */
8788             *dest = '\0';
8789         }
8790     }
8791     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8792         PL_lex_state = LEX_INTERPEND;
8793     return s;
8794 }
8795
8796 static bool
8797 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8798
8799     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8800      * the parse starting at 's', based on the subset that are valid in this
8801      * context input to this routine in 'valid_flags'. Advances s.  Returns
8802      * TRUE if the input was a valid flag, so the next char may be as well;
8803      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8804      * current regex.  This routine will set it to any charset modifier found.
8805      * The caller shouldn't change it.  This way, another charset modifier
8806      * encountered in the parse can be detected as an error, as we have decided
8807      * allow only one */
8808
8809     const char c = **s;
8810
8811     if (! strchr(valid_flags, c)) {
8812         if (isALNUM(c)) {
8813             goto deprecate;
8814         }
8815         return FALSE;
8816     }
8817
8818     switch (c) {
8819
8820         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8821         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8822         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8823         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8824         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8825         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8826         case LOCALE_PAT_MOD:
8827
8828             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8829              * can't be regex modifiers.
8830              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8831              * much as easily done.  s///lei, for example, has to mean regex
8832              * modifiers if it's not an error (as does any word character
8833              * following the 'e').  Otherwise, we resolve to the backwards-
8834              * compatible, but less likely 's/// le ...', i.e. as meaning
8835              * less-than-or-equal.  The reason it's not likely is that s//
8836              * returns a number for code in the field (/r returns a string, but
8837              * that wasn't added until the 5.13 series), and so '<=' should be
8838              * used for comparing, not 'le'. */
8839             if (*((*s) + 1) == 't') {
8840                 goto deprecate;
8841             }
8842             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8843
8844                 /* 'e' is valid only for substitutes, s///e.  If it is not
8845                  * valid in the current context, then 'm//le' must mean the
8846                  * comparison operator, so use the regular deprecation message.
8847                  */
8848                 if (! strchr(valid_flags, 'e')) {
8849                     goto deprecate;
8850                 }
8851                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8852                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8853                 return FALSE;
8854             }
8855             if (*charset) {
8856                 goto multiple_charsets;
8857             }
8858             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8859             *charset = c;
8860             break;
8861         case UNICODE_PAT_MOD:
8862             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8863              * 'n' means they can't be regex modifiers */
8864             if (*((*s) + 1) == 'n') {
8865                 goto deprecate;
8866             }
8867             if (*charset) {
8868                 goto multiple_charsets;
8869             }
8870             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8871             *charset = c;
8872             break;
8873         case ASCII_RESTRICT_PAT_MOD:
8874             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8875              * can't be regex modifiers */
8876             if (*((*s) + 1) == 'n') {
8877                 goto deprecate;
8878             }
8879
8880             if (! *charset) {
8881                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8882             }
8883             else {
8884
8885                 /* Error if previous modifier wasn't an 'a', but if it was, see
8886                  * if, and accept, a second occurrence (only) */
8887                 if (*charset != 'a'
8888                     || get_regex_charset(*pmfl)
8889                         != REGEX_ASCII_RESTRICTED_CHARSET)
8890                 {
8891                         goto multiple_charsets;
8892                 }
8893                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8894             }
8895             *charset = c;
8896             break;
8897         case DEPENDS_PAT_MOD:
8898             if (*charset) {
8899                 goto multiple_charsets;
8900             }
8901             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8902             *charset = c;
8903             break;
8904     }
8905
8906     (*s)++;
8907     return TRUE;
8908
8909     deprecate:
8910         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8911             "Having no space between pattern and following word is deprecated");
8912         return FALSE;
8913
8914     multiple_charsets:
8915         if (*charset != c) {
8916             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8917         }
8918         else if (c == 'a') {
8919             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8920         }
8921         else {
8922             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8923         }
8924
8925         /* Pretend that it worked, so will continue processing before dieing */
8926         (*s)++;
8927         return TRUE;
8928 }
8929
8930 STATIC char *
8931 S_scan_pat(pTHX_ char *start, I32 type)
8932 {
8933     dVAR;
8934     PMOP *pm;
8935     char *s = scan_str(start,!!PL_madskills,FALSE);
8936     const char * const valid_flags =
8937         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8938     char charset = '\0';    /* character set modifier */
8939 #ifdef PERL_MAD
8940     char *modstart;
8941 #endif
8942
8943     PERL_ARGS_ASSERT_SCAN_PAT;
8944
8945     if (!s) {
8946         const char * const delimiter = skipspace(start);
8947         Perl_croak(aTHX_
8948                    (const char *)
8949                    (*delimiter == '?'
8950                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8951                     : "Search pattern not terminated" ));
8952     }
8953
8954     pm = (PMOP*)newPMOP(type, 0);
8955     if (PL_multi_open == '?') {
8956         /* This is the only point in the code that sets PMf_ONCE:  */
8957         pm->op_pmflags |= PMf_ONCE;
8958
8959         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8960            allows us to restrict the list needed by reset to just the ??
8961            matches.  */
8962         assert(type != OP_TRANS);
8963         if (PL_curstash) {
8964             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8965             U32 elements;
8966             if (!mg) {
8967                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8968                                  0);
8969             }
8970             elements = mg->mg_len / sizeof(PMOP**);
8971             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8972             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8973             mg->mg_len = elements * sizeof(PMOP**);
8974             PmopSTASH_set(pm,PL_curstash);
8975         }
8976     }
8977 #ifdef PERL_MAD
8978     modstart = s;
8979 #endif
8980     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8981 #ifdef PERL_MAD
8982     if (PL_madskills && modstart != s) {
8983         SV* tmptoken = newSVpvn(modstart, s - modstart);
8984         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8985     }
8986 #endif
8987     /* issue a warning if /c is specified,but /g is not */
8988     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8989     {
8990         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8991                        "Use of /c modifier is meaningless without /g" );
8992     }
8993
8994     PL_lex_op = (OP*)pm;
8995     pl_yylval.ival = OP_MATCH;
8996     return s;
8997 }
8998
8999 STATIC char *
9000 S_scan_subst(pTHX_ char *start)
9001 {
9002     dVAR;
9003     char *s;
9004     register PMOP *pm;
9005     I32 first_start;
9006     I32 es = 0;
9007     char charset = '\0';    /* character set modifier */
9008 #ifdef PERL_MAD
9009     char *modstart;
9010 #endif
9011
9012     PERL_ARGS_ASSERT_SCAN_SUBST;
9013
9014     pl_yylval.ival = OP_NULL;
9015
9016     s = scan_str(start,!!PL_madskills,FALSE);
9017
9018     if (!s)
9019         Perl_croak(aTHX_ "Substitution pattern not terminated");
9020
9021     if (s[-1] == PL_multi_open)
9022         s--;
9023 #ifdef PERL_MAD
9024     if (PL_madskills) {
9025         CURMAD('q', PL_thisopen);
9026         CURMAD('_', PL_thiswhite);
9027         CURMAD('E', PL_thisstuff);
9028         CURMAD('Q', PL_thisclose);
9029         PL_realtokenstart = s - SvPVX(PL_linestr);
9030     }
9031 #endif
9032
9033     first_start = PL_multi_start;
9034     s = scan_str(s,!!PL_madskills,FALSE);
9035     if (!s) {
9036         if (PL_lex_stuff) {
9037             SvREFCNT_dec(PL_lex_stuff);
9038             PL_lex_stuff = NULL;
9039         }
9040         Perl_croak(aTHX_ "Substitution replacement not terminated");
9041     }
9042     PL_multi_start = first_start;       /* so whole substitution is taken together */
9043
9044     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9045
9046 #ifdef PERL_MAD
9047     if (PL_madskills) {
9048         CURMAD('z', PL_thisopen);
9049         CURMAD('R', PL_thisstuff);
9050         CURMAD('Z', PL_thisclose);
9051     }
9052     modstart = s;
9053 #endif
9054
9055     while (*s) {
9056         if (*s == EXEC_PAT_MOD) {
9057             s++;
9058             es++;
9059         }
9060         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9061         {
9062             break;
9063         }
9064     }
9065
9066 #ifdef PERL_MAD
9067     if (PL_madskills) {
9068         if (modstart != s)
9069             curmad('m', newSVpvn(modstart, s - modstart));
9070         append_madprops(PL_thismad, (OP*)pm, 0);
9071         PL_thismad = 0;
9072     }
9073 #endif
9074     if ((pm->op_pmflags & PMf_CONTINUE)) {
9075         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9076     }
9077
9078     if (es) {
9079         SV * const repl = newSVpvs("");
9080
9081         PL_sublex_info.super_bufptr = s;
9082         PL_sublex_info.super_bufend = PL_bufend;
9083         PL_multi_end = 0;
9084         pm->op_pmflags |= PMf_EVAL;
9085         while (es-- > 0) {
9086             if (es)
9087                 sv_catpvs(repl, "eval ");
9088             else
9089                 sv_catpvs(repl, "do ");
9090         }
9091         sv_catpvs(repl, "{");
9092         sv_catsv(repl, PL_lex_repl);
9093         if (strchr(SvPVX(PL_lex_repl), '#'))
9094             sv_catpvs(repl, "\n");
9095         sv_catpvs(repl, "}");
9096         SvEVALED_on(repl);
9097         SvREFCNT_dec(PL_lex_repl);
9098         PL_lex_repl = repl;
9099     }
9100
9101     PL_lex_op = (OP*)pm;
9102     pl_yylval.ival = OP_SUBST;
9103     return s;
9104 }
9105
9106 STATIC char *
9107 S_scan_trans(pTHX_ char *start)
9108 {
9109     dVAR;
9110     register char* s;
9111     OP *o;
9112     short *tbl;
9113     U8 squash;
9114     U8 del;
9115     U8 complement;
9116     bool nondestruct = 0;
9117 #ifdef PERL_MAD
9118     char *modstart;
9119 #endif
9120
9121     PERL_ARGS_ASSERT_SCAN_TRANS;
9122
9123     pl_yylval.ival = OP_NULL;
9124
9125     s = scan_str(start,!!PL_madskills,FALSE);
9126     if (!s)
9127         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9128
9129     if (s[-1] == PL_multi_open)
9130         s--;
9131 #ifdef PERL_MAD
9132     if (PL_madskills) {
9133         CURMAD('q', PL_thisopen);
9134         CURMAD('_', PL_thiswhite);
9135         CURMAD('E', PL_thisstuff);
9136         CURMAD('Q', PL_thisclose);
9137         PL_realtokenstart = s - SvPVX(PL_linestr);
9138     }
9139 #endif
9140
9141     s = scan_str(s,!!PL_madskills,FALSE);
9142     if (!s) {
9143         if (PL_lex_stuff) {
9144             SvREFCNT_dec(PL_lex_stuff);
9145             PL_lex_stuff = NULL;
9146         }
9147         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9148     }
9149     if (PL_madskills) {
9150         CURMAD('z', PL_thisopen);
9151         CURMAD('R', PL_thisstuff);
9152         CURMAD('Z', PL_thisclose);
9153     }
9154
9155     complement = del = squash = 0;
9156 #ifdef PERL_MAD
9157     modstart = s;
9158 #endif
9159     while (1) {
9160         switch (*s) {
9161         case 'c':
9162             complement = OPpTRANS_COMPLEMENT;
9163             break;
9164         case 'd':
9165             del = OPpTRANS_DELETE;
9166             break;
9167         case 's':
9168             squash = OPpTRANS_SQUASH;
9169             break;
9170         case 'r':
9171             nondestruct = 1;
9172             break;
9173         default:
9174             goto no_more;
9175         }
9176         s++;
9177     }
9178   no_more:
9179
9180     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9181     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9182     o->op_private &= ~OPpTRANS_ALL;
9183     o->op_private |= del|squash|complement|
9184       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9185       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9186
9187     PL_lex_op = o;
9188     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9189
9190 #ifdef PERL_MAD
9191     if (PL_madskills) {
9192         if (modstart != s)
9193             curmad('m', newSVpvn(modstart, s - modstart));
9194         append_madprops(PL_thismad, o, 0);
9195         PL_thismad = 0;
9196     }
9197 #endif
9198
9199     return s;
9200 }
9201
9202 STATIC char *
9203 S_scan_heredoc(pTHX_ register char *s)
9204 {
9205     dVAR;
9206     SV *herewas;
9207     I32 op_type = OP_SCALAR;
9208     I32 len;
9209     SV *tmpstr;
9210     char term;
9211     const char *found_newline;
9212     register char *d;
9213     register char *e;
9214     char *peek;
9215     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9216 #ifdef PERL_MAD
9217     I32 stuffstart = s - SvPVX(PL_linestr);
9218     char *tstart;
9219
9220     PL_realtokenstart = -1;
9221 #endif
9222
9223     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9224
9225     s += 2;
9226     d = PL_tokenbuf;
9227     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9228     if (!outer)
9229         *d++ = '\n';
9230     peek = s;
9231     while (SPACE_OR_TAB(*peek))
9232         peek++;
9233     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9234         s = peek;
9235         term = *s++;
9236         s = delimcpy(d, e, s, PL_bufend, term, &len);
9237         d += len;
9238         if (s < PL_bufend)
9239             s++;
9240     }
9241     else {
9242         if (*s == '\\')
9243             s++, term = '\'';
9244         else
9245             term = '"';
9246         if (!isALNUM_lazy_if(s,UTF))
9247             deprecate("bare << to mean <<\"\"");
9248         for (; isALNUM_lazy_if(s,UTF); s++) {
9249             if (d < e)
9250                 *d++ = *s;
9251         }
9252     }
9253     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9254         Perl_croak(aTHX_ "Delimiter for here document is too long");
9255     *d++ = '\n';
9256     *d = '\0';
9257     len = d - PL_tokenbuf;
9258
9259 #ifdef PERL_MAD
9260     if (PL_madskills) {
9261         tstart = PL_tokenbuf + !outer;
9262         PL_thisclose = newSVpvn(tstart, len - !outer);
9263         tstart = SvPVX(PL_linestr) + stuffstart;
9264         PL_thisopen = newSVpvn(tstart, s - tstart);
9265         stuffstart = s - SvPVX(PL_linestr);
9266     }
9267 #endif
9268 #ifndef PERL_STRICT_CR
9269     d = strchr(s, '\r');
9270     if (d) {
9271         char * const olds = s;
9272         s = d;
9273         while (s < PL_bufend) {
9274             if (*s == '\r') {
9275                 *d++ = '\n';
9276                 if (*++s == '\n')
9277                     s++;
9278             }
9279             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9280                 *d++ = *s++;
9281                 s++;
9282             }
9283             else
9284                 *d++ = *s++;
9285         }
9286         *d = '\0';
9287         PL_bufend = d;
9288         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9289         s = olds;
9290     }
9291 #endif
9292 #ifdef PERL_MAD
9293     found_newline = 0;
9294 #endif
9295     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9296         herewas = newSVpvn(s,PL_bufend-s);
9297     }
9298     else {
9299 #ifdef PERL_MAD
9300         herewas = newSVpvn(s-1,found_newline-s+1);
9301 #else
9302         s--;
9303         herewas = newSVpvn(s,found_newline-s);
9304 #endif
9305     }
9306 #ifdef PERL_MAD
9307     if (PL_madskills) {
9308         tstart = SvPVX(PL_linestr) + stuffstart;
9309         if (PL_thisstuff)
9310             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9311         else
9312             PL_thisstuff = newSVpvn(tstart, s - tstart);
9313     }
9314 #endif
9315     s += SvCUR(herewas);
9316
9317 #ifdef PERL_MAD
9318     stuffstart = s - SvPVX(PL_linestr);
9319
9320     if (found_newline)
9321         s--;
9322 #endif
9323
9324     tmpstr = newSV_type(SVt_PVIV);
9325     SvGROW(tmpstr, 80);
9326     if (term == '\'') {
9327         op_type = OP_CONST;
9328         SvIV_set(tmpstr, -1);
9329     }
9330     else if (term == '`') {
9331         op_type = OP_BACKTICK;
9332         SvIV_set(tmpstr, '\\');
9333     }
9334
9335     CLINE;
9336     PL_multi_start = CopLINE(PL_curcop);
9337     PL_multi_open = PL_multi_close = '<';
9338     term = *PL_tokenbuf;
9339     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9340         char * const bufptr = PL_sublex_info.super_bufptr;
9341         char * const bufend = PL_sublex_info.super_bufend;
9342         char * const olds = s - SvCUR(herewas);
9343         s = strchr(bufptr, '\n');
9344         if (!s)
9345             s = bufend;
9346         d = s;
9347         while (s < bufend &&
9348           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9349             if (*s++ == '\n')
9350                 CopLINE_inc(PL_curcop);
9351         }
9352         if (s >= bufend) {
9353             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9354             missingterm(PL_tokenbuf);
9355         }
9356         sv_setpvn(herewas,bufptr,d-bufptr+1);
9357         sv_setpvn(tmpstr,d+1,s-d);
9358         s += len - 1;
9359         sv_catpvn(herewas,s,bufend-s);
9360         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9361
9362         s = olds;
9363         goto retval;
9364     }
9365     else if (!outer) {
9366         d = s;
9367         while (s < PL_bufend &&
9368           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9369             if (*s++ == '\n')
9370                 CopLINE_inc(PL_curcop);
9371         }
9372         if (s >= PL_bufend) {
9373             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9374             missingterm(PL_tokenbuf);
9375         }
9376         sv_setpvn(tmpstr,d+1,s-d);
9377 #ifdef PERL_MAD
9378         if (PL_madskills) {
9379             if (PL_thisstuff)
9380                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9381             else
9382                 PL_thisstuff = newSVpvn(d + 1, s - d);
9383             stuffstart = s - SvPVX(PL_linestr);
9384         }
9385 #endif
9386         s += len - 1;
9387         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9388
9389         sv_catpvn(herewas,s,PL_bufend-s);
9390         sv_setsv(PL_linestr,herewas);
9391         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9392         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9393         PL_last_lop = PL_last_uni = NULL;
9394     }
9395     else
9396         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9397     while (s >= PL_bufend) {    /* multiple line string? */
9398 #ifdef PERL_MAD
9399         if (PL_madskills) {
9400             tstart = SvPVX(PL_linestr) + stuffstart;
9401             if (PL_thisstuff)
9402                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9403             else
9404                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9405         }
9406 #endif
9407         PL_bufptr = s;
9408         CopLINE_inc(PL_curcop);
9409         if (!outer || !lex_next_chunk(0)) {
9410             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9411             missingterm(PL_tokenbuf);
9412         }
9413         CopLINE_dec(PL_curcop);
9414         s = PL_bufptr;
9415 #ifdef PERL_MAD
9416         stuffstart = s - SvPVX(PL_linestr);
9417 #endif
9418         CopLINE_inc(PL_curcop);
9419         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9420         PL_last_lop = PL_last_uni = NULL;
9421 #ifndef PERL_STRICT_CR
9422         if (PL_bufend - PL_linestart >= 2) {
9423             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9424                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9425             {
9426                 PL_bufend[-2] = '\n';
9427                 PL_bufend--;
9428                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9429             }
9430             else if (PL_bufend[-1] == '\r')
9431                 PL_bufend[-1] = '\n';
9432         }
9433         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9434             PL_bufend[-1] = '\n';
9435 #endif
9436         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9437             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9438             *(SvPVX(PL_linestr) + off ) = ' ';
9439             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9440             sv_catsv(PL_linestr,herewas);
9441             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9442             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9443         }
9444         else {
9445             s = PL_bufend;
9446             sv_catsv(tmpstr,PL_linestr);
9447         }
9448     }
9449     s++;
9450 retval:
9451     PL_multi_end = CopLINE(PL_curcop);
9452     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9453         SvPV_shrink_to_cur(tmpstr);
9454     }
9455     SvREFCNT_dec(herewas);
9456     if (!IN_BYTES) {
9457         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9458             SvUTF8_on(tmpstr);
9459         else if (PL_encoding)
9460             sv_recode_to_utf8(tmpstr, PL_encoding);
9461     }
9462     PL_lex_stuff = tmpstr;
9463     pl_yylval.ival = op_type;
9464     return s;
9465 }
9466
9467 /* scan_inputsymbol
9468    takes: current position in input buffer
9469    returns: new position in input buffer
9470    side-effects: pl_yylval and lex_op are set.
9471
9472    This code handles:
9473
9474    <>           read from ARGV
9475    <FH>         read from filehandle
9476    <pkg::FH>    read from package qualified filehandle
9477    <pkg'FH>     read from package qualified filehandle
9478    <$fh>        read from filehandle in $fh
9479    <*.h>        filename glob
9480
9481 */
9482
9483 STATIC char *
9484 S_scan_inputsymbol(pTHX_ char *start)
9485 {
9486     dVAR;
9487     register char *s = start;           /* current position in buffer */
9488     char *end;
9489     I32 len;
9490     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9491     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9492
9493     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9494
9495     end = strchr(s, '\n');
9496     if (!end)
9497         end = PL_bufend;
9498     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9499
9500     /* die if we didn't have space for the contents of the <>,
9501        or if it didn't end, or if we see a newline
9502     */
9503
9504     if (len >= (I32)sizeof PL_tokenbuf)
9505         Perl_croak(aTHX_ "Excessively long <> operator");
9506     if (s >= end)
9507         Perl_croak(aTHX_ "Unterminated <> operator");
9508
9509     s++;
9510
9511     /* check for <$fh>
9512        Remember, only scalar variables are interpreted as filehandles by
9513        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9514        treated as a glob() call.
9515        This code makes use of the fact that except for the $ at the front,
9516        a scalar variable and a filehandle look the same.
9517     */
9518     if (*d == '$' && d[1]) d++;
9519
9520     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9521     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9522         d++;
9523
9524     /* If we've tried to read what we allow filehandles to look like, and
9525        there's still text left, then it must be a glob() and not a getline.
9526        Use scan_str to pull out the stuff between the <> and treat it
9527        as nothing more than a string.
9528     */
9529
9530     if (d - PL_tokenbuf != len) {
9531         pl_yylval.ival = OP_GLOB;
9532         s = scan_str(start,!!PL_madskills,FALSE);
9533         if (!s)
9534            Perl_croak(aTHX_ "Glob not terminated");
9535         return s;
9536     }
9537     else {
9538         bool readline_overriden = FALSE;
9539         GV *gv_readline;
9540         GV **gvp;
9541         /* we're in a filehandle read situation */
9542         d = PL_tokenbuf;
9543
9544         /* turn <> into <ARGV> */
9545         if (!len)
9546             Copy("ARGV",d,5,char);
9547
9548         /* Check whether readline() is overriden */
9549         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9550         if ((gv_readline
9551                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9552                 ||
9553                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9554                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9555                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9556             readline_overriden = TRUE;
9557
9558         /* if <$fh>, create the ops to turn the variable into a
9559            filehandle
9560         */
9561         if (*d == '$') {
9562             /* try to find it in the pad for this block, otherwise find
9563                add symbol table ops
9564             */
9565             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9566             if (tmp != NOT_IN_PAD) {
9567                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9568                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9569                     HEK * const stashname = HvNAME_HEK(stash);
9570                     SV * const sym = sv_2mortal(newSVhek(stashname));
9571                     sv_catpvs(sym, "::");
9572                     sv_catpv(sym, d+1);
9573                     d = SvPVX(sym);
9574                     goto intro_sym;
9575                 }
9576                 else {
9577                     OP * const o = newOP(OP_PADSV, 0);
9578                     o->op_targ = tmp;
9579                     PL_lex_op = readline_overriden
9580                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9581                                 op_append_elem(OP_LIST, o,
9582                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9583                         : (OP*)newUNOP(OP_READLINE, 0, o);
9584                 }
9585             }
9586             else {
9587                 GV *gv;
9588                 ++d;
9589 intro_sym:
9590                 gv = gv_fetchpv(d,
9591                                 (PL_in_eval
9592                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9593                                  : GV_ADDMULTI),
9594                                 SVt_PV);
9595                 PL_lex_op = readline_overriden
9596                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9597                             op_append_elem(OP_LIST,
9598                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9599                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9600                     : (OP*)newUNOP(OP_READLINE, 0,
9601                             newUNOP(OP_RV2SV, 0,
9602                                 newGVOP(OP_GV, 0, gv)));
9603             }
9604             if (!readline_overriden)
9605                 PL_lex_op->op_flags |= OPf_SPECIAL;
9606             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9607             pl_yylval.ival = OP_NULL;
9608         }
9609
9610         /* If it's none of the above, it must be a literal filehandle
9611            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9612         else {
9613             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9614             PL_lex_op = readline_overriden
9615                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9616                         op_append_elem(OP_LIST,
9617                             newGVOP(OP_GV, 0, gv),
9618                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9619                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9620             pl_yylval.ival = OP_NULL;
9621         }
9622     }
9623
9624     return s;
9625 }
9626
9627
9628 /* scan_str
9629    takes: start position in buffer
9630           keep_quoted preserve \ on the embedded delimiter(s)
9631           keep_delims preserve the delimiters around the string
9632    returns: position to continue reading from buffer
9633    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9634         updates the read buffer.
9635
9636    This subroutine pulls a string out of the input.  It is called for:
9637         q               single quotes           q(literal text)
9638         '               single quotes           'literal text'
9639         qq              double quotes           qq(interpolate $here please)
9640         "               double quotes           "interpolate $here please"
9641         qx              backticks               qx(/bin/ls -l)
9642         `               backticks               `/bin/ls -l`
9643         qw              quote words             @EXPORT_OK = qw( func() $spam )
9644         m//             regexp match            m/this/
9645         s///            regexp substitute       s/this/that/
9646         tr///           string transliterate    tr/this/that/
9647         y///            string transliterate    y/this/that/
9648         ($*@)           sub prototypes          sub foo ($)
9649         (stuff)         sub attr parameters     sub foo : attr(stuff)
9650         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9651
9652    In most of these cases (all but <>, patterns and transliterate)
9653    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9654    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9655    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9656    calls scan_str().
9657
9658    It skips whitespace before the string starts, and treats the first
9659    character as the delimiter.  If the delimiter is one of ([{< then
9660    the corresponding "close" character )]}> is used as the closing
9661    delimiter.  It allows quoting of delimiters, and if the string has
9662    balanced delimiters ([{<>}]) it allows nesting.
9663
9664    On success, the SV with the resulting string is put into lex_stuff or,
9665    if that is already non-NULL, into lex_repl. The second case occurs only
9666    when parsing the RHS of the special constructs s/// and tr/// (y///).
9667    For convenience, the terminating delimiter character is stuffed into
9668    SvIVX of the SV.
9669 */
9670
9671 STATIC char *
9672 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9673 {
9674     dVAR;
9675     SV *sv;                             /* scalar value: string */
9676     const char *tmps;                   /* temp string, used for delimiter matching */
9677     register char *s = start;           /* current position in the buffer */
9678     register char term;                 /* terminating character */
9679     register char *to;                  /* current position in the sv's data */
9680     I32 brackets = 1;                   /* bracket nesting level */
9681     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9682     I32 termcode;                       /* terminating char. code */
9683     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9684     STRLEN termlen;                     /* length of terminating string */
9685     int last_off = 0;                   /* last position for nesting bracket */
9686 #ifdef PERL_MAD
9687     int stuffstart;
9688     char *tstart;
9689 #endif
9690
9691     PERL_ARGS_ASSERT_SCAN_STR;
9692
9693     /* skip space before the delimiter */
9694     if (isSPACE(*s)) {
9695         s = PEEKSPACE(s);
9696     }
9697
9698 #ifdef PERL_MAD
9699     if (PL_realtokenstart >= 0) {
9700         stuffstart = PL_realtokenstart;
9701         PL_realtokenstart = -1;
9702     }
9703     else
9704         stuffstart = start - SvPVX(PL_linestr);
9705 #endif
9706     /* mark where we are, in case we need to report errors */
9707     CLINE;
9708
9709     /* after skipping whitespace, the next character is the terminator */
9710     term = *s;
9711     if (!UTF) {
9712         termcode = termstr[0] = term;
9713         termlen = 1;
9714     }
9715     else {
9716         termcode = utf8_to_uvchr((U8*)s, &termlen);
9717         Copy(s, termstr, termlen, U8);
9718         if (!UTF8_IS_INVARIANT(term))
9719             has_utf8 = TRUE;
9720     }
9721
9722     /* mark where we are */
9723     PL_multi_start = CopLINE(PL_curcop);
9724     PL_multi_open = term;
9725
9726     /* find corresponding closing delimiter */
9727     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9728         termcode = termstr[0] = term = tmps[5];
9729
9730     PL_multi_close = term;
9731
9732     /* create a new SV to hold the contents.  79 is the SV's initial length.
9733        What a random number. */
9734     sv = newSV_type(SVt_PVIV);
9735     SvGROW(sv, 80);
9736     SvIV_set(sv, termcode);
9737     (void)SvPOK_only(sv);               /* validate pointer */
9738
9739     /* move past delimiter and try to read a complete string */
9740     if (keep_delims)
9741         sv_catpvn(sv, s, termlen);
9742     s += termlen;
9743 #ifdef PERL_MAD
9744     tstart = SvPVX(PL_linestr) + stuffstart;
9745     if (!PL_thisopen && !keep_delims) {
9746         PL_thisopen = newSVpvn(tstart, s - tstart);
9747         stuffstart = s - SvPVX(PL_linestr);
9748     }
9749 #endif
9750     for (;;) {
9751         if (PL_encoding && !UTF) {
9752             bool cont = TRUE;
9753
9754             while (cont) {
9755                 int offset = s - SvPVX_const(PL_linestr);
9756                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9757                                            &offset, (char*)termstr, termlen);
9758                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9759                 char * const svlast = SvEND(sv) - 1;
9760
9761                 for (; s < ns; s++) {
9762                     if (*s == '\n' && !PL_rsfp)
9763                         CopLINE_inc(PL_curcop);
9764                 }
9765                 if (!found)
9766                     goto read_more_line;
9767                 else {
9768                     /* handle quoted delimiters */
9769                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9770                         const char *t;
9771                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9772                             t--;
9773                         if ((svlast-1 - t) % 2) {
9774                             if (!keep_quoted) {
9775                                 *(svlast-1) = term;
9776                                 *svlast = '\0';
9777                                 SvCUR_set(sv, SvCUR(sv) - 1);
9778                             }
9779                             continue;
9780                         }
9781                     }
9782                     if (PL_multi_open == PL_multi_close) {
9783                         cont = FALSE;
9784                     }
9785                     else {
9786                         const char *t;
9787                         char *w;
9788                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9789                             /* At here, all closes are "was quoted" one,
9790                                so we don't check PL_multi_close. */
9791                             if (*t == '\\') {
9792                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9793                                     t++;
9794                                 else
9795                                     *w++ = *t++;
9796                             }
9797                             else if (*t == PL_multi_open)
9798                                 brackets++;
9799
9800                             *w = *t;
9801                         }
9802                         if (w < t) {
9803                             *w++ = term;
9804                             *w = '\0';
9805                             SvCUR_set(sv, w - SvPVX_const(sv));
9806                         }
9807                         last_off = w - SvPVX(sv);
9808                         if (--brackets <= 0)
9809                             cont = FALSE;
9810                     }
9811                 }
9812             }
9813             if (!keep_delims) {
9814                 SvCUR_set(sv, SvCUR(sv) - 1);
9815                 *SvEND(sv) = '\0';
9816             }
9817             break;
9818         }
9819
9820         /* extend sv if need be */
9821         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9822         /* set 'to' to the next character in the sv's string */
9823         to = SvPVX(sv)+SvCUR(sv);
9824
9825         /* if open delimiter is the close delimiter read unbridle */
9826         if (PL_multi_open == PL_multi_close) {
9827             for (; s < PL_bufend; s++,to++) {
9828                 /* embedded newlines increment the current line number */
9829                 if (*s == '\n' && !PL_rsfp)
9830                     CopLINE_inc(PL_curcop);
9831                 /* handle quoted delimiters */
9832                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9833                     if (!keep_quoted && s[1] == term)
9834                         s++;
9835                 /* any other quotes are simply copied straight through */
9836                     else
9837                         *to++ = *s++;
9838                 }
9839                 /* terminate when run out of buffer (the for() condition), or
9840                    have found the terminator */
9841                 else if (*s == term) {
9842                     if (termlen == 1)
9843                         break;
9844                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9845                         break;
9846                 }
9847                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9848                     has_utf8 = TRUE;
9849                 *to = *s;
9850             }
9851         }
9852
9853         /* if the terminator isn't the same as the start character (e.g.,
9854            matched brackets), we have to allow more in the quoting, and
9855            be prepared for nested brackets.
9856         */
9857         else {
9858             /* read until we run out of string, or we find the terminator */
9859             for (; s < PL_bufend; s++,to++) {
9860                 /* embedded newlines increment the line count */
9861                 if (*s == '\n' && !PL_rsfp)
9862                     CopLINE_inc(PL_curcop);
9863                 /* backslashes can escape the open or closing characters */
9864                 if (*s == '\\' && s+1 < PL_bufend) {
9865                     if (!keep_quoted &&
9866                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9867                         s++;
9868                     else
9869                         *to++ = *s++;
9870                 }
9871                 /* allow nested opens and closes */
9872                 else if (*s == PL_multi_close && --brackets <= 0)
9873                     break;
9874                 else if (*s == PL_multi_open)
9875                     brackets++;
9876                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9877                     has_utf8 = TRUE;
9878                 *to = *s;
9879             }
9880         }
9881         /* terminate the copied string and update the sv's end-of-string */
9882         *to = '\0';
9883         SvCUR_set(sv, to - SvPVX_const(sv));
9884
9885         /*
9886          * this next chunk reads more into the buffer if we're not done yet
9887          */
9888
9889         if (s < PL_bufend)
9890             break;              /* handle case where we are done yet :-) */
9891
9892 #ifndef PERL_STRICT_CR
9893         if (to - SvPVX_const(sv) >= 2) {
9894             if ((to[-2] == '\r' && to[-1] == '\n') ||
9895                 (to[-2] == '\n' && to[-1] == '\r'))
9896             {
9897                 to[-2] = '\n';
9898                 to--;
9899                 SvCUR_set(sv, to - SvPVX_const(sv));
9900             }
9901             else if (to[-1] == '\r')
9902                 to[-1] = '\n';
9903         }
9904         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9905             to[-1] = '\n';
9906 #endif
9907
9908      read_more_line:
9909         /* if we're out of file, or a read fails, bail and reset the current
9910            line marker so we can report where the unterminated string began
9911         */
9912 #ifdef PERL_MAD
9913         if (PL_madskills) {
9914             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9915             if (PL_thisstuff)
9916                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9917             else
9918                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9919         }
9920 #endif
9921         CopLINE_inc(PL_curcop);
9922         PL_bufptr = PL_bufend;
9923         if (!lex_next_chunk(0)) {
9924             sv_free(sv);
9925             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9926             return NULL;
9927         }
9928         s = PL_bufptr;
9929 #ifdef PERL_MAD
9930         stuffstart = 0;
9931 #endif
9932     }
9933
9934     /* at this point, we have successfully read the delimited string */
9935
9936     if (!PL_encoding || UTF) {
9937 #ifdef PERL_MAD
9938         if (PL_madskills) {
9939             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9940             const int len = s - tstart;
9941             if (PL_thisstuff)
9942                 sv_catpvn(PL_thisstuff, tstart, len);
9943             else
9944                 PL_thisstuff = newSVpvn(tstart, len);
9945             if (!PL_thisclose && !keep_delims)
9946                 PL_thisclose = newSVpvn(s,termlen);
9947         }
9948 #endif
9949
9950         if (keep_delims)
9951             sv_catpvn(sv, s, termlen);
9952         s += termlen;
9953     }
9954 #ifdef PERL_MAD
9955     else {
9956         if (PL_madskills) {
9957             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9958             const int len = s - tstart - termlen;
9959             if (PL_thisstuff)
9960                 sv_catpvn(PL_thisstuff, tstart, len);
9961             else
9962                 PL_thisstuff = newSVpvn(tstart, len);
9963             if (!PL_thisclose && !keep_delims)
9964                 PL_thisclose = newSVpvn(s - termlen,termlen);
9965         }
9966     }
9967 #endif
9968     if (has_utf8 || PL_encoding)
9969         SvUTF8_on(sv);
9970
9971     PL_multi_end = CopLINE(PL_curcop);
9972
9973     /* if we allocated too much space, give some back */
9974     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9975         SvLEN_set(sv, SvCUR(sv) + 1);
9976         SvPV_renew(sv, SvLEN(sv));
9977     }
9978
9979     /* decide whether this is the first or second quoted string we've read
9980        for this op
9981     */
9982
9983     if (PL_lex_stuff)
9984         PL_lex_repl = sv;
9985     else
9986         PL_lex_stuff = sv;
9987     return s;
9988 }
9989
9990 /*
9991   scan_num
9992   takes: pointer to position in buffer
9993   returns: pointer to new position in buffer
9994   side-effects: builds ops for the constant in pl_yylval.op
9995
9996   Read a number in any of the formats that Perl accepts:
9997
9998   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9999   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10000   0b[01](_?[01])*
10001   0[0-7](_?[0-7])*
10002   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10003
10004   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10005   thing it reads.
10006
10007   If it reads a number without a decimal point or an exponent, it will
10008   try converting the number to an integer and see if it can do so
10009   without loss of precision.
10010 */
10011
10012 char *
10013 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10014 {
10015     dVAR;
10016     register const char *s = start;     /* current position in buffer */
10017     register char *d;                   /* destination in temp buffer */
10018     register char *e;                   /* end of temp buffer */
10019     NV nv;                              /* number read, as a double */
10020     SV *sv = NULL;                      /* place to put the converted number */
10021     bool floatit;                       /* boolean: int or float? */
10022     const char *lastub = NULL;          /* position of last underbar */
10023     static char const number_too_long[] = "Number too long";
10024
10025     PERL_ARGS_ASSERT_SCAN_NUM;
10026
10027     /* We use the first character to decide what type of number this is */
10028
10029     switch (*s) {
10030     default:
10031       Perl_croak(aTHX_ "panic: scan_num");
10032
10033     /* if it starts with a 0, it could be an octal number, a decimal in
10034        0.13 disguise, or a hexadecimal number, or a binary number. */
10035     case '0':
10036         {
10037           /* variables:
10038              u          holds the "number so far"
10039              shift      the power of 2 of the base
10040                         (hex == 4, octal == 3, binary == 1)
10041              overflowed was the number more than we can hold?
10042
10043              Shift is used when we add a digit.  It also serves as an "are
10044              we in octal/hex/binary?" indicator to disallow hex characters
10045              when in octal mode.
10046            */
10047             NV n = 0.0;
10048             UV u = 0;
10049             I32 shift;
10050             bool overflowed = FALSE;
10051             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10052             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10053             static const char* const bases[5] =
10054               { "", "binary", "", "octal", "hexadecimal" };
10055             static const char* const Bases[5] =
10056               { "", "Binary", "", "Octal", "Hexadecimal" };
10057             static const char* const maxima[5] =
10058               { "",
10059                 "0b11111111111111111111111111111111",
10060                 "",
10061                 "037777777777",
10062                 "0xffffffff" };
10063             const char *base, *Base, *max;
10064
10065             /* check for hex */
10066             if (s[1] == 'x' || s[1] == 'X') {
10067                 shift = 4;
10068                 s += 2;
10069                 just_zero = FALSE;
10070             } else if (s[1] == 'b' || s[1] == 'B') {
10071                 shift = 1;
10072                 s += 2;
10073                 just_zero = FALSE;
10074             }
10075             /* check for a decimal in disguise */
10076             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10077                 goto decimal;
10078             /* so it must be octal */
10079             else {
10080                 shift = 3;
10081                 s++;
10082             }
10083
10084             if (*s == '_') {
10085                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10086                                "Misplaced _ in number");
10087                lastub = s++;
10088             }
10089
10090             base = bases[shift];
10091             Base = Bases[shift];
10092             max  = maxima[shift];
10093
10094             /* read the rest of the number */
10095             for (;;) {
10096                 /* x is used in the overflow test,
10097                    b is the digit we're adding on. */
10098                 UV x, b;
10099
10100                 switch (*s) {
10101
10102                 /* if we don't mention it, we're done */
10103                 default:
10104                     goto out;
10105
10106                 /* _ are ignored -- but warned about if consecutive */
10107                 case '_':
10108                     if (lastub && s == lastub + 1)
10109                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10110                                        "Misplaced _ in number");
10111                     lastub = s++;
10112                     break;
10113
10114                 /* 8 and 9 are not octal */
10115                 case '8': case '9':
10116                     if (shift == 3)
10117                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10118                     /* FALL THROUGH */
10119
10120                 /* octal digits */
10121                 case '2': case '3': case '4':
10122                 case '5': case '6': case '7':
10123                     if (shift == 1)
10124                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10125                     /* FALL THROUGH */
10126
10127                 case '0': case '1':
10128                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10129                     goto digit;
10130
10131                 /* hex digits */
10132                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10133                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10134                     /* make sure they said 0x */
10135                     if (shift != 4)
10136                         goto out;
10137                     b = (*s++ & 7) + 9;
10138
10139                     /* Prepare to put the digit we have onto the end
10140                        of the number so far.  We check for overflows.
10141                     */
10142
10143                   digit:
10144                     just_zero = FALSE;
10145                     if (!overflowed) {
10146                         x = u << shift; /* make room for the digit */
10147
10148                         if ((x >> shift) != u
10149                             && !(PL_hints & HINT_NEW_BINARY)) {
10150                             overflowed = TRUE;
10151                             n = (NV) u;
10152                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10153                                              "Integer overflow in %s number",
10154                                              base);
10155                         } else
10156                             u = x | b;          /* add the digit to the end */
10157                     }
10158                     if (overflowed) {
10159                         n *= nvshift[shift];
10160                         /* If an NV has not enough bits in its
10161                          * mantissa to represent an UV this summing of
10162                          * small low-order numbers is a waste of time
10163                          * (because the NV cannot preserve the
10164                          * low-order bits anyway): we could just
10165                          * remember when did we overflow and in the
10166                          * end just multiply n by the right
10167                          * amount. */
10168                         n += (NV) b;
10169                     }
10170                     break;
10171                 }
10172             }
10173
10174           /* if we get here, we had success: make a scalar value from
10175              the number.
10176           */
10177           out:
10178
10179             /* final misplaced underbar check */
10180             if (s[-1] == '_') {
10181                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10182             }
10183
10184             if (overflowed) {
10185                 if (n > 4294967295.0)
10186                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10187                                    "%s number > %s non-portable",
10188                                    Base, max);
10189                 sv = newSVnv(n);
10190             }
10191             else {
10192 #if UVSIZE > 4
10193                 if (u > 0xffffffff)
10194                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10195                                    "%s number > %s non-portable",
10196                                    Base, max);
10197 #endif
10198                 sv = newSVuv(u);
10199             }
10200             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10201                 sv = new_constant(start, s - start, "integer",
10202                                   sv, NULL, NULL, 0);
10203             else if (PL_hints & HINT_NEW_BINARY)
10204                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10205         }
10206         break;
10207
10208     /*
10209       handle decimal numbers.
10210       we're also sent here when we read a 0 as the first digit
10211     */
10212     case '1': case '2': case '3': case '4': case '5':
10213     case '6': case '7': case '8': case '9': case '.':
10214       decimal:
10215         d = PL_tokenbuf;
10216         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10217         floatit = FALSE;
10218
10219         /* read next group of digits and _ and copy into d */
10220         while (isDIGIT(*s) || *s == '_') {
10221             /* skip underscores, checking for misplaced ones
10222                if -w is on
10223             */
10224             if (*s == '_') {
10225                 if (lastub && s == lastub + 1)
10226                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10227                                    "Misplaced _ in number");
10228                 lastub = s++;
10229             }
10230             else {
10231                 /* check for end of fixed-length buffer */
10232                 if (d >= e)
10233                     Perl_croak(aTHX_ number_too_long);
10234                 /* if we're ok, copy the character */
10235                 *d++ = *s++;
10236             }
10237         }
10238
10239         /* final misplaced underbar check */
10240         if (lastub && s == lastub + 1) {
10241             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10242         }
10243
10244         /* read a decimal portion if there is one.  avoid
10245            3..5 being interpreted as the number 3. followed
10246            by .5
10247         */
10248         if (*s == '.' && s[1] != '.') {
10249             floatit = TRUE;
10250             *d++ = *s++;
10251
10252             if (*s == '_') {
10253                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10254                                "Misplaced _ in number");
10255                 lastub = s;
10256             }
10257
10258             /* copy, ignoring underbars, until we run out of digits.
10259             */
10260             for (; isDIGIT(*s) || *s == '_'; s++) {
10261                 /* fixed length buffer check */
10262                 if (d >= e)
10263                     Perl_croak(aTHX_ number_too_long);
10264                 if (*s == '_') {
10265                    if (lastub && s == lastub + 1)
10266                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10267                                       "Misplaced _ in number");
10268                    lastub = s;
10269                 }
10270                 else
10271                     *d++ = *s;
10272             }
10273             /* fractional part ending in underbar? */
10274             if (s[-1] == '_') {
10275                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10276                                "Misplaced _ in number");
10277             }
10278             if (*s == '.' && isDIGIT(s[1])) {
10279                 /* oops, it's really a v-string, but without the "v" */
10280                 s = start;
10281                 goto vstring;
10282             }
10283         }
10284
10285         /* read exponent part, if present */
10286         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10287             floatit = TRUE;
10288             s++;
10289
10290             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10291             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10292
10293             /* stray preinitial _ */
10294             if (*s == '_') {
10295                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10296                                "Misplaced _ in number");
10297                 lastub = s++;
10298             }
10299
10300             /* allow positive or negative exponent */
10301             if (*s == '+' || *s == '-')
10302                 *d++ = *s++;
10303
10304             /* stray initial _ */
10305             if (*s == '_') {
10306                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10307                                "Misplaced _ in number");
10308                 lastub = s++;
10309             }
10310
10311             /* read digits of exponent */
10312             while (isDIGIT(*s) || *s == '_') {
10313                 if (isDIGIT(*s)) {
10314                     if (d >= e)
10315                         Perl_croak(aTHX_ number_too_long);
10316                     *d++ = *s++;
10317                 }
10318                 else {
10319                    if (((lastub && s == lastub + 1) ||
10320                         (!isDIGIT(s[1]) && s[1] != '_')))
10321                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10322                                       "Misplaced _ in number");
10323                    lastub = s++;
10324                 }
10325             }
10326         }
10327
10328
10329         /*
10330            We try to do an integer conversion first if no characters
10331            indicating "float" have been found.
10332          */
10333
10334         if (!floatit) {
10335             UV uv;
10336             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10337
10338             if (flags == IS_NUMBER_IN_UV) {
10339               if (uv <= IV_MAX)
10340                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10341               else
10342                 sv = newSVuv(uv);
10343             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10344               if (uv <= (UV) IV_MIN)
10345                 sv = newSViv(-(IV)uv);
10346               else
10347                 floatit = TRUE;
10348             } else
10349               floatit = TRUE;
10350         }
10351         if (floatit) {
10352             /* terminate the string */
10353             *d = '\0';
10354             nv = Atof(PL_tokenbuf);
10355             sv = newSVnv(nv);
10356         }
10357
10358         if ( floatit
10359              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10360             const char *const key = floatit ? "float" : "integer";
10361             const STRLEN keylen = floatit ? 5 : 7;
10362             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10363                                 key, keylen, sv, NULL, NULL, 0);
10364         }
10365         break;
10366
10367     /* if it starts with a v, it could be a v-string */
10368     case 'v':
10369 vstring:
10370                 sv = newSV(5); /* preallocate storage space */
10371                 s = scan_vstring(s, PL_bufend, sv);
10372         break;
10373     }
10374
10375     /* make the op for the constant and return */
10376
10377     if (sv)
10378         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10379     else
10380         lvalp->opval = NULL;
10381
10382     return (char *)s;
10383 }
10384
10385 STATIC char *
10386 S_scan_formline(pTHX_ register char *s)
10387 {
10388     dVAR;
10389     register char *eol;
10390     register char *t;
10391     SV * const stuff = newSVpvs("");
10392     bool needargs = FALSE;
10393     bool eofmt = FALSE;
10394 #ifdef PERL_MAD
10395     char *tokenstart = s;
10396     SV* savewhite = NULL;
10397
10398     if (PL_madskills) {
10399         savewhite = PL_thiswhite;
10400         PL_thiswhite = 0;
10401     }
10402 #endif
10403
10404     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10405
10406     while (!needargs) {
10407         if (*s == '.') {
10408             t = s+1;
10409 #ifdef PERL_STRICT_CR
10410             while (SPACE_OR_TAB(*t))
10411                 t++;
10412 #else
10413             while (SPACE_OR_TAB(*t) || *t == '\r')
10414                 t++;
10415 #endif
10416             if (*t == '\n' || t == PL_bufend) {
10417                 eofmt = TRUE;
10418                 break;
10419             }
10420         }
10421         if (PL_in_eval && !PL_rsfp) {
10422             eol = (char *) memchr(s,'\n',PL_bufend-s);
10423             if (!eol++)
10424                 eol = PL_bufend;
10425         }
10426         else
10427             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10428         if (*s != '#') {
10429             for (t = s; t < eol; t++) {
10430                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10431                     needargs = FALSE;
10432                     goto enough;        /* ~~ must be first line in formline */
10433                 }
10434                 if (*t == '@' || *t == '^')
10435                     needargs = TRUE;
10436             }
10437             if (eol > s) {
10438                 sv_catpvn(stuff, s, eol-s);
10439 #ifndef PERL_STRICT_CR
10440                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10441                     char *end = SvPVX(stuff) + SvCUR(stuff);
10442                     end[-2] = '\n';
10443                     end[-1] = '\0';
10444                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10445                 }
10446 #endif
10447             }
10448             else
10449               break;
10450         }
10451         s = (char*)eol;
10452         if (PL_rsfp) {
10453             bool got_some;
10454 #ifdef PERL_MAD
10455             if (PL_madskills) {
10456                 if (PL_thistoken)
10457                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10458                 else
10459                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10460             }
10461 #endif
10462             PL_bufptr = PL_bufend;
10463             CopLINE_inc(PL_curcop);
10464             got_some = lex_next_chunk(0);
10465             CopLINE_dec(PL_curcop);
10466             s = PL_bufptr;
10467 #ifdef PERL_MAD
10468             tokenstart = PL_bufptr;
10469 #endif
10470             if (!got_some)
10471                 break;
10472         }
10473         incline(s);
10474     }
10475   enough:
10476     if (SvCUR(stuff)) {
10477         PL_expect = XTERM;
10478         if (needargs) {
10479             PL_lex_state = LEX_NORMAL;
10480             start_force(PL_curforce);
10481             NEXTVAL_NEXTTOKE.ival = 0;
10482             force_next(',');
10483         }
10484         else
10485             PL_lex_state = LEX_FORMLINE;
10486         if (!IN_BYTES) {
10487             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10488                 SvUTF8_on(stuff);
10489             else if (PL_encoding)
10490                 sv_recode_to_utf8(stuff, PL_encoding);
10491         }
10492         start_force(PL_curforce);
10493         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10494         force_next(THING);
10495         start_force(PL_curforce);
10496         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10497         force_next(LSTOP);
10498     }
10499     else {
10500         SvREFCNT_dec(stuff);
10501         if (eofmt)
10502             PL_lex_formbrack = 0;
10503         PL_bufptr = s;
10504     }
10505 #ifdef PERL_MAD
10506     if (PL_madskills) {
10507         if (PL_thistoken)
10508             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10509         else
10510             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10511         PL_thiswhite = savewhite;
10512     }
10513 #endif
10514     return s;
10515 }
10516
10517 I32
10518 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10519 {
10520     dVAR;
10521     const I32 oldsavestack_ix = PL_savestack_ix;
10522     CV* const outsidecv = PL_compcv;
10523
10524     if (PL_compcv) {
10525         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10526     }
10527     SAVEI32(PL_subline);
10528     save_item(PL_subname);
10529     SAVESPTR(PL_compcv);
10530
10531     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10532     CvFLAGS(PL_compcv) |= flags;
10533
10534     PL_subline = CopLINE(PL_curcop);
10535     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10536     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10537     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10538
10539     return oldsavestack_ix;
10540 }
10541
10542 #ifdef __SC__
10543 #pragma segment Perl_yylex
10544 #endif
10545 static int
10546 S_yywarn(pTHX_ const char *const s)
10547 {
10548     dVAR;
10549
10550     PERL_ARGS_ASSERT_YYWARN;
10551
10552     PL_in_eval |= EVAL_WARNONLY;
10553     yyerror(s);
10554     PL_in_eval &= ~EVAL_WARNONLY;
10555     return 0;
10556 }
10557
10558 int
10559 Perl_yyerror(pTHX_ const char *const s)
10560 {
10561     dVAR;
10562     const char *where = NULL;
10563     const char *context = NULL;
10564     int contlen = -1;
10565     SV *msg;
10566     int yychar  = PL_parser->yychar;
10567
10568     PERL_ARGS_ASSERT_YYERROR;
10569
10570     if (!yychar || (yychar == ';' && !PL_rsfp))
10571         where = "at EOF";
10572     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10573       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10574       PL_oldbufptr != PL_bufptr) {
10575         /*
10576                 Only for NetWare:
10577                 The code below is removed for NetWare because it abends/crashes on NetWare
10578                 when the script has error such as not having the closing quotes like:
10579                     if ($var eq "value)
10580                 Checking of white spaces is anyway done in NetWare code.
10581         */
10582 #ifndef NETWARE
10583         while (isSPACE(*PL_oldoldbufptr))
10584             PL_oldoldbufptr++;
10585 #endif
10586         context = PL_oldoldbufptr;
10587         contlen = PL_bufptr - PL_oldoldbufptr;
10588     }
10589     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10590       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10591         /*
10592                 Only for NetWare:
10593                 The code below is removed for NetWare because it abends/crashes on NetWare
10594                 when the script has error such as not having the closing quotes like:
10595                     if ($var eq "value)
10596                 Checking of white spaces is anyway done in NetWare code.
10597         */
10598 #ifndef NETWARE
10599         while (isSPACE(*PL_oldbufptr))
10600             PL_oldbufptr++;
10601 #endif
10602         context = PL_oldbufptr;
10603         contlen = PL_bufptr - PL_oldbufptr;
10604     }
10605     else if (yychar > 255)
10606         where = "next token ???";
10607     else if (yychar == -2) { /* YYEMPTY */
10608         if (PL_lex_state == LEX_NORMAL ||
10609            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10610             where = "at end of line";
10611         else if (PL_lex_inpat)
10612             where = "within pattern";
10613         else
10614             where = "within string";
10615     }
10616     else {
10617         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10618         if (yychar < 32)
10619             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10620         else if (isPRINT_LC(yychar)) {
10621             const char string = yychar;
10622             sv_catpvn(where_sv, &string, 1);
10623         }
10624         else
10625             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10626         where = SvPVX_const(where_sv);
10627     }
10628     msg = sv_2mortal(newSVpv(s, 0));
10629     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10630         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10631     if (context)
10632         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10633     else
10634         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10635     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10636         Perl_sv_catpvf(aTHX_ msg,
10637         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10638                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10639         PL_multi_end = 0;
10640     }
10641     if (PL_in_eval & EVAL_WARNONLY) {
10642         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10643     }
10644     else
10645         qerror(msg);
10646     if (PL_error_count >= 10) {
10647         if (PL_in_eval && SvCUR(ERRSV))
10648             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10649                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10650         else
10651             Perl_croak(aTHX_ "%s has too many errors.\n",
10652             OutCopFILE(PL_curcop));
10653     }
10654     PL_in_my = 0;
10655     PL_in_my_stash = NULL;
10656     return 0;
10657 }
10658 #ifdef __SC__
10659 #pragma segment Main
10660 #endif
10661
10662 STATIC char*
10663 S_swallow_bom(pTHX_ U8 *s)
10664 {
10665     dVAR;
10666     const STRLEN slen = SvCUR(PL_linestr);
10667
10668     PERL_ARGS_ASSERT_SWALLOW_BOM;
10669
10670     switch (s[0]) {
10671     case 0xFF:
10672         if (s[1] == 0xFE) {
10673             /* UTF-16 little-endian? (or UTF-32LE?) */
10674             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10675                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10676 #ifndef PERL_NO_UTF16_FILTER
10677             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10678             s += 2;
10679             if (PL_bufend > (char*)s) {
10680                 s = add_utf16_textfilter(s, TRUE);
10681             }
10682 #else
10683             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10684 #endif
10685         }
10686         break;
10687     case 0xFE:
10688         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10689 #ifndef PERL_NO_UTF16_FILTER
10690             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10691             s += 2;
10692             if (PL_bufend > (char *)s) {
10693                 s = add_utf16_textfilter(s, FALSE);
10694             }
10695 #else
10696             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10697 #endif
10698         }
10699         break;
10700     case 0xEF:
10701         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10702             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10703             s += 3;                      /* UTF-8 */
10704         }
10705         break;
10706     case 0:
10707         if (slen > 3) {
10708              if (s[1] == 0) {
10709                   if (s[2] == 0xFE && s[3] == 0xFF) {
10710                        /* UTF-32 big-endian */
10711                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10712                   }
10713              }
10714              else if (s[2] == 0 && s[3] != 0) {
10715                   /* Leading bytes
10716                    * 00 xx 00 xx
10717                    * are a good indicator of UTF-16BE. */
10718 #ifndef PERL_NO_UTF16_FILTER
10719                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10720                   s = add_utf16_textfilter(s, FALSE);
10721 #else
10722                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10723 #endif
10724              }
10725         }
10726 #ifdef EBCDIC
10727     case 0xDD:
10728         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10729             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10730             s += 4;                      /* UTF-8 */
10731         }
10732         break;
10733 #endif
10734
10735     default:
10736          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10737                   /* Leading bytes
10738                    * xx 00 xx 00
10739                    * are a good indicator of UTF-16LE. */
10740 #ifndef PERL_NO_UTF16_FILTER
10741               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10742               s = add_utf16_textfilter(s, TRUE);
10743 #else
10744               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10745 #endif
10746          }
10747     }
10748     return (char*)s;
10749 }
10750
10751
10752 #ifndef PERL_NO_UTF16_FILTER
10753 static I32
10754 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10755 {
10756     dVAR;
10757     SV *const filter = FILTER_DATA(idx);
10758     /* We re-use this each time round, throwing the contents away before we
10759        return.  */
10760     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10761     SV *const utf8_buffer = filter;
10762     IV status = IoPAGE(filter);
10763     const bool reverse = cBOOL(IoLINES(filter));
10764     I32 retval;
10765
10766     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10767
10768     /* As we're automatically added, at the lowest level, and hence only called
10769        from this file, we can be sure that we're not called in block mode. Hence
10770        don't bother writing code to deal with block mode.  */
10771     if (maxlen) {
10772         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10773     }
10774     if (status < 0) {
10775         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10776     }
10777     DEBUG_P(PerlIO_printf(Perl_debug_log,
10778                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10779                           FPTR2DPTR(void *, S_utf16_textfilter),
10780                           reverse ? 'l' : 'b', idx, maxlen, status,
10781                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10782
10783     while (1) {
10784         STRLEN chars;
10785         STRLEN have;
10786         I32 newlen;
10787         U8 *end;
10788         /* First, look in our buffer of existing UTF-8 data:  */
10789         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10790
10791         if (nl) {
10792             ++nl;
10793         } else if (status == 0) {
10794             /* EOF */
10795             IoPAGE(filter) = 0;
10796             nl = SvEND(utf8_buffer);
10797         }
10798         if (nl) {
10799             STRLEN got = nl - SvPVX(utf8_buffer);
10800             /* Did we have anything to append?  */
10801             retval = got != 0;
10802             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10803             /* Everything else in this code works just fine if SVp_POK isn't
10804                set.  This, however, needs it, and we need it to work, else
10805                we loop infinitely because the buffer is never consumed.  */
10806             sv_chop(utf8_buffer, nl);
10807             break;
10808         }
10809
10810         /* OK, not a complete line there, so need to read some more UTF-16.
10811            Read an extra octect if the buffer currently has an odd number. */
10812         while (1) {
10813             if (status <= 0)
10814                 break;
10815             if (SvCUR(utf16_buffer) >= 2) {
10816                 /* Location of the high octet of the last complete code point.
10817                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10818                    *coupled* with all the benefits of partial reads and
10819                    endianness.  */
10820                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10821                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10822
10823                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10824                     break;
10825                 }
10826
10827                 /* We have the first half of a surrogate. Read more.  */
10828                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10829             }
10830
10831             status = FILTER_READ(idx + 1, utf16_buffer,
10832                                  160 + (SvCUR(utf16_buffer) & 1));
10833             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10834             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10835             if (status < 0) {
10836                 /* Error */
10837                 IoPAGE(filter) = status;
10838                 return status;
10839             }
10840         }
10841
10842         chars = SvCUR(utf16_buffer) >> 1;
10843         have = SvCUR(utf8_buffer);
10844         SvGROW(utf8_buffer, have + chars * 3 + 1);
10845
10846         if (reverse) {
10847             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10848                                          (U8*)SvPVX_const(utf8_buffer) + have,
10849                                          chars * 2, &newlen);
10850         } else {
10851             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10852                                 (U8*)SvPVX_const(utf8_buffer) + have,
10853                                 chars * 2, &newlen);
10854         }
10855         SvCUR_set(utf8_buffer, have + newlen);
10856         *end = '\0';
10857
10858         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10859            it's private to us, and utf16_to_utf8{,reversed} take a
10860            (pointer,length) pair, rather than a NUL-terminated string.  */
10861         if(SvCUR(utf16_buffer) & 1) {
10862             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10863             SvCUR_set(utf16_buffer, 1);
10864         } else {
10865             SvCUR_set(utf16_buffer, 0);
10866         }
10867     }
10868     DEBUG_P(PerlIO_printf(Perl_debug_log,
10869                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10870                           status,
10871                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10872     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10873     return retval;
10874 }
10875
10876 static U8 *
10877 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10878 {
10879     SV *filter = filter_add(S_utf16_textfilter, NULL);
10880
10881     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10882
10883     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10884     sv_setpvs(filter, "");
10885     IoLINES(filter) = reversed;
10886     IoPAGE(filter) = 1; /* Not EOF */
10887
10888     /* Sadly, we have to return a valid pointer, come what may, so we have to
10889        ignore any error return from this.  */
10890     SvCUR_set(PL_linestr, 0);
10891     if (FILTER_READ(0, PL_linestr, 0)) {
10892         SvUTF8_on(PL_linestr);
10893     } else {
10894         SvUTF8_on(PL_linestr);
10895     }
10896     PL_bufend = SvEND(PL_linestr);
10897     return (U8*)SvPVX(PL_linestr);
10898 }
10899 #endif
10900
10901 /*
10902 Returns a pointer to the next character after the parsed
10903 vstring, as well as updating the passed in sv.
10904
10905 Function must be called like
10906
10907         sv = newSV(5);
10908         s = scan_vstring(s,e,sv);
10909
10910 where s and e are the start and end of the string.
10911 The sv should already be large enough to store the vstring
10912 passed in, for performance reasons.
10913
10914 */
10915
10916 char *
10917 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10918 {
10919     dVAR;
10920     const char *pos = s;
10921     const char *start = s;
10922
10923     PERL_ARGS_ASSERT_SCAN_VSTRING;
10924
10925     if (*pos == 'v') pos++;  /* get past 'v' */
10926     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10927         pos++;
10928     if ( *pos != '.') {
10929         /* this may not be a v-string if followed by => */
10930         const char *next = pos;
10931         while (next < e && isSPACE(*next))
10932             ++next;
10933         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10934             /* return string not v-string */
10935             sv_setpvn(sv,(char *)s,pos-s);
10936             return (char *)pos;
10937         }
10938     }
10939
10940     if (!isALPHA(*pos)) {
10941         U8 tmpbuf[UTF8_MAXBYTES+1];
10942
10943         if (*s == 'v')
10944             s++;  /* get past 'v' */
10945
10946         sv_setpvs(sv, "");
10947
10948         for (;;) {
10949             /* this is atoi() that tolerates underscores */
10950             U8 *tmpend;
10951             UV rev = 0;
10952             const char *end = pos;
10953             UV mult = 1;
10954             while (--end >= s) {
10955                 if (*end != '_') {
10956                     const UV orev = rev;
10957                     rev += (*end - '0') * mult;
10958                     mult *= 10;
10959                     if (orev > rev)
10960                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10961                                          "Integer overflow in decimal number");
10962                 }
10963             }
10964 #ifdef EBCDIC
10965             if (rev > 0x7FFFFFFF)
10966                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10967 #endif
10968             /* Append native character for the rev point */
10969             tmpend = uvchr_to_utf8(tmpbuf, rev);
10970             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10971             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10972                  SvUTF8_on(sv);
10973             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10974                  s = ++pos;
10975             else {
10976                  s = pos;
10977                  break;
10978             }
10979             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10980                  pos++;
10981         }
10982         SvPOK_on(sv);
10983         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10984         SvRMAGICAL_on(sv);
10985     }
10986     return (char *)s;
10987 }
10988
10989 int
10990 Perl_keyword_plugin_standard(pTHX_
10991         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10992 {
10993     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10994     PERL_UNUSED_CONTEXT;
10995     PERL_UNUSED_ARG(keyword_ptr);
10996     PERL_UNUSED_ARG(keyword_len);
10997     PERL_UNUSED_ARG(op_ptr);
10998     return KEYWORD_PLUGIN_DECLINE;
10999 }
11000
11001 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11002 static void
11003 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11004 {
11005     SAVEI32(PL_lex_brackets);
11006     if (PL_lex_brackets > 100)
11007         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11008     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11009     SAVEI32(PL_lex_allbrackets);
11010     PL_lex_allbrackets = 0;
11011     SAVEI8(PL_lex_fakeeof);
11012     PL_lex_fakeeof = (U8)fakeeof;
11013     if(yyparse(gramtype) && !PL_parser->error_count)
11014         qerror(Perl_mess(aTHX_ "Parse error"));
11015 }
11016
11017 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11018 static OP *
11019 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11020 {
11021     OP *o;
11022     ENTER;
11023     SAVEVPTR(PL_eval_root);
11024     PL_eval_root = NULL;
11025     parse_recdescent(gramtype, fakeeof);
11026     o = PL_eval_root;
11027     LEAVE;
11028     return o;
11029 }
11030
11031 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11032 static OP *
11033 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11034 {
11035     OP *exprop;
11036     if (flags & ~PARSE_OPTIONAL)
11037         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11038     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11039     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11040         if (!PL_parser->error_count)
11041             qerror(Perl_mess(aTHX_ "Parse error"));
11042         exprop = newOP(OP_NULL, 0);
11043     }
11044     return exprop;
11045 }
11046
11047 /*
11048 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11049
11050 Parse a Perl arithmetic expression.  This may contain operators of precedence
11051 down to the bit shift operators.  The expression must be followed (and thus
11052 terminated) either by a comparison or lower-precedence operator or by
11053 something that would normally terminate an expression such as semicolon.
11054 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11055 otherwise it is mandatory.  It is up to the caller to ensure that the
11056 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11057 the source of the code to be parsed and the lexical context for the
11058 expression.
11059
11060 The op tree representing the expression is returned.  If an optional
11061 expression is absent, a null pointer is returned, otherwise the pointer
11062 will be non-null.
11063
11064 If an error occurs in parsing or compilation, in most cases a valid op
11065 tree is returned anyway.  The error is reflected in the parser state,
11066 normally resulting in a single exception at the top level of parsing
11067 which covers all the compilation errors that occurred.  Some compilation
11068 errors, however, will throw an exception immediately.
11069
11070 =cut
11071 */
11072
11073 OP *
11074 Perl_parse_arithexpr(pTHX_ U32 flags)
11075 {
11076     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11077 }
11078
11079 /*
11080 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11081
11082 Parse a Perl term expression.  This may contain operators of precedence
11083 down to the assignment operators.  The expression must be followed (and thus
11084 terminated) either by a comma or lower-precedence operator or by
11085 something that would normally terminate an expression such as semicolon.
11086 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11087 otherwise it is mandatory.  It is up to the caller to ensure that the
11088 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11089 the source of the code to be parsed and the lexical context for the
11090 expression.
11091
11092 The op tree representing the expression is returned.  If an optional
11093 expression is absent, a null pointer is returned, otherwise the pointer
11094 will be non-null.
11095
11096 If an error occurs in parsing or compilation, in most cases a valid op
11097 tree is returned anyway.  The error is reflected in the parser state,
11098 normally resulting in a single exception at the top level of parsing
11099 which covers all the compilation errors that occurred.  Some compilation
11100 errors, however, will throw an exception immediately.
11101
11102 =cut
11103 */
11104
11105 OP *
11106 Perl_parse_termexpr(pTHX_ U32 flags)
11107 {
11108     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11109 }
11110
11111 /*
11112 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11113
11114 Parse a Perl list expression.  This may contain operators of precedence
11115 down to the comma operator.  The expression must be followed (and thus
11116 terminated) either by a low-precedence logic operator such as C<or> or by
11117 something that would normally terminate an expression such as semicolon.
11118 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11119 otherwise it is mandatory.  It is up to the caller to ensure that the
11120 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11121 the source of the code to be parsed and the lexical context for the
11122 expression.
11123
11124 The op tree representing the expression is returned.  If an optional
11125 expression is absent, a null pointer is returned, otherwise the pointer
11126 will be non-null.
11127
11128 If an error occurs in parsing or compilation, in most cases a valid op
11129 tree is returned anyway.  The error is reflected in the parser state,
11130 normally resulting in a single exception at the top level of parsing
11131 which covers all the compilation errors that occurred.  Some compilation
11132 errors, however, will throw an exception immediately.
11133
11134 =cut
11135 */
11136
11137 OP *
11138 Perl_parse_listexpr(pTHX_ U32 flags)
11139 {
11140     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11141 }
11142
11143 /*
11144 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11145
11146 Parse a single complete Perl expression.  This allows the full
11147 expression grammar, including the lowest-precedence operators such
11148 as C<or>.  The expression must be followed (and thus terminated) by a
11149 token that an expression would normally be terminated by: end-of-file,
11150 closing bracketing punctuation, semicolon, or one of the keywords that
11151 signals a postfix expression-statement modifier.  If I<flags> includes
11152 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11153 mandatory.  It is up to the caller to ensure that the dynamic parser
11154 state (L</PL_parser> et al) is correctly set to reflect the source of
11155 the code to be parsed and the lexical context for the expression.
11156
11157 The op tree representing the expression is returned.  If an optional
11158 expression is absent, a null pointer is returned, otherwise the pointer
11159 will be non-null.
11160
11161 If an error occurs in parsing or compilation, in most cases a valid op
11162 tree is returned anyway.  The error is reflected in the parser state,
11163 normally resulting in a single exception at the top level of parsing
11164 which covers all the compilation errors that occurred.  Some compilation
11165 errors, however, will throw an exception immediately.
11166
11167 =cut
11168 */
11169
11170 OP *
11171 Perl_parse_fullexpr(pTHX_ U32 flags)
11172 {
11173     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11174 }
11175
11176 /*
11177 =for apidoc Amx|OP *|parse_block|U32 flags
11178
11179 Parse a single complete Perl code block.  This consists of an opening
11180 brace, a sequence of statements, and a closing brace.  The block
11181 constitutes a lexical scope, so C<my> variables and various compile-time
11182 effects can be contained within it.  It is up to the caller to ensure
11183 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11184 reflect the source of the code to be parsed and the lexical context for
11185 the statement.
11186
11187 The op tree representing the code block is returned.  This is always a
11188 real op, never a null pointer.  It will normally be a C<lineseq> list,
11189 including C<nextstate> or equivalent ops.  No ops to construct any kind
11190 of runtime scope are included by virtue of it being a block.
11191
11192 If an error occurs in parsing or compilation, in most cases a valid op
11193 tree (most likely null) is returned anyway.  The error is reflected in
11194 the parser state, normally resulting in a single exception at the top
11195 level of parsing which covers all the compilation errors that occurred.
11196 Some compilation errors, however, will throw an exception immediately.
11197
11198 The I<flags> parameter is reserved for future use, and must always
11199 be zero.
11200
11201 =cut
11202 */
11203
11204 OP *
11205 Perl_parse_block(pTHX_ U32 flags)
11206 {
11207     if (flags)
11208         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11209     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11210 }
11211
11212 /*
11213 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11214
11215 Parse a single unadorned Perl statement.  This may be a normal imperative
11216 statement or a declaration that has compile-time effect.  It does not
11217 include any label or other affixture.  It is up to the caller to ensure
11218 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11219 reflect the source of the code to be parsed and the lexical context for
11220 the statement.
11221
11222 The op tree representing the statement is returned.  This may be a
11223 null pointer if the statement is null, for example if it was actually
11224 a subroutine definition (which has compile-time side effects).  If not
11225 null, it will be ops directly implementing the statement, suitable to
11226 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11227 equivalent op (except for those embedded in a scope contained entirely
11228 within the statement).
11229
11230 If an error occurs in parsing or compilation, in most cases a valid op
11231 tree (most likely null) is returned anyway.  The error is reflected in
11232 the parser state, normally resulting in a single exception at the top
11233 level of parsing which covers all the compilation errors that occurred.
11234 Some compilation errors, however, will throw an exception immediately.
11235
11236 The I<flags> parameter is reserved for future use, and must always
11237 be zero.
11238
11239 =cut
11240 */
11241
11242 OP *
11243 Perl_parse_barestmt(pTHX_ U32 flags)
11244 {
11245     if (flags)
11246         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11247     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11248 }
11249
11250 /*
11251 =for apidoc Amx|SV *|parse_label|U32 flags
11252
11253 Parse a single label, possibly optional, of the type that may prefix a
11254 Perl statement.  It is up to the caller to ensure that the dynamic parser
11255 state (L</PL_parser> et al) is correctly set to reflect the source of
11256 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11257 label is optional, otherwise it is mandatory.
11258
11259 The name of the label is returned in the form of a fresh scalar.  If an
11260 optional label is absent, a null pointer is returned.
11261
11262 If an error occurs in parsing, which can only occur if the label is
11263 mandatory, a valid label is returned anyway.  The error is reflected in
11264 the parser state, normally resulting in a single exception at the top
11265 level of parsing which covers all the compilation errors that occurred.
11266
11267 =cut
11268 */
11269
11270 SV *
11271 Perl_parse_label(pTHX_ U32 flags)
11272 {
11273     if (flags & ~PARSE_OPTIONAL)
11274         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11275     if (PL_lex_state == LEX_KNOWNEXT) {
11276         PL_parser->yychar = yylex();
11277         if (PL_parser->yychar == LABEL) {
11278             char *lpv = pl_yylval.pval;
11279             STRLEN llen = strlen(lpv);
11280             SV *lsv;
11281             PL_parser->yychar = YYEMPTY;
11282             lsv = newSV_type(SVt_PV);
11283             SvPV_set(lsv, lpv);
11284             SvCUR_set(lsv, llen);
11285             SvLEN_set(lsv, llen+1);
11286             SvPOK_on(lsv);
11287             return lsv;
11288         } else {
11289             yyunlex();
11290             goto no_label;
11291         }
11292     } else {
11293         char *s, *t;
11294         U8 c;
11295         STRLEN wlen, bufptr_pos;
11296         lex_read_space(0);
11297         t = s = PL_bufptr;
11298         c = (U8)*s;
11299         if (!isIDFIRST_A(c))
11300             goto no_label;
11301         do {
11302             c = (U8)*++t;
11303         } while(isWORDCHAR_A(c));
11304         wlen = t - s;
11305         if (word_takes_any_delimeter(s, wlen))
11306             goto no_label;
11307         bufptr_pos = s - SvPVX(PL_linestr);
11308         PL_bufptr = t;
11309         lex_read_space(LEX_KEEP_PREVIOUS);
11310         t = PL_bufptr;
11311         s = SvPVX(PL_linestr) + bufptr_pos;
11312         if (t[0] == ':' && t[1] != ':') {
11313             PL_oldoldbufptr = PL_oldbufptr;
11314             PL_oldbufptr = s;
11315             PL_bufptr = t+1;
11316             return newSVpvn(s, wlen);
11317         } else {
11318             PL_bufptr = s;
11319             no_label:
11320             if (flags & PARSE_OPTIONAL) {
11321                 return NULL;
11322             } else {
11323                 qerror(Perl_mess(aTHX_ "Parse error"));
11324                 return newSVpvs("x");
11325             }
11326         }
11327     }
11328 }
11329
11330 /*
11331 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11332
11333 Parse a single complete Perl statement.  This may be a normal imperative
11334 statement or a declaration that has compile-time effect, and may include
11335 optional labels.  It is up to the caller to ensure that the dynamic
11336 parser state (L</PL_parser> et al) is correctly set to reflect the source
11337 of the code to be parsed and the lexical context for the statement.
11338
11339 The op tree representing the statement is returned.  This may be a
11340 null pointer if the statement is null, for example if it was actually
11341 a subroutine definition (which has compile-time side effects).  If not
11342 null, it will be the result of a L</newSTATEOP> call, normally including
11343 a C<nextstate> or equivalent op.
11344
11345 If an error occurs in parsing or compilation, in most cases a valid op
11346 tree (most likely null) is returned anyway.  The error is reflected in
11347 the parser state, normally resulting in a single exception at the top
11348 level of parsing which covers all the compilation errors that occurred.
11349 Some compilation errors, however, will throw an exception immediately.
11350
11351 The I<flags> parameter is reserved for future use, and must always
11352 be zero.
11353
11354 =cut
11355 */
11356
11357 OP *
11358 Perl_parse_fullstmt(pTHX_ U32 flags)
11359 {
11360     if (flags)
11361         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11362     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11363 }
11364
11365 /*
11366 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11367
11368 Parse a sequence of zero or more Perl statements.  These may be normal
11369 imperative statements, including optional labels, or declarations
11370 that have compile-time effect, or any mixture thereof.  The statement
11371 sequence ends when a closing brace or end-of-file is encountered in a
11372 place where a new statement could have validly started.  It is up to
11373 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11374 is correctly set to reflect the source of the code to be parsed and the
11375 lexical context for the statements.
11376
11377 The op tree representing the statement sequence is returned.  This may
11378 be a null pointer if the statements were all null, for example if there
11379 were no statements or if there were only subroutine definitions (which
11380 have compile-time side effects).  If not null, it will be a C<lineseq>
11381 list, normally including C<nextstate> or equivalent ops.
11382
11383 If an error occurs in parsing or compilation, in most cases a valid op
11384 tree is returned anyway.  The error is reflected in the parser state,
11385 normally resulting in a single exception at the top level of parsing
11386 which covers all the compilation errors that occurred.  Some compilation
11387 errors, however, will throw an exception immediately.
11388
11389 The I<flags> parameter is reserved for future use, and must always
11390 be zero.
11391
11392 =cut
11393 */
11394
11395 OP *
11396 Perl_parse_stmtseq(pTHX_ U32 flags)
11397 {
11398     OP *stmtseqop;
11399     I32 c;
11400     if (flags)
11401         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11402     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11403     c = lex_peek_unichar(0);
11404     if (c != -1 && c != /*{*/'}')
11405         qerror(Perl_mess(aTHX_ "Parse error"));
11406     return stmtseqop;
11407 }
11408
11409 void
11410 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11411 {
11412     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11413     deprecate("qw(...) as parentheses");
11414     force_next((4<<24)|')');
11415     if (qwlist->op_type == OP_STUB) {
11416         op_free(qwlist);
11417     }
11418     else {
11419         start_force(PL_curforce);
11420         NEXTVAL_NEXTTOKE.opval = qwlist;
11421         force_next(THING);
11422     }
11423     force_next((2<<24)|'(');
11424 }
11425
11426 /*
11427  * Local variables:
11428  * c-indentation-style: bsd
11429  * c-basic-offset: 4
11430  * indent-tabs-mode: t
11431  * End:
11432  *
11433  * ex: set ts=8 sts=4 sw=4 noet:
11434  */