toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN0OP       : zero-argument function, with its op created in this file
 228  * FUN1         : not used, except for not, which isn't a UNIOP
 229  * BOop         : bitwise or or xor
 230  * BAop         : bitwise and
 231  * SHop         : shift operator
 232  * PWop         : power operator
 233  * PMop         : pattern-matching operator
 234  * Aop          : addition-level operator
 235  * Mop          : multiplication-level operator
 236  * Eop          : equality-testing operator
 237  * Rop          : relational operator <= != gt
 238  *
 239  * Also see LOP and lop() below.
 240  */
 241
 242 #ifdef DEBUGGING /* Serve -DT. */
 243 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 244 #else
 245 #   define REPORT(retval) (retval)
 246 #endif
 247
 248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 256 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 257 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 258 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 259 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 260 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 261 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 262 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 263 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 264 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 265 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 266 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 267 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 268 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 269
 270 /* This bit of chicanery makes a unary function followed by
 271  * a parenthesis into a function with one argument, highest precedence.
 272  * The UNIDOR macro is for unary functions that can be followed by the //
 273  * operator (such as C<shift // 0>).
 274  */
 275 #define UNI2(f,x) { \
 276         pl_yylval.ival = f; \
 277         PL_expect = x; \
 278         PL_bufptr = s; \
 279         PL_last_uni = PL_oldbufptr; \
 280         PL_last_lop_op = f; \
 281         if (*s == '(') \
 282             return REPORT( (int)FUNC1 ); \
 283         s = PEEKSPACE(s); \
 284         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 285         }
 286 #define UNI(f)    UNI2(f,XTERM)
 287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 288
 289 #define UNIBRACK(f) { \
 290         pl_yylval.ival = f; \
 291         PL_bufptr = s; \
 292         PL_last_uni = PL_oldbufptr; \
 293         if (*s == '(') \
 294             return REPORT( (int)FUNC1 ); \
 295         s = PEEKSPACE(s); \
 296         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 297         }
 298
 299 /* grandfather return to old style */
 300 #define OLDLOP(f) \
 301         do { \
 302             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 303                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 304             pl_yylval.ival = (f); \
 305             PL_expect = XTERM; \
 306             PL_bufptr = s; \
 307             return (int)LSTOP; \
 308         } while(0)
 309
 310 #ifdef DEBUGGING
 311
 312 /* how to interpret the pl_yylval associated with the token */
 313 enum token_type {
 314     TOKENTYPE_NONE,
 315     TOKENTYPE_IVAL,
 316     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 317     TOKENTYPE_PVAL,
 318     TOKENTYPE_OPVAL,
 319     TOKENTYPE_GVVAL
 320 };
 321
 322 static struct debug_tokens {
 323     const int token;
 324     enum token_type type;
 325     const char *name;
 326 } const debug_tokens[] =
 327 {
 328     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 329     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 330     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 331     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 332     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 333     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 334     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 335     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 336     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 337     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 338     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 339     { DO,               TOKENTYPE_NONE,         "DO" },
 340     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 341     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 342     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 343     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 344     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 345     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 346     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 347     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 348     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 349     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 350     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 351     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 352     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 353     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 354     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 355     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 356     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 357     { IF,               TOKENTYPE_IVAL,         "IF" },
 358     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 359     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 360     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 361     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 362     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 363     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 364     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 365     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 366     { MY,               TOKENTYPE_IVAL,         "MY" },
 367     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 368     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 369     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 370     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 371     { OROR,             TOKENTYPE_NONE,         "OROR" },
 372     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 373     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 374     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 375     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 376     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 377     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 378     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 379     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 380     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 381     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 382     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 383     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 384     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 385     { SUB,              TOKENTYPE_NONE,         "SUB" },
 386     { THING,            TOKENTYPE_OPVAL,        "THING" },
 387     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 388     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 389     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 390     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 391     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 392     { USE,              TOKENTYPE_IVAL,         "USE" },
 393     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 394     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 395     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 396     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 397     { 0,                TOKENTYPE_NONE,         NULL }
 398 };
 399
 400 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 401
 402 STATIC int
 403 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 404 {
 405     dVAR;
 406
 407     PERL_ARGS_ASSERT_TOKEREPORT;
 408
 409     if (DEBUG_T_TEST) {
 410         const char *name = NULL;
 411         enum token_type type = TOKENTYPE_NONE;
 412         const struct debug_tokens *p;
 413         SV* const report = newSVpvs("<== ");
 414
 415         for (p = debug_tokens; p->token; p++) {
 416             if (p->token == (int)rv) {
 417                 name = p->name;
 418                 type = p->type;
 419                 break;
 420             }
 421         }
 422         if (name)
 423             Perl_sv_catpv(aTHX_ report, name);
 424         else if ((char)rv > ' ' && (char)rv < '~')
 425             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 426         else if (!rv)
 427             sv_catpvs(report, "EOF");
 428         else
 429             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 430         switch (type) {
 431         case TOKENTYPE_NONE:
 432         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 433             break;
 434         case TOKENTYPE_IVAL:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 436             break;
 437         case TOKENTYPE_OPNUM:
 438             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 439                                     PL_op_name[lvalp->ival]);
 440             break;
 441         case TOKENTYPE_PVAL:
 442             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 443             break;
 444         case TOKENTYPE_OPVAL:
 445             if (lvalp->opval) {
 446                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 447                                     PL_op_name[lvalp->opval->op_type]);
 448                 if (lvalp->opval->op_type == OP_CONST) {
 449                     Perl_sv_catpvf(aTHX_ report, " %s",
 450                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 451                 }
 452
 453             }
 454             else
 455                 sv_catpvs(report, "(opval=null)");
 456             break;
 457         }
 458         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 459     };
 460     return (int)rv;
 461 }
 462
 463
 464 /* print the buffer with suitable escapes */
 465
 466 STATIC void
 467 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 468 {
 469     SV* const tmp = newSVpvs("");
 470
 471     PERL_ARGS_ASSERT_PRINTBUF;
 472
 473     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 474     SvREFCNT_dec(tmp);
 475 }
 476
 477 #endif
 478
 479 static int
 480 S_deprecate_commaless_var_list(pTHX) {
 481     PL_expect = XTERM;
 482     deprecate("comma-less variable list");
 483     return REPORT(','); /* grandfather non-comma-format format */
 484 }
 485
 486 /*
 487  * S_ao
 488  *
 489  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 490  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 491  */
 492
 493 STATIC int
 494 S_ao(pTHX_ int toketype)
 495 {
 496     dVAR;
 497     if (*PL_bufptr == '=') {
 498         PL_bufptr++;
 499         if (toketype == ANDAND)
 500             pl_yylval.ival = OP_ANDASSIGN;
 501         else if (toketype == OROR)
 502             pl_yylval.ival = OP_ORASSIGN;
 503         else if (toketype == DORDOR)
 504             pl_yylval.ival = OP_DORASSIGN;
 505         toketype = ASSIGNOP;
 506     }
 507     return toketype;
 508 }
 509
 510 /*
 511  * S_no_op
 512  * When Perl expects an operator and finds something else, no_op
 513  * prints the warning.  It always prints "<something> found where
 514  * operator expected.  It prints "Missing semicolon on previous line?"
 515  * if the surprise occurs at the start of the line.  "do you need to
 516  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 517  * where the compiler doesn't know if foo is a method call or a function.
 518  * It prints "Missing operator before end of line" if there's nothing
 519  * after the missing operator, or "... before <...>" if there is something
 520  * after the missing operator.
 521  */
 522
 523 STATIC void
 524 S_no_op(pTHX_ const char *const what, char *s)
 525 {
 526     dVAR;
 527     char * const oldbp = PL_bufptr;
 528     const bool is_first = (PL_oldbufptr == PL_linestart);
 529
 530     PERL_ARGS_ASSERT_NO_OP;
 531
 532     if (!s)
 533         s = oldbp;
 534     else
 535         PL_bufptr = s;
 536     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 537     if (ckWARN_d(WARN_SYNTAX)) {
 538         if (is_first)
 539             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 540                     "\t(Missing semicolon on previous line?)\n");
 541         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 542             const char *t;
 543             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 544                 NOOP;
 545             if (t < PL_bufptr && isSPACE(*t))
 546                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 547                         "\t(Do you need to predeclare %.*s?)\n",
 548                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 549         }
 550         else {
 551             assert(s >= oldbp);
 552             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 553                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 554         }
 555     }
 556     PL_bufptr = oldbp;
 557 }
 558
 559 /*
 560  * S_missingterm
 561  * Complain about missing quote/regexp/heredoc terminator.
 562  * If it's called with NULL then it cauterizes the line buffer.
 563  * If we're in a delimited string and the delimiter is a control
 564  * character, it's reformatted into a two-char sequence like ^C.
 565  * This is fatal.
 566  */
 567
 568 STATIC void
 569 S_missingterm(pTHX_ char *s)
 570 {
 571     dVAR;
 572     char tmpbuf[3];
 573     char q;
 574     if (s) {
 575         char * const nl = strrchr(s,'\n');
 576         if (nl)
 577             *nl = '\0';
 578     }
 579     else if (isCNTRL(PL_multi_close)) {
 580         *tmpbuf = '^';
 581         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 582         tmpbuf[2] = '\0';
 583         s = tmpbuf;
 584     }
 585     else {
 586         *tmpbuf = (char)PL_multi_close;
 587         tmpbuf[1] = '\0';
 588         s = tmpbuf;
 589     }
 590     q = strchr(s,'"') ? '\'' : '"';
 591     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 592 }
 593
 594 /*
 595  * Check whether the named feature is enabled.
 596  */
 597 bool
 598 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 599 {
 600     dVAR;
 601     HV * const hinthv = GvHV(PL_hintgv);
 602     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 603
 604     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 605
 606     if (namelen > MAX_FEATURE_LEN)
 607         return FALSE;
 608     memcpy(&he_name[8], name, namelen);
 609
 610     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 611 }
 612
 613 /*
 614  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 615  * utf16-to-utf8-reversed.
 616  */
 617
 618 #ifdef PERL_CR_FILTER
 619 static void
 620 strip_return(SV *sv)
 621 {
 622     register const char *s = SvPVX_const(sv);
 623     register const char * const e = s + SvCUR(sv);
 624
 625     PERL_ARGS_ASSERT_STRIP_RETURN;
 626
 627     /* outer loop optimized to do nothing if there are no CR-LFs */
 628     while (s < e) {
 629         if (*s++ == '\r' && *s == '\n') {
 630             /* hit a CR-LF, need to copy the rest */
 631             register char *d = s - 1;
 632             *d++ = *s++;
 633             while (s < e) {
 634                 if (*s == '\r' && s[1] == '\n')
 635                     s++;
 636                 *d++ = *s++;
 637             }
 638             SvCUR(sv) -= s - d;
 639             return;
 640         }
 641     }
 642 }
 643
 644 STATIC I32
 645 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 646 {
 647     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 648     if (count > 0 && !maxlen)
 649         strip_return(sv);
 650     return count;
 651 }
 652 #endif
 653
 654 /*
 655 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 656
 657 Creates and initialises a new lexer/parser state object, supplying
 658 a context in which to lex and parse from a new source of Perl code.
 659 A pointer to the new state object is placed in L</PL_parser>.  An entry
 660 is made on the save stack so that upon unwinding the new state object
 661 will be destroyed and the former value of L</PL_parser> will be restored.
 662 Nothing else need be done to clean up the parsing context.
 663
 664 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 665 non-null, provides a string (in SV form) containing code to be parsed.
 666 A copy of the string is made, so subsequent modification of I<line>
 667 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 668 from which code will be read to be parsed.  If both are non-null, the
 669 code in I<line> comes first and must consist of complete lines of input,
 670 and I<rsfp> supplies the remainder of the source.
 671
 672 The I<flags> parameter is reserved for future use, and must always
 673 be zero, except for one flag that is currently reserved for perl's internal
 674 use.
 675
 676 =cut
 677 */
 678
 679 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 680    can share filters with the current parser. */
 681
 682 void
 683 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 684 {
 685     dVAR;
 686     const char *s = NULL;
 687     STRLEN len;
 688     yy_parser *parser, *oparser;
 689     if (flags && flags != LEX_START_SAME_FILTER)
 690         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 691
 692     /* create and initialise a parser */
 693
 694     Newxz(parser, 1, yy_parser);
 695     parser->old_parser = oparser = PL_parser;
 696     PL_parser = parser;
 697
 698     parser->stack = NULL;
 699     parser->ps = NULL;
 700     parser->stack_size = 0;
 701
 702     /* on scope exit, free this parser and restore any outer one */
 703     SAVEPARSER(parser);
 704     parser->saved_curcop = PL_curcop;
 705
 706     /* initialise lexer state */
 707
 708 #ifdef PERL_MAD
 709     parser->curforce = -1;
 710 #else
 711     parser->nexttoke = 0;
 712 #endif
 713     parser->error_count = oparser ? oparser->error_count : 0;
 714     parser->copline = NOLINE;
 715     parser->lex_state = LEX_NORMAL;
 716     parser->expect = XSTATE;
 717     parser->rsfp = rsfp;
 718     parser->rsfp_filters =
 719       !(flags & LEX_START_SAME_FILTER) || !oparser
 720         ? newAV()
 721         : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 722
 723     Newx(parser->lex_brackstack, 120, char);
 724     Newx(parser->lex_casestack, 12, char);
 725     *parser->lex_casestack = '\0';
 726
 727     if (line) {
 728         s = SvPV_const(line, len);
 729     } else {
 730         len = 0;
 731     }
 732
 733     if (!len) {
 734         parser->linestr = newSVpvs("\n;");
 735     } else {
 736         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 737         if (s[len-1] != ';')
 738             sv_catpvs(parser->linestr, "\n;");
 739     }
 740     parser->oldoldbufptr =
 741         parser->oldbufptr =
 742         parser->bufptr =
 743         parser->linestart = SvPVX(parser->linestr);
 744     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 745     parser->last_lop = parser->last_uni = NULL;
 746
 747     parser->in_pod = 0;
 748 }
 749
 750
 751 /* delete a parser object */
 752
 753 void
 754 Perl_parser_free(pTHX_  const yy_parser *parser)
 755 {
 756     PERL_ARGS_ASSERT_PARSER_FREE;
 757
 758     PL_curcop = parser->saved_curcop;
 759     SvREFCNT_dec(parser->linestr);
 760
 761     if (parser->rsfp == PerlIO_stdin())
 762         PerlIO_clearerr(parser->rsfp);
 763     else if (parser->rsfp && (!parser->old_parser ||
 764                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 765         PerlIO_close(parser->rsfp);
 766     SvREFCNT_dec(parser->rsfp_filters);
 767
 768     Safefree(parser->lex_brackstack);
 769     Safefree(parser->lex_casestack);
 770     PL_parser = parser->old_parser;
 771     Safefree(parser);
 772 }
 773
 774
 775 /*
 776 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 777
 778 Buffer scalar containing the chunk currently under consideration of the
 779 text currently being lexed.  This is always a plain string scalar (for
 780 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 781 normal scalar means; instead refer to the buffer directly by the pointer
 782 variables described below.
 783
 784 The lexer maintains various C<char*> pointers to things in the
 785 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 786 reallocated, all of these pointers must be updated.  Don't attempt to
 787 do this manually, but rather use L</lex_grow_linestr> if you need to
 788 reallocate the buffer.
 789
 790 The content of the text chunk in the buffer is commonly exactly one
 791 complete line of input, up to and including a newline terminator,
 792 but there are situations where it is otherwise.  The octets of the
 793 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 794 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 795 flag on this scalar, which may disagree with it.
 796
 797 For direct examination of the buffer, the variable
 798 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 799 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 800 of these pointers is usually preferable to examination of the scalar
 801 through normal scalar means.
 802
 803 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 804
 805 Direct pointer to the end of the chunk of text currently being lexed, the
 806 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 807 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 808 always located at the end of the buffer, and does not count as part of
 809 the buffer's contents.
 810
 811 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 812
 813 Points to the current position of lexing inside the lexer buffer.
 814 Characters around this point may be freely examined, within
 815 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 816 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 817 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 818
 819 Lexing code (whether in the Perl core or not) moves this pointer past
 820 the characters that it consumes.  It is also expected to perform some
 821 bookkeeping whenever a newline character is consumed.  This movement
 822 can be more conveniently performed by the function L</lex_read_to>,
 823 which handles newlines appropriately.
 824
 825 Interpretation of the buffer's octets can be abstracted out by
 826 using the slightly higher-level functions L</lex_peek_unichar> and
 827 L</lex_read_unichar>.
 828
 829 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 830
 831 Points to the start of the current line inside the lexer buffer.
 832 This is useful for indicating at which column an error occurred, and
 833 not much else.  This must be updated by any lexing code that consumes
 834 a newline; the function L</lex_read_to> handles this detail.
 835
 836 =cut
 837 */
 838
 839 /*
 840 =for apidoc Amx|bool|lex_bufutf8
 841
 842 Indicates whether the octets in the lexer buffer
 843 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 844 of Unicode characters.  If not, they should be interpreted as Latin-1
 845 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 846
 847 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 848 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 849 encoding.
 850
 851 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 852 is significant, but not the whole story regarding the input character
 853 encoding.  Normally, when a file is being read, the scalar contains octets
 854 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 855 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 856 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 857 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 858 is in effect.  This logic may change in the future; use this function
 859 instead of implementing the logic yourself.
 860
 861 =cut
 862 */
 863
 864 bool
 865 Perl_lex_bufutf8(pTHX)
 866 {
 867     return UTF;
 868 }
 869
 870 /*
 871 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 872
 873 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 874 at least I<len> octets (including terminating NUL).  Returns a
 875 pointer to the reallocated buffer.  This is necessary before making
 876 any direct modification of the buffer that would increase its length.
 877 L</lex_stuff_pvn> provides a more convenient way to insert text into
 878 the buffer.
 879
 880 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 881 this function updates all of the lexer's variables that point directly
 882 into the buffer.
 883
 884 =cut
 885 */
 886
 887 char *
 888 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 889 {
 890     SV *linestr;
 891     char *buf;
 892     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 893     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 894     linestr = PL_parser->linestr;
 895     buf = SvPVX(linestr);
 896     if (len <= SvLEN(linestr))
 897         return buf;
 898     bufend_pos = PL_parser->bufend - buf;
 899     bufptr_pos = PL_parser->bufptr - buf;
 900     oldbufptr_pos = PL_parser->oldbufptr - buf;
 901     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 902     linestart_pos = PL_parser->linestart - buf;
 903     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 904     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 905     buf = sv_grow(linestr, len);
 906     PL_parser->bufend = buf + bufend_pos;
 907     PL_parser->bufptr = buf + bufptr_pos;
 908     PL_parser->oldbufptr = buf + oldbufptr_pos;
 909     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 910     PL_parser->linestart = buf + linestart_pos;
 911     if (PL_parser->last_uni)
 912         PL_parser->last_uni = buf + last_uni_pos;
 913     if (PL_parser->last_lop)
 914         PL_parser->last_lop = buf + last_lop_pos;
 915     return buf;
 916 }
 917
 918 /*
 919 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 920
 921 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 922 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 923 reallocating the buffer if necessary.  This means that lexing code that
 924 runs later will see the characters as if they had appeared in the input.
 925 It is not recommended to do this as part of normal parsing, and most
 926 uses of this facility run the risk of the inserted characters being
 927 interpreted in an unintended manner.
 928
 929 The string to be inserted is represented by I<len> octets starting
 930 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 931 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 932 The characters are recoded for the lexer buffer, according to how the
 933 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 934 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 935 function is more convenient.
 936
 937 =cut
 938 */
 939
 940 void
 941 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 942 {
 943     dVAR;
 944     char *bufptr;
 945     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 946     if (flags & ~(LEX_STUFF_UTF8))
 947         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 948     if (UTF) {
 949         if (flags & LEX_STUFF_UTF8) {
 950             goto plain_copy;
 951         } else {
 952             STRLEN highhalf = 0;
 953             const char *p, *e = pv+len;
 954             for (p = pv; p != e; p++)
 955                 highhalf += !!(((U8)*p) & 0x80);
 956             if (!highhalf)
 957                 goto plain_copy;
 958             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 959             bufptr = PL_parser->bufptr;
 960             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 961             SvCUR_set(PL_parser->linestr,
 962                 SvCUR(PL_parser->linestr) + len+highhalf);
 963             PL_parser->bufend += len+highhalf;
 964             for (p = pv; p != e; p++) {
 965                 U8 c = (U8)*p;
 966                 if (c & 0x80) {
 967                     *bufptr++ = (char)(0xc0 | (c >> 6));
 968                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 969                 } else {
 970                     *bufptr++ = (char)c;
 971                 }
 972             }
 973         }
 974     } else {
 975         if (flags & LEX_STUFF_UTF8) {
 976             STRLEN highhalf = 0;
 977             const char *p, *e = pv+len;
 978             for (p = pv; p != e; p++) {
 979                 U8 c = (U8)*p;
 980                 if (c >= 0xc4) {
 981                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 982                                 "non-Latin-1 character into Latin-1 input");
 983                 } else if (c >= 0xc2 && p+1 != e &&
 984                             (((U8)p[1]) & 0xc0) == 0x80) {
 985                     p++;
 986                     highhalf++;
 987                 } else if (c >= 0x80) {
 988                     /* malformed UTF-8 */
 989                     ENTER;
 990                     SAVESPTR(PL_warnhook);
 991                     PL_warnhook = PERL_WARNHOOK_FATAL;
 992                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 993                     LEAVE;
 994                 }
 995             }
 996             if (!highhalf)
 997                 goto plain_copy;
 998             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 999             bufptr = PL_parser->bufptr;
1000             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1001             SvCUR_set(PL_parser->linestr,
1002                 SvCUR(PL_parser->linestr) + len-highhalf);
1003             PL_parser->bufend += len-highhalf;
1004             for (p = pv; p != e; p++) {
1005                 U8 c = (U8)*p;
1006                 if (c & 0x80) {
1007                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008                     p++;
1009                 } else {
1010                     *bufptr++ = (char)c;
1011                 }
1012             }
1013         } else {
1014             plain_copy:
1015             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016             bufptr = PL_parser->bufptr;
1017             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1018             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1019             PL_parser->bufend += len;
1020             Copy(pv, bufptr, len, char);
1021         }
1022     }
1023 }
1024
1025 /*
1026 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030 reallocating the buffer if necessary.  This means that lexing code that
1031 runs later will see the characters as if they had appeared in the input.
1032 It is not recommended to do this as part of normal parsing, and most
1033 uses of this facility run the risk of the inserted characters being
1034 interpreted in an unintended manner.
1035
1036 The string to be inserted is represented by octets starting at I<pv>
1037 and continuing to the first nul.  These octets are interpreted as either
1038 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039 in I<flags>.  The characters are recoded for the lexer buffer, according
1040 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041 If it is not convenient to nul-terminate a string to be inserted, the
1042 L</lex_stuff_pvn> function is more appropriate.
1043
1044 =cut
1045 */
1046
1047 void
1048 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049 {
1050     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051     lex_stuff_pvn(pv, strlen(pv), flags);
1052 }
1053
1054 /*
1055 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059 reallocating the buffer if necessary.  This means that lexing code that
1060 runs later will see the characters as if they had appeared in the input.
1061 It is not recommended to do this as part of normal parsing, and most
1062 uses of this facility run the risk of the inserted characters being
1063 interpreted in an unintended manner.
1064
1065 The string to be inserted is the string value of I<sv>.  The characters
1066 are recoded for the lexer buffer, according to how the buffer is currently
1067 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1068 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069 need to construct a scalar.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076 {
1077     char *pv;
1078     STRLEN len;
1079     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080     if (flags)
1081         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082     pv = SvPV(sv, len);
1083     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084 }
1085
1086 /*
1087 =for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1091 This hides the discarded text from any lexing code that runs later,
1092 as if the text had never appeared.
1093
1094 This is not the normal way to consume lexed text.  For that, use
1095 L</lex_read_to>.
1096
1097 =cut
1098 */
1099
1100 void
1101 Perl_lex_unstuff(pTHX_ char *ptr)
1102 {
1103     char *buf, *bufend;
1104     STRLEN unstuff_len;
1105     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106     buf = PL_parser->bufptr;
1107     if (ptr < buf)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109     if (ptr == buf)
1110         return;
1111     bufend = PL_parser->bufend;
1112     if (ptr > bufend)
1113         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114     unstuff_len = ptr - buf;
1115     Move(ptr, buf, bufend+1-ptr, char);
1116     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117     PL_parser->bufend = bufend - unstuff_len;
1118 }
1119
1120 /*
1121 =for apidoc Amx|void|lex_read_to|char *ptr
1122
1123 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125 performing the correct bookkeeping whenever a newline character is passed.
1126 This is the normal way to consume lexed text.
1127
1128 Interpretation of the buffer's octets can be abstracted out by
1129 using the slightly higher-level functions L</lex_peek_unichar> and
1130 L</lex_read_unichar>.
1131
1132 =cut
1133 */
1134
1135 void
1136 Perl_lex_read_to(pTHX_ char *ptr)
1137 {
1138     char *s;
1139     PERL_ARGS_ASSERT_LEX_READ_TO;
1140     s = PL_parser->bufptr;
1141     if (ptr < s || ptr > PL_parser->bufend)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143     for (; s != ptr; s++)
1144         if (*s == '\n') {
1145             CopLINE_inc(PL_curcop);
1146             PL_parser->linestart = s+1;
1147         }
1148     PL_parser->bufptr = ptr;
1149 }
1150
1151 /*
1152 =for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155 up to I<ptr>.  The remaining content of the buffer will be moved, and
1156 all pointers into the buffer updated appropriately.  I<ptr> must not
1157 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158 it is not permitted to discard text that has yet to be lexed.
1159
1160 Normally it is not necessarily to do this directly, because it suffices to
1161 use the implicit discarding behaviour of L</lex_next_chunk> and things
1162 based on it.  However, if a token stretches across multiple lines,
1163 and the lexing code has kept multiple lines of text in the buffer for
1164 that purpose, then after completion of the token it would be wise to
1165 explicitly discard the now-unneeded earlier lines, to avoid future
1166 multi-line tokens growing the buffer without bound.
1167
1168 =cut
1169 */
1170
1171 void
1172 Perl_lex_discard_to(pTHX_ char *ptr)
1173 {
1174     char *buf;
1175     STRLEN discard_len;
1176     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177     buf = SvPVX(PL_parser->linestr);
1178     if (ptr < buf)
1179         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180     if (ptr == buf)
1181         return;
1182     if (ptr > PL_parser->bufptr)
1183         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184     discard_len = ptr - buf;
1185     if (PL_parser->oldbufptr < ptr)
1186         PL_parser->oldbufptr = ptr;
1187     if (PL_parser->oldoldbufptr < ptr)
1188         PL_parser->oldoldbufptr = ptr;
1189     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190         PL_parser->last_uni = NULL;
1191     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192         PL_parser->last_lop = NULL;
1193     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195     PL_parser->bufend -= discard_len;
1196     PL_parser->bufptr -= discard_len;
1197     PL_parser->oldbufptr -= discard_len;
1198     PL_parser->oldoldbufptr -= discard_len;
1199     if (PL_parser->last_uni)
1200         PL_parser->last_uni -= discard_len;
1201     if (PL_parser->last_lop)
1202         PL_parser->last_lop -= discard_len;
1203 }
1204
1205 /*
1206 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208 Reads in the next chunk of text to be lexed, appending it to
1209 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1210 looked to the end of the current chunk and wants to know more.  It is
1211 usual, but not necessary, for lexing to have consumed the entirety of
1212 the current chunk at this time.
1213
1214 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215 chunk (i.e., the current chunk has been entirely consumed), normally the
1216 current chunk will be discarded at the same time that the new chunk is
1217 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218 will not be discarded.  If the current chunk has not been entirely
1219 consumed, then it will not be discarded regardless of the flag.
1220
1221 Returns true if some new text was added to the buffer, or false if the
1222 buffer has reached the end of the input text.
1223
1224 =cut
1225 */
1226
1227 #define LEX_FAKE_EOF 0x80000000
1228
1229 bool
1230 Perl_lex_next_chunk(pTHX_ U32 flags)
1231 {
1232     SV *linestr;
1233     char *buf;
1234     STRLEN old_bufend_pos, new_bufend_pos;
1235     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1237     bool got_some_for_debugger = 0;
1238     bool got_some;
1239     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1241     linestr = PL_parser->linestr;
1242     buf = SvPVX(linestr);
1243     if (!(flags & LEX_KEEP_PREVIOUS) &&
1244             PL_parser->bufptr == PL_parser->bufend) {
1245         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246         linestart_pos = 0;
1247         if (PL_parser->last_uni != PL_parser->bufend)
1248             PL_parser->last_uni = NULL;
1249         if (PL_parser->last_lop != PL_parser->bufend)
1250             PL_parser->last_lop = NULL;
1251         last_uni_pos = last_lop_pos = 0;
1252         *buf = 0;
1253         SvCUR(linestr) = 0;
1254     } else {
1255         old_bufend_pos = PL_parser->bufend - buf;
1256         bufptr_pos = PL_parser->bufptr - buf;
1257         oldbufptr_pos = PL_parser->oldbufptr - buf;
1258         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259         linestart_pos = PL_parser->linestart - buf;
1260         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262     }
1263     if (flags & LEX_FAKE_EOF) {
1264         goto eof;
1265     } else if (!PL_parser->rsfp) {
1266         got_some = 0;
1267     } else if (filter_gets(linestr, old_bufend_pos)) {
1268         got_some = 1;
1269         got_some_for_debugger = 1;
1270     } else {
1271         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1272             sv_setpvs(linestr, "");
1273         eof:
1274         /* End of real input.  Close filehandle (unless it was STDIN),
1275          * then add implicit termination.
1276          */
1277         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278             PerlIO_clearerr(PL_parser->rsfp);
1279         else if (PL_parser->rsfp)
1280             (void)PerlIO_close(PL_parser->rsfp);
1281         PL_parser->rsfp = NULL;
1282         PL_parser->in_pod = 0;
1283 #ifdef PERL_MAD
1284         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285             PL_faketokens = 1;
1286 #endif
1287         if (!PL_in_eval && PL_minus_p) {
1288             sv_catpvs(linestr,
1289                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290             PL_minus_n = PL_minus_p = 0;
1291         } else if (!PL_in_eval && PL_minus_n) {
1292             sv_catpvs(linestr, /*{*/";}");
1293             PL_minus_n = 0;
1294         } else
1295             sv_catpvs(linestr, ";");
1296         got_some = 1;
1297     }
1298     buf = SvPVX(linestr);
1299     new_bufend_pos = SvCUR(linestr);
1300     PL_parser->bufend = buf + new_bufend_pos;
1301     PL_parser->bufptr = buf + bufptr_pos;
1302     PL_parser->oldbufptr = buf + oldbufptr_pos;
1303     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304     PL_parser->linestart = buf + linestart_pos;
1305     if (PL_parser->last_uni)
1306         PL_parser->last_uni = buf + last_uni_pos;
1307     if (PL_parser->last_lop)
1308         PL_parser->last_lop = buf + last_lop_pos;
1309     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1310             PL_curstash != PL_debstash) {
1311         /* debugger active and we're not compiling the debugger code,
1312          * so store the line into the debugger's array of lines
1313          */
1314         update_debugger_info(NULL, buf+old_bufend_pos,
1315             new_bufend_pos-old_bufend_pos);
1316     }
1317     return got_some;
1318 }
1319
1320 /*
1321 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323 Looks ahead one (Unicode) character in the text currently being lexed.
1324 Returns the codepoint (unsigned integer value) of the next character,
1325 or -1 if lexing has reached the end of the input text.  To consume the
1326 peeked character, use L</lex_read_unichar>.
1327
1328 If the next character is in (or extends into) the next chunk of input
1329 text, the next chunk will be read in.  Normally the current chunk will be
1330 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331 then the current chunk will not be discarded.
1332
1333 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334 is encountered, an exception is generated.
1335
1336 =cut
1337 */
1338
1339 I32
1340 Perl_lex_peek_unichar(pTHX_ U32 flags)
1341 {
1342     dVAR;
1343     char *s, *bufend;
1344     if (flags & ~(LEX_KEEP_PREVIOUS))
1345         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346     s = PL_parser->bufptr;
1347     bufend = PL_parser->bufend;
1348     if (UTF) {
1349         U8 head;
1350         I32 unichar;
1351         STRLEN len, retlen;
1352         if (s == bufend) {
1353             if (!lex_next_chunk(flags))
1354                 return -1;
1355             s = PL_parser->bufptr;
1356             bufend = PL_parser->bufend;
1357         }
1358         head = (U8)*s;
1359         if (!(head & 0x80))
1360             return head;
1361         if (head & 0x40) {
1362             len = PL_utf8skip[head];
1363             while ((STRLEN)(bufend-s) < len) {
1364                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365                     break;
1366                 s = PL_parser->bufptr;
1367                 bufend = PL_parser->bufend;
1368             }
1369         }
1370         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371         if (retlen == (STRLEN)-1) {
1372             /* malformed UTF-8 */
1373             ENTER;
1374             SAVESPTR(PL_warnhook);
1375             PL_warnhook = PERL_WARNHOOK_FATAL;
1376             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377             LEAVE;
1378         }
1379         return unichar;
1380     } else {
1381         if (s == bufend) {
1382             if (!lex_next_chunk(flags))
1383                 return -1;
1384             s = PL_parser->bufptr;
1385         }
1386         return (U8)*s;
1387     }
1388 }
1389
1390 /*
1391 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393 Reads the next (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the character read,
1395 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396 if lexing has reached the end of the input text.  To non-destructively
1397 examine the next character, use L</lex_peek_unichar> instead.
1398
1399 If the next character is in (or extends into) the next chunk of input
1400 text, the next chunk will be read in.  Normally the current chunk will be
1401 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402 then the current chunk will not be discarded.
1403
1404 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405 is encountered, an exception is generated.
1406
1407 =cut
1408 */
1409
1410 I32
1411 Perl_lex_read_unichar(pTHX_ U32 flags)
1412 {
1413     I32 c;
1414     if (flags & ~(LEX_KEEP_PREVIOUS))
1415         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416     c = lex_peek_unichar(flags);
1417     if (c != -1) {
1418         if (c == '\n')
1419             CopLINE_inc(PL_curcop);
1420         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1421     }
1422     return c;
1423 }
1424
1425 /*
1426 =for apidoc Amx|void|lex_read_space|U32 flags
1427
1428 Reads optional spaces, in Perl style, in the text currently being
1429 lexed.  The spaces may include ordinary whitespace characters and
1430 Perl-style comments.  C<#line> directives are processed if encountered.
1431 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1432 at a non-space character (or the end of the input text).
1433
1434 If spaces extend into the next chunk of input text, the next chunk will
1435 be read in.  Normally the current chunk will be discarded at the same
1436 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1437 chunk will not be discarded.
1438
1439 =cut
1440 */
1441
1442 #define LEX_NO_NEXT_CHUNK 0x80000000
1443
1444 void
1445 Perl_lex_read_space(pTHX_ U32 flags)
1446 {
1447     char *s, *bufend;
1448     bool need_incline = 0;
1449     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1450         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1451 #ifdef PERL_MAD
1452     if (PL_skipwhite) {
1453         sv_free(PL_skipwhite);
1454         PL_skipwhite = NULL;
1455     }
1456     if (PL_madskills)
1457         PL_skipwhite = newSVpvs("");
1458 #endif /* PERL_MAD */
1459     s = PL_parser->bufptr;
1460     bufend = PL_parser->bufend;
1461     while (1) {
1462         char c = *s;
1463         if (c == '#') {
1464             do {
1465                 c = *++s;
1466             } while (!(c == '\n' || (c == 0 && s == bufend)));
1467         } else if (c == '\n') {
1468             s++;
1469             PL_parser->linestart = s;
1470             if (s == bufend)
1471                 need_incline = 1;
1472             else
1473                 incline(s);
1474         } else if (isSPACE(c)) {
1475             s++;
1476         } else if (c == 0 && s == bufend) {
1477             bool got_more;
1478 #ifdef PERL_MAD
1479             if (PL_madskills)
1480                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1481 #endif /* PERL_MAD */
1482             if (flags & LEX_NO_NEXT_CHUNK)
1483                 break;
1484             PL_parser->bufptr = s;
1485             CopLINE_inc(PL_curcop);
1486             got_more = lex_next_chunk(flags);
1487             CopLINE_dec(PL_curcop);
1488             s = PL_parser->bufptr;
1489             bufend = PL_parser->bufend;
1490             if (!got_more)
1491                 break;
1492             if (need_incline && PL_parser->rsfp) {
1493                 incline(s);
1494                 need_incline = 0;
1495             }
1496         } else {
1497             break;
1498         }
1499     }
1500 #ifdef PERL_MAD
1501     if (PL_madskills)
1502         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1503 #endif /* PERL_MAD */
1504     PL_parser->bufptr = s;
1505 }
1506
1507 /*
1508  * S_incline
1509  * This subroutine has nothing to do with tilting, whether at windmills
1510  * or pinball tables.  Its name is short for "increment line".  It
1511  * increments the current line number in CopLINE(PL_curcop) and checks
1512  * to see whether the line starts with a comment of the form
1513  *    # line 500 "foo.pm"
1514  * If so, it sets the current line number and file to the values in the comment.
1515  */
1516
1517 STATIC void
1518 S_incline(pTHX_ const char *s)
1519 {
1520     dVAR;
1521     const char *t;
1522     const char *n;
1523     const char *e;
1524     line_t line_num;
1525
1526     PERL_ARGS_ASSERT_INCLINE;
1527
1528     CopLINE_inc(PL_curcop);
1529     if (*s++ != '#')
1530         return;
1531     while (SPACE_OR_TAB(*s))
1532         s++;
1533     if (strnEQ(s, "line", 4))
1534         s += 4;
1535     else
1536         return;
1537     if (SPACE_OR_TAB(*s))
1538         s++;
1539     else
1540         return;
1541     while (SPACE_OR_TAB(*s))
1542         s++;
1543     if (!isDIGIT(*s))
1544         return;
1545
1546     n = s;
1547     while (isDIGIT(*s))
1548         s++;
1549     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1550         return;
1551     while (SPACE_OR_TAB(*s))
1552         s++;
1553     if (*s == '"' && (t = strchr(s+1, '"'))) {
1554         s++;
1555         e = t + 1;
1556     }
1557     else {
1558         t = s;
1559         while (!isSPACE(*t))
1560             t++;
1561         e = t;
1562     }
1563     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1564         e++;
1565     if (*e != '\n' && *e != '\0')
1566         return;         /* false alarm */
1567
1568     line_num = atoi(n)-1;
1569
1570     if (t - s > 0) {
1571         const STRLEN len = t - s;
1572         SV *const temp_sv = CopFILESV(PL_curcop);
1573         const char *cf;
1574         STRLEN tmplen;
1575
1576         if (temp_sv) {
1577             cf = SvPVX(temp_sv);
1578             tmplen = SvCUR(temp_sv);
1579         } else {
1580             cf = NULL;
1581             tmplen = 0;
1582         }
1583
1584         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1585             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1586              * to *{"::_<newfilename"} */
1587             /* However, the long form of evals is only turned on by the
1588                debugger - usually they're "(eval %lu)" */
1589             char smallbuf[128];
1590             char *tmpbuf;
1591             GV **gvp;
1592             STRLEN tmplen2 = len;
1593             if (tmplen + 2 <= sizeof smallbuf)
1594                 tmpbuf = smallbuf;
1595             else
1596                 Newx(tmpbuf, tmplen + 2, char);
1597             tmpbuf[0] = '_';
1598             tmpbuf[1] = '<';
1599             memcpy(tmpbuf + 2, cf, tmplen);
1600             tmplen += 2;
1601             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1602             if (gvp) {
1603                 char *tmpbuf2;
1604                 GV *gv2;
1605
1606                 if (tmplen2 + 2 <= sizeof smallbuf)
1607                     tmpbuf2 = smallbuf;
1608                 else
1609                     Newx(tmpbuf2, tmplen2 + 2, char);
1610
1611                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1612                     /* Either they malloc'd it, or we malloc'd it,
1613                        so no prefix is present in ours.  */
1614                     tmpbuf2[0] = '_';
1615                     tmpbuf2[1] = '<';
1616                 }
1617
1618                 memcpy(tmpbuf2 + 2, s, tmplen2);
1619                 tmplen2 += 2;
1620
1621                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1622                 if (!isGV(gv2)) {
1623                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1624                     /* adjust ${"::_<newfilename"} to store the new file name */
1625                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1626                     /* The line number may differ. If that is the case,
1627                        alias the saved lines that are in the array.
1628                        Otherwise alias the whole array. */
1629                     if (CopLINE(PL_curcop) == line_num) {
1630                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1631                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1632                     }
1633                     else if (GvAV(*gvp)) {
1634                         AV * const av = GvAV(*gvp);
1635                         const I32 start = CopLINE(PL_curcop)+1;
1636                         I32 items = AvFILLp(av) - start;
1637                         if (items > 0) {
1638                             AV * const av2 = GvAVn(gv2);
1639                             SV **svp = AvARRAY(av) + start;
1640                             I32 l = (I32)line_num+1;
1641                             while (items--)
1642                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1643                         }
1644                     }
1645                 }
1646
1647                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1648             }
1649             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1650         }
1651         CopFILE_free(PL_curcop);
1652         CopFILE_setn(PL_curcop, s, len);
1653     }
1654     CopLINE_set(PL_curcop, line_num);
1655 }
1656
1657 #ifdef PERL_MAD
1658 /* skip space before PL_thistoken */
1659
1660 STATIC char *
1661 S_skipspace0(pTHX_ register char *s)
1662 {
1663     PERL_ARGS_ASSERT_SKIPSPACE0;
1664
1665     s = skipspace(s);
1666     if (!PL_madskills)
1667         return s;
1668     if (PL_skipwhite) {
1669         if (!PL_thiswhite)
1670             PL_thiswhite = newSVpvs("");
1671         sv_catsv(PL_thiswhite, PL_skipwhite);
1672         sv_free(PL_skipwhite);
1673         PL_skipwhite = 0;
1674     }
1675     PL_realtokenstart = s - SvPVX(PL_linestr);
1676     return s;
1677 }
1678
1679 /* skip space after PL_thistoken */
1680
1681 STATIC char *
1682 S_skipspace1(pTHX_ register char *s)
1683 {
1684     const char *start = s;
1685     I32 startoff = start - SvPVX(PL_linestr);
1686
1687     PERL_ARGS_ASSERT_SKIPSPACE1;
1688
1689     s = skipspace(s);
1690     if (!PL_madskills)
1691         return s;
1692     start = SvPVX(PL_linestr) + startoff;
1693     if (!PL_thistoken && PL_realtokenstart >= 0) {
1694         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1695         PL_thistoken = newSVpvn(tstart, start - tstart);
1696     }
1697     PL_realtokenstart = -1;
1698     if (PL_skipwhite) {
1699         if (!PL_nextwhite)
1700             PL_nextwhite = newSVpvs("");
1701         sv_catsv(PL_nextwhite, PL_skipwhite);
1702         sv_free(PL_skipwhite);
1703         PL_skipwhite = 0;
1704     }
1705     return s;
1706 }
1707
1708 STATIC char *
1709 S_skipspace2(pTHX_ register char *s, SV **svp)
1710 {
1711     char *start;
1712     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1713     const I32 startoff = s - SvPVX(PL_linestr);
1714
1715     PERL_ARGS_ASSERT_SKIPSPACE2;
1716
1717     s = skipspace(s);
1718     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1719     if (!PL_madskills || !svp)
1720         return s;
1721     start = SvPVX(PL_linestr) + startoff;
1722     if (!PL_thistoken && PL_realtokenstart >= 0) {
1723         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1724         PL_thistoken = newSVpvn(tstart, start - tstart);
1725         PL_realtokenstart = -1;
1726     }
1727     if (PL_skipwhite) {
1728         if (!*svp)
1729             *svp = newSVpvs("");
1730         sv_setsv(*svp, PL_skipwhite);
1731         sv_free(PL_skipwhite);
1732         PL_skipwhite = 0;
1733     }
1734
1735     return s;
1736 }
1737 #endif
1738
1739 STATIC void
1740 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1741 {
1742     AV *av = CopFILEAVx(PL_curcop);
1743     if (av) {
1744         SV * const sv = newSV_type(SVt_PVMG);
1745         if (orig_sv)
1746             sv_setsv(sv, orig_sv);
1747         else
1748             sv_setpvn(sv, buf, len);
1749         (void)SvIOK_on(sv);
1750         SvIV_set(sv, 0);
1751         av_store(av, (I32)CopLINE(PL_curcop), sv);
1752     }
1753 }
1754
1755 /*
1756  * S_skipspace
1757  * Called to gobble the appropriate amount and type of whitespace.
1758  * Skips comments as well.
1759  */
1760
1761 STATIC char *
1762 S_skipspace(pTHX_ register char *s)
1763 {
1764 #ifdef PERL_MAD
1765     char *start = s;
1766 #endif /* PERL_MAD */
1767     PERL_ARGS_ASSERT_SKIPSPACE;
1768 #ifdef PERL_MAD
1769     if (PL_skipwhite) {
1770         sv_free(PL_skipwhite);
1771         PL_skipwhite = NULL;
1772     }
1773 #endif /* PERL_MAD */
1774     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1775         while (s < PL_bufend && SPACE_OR_TAB(*s))
1776             s++;
1777     } else {
1778         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1779         PL_bufptr = s;
1780         lex_read_space(LEX_KEEP_PREVIOUS |
1781                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1782                     LEX_NO_NEXT_CHUNK : 0));
1783         s = PL_bufptr;
1784         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1785         if (PL_linestart > PL_bufptr)
1786             PL_bufptr = PL_linestart;
1787         return s;
1788     }
1789 #ifdef PERL_MAD
1790     if (PL_madskills)
1791         PL_skipwhite = newSVpvn(start, s-start);
1792 #endif /* PERL_MAD */
1793     return s;
1794 }
1795
1796 /*
1797  * S_check_uni
1798  * Check the unary operators to ensure there's no ambiguity in how they're
1799  * used.  An ambiguous piece of code would be:
1800  *     rand + 5
1801  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1802  * the +5 is its argument.
1803  */
1804
1805 STATIC void
1806 S_check_uni(pTHX)
1807 {
1808     dVAR;
1809     const char *s;
1810     const char *t;
1811
1812     if (PL_oldoldbufptr != PL_last_uni)
1813         return;
1814     while (isSPACE(*PL_last_uni))
1815         PL_last_uni++;
1816     s = PL_last_uni;
1817     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1818         s++;
1819     if ((t = strchr(s, '(')) && t < PL_bufptr)
1820         return;
1821
1822     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1823                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1824                      (int)(s - PL_last_uni), PL_last_uni);
1825 }
1826
1827 /*
1828  * LOP : macro to build a list operator.  Its behaviour has been replaced
1829  * with a subroutine, S_lop() for which LOP is just another name.
1830  */
1831
1832 #define LOP(f,x) return lop(f,x,s)
1833
1834 /*
1835  * S_lop
1836  * Build a list operator (or something that might be one).  The rules:
1837  *  - if we have a next token, then it's a list operator [why?]
1838  *  - if the next thing is an opening paren, then it's a function
1839  *  - else it's a list operator
1840  */
1841
1842 STATIC I32
1843 S_lop(pTHX_ I32 f, int x, char *s)
1844 {
1845     dVAR;
1846
1847     PERL_ARGS_ASSERT_LOP;
1848
1849     pl_yylval.ival = f;
1850     CLINE;
1851     PL_expect = x;
1852     PL_bufptr = s;
1853     PL_last_lop = PL_oldbufptr;
1854     PL_last_lop_op = (OPCODE)f;
1855 #ifdef PERL_MAD
1856     if (PL_lasttoke)
1857         goto lstop;
1858 #else
1859     if (PL_nexttoke)
1860         goto lstop;
1861 #endif
1862     if (*s == '(')
1863         return REPORT(FUNC);
1864     s = PEEKSPACE(s);
1865     if (*s == '(')
1866         return REPORT(FUNC);
1867     else {
1868         lstop:
1869         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1870             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1871         return REPORT(LSTOP);
1872     }
1873 }
1874
1875 #ifdef PERL_MAD
1876  /*
1877  * S_start_force
1878  * Sets up for an eventual force_next().  start_force(0) basically does
1879  * an unshift, while start_force(-1) does a push.  yylex removes items
1880  * on the "pop" end.
1881  */
1882
1883 STATIC void
1884 S_start_force(pTHX_ int where)
1885 {
1886     int i;
1887
1888     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1889         where = PL_lasttoke;
1890     assert(PL_curforce < 0 || PL_curforce == where);
1891     if (PL_curforce != where) {
1892         for (i = PL_lasttoke; i > where; --i) {
1893             PL_nexttoke[i] = PL_nexttoke[i-1];
1894         }
1895         PL_lasttoke++;
1896     }
1897     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1898         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1899     PL_curforce = where;
1900     if (PL_nextwhite) {
1901         if (PL_madskills)
1902             curmad('^', newSVpvs(""));
1903         CURMAD('_', PL_nextwhite);
1904     }
1905 }
1906
1907 STATIC void
1908 S_curmad(pTHX_ char slot, SV *sv)
1909 {
1910     MADPROP **where;
1911
1912     if (!sv)
1913         return;
1914     if (PL_curforce < 0)
1915         where = &PL_thismad;
1916     else
1917         where = &PL_nexttoke[PL_curforce].next_mad;
1918
1919     if (PL_faketokens)
1920         sv_setpvs(sv, "");
1921     else {
1922         if (!IN_BYTES) {
1923             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1924                 SvUTF8_on(sv);
1925             else if (PL_encoding) {
1926                 sv_recode_to_utf8(sv, PL_encoding);
1927             }
1928         }
1929     }
1930
1931     /* keep a slot open for the head of the list? */
1932     if (slot != '_' && *where && (*where)->mad_key == '^') {
1933         (*where)->mad_key = slot;
1934         sv_free(MUTABLE_SV(((*where)->mad_val)));
1935         (*where)->mad_val = (void*)sv;
1936     }
1937     else
1938         addmad(newMADsv(slot, sv), where, 0);
1939 }
1940 #else
1941 #  define start_force(where)    NOOP
1942 #  define curmad(slot, sv)      NOOP
1943 #endif
1944
1945 /*
1946  * S_force_next
1947  * When the lexer realizes it knows the next token (for instance,
1948  * it is reordering tokens for the parser) then it can call S_force_next
1949  * to know what token to return the next time the lexer is called.  Caller
1950  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1951  * and possibly PL_expect to ensure the lexer handles the token correctly.
1952  */
1953
1954 STATIC void
1955 S_force_next(pTHX_ I32 type)
1956 {
1957     dVAR;
1958 #ifdef DEBUGGING
1959     if (DEBUG_T_TEST) {
1960         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1961         tokereport(type, &NEXTVAL_NEXTTOKE);
1962     }
1963 #endif
1964 #ifdef PERL_MAD
1965     if (PL_curforce < 0)
1966         start_force(PL_lasttoke);
1967     PL_nexttoke[PL_curforce].next_type = type;
1968     if (PL_lex_state != LEX_KNOWNEXT)
1969         PL_lex_defer = PL_lex_state;
1970     PL_lex_state = LEX_KNOWNEXT;
1971     PL_lex_expect = PL_expect;
1972     PL_curforce = -1;
1973 #else
1974     PL_nexttype[PL_nexttoke] = type;
1975     PL_nexttoke++;
1976     if (PL_lex_state != LEX_KNOWNEXT) {
1977         PL_lex_defer = PL_lex_state;
1978         PL_lex_expect = PL_expect;
1979         PL_lex_state = LEX_KNOWNEXT;
1980     }
1981 #endif
1982 }
1983
1984 void
1985 Perl_yyunlex(pTHX)
1986 {
1987     int yyc = PL_parser->yychar;
1988     if (yyc != YYEMPTY) {
1989         if (yyc) {
1990             start_force(-1);
1991             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1992             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1993                 PL_lex_allbrackets--;
1994                 PL_lex_brackets--;
1995                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1996             } else if (yyc == '('/*)*/) {
1997                 PL_lex_allbrackets--;
1998                 yyc |= (2<<24);
1999             }
2000             force_next(yyc);
2001         }
2002         PL_parser->yychar = YYEMPTY;
2003     }
2004 }
2005
2006 STATIC SV *
2007 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2008 {
2009     dVAR;
2010     SV * const sv = newSVpvn_utf8(start, len,
2011                                   !IN_BYTES
2012                                   && UTF
2013                                   && !is_ascii_string((const U8*)start, len)
2014                                   && is_utf8_string((const U8*)start, len));
2015     return sv;
2016 }
2017
2018 /*
2019  * S_force_word
2020  * When the lexer knows the next thing is a word (for instance, it has
2021  * just seen -> and it knows that the next char is a word char, then
2022  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2023  * lookahead.
2024  *
2025  * Arguments:
2026  *   char *start : buffer position (must be within PL_linestr)
2027  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2028  *   int check_keyword : if true, Perl checks to make sure the word isn't
2029  *       a keyword (do this if the word is a label, e.g. goto FOO)
2030  *   int allow_pack : if true, : characters will also be allowed (require,
2031  *       use, etc. do this)
2032  *   int allow_initial_tick : used by the "sub" lexer only.
2033  */
2034
2035 STATIC char *
2036 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2037 {
2038     dVAR;
2039     register char *s;
2040     STRLEN len;
2041
2042     PERL_ARGS_ASSERT_FORCE_WORD;
2043
2044     start = SKIPSPACE1(start);
2045     s = start;
2046     if (isIDFIRST_lazy_if(s,UTF) ||
2047         (allow_pack && *s == ':') ||
2048         (allow_initial_tick && *s == '\'') )
2049     {
2050         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2051         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2052             return start;
2053         start_force(PL_curforce);
2054         if (PL_madskills)
2055             curmad('X', newSVpvn(start,s-start));
2056         if (token == METHOD) {
2057             s = SKIPSPACE1(s);
2058             if (*s == '(')
2059                 PL_expect = XTERM;
2060             else {
2061                 PL_expect = XOPERATOR;
2062             }
2063         }
2064         if (PL_madskills)
2065             curmad('g', newSVpvs( "forced" ));
2066         NEXTVAL_NEXTTOKE.opval
2067             = (OP*)newSVOP(OP_CONST,0,
2068                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2069         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2070         force_next(token);
2071     }
2072     return s;
2073 }
2074
2075 /*
2076  * S_force_ident
2077  * Called when the lexer wants $foo *foo &foo etc, but the program
2078  * text only contains the "foo" portion.  The first argument is a pointer
2079  * to the "foo", and the second argument is the type symbol to prefix.
2080  * Forces the next token to be a "WORD".
2081  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2082  */
2083
2084 STATIC void
2085 S_force_ident(pTHX_ register const char *s, int kind)
2086 {
2087     dVAR;
2088
2089     PERL_ARGS_ASSERT_FORCE_IDENT;
2090
2091     if (*s) {
2092         const STRLEN len = strlen(s);
2093         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2094         start_force(PL_curforce);
2095         NEXTVAL_NEXTTOKE.opval = o;
2096         force_next(WORD);
2097         if (kind) {
2098             o->op_private = OPpCONST_ENTERED;
2099             /* XXX see note in pp_entereval() for why we forgo typo
2100                warnings if the symbol must be introduced in an eval.
2101                GSAR 96-10-12 */
2102             gv_fetchpvn_flags(s, len,
2103                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2104                               : GV_ADD,
2105                               kind == '$' ? SVt_PV :
2106                               kind == '@' ? SVt_PVAV :
2107                               kind == '%' ? SVt_PVHV :
2108                               SVt_PVGV
2109                               );
2110         }
2111     }
2112 }
2113
2114 NV
2115 Perl_str_to_version(pTHX_ SV *sv)
2116 {
2117     NV retval = 0.0;
2118     NV nshift = 1.0;
2119     STRLEN len;
2120     const char *start = SvPV_const(sv,len);
2121     const char * const end = start + len;
2122     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2123
2124     PERL_ARGS_ASSERT_STR_TO_VERSION;
2125
2126     while (start < end) {
2127         STRLEN skip;
2128         UV n;
2129         if (utf)
2130             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2131         else {
2132             n = *(U8*)start;
2133             skip = 1;
2134         }
2135         retval += ((NV)n)/nshift;
2136         start += skip;
2137         nshift *= 1000;
2138     }
2139     return retval;
2140 }
2141
2142 /*
2143  * S_force_version
2144  * Forces the next token to be a version number.
2145  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2146  * and if "guessing" is TRUE, then no new token is created (and the caller
2147  * must use an alternative parsing method).
2148  */
2149
2150 STATIC char *
2151 S_force_version(pTHX_ char *s, int guessing)
2152 {
2153     dVAR;
2154     OP *version = NULL;
2155     char *d;
2156 #ifdef PERL_MAD
2157     I32 startoff = s - SvPVX(PL_linestr);
2158 #endif
2159
2160     PERL_ARGS_ASSERT_FORCE_VERSION;
2161
2162     s = SKIPSPACE1(s);
2163
2164     d = s;
2165     if (*d == 'v')
2166         d++;
2167     if (isDIGIT(*d)) {
2168         while (isDIGIT(*d) || *d == '_' || *d == '.')
2169             d++;
2170 #ifdef PERL_MAD
2171         if (PL_madskills) {
2172             start_force(PL_curforce);
2173             curmad('X', newSVpvn(s,d-s));
2174         }
2175 #endif
2176         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2177             SV *ver;
2178 #ifdef USE_LOCALE_NUMERIC
2179             char *loc = setlocale(LC_NUMERIC, "C");
2180 #endif
2181             s = scan_num(s, &pl_yylval);
2182 #ifdef USE_LOCALE_NUMERIC
2183             setlocale(LC_NUMERIC, loc);
2184 #endif
2185             version = pl_yylval.opval;
2186             ver = cSVOPx(version)->op_sv;
2187             if (SvPOK(ver) && !SvNIOK(ver)) {
2188                 SvUPGRADE(ver, SVt_PVNV);
2189                 SvNV_set(ver, str_to_version(ver));
2190                 SvNOK_on(ver);          /* hint that it is a version */
2191             }
2192         }
2193         else if (guessing) {
2194 #ifdef PERL_MAD
2195             if (PL_madskills) {
2196                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2197                 PL_nextwhite = 0;
2198                 s = SvPVX(PL_linestr) + startoff;
2199             }
2200 #endif
2201             return s;
2202         }
2203     }
2204
2205 #ifdef PERL_MAD
2206     if (PL_madskills && !version) {
2207         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2208         PL_nextwhite = 0;
2209         s = SvPVX(PL_linestr) + startoff;
2210     }
2211 #endif
2212     /* NOTE: The parser sees the package name and the VERSION swapped */
2213     start_force(PL_curforce);
2214     NEXTVAL_NEXTTOKE.opval = version;
2215     force_next(WORD);
2216
2217     return s;
2218 }
2219
2220 /*
2221  * S_force_strict_version
2222  * Forces the next token to be a version number using strict syntax rules.
2223  */
2224
2225 STATIC char *
2226 S_force_strict_version(pTHX_ char *s)
2227 {
2228     dVAR;
2229     OP *version = NULL;
2230 #ifdef PERL_MAD
2231     I32 startoff = s - SvPVX(PL_linestr);
2232 #endif
2233     const char *errstr = NULL;
2234
2235     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2236
2237     while (isSPACE(*s)) /* leading whitespace */
2238         s++;
2239
2240     if (is_STRICT_VERSION(s,&errstr)) {
2241         SV *ver = newSV(0);
2242         s = (char *)scan_version(s, ver, 0);
2243         version = newSVOP(OP_CONST, 0, ver);
2244     }
2245     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2246             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2247     {
2248         PL_bufptr = s;
2249         if (errstr)
2250             yyerror(errstr); /* version required */
2251         return s;
2252     }
2253
2254 #ifdef PERL_MAD
2255     if (PL_madskills && !version) {
2256         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2257         PL_nextwhite = 0;
2258         s = SvPVX(PL_linestr) + startoff;
2259     }
2260 #endif
2261     /* NOTE: The parser sees the package name and the VERSION swapped */
2262     start_force(PL_curforce);
2263     NEXTVAL_NEXTTOKE.opval = version;
2264     force_next(WORD);
2265
2266     return s;
2267 }
2268
2269 /*
2270  * S_tokeq
2271  * Tokenize a quoted string passed in as an SV.  It finds the next
2272  * chunk, up to end of string or a backslash.  It may make a new
2273  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2274  * turns \\ into \.
2275  */
2276
2277 STATIC SV *
2278 S_tokeq(pTHX_ SV *sv)
2279 {
2280     dVAR;
2281     register char *s;
2282     register char *send;
2283     register char *d;
2284     STRLEN len = 0;
2285     SV *pv = sv;
2286
2287     PERL_ARGS_ASSERT_TOKEQ;
2288
2289     if (!SvLEN(sv))
2290         goto finish;
2291
2292     s = SvPV_force(sv, len);
2293     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2294         goto finish;
2295     send = s + len;
2296     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2297     while (s < send && !(*s == '\\' && s[1] == '\\'))
2298         s++;
2299     if (s == send)
2300         goto finish;
2301     d = s;
2302     if ( PL_hints & HINT_NEW_STRING ) {
2303         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2304     }
2305     while (s < send) {
2306         if (*s == '\\') {
2307             if (s + 1 < send && (s[1] == '\\'))
2308                 s++;            /* all that, just for this */
2309         }
2310         *d++ = *s++;
2311     }
2312     *d = '\0';
2313     SvCUR_set(sv, d - SvPVX_const(sv));
2314   finish:
2315     if ( PL_hints & HINT_NEW_STRING )
2316        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2317     return sv;
2318 }
2319
2320 /*
2321  * Now come three functions related to double-quote context,
2322  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2323  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2324  * interact with PL_lex_state, and create fake ( ... ) argument lists
2325  * to handle functions and concatenation.
2326  * They assume that whoever calls them will be setting up a fake
2327  * join call, because each subthing puts a ',' after it.  This lets
2328  *   "lower \luPpEr"
2329  * become
2330  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2331  *
2332  * (I'm not sure whether the spurious commas at the end of lcfirst's
2333  * arguments and join's arguments are created or not).
2334  */
2335
2336 /*
2337  * S_sublex_start
2338  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2339  *
2340  * Pattern matching will set PL_lex_op to the pattern-matching op to
2341  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2342  *
2343  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2344  *
2345  * Everything else becomes a FUNC.
2346  *
2347  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2348  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2349  * call to S_sublex_push().
2350  */
2351
2352 STATIC I32
2353 S_sublex_start(pTHX)
2354 {
2355     dVAR;
2356     register const I32 op_type = pl_yylval.ival;
2357
2358     if (op_type == OP_NULL) {
2359         pl_yylval.opval = PL_lex_op;
2360         PL_lex_op = NULL;
2361         return THING;
2362     }
2363     if (op_type == OP_CONST || op_type == OP_READLINE) {
2364         SV *sv = tokeq(PL_lex_stuff);
2365
2366         if (SvTYPE(sv) == SVt_PVIV) {
2367             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2368             STRLEN len;
2369             const char * const p = SvPV_const(sv, len);
2370             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2371             SvREFCNT_dec(sv);
2372             sv = nsv;
2373         }
2374         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2375         PL_lex_stuff = NULL;
2376         /* Allow <FH> // "foo" */
2377         if (op_type == OP_READLINE)
2378             PL_expect = XTERMORDORDOR;
2379         return THING;
2380     }
2381     else if (op_type == OP_BACKTICK && PL_lex_op) {
2382         /* readpipe() vas overriden */
2383         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2384         pl_yylval.opval = PL_lex_op;
2385         PL_lex_op = NULL;
2386         PL_lex_stuff = NULL;
2387         return THING;
2388     }
2389
2390     PL_sublex_info.super_state = PL_lex_state;
2391     PL_sublex_info.sub_inwhat = (U16)op_type;
2392     PL_sublex_info.sub_op = PL_lex_op;
2393     PL_lex_state = LEX_INTERPPUSH;
2394
2395     PL_expect = XTERM;
2396     if (PL_lex_op) {
2397         pl_yylval.opval = PL_lex_op;
2398         PL_lex_op = NULL;
2399         return PMFUNC;
2400     }
2401     else
2402         return FUNC;
2403 }
2404
2405 /*
2406  * S_sublex_push
2407  * Create a new scope to save the lexing state.  The scope will be
2408  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2409  * to the uc, lc, etc. found before.
2410  * Sets PL_lex_state to LEX_INTERPCONCAT.
2411  */
2412
2413 STATIC I32
2414 S_sublex_push(pTHX)
2415 {
2416     dVAR;
2417     ENTER;
2418
2419     PL_lex_state = PL_sublex_info.super_state;
2420     SAVEBOOL(PL_lex_dojoin);
2421     SAVEI32(PL_lex_brackets);
2422     SAVEI32(PL_lex_allbrackets);
2423     SAVEI8(PL_lex_fakeeof);
2424     SAVEI32(PL_lex_casemods);
2425     SAVEI32(PL_lex_starts);
2426     SAVEI8(PL_lex_state);
2427     SAVEVPTR(PL_lex_inpat);
2428     SAVEI16(PL_lex_inwhat);
2429     SAVECOPLINE(PL_curcop);
2430     SAVEPPTR(PL_bufptr);
2431     SAVEPPTR(PL_bufend);
2432     SAVEPPTR(PL_oldbufptr);
2433     SAVEPPTR(PL_oldoldbufptr);
2434     SAVEPPTR(PL_last_lop);
2435     SAVEPPTR(PL_last_uni);
2436     SAVEPPTR(PL_linestart);
2437     SAVESPTR(PL_linestr);
2438     SAVEGENERICPV(PL_lex_brackstack);
2439     SAVEGENERICPV(PL_lex_casestack);
2440
2441     PL_linestr = PL_lex_stuff;
2442     PL_lex_stuff = NULL;
2443
2444     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2445         = SvPVX(PL_linestr);
2446     PL_bufend += SvCUR(PL_linestr);
2447     PL_last_lop = PL_last_uni = NULL;
2448     SAVEFREESV(PL_linestr);
2449
2450     PL_lex_dojoin = FALSE;
2451     PL_lex_brackets = 0;
2452     PL_lex_allbrackets = 0;
2453     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2454     Newx(PL_lex_brackstack, 120, char);
2455     Newx(PL_lex_casestack, 12, char);
2456     PL_lex_casemods = 0;
2457     *PL_lex_casestack = '\0';
2458     PL_lex_starts = 0;
2459     PL_lex_state = LEX_INTERPCONCAT;
2460     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2461
2462     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2463     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2464     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2465         PL_lex_inpat = PL_sublex_info.sub_op;
2466     else
2467         PL_lex_inpat = NULL;
2468
2469     return '(';
2470 }
2471
2472 /*
2473  * S_sublex_done
2474  * Restores lexer state after a S_sublex_push.
2475  */
2476
2477 STATIC I32
2478 S_sublex_done(pTHX)
2479 {
2480     dVAR;
2481     if (!PL_lex_starts++) {
2482         SV * const sv = newSVpvs("");
2483         if (SvUTF8(PL_linestr))
2484             SvUTF8_on(sv);
2485         PL_expect = XOPERATOR;
2486         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2487         return THING;
2488     }
2489
2490     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2491         PL_lex_state = LEX_INTERPCASEMOD;
2492         return yylex();
2493     }
2494
2495     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2496     assert(PL_lex_inwhat != OP_TRANSR);
2497     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2498         PL_linestr = PL_lex_repl;
2499         PL_lex_inpat = 0;
2500         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2501         PL_bufend += SvCUR(PL_linestr);
2502         PL_last_lop = PL_last_uni = NULL;
2503         SAVEFREESV(PL_linestr);
2504         PL_lex_dojoin = FALSE;
2505         PL_lex_brackets = 0;
2506         PL_lex_allbrackets = 0;
2507         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2508         PL_lex_casemods = 0;
2509         *PL_lex_casestack = '\0';
2510         PL_lex_starts = 0;
2511         if (SvEVALED(PL_lex_repl)) {
2512             PL_lex_state = LEX_INTERPNORMAL;
2513             PL_lex_starts++;
2514             /*  we don't clear PL_lex_repl here, so that we can check later
2515                 whether this is an evalled subst; that means we rely on the
2516                 logic to ensure sublex_done() is called again only via the
2517                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2518         }
2519         else {
2520             PL_lex_state = LEX_INTERPCONCAT;
2521             PL_lex_repl = NULL;
2522         }
2523         return ',';
2524     }
2525     else {
2526 #ifdef PERL_MAD
2527         if (PL_madskills) {
2528             if (PL_thiswhite) {
2529                 if (!PL_endwhite)
2530                     PL_endwhite = newSVpvs("");
2531                 sv_catsv(PL_endwhite, PL_thiswhite);
2532                 PL_thiswhite = 0;
2533             }
2534             if (PL_thistoken)
2535                 sv_setpvs(PL_thistoken,"");
2536             else
2537                 PL_realtokenstart = -1;
2538         }
2539 #endif
2540         LEAVE;
2541         PL_bufend = SvPVX(PL_linestr);
2542         PL_bufend += SvCUR(PL_linestr);
2543         PL_expect = XOPERATOR;
2544         PL_sublex_info.sub_inwhat = 0;
2545         return ')';
2546     }
2547 }
2548
2549 /*
2550   scan_const
2551
2552   Extracts a pattern, double-quoted string, or transliteration.  This
2553   is terrifying code.
2554
2555   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2556   processing a pattern (PL_lex_inpat is true), a transliteration
2557   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2558
2559   Returns a pointer to the character scanned up to. If this is
2560   advanced from the start pointer supplied (i.e. if anything was
2561   successfully parsed), will leave an OP for the substring scanned
2562   in pl_yylval. Caller must intuit reason for not parsing further
2563   by looking at the next characters herself.
2564
2565   In patterns:
2566     backslashes:
2567       constants: \N{NAME} only
2568       case and quoting: \U \Q \E
2569     stops on @ and $, but not for $ as tail anchor
2570
2571   In transliterations:
2572     characters are VERY literal, except for - not at the start or end
2573     of the string, which indicates a range. If the range is in bytes,
2574     scan_const expands the range to the full set of intermediate
2575     characters. If the range is in utf8, the hyphen is replaced with
2576     a certain range mark which will be handled by pmtrans() in op.c.
2577
2578   In double-quoted strings:
2579     backslashes:
2580       double-quoted style: \r and \n
2581       constants: \x31, etc.
2582       deprecated backrefs: \1 (in substitution replacements)
2583       case and quoting: \U \Q \E
2584     stops on @ and $
2585
2586   scan_const does *not* construct ops to handle interpolated strings.
2587   It stops processing as soon as it finds an embedded $ or @ variable
2588   and leaves it to the caller to work out what's going on.
2589
2590   embedded arrays (whether in pattern or not) could be:
2591       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2592
2593   $ in double-quoted strings must be the symbol of an embedded scalar.
2594
2595   $ in pattern could be $foo or could be tail anchor.  Assumption:
2596   it's a tail anchor if $ is the last thing in the string, or if it's
2597   followed by one of "()| \r\n\t"
2598
2599   \1 (backreferences) are turned into $1
2600
2601   The structure of the code is
2602       while (there's a character to process) {
2603           handle transliteration ranges
2604           skip regexp comments /(?#comment)/ and codes /(?{code})/
2605           skip #-initiated comments in //x patterns
2606           check for embedded arrays
2607           check for embedded scalars
2608           if (backslash) {
2609               deprecate \1 in substitution replacements
2610               handle string-changing backslashes \l \U \Q \E, etc.
2611               switch (what was escaped) {
2612                   handle \- in a transliteration (becomes a literal -)
2613                   if a pattern and not \N{, go treat as regular character
2614                   handle \132 (octal characters)
2615                   handle \x15 and \x{1234} (hex characters)
2616                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2617                   handle \cV (control characters)
2618                   handle printf-style backslashes (\f, \r, \n, etc)
2619               } (end switch)
2620               continue
2621           } (end if backslash)
2622           handle regular character
2623     } (end while character to read)
2624
2625 */
2626
2627 STATIC char *
2628 S_scan_const(pTHX_ char *start)
2629 {
2630     dVAR;
2631     register char *send = PL_bufend;            /* end of the constant */
2632     SV *sv = newSV(send - start);               /* sv for the constant.  See
2633                                                    note below on sizing. */
2634     register char *s = start;                   /* start of the constant */
2635     register char *d = SvPVX(sv);               /* destination for copies */
2636     bool dorange = FALSE;                       /* are we in a translit range? */
2637     bool didrange = FALSE;                      /* did we just finish a range? */
2638     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2639     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2640                                                    to be UTF8?  But, this can
2641                                                    show as true when the source
2642                                                    isn't utf8, as for example
2643                                                    when it is entirely composed
2644                                                    of hex constants */
2645
2646     /* Note on sizing:  The scanned constant is placed into sv, which is
2647      * initialized by newSV() assuming one byte of output for every byte of
2648      * input.  This routine expects newSV() to allocate an extra byte for a
2649      * trailing NUL, which this routine will append if it gets to the end of
2650      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2651      * CAPITAL LETTER A}), or more output than input if the constant ends up
2652      * recoded to utf8, but each time a construct is found that might increase
2653      * the needed size, SvGROW() is called.  Its size parameter each time is
2654      * based on the best guess estimate at the time, namely the length used so
2655      * far, plus the length the current construct will occupy, plus room for
2656      * the trailing NUL, plus one byte for every input byte still unscanned */
2657
2658     UV uv;
2659 #ifdef EBCDIC
2660     UV literal_endpoint = 0;
2661     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2662 #endif
2663
2664     PERL_ARGS_ASSERT_SCAN_CONST;
2665
2666     assert(PL_lex_inwhat != OP_TRANSR);
2667     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2668         /* If we are doing a trans and we know we want UTF8 set expectation */
2669         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2670         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2671     }
2672
2673
2674     while (s < send || dorange) {
2675
2676         /* get transliterations out of the way (they're most literal) */
2677         if (PL_lex_inwhat == OP_TRANS) {
2678             /* expand a range A-Z to the full set of characters.  AIE! */
2679             if (dorange) {
2680                 I32 i;                          /* current expanded character */
2681                 I32 min;                        /* first character in range */
2682                 I32 max;                        /* last character in range */
2683
2684 #ifdef EBCDIC
2685                 UV uvmax = 0;
2686 #endif
2687
2688                 if (has_utf8
2689 #ifdef EBCDIC
2690                     && !native_range
2691 #endif
2692                     ) {
2693                     char * const c = (char*)utf8_hop((U8*)d, -1);
2694                     char *e = d++;
2695                     while (e-- > c)
2696                         *(e + 1) = *e;
2697                     *c = (char)UTF_TO_NATIVE(0xff);
2698                     /* mark the range as done, and continue */
2699                     dorange = FALSE;
2700                     didrange = TRUE;
2701                     continue;
2702                 }
2703
2704                 i = d - SvPVX_const(sv);                /* remember current offset */
2705 #ifdef EBCDIC
2706                 SvGROW(sv,
2707                        SvLEN(sv) + (has_utf8 ?
2708                                     (512 - UTF_CONTINUATION_MARK +
2709                                      UNISKIP(0x100))
2710                                     : 256));
2711                 /* How many two-byte within 0..255: 128 in UTF-8,
2712                  * 96 in UTF-8-mod. */
2713 #else
2714                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2715 #endif
2716                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2717 #ifdef EBCDIC
2718                 if (has_utf8) {
2719                     int j;
2720                     for (j = 0; j <= 1; j++) {
2721                         char * const c = (char*)utf8_hop((U8*)d, -1);
2722                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2723                         if (j)
2724                             min = (U8)uv;
2725                         else if (uv < 256)
2726                             max = (U8)uv;
2727                         else {
2728                             max = (U8)0xff; /* only to \xff */
2729                             uvmax = uv; /* \x{100} to uvmax */
2730                         }
2731                         d = c; /* eat endpoint chars */
2732                      }
2733                 }
2734                else {
2735 #endif
2736                    d -= 2;              /* eat the first char and the - */
2737                    min = (U8)*d;        /* first char in range */
2738                    max = (U8)d[1];      /* last char in range  */
2739 #ifdef EBCDIC
2740                }
2741 #endif
2742
2743                 if (min > max) {
2744                     Perl_croak(aTHX_
2745                                "Invalid range \"%c-%c\" in transliteration operator",
2746                                (char)min, (char)max);
2747                 }
2748
2749 #ifdef EBCDIC
2750                 if (literal_endpoint == 2 &&
2751                     ((isLOWER(min) && isLOWER(max)) ||
2752                      (isUPPER(min) && isUPPER(max)))) {
2753                     if (isLOWER(min)) {
2754                         for (i = min; i <= max; i++)
2755                             if (isLOWER(i))
2756                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2757                     } else {
2758                         for (i = min; i <= max; i++)
2759                             if (isUPPER(i))
2760                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2761                     }
2762                 }
2763                 else
2764 #endif
2765                     for (i = min; i <= max; i++)
2766 #ifdef EBCDIC
2767                         if (has_utf8) {
2768                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2769                             if (UNI_IS_INVARIANT(ch))
2770                                 *d++ = (U8)i;
2771                             else {
2772                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2773                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2774                             }
2775                         }
2776                         else
2777 #endif
2778                             *d++ = (char)i;
2779
2780 #ifdef EBCDIC
2781                 if (uvmax) {
2782                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2783                     if (uvmax > 0x101)
2784                         *d++ = (char)UTF_TO_NATIVE(0xff);
2785                     if (uvmax > 0x100)
2786                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2787                 }
2788 #endif
2789
2790                 /* mark the range as done, and continue */
2791                 dorange = FALSE;
2792                 didrange = TRUE;
2793 #ifdef EBCDIC
2794                 literal_endpoint = 0;
2795 #endif
2796                 continue;
2797             }
2798
2799             /* range begins (ignore - as first or last char) */
2800             else if (*s == '-' && s+1 < send  && s != start) {
2801                 if (didrange) {
2802                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2803                 }
2804                 if (has_utf8
2805 #ifdef EBCDIC
2806                     && !native_range
2807 #endif
2808                     ) {
2809                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2810                     s++;
2811                     continue;
2812                 }
2813                 dorange = TRUE;
2814                 s++;
2815             }
2816             else {
2817                 didrange = FALSE;
2818 #ifdef EBCDIC
2819                 literal_endpoint = 0;
2820                 native_range = TRUE;
2821 #endif
2822             }
2823         }
2824
2825         /* if we get here, we're not doing a transliteration */
2826
2827         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2828            except for the last char, which will be done separately. */
2829         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2830             if (s[2] == '#') {
2831                 while (s+1 < send && *s != ')')
2832                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2833             }
2834             else if (s[2] == '{' /* This should match regcomp.c */
2835                     || (s[2] == '?' && s[3] == '{'))
2836             {
2837                 I32 count = 1;
2838                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2839                 char c;
2840
2841                 while (count && (c = *regparse)) {
2842                     if (c == '\\' && regparse[1])
2843                         regparse++;
2844                     else if (c == '{')
2845                         count++;
2846                     else if (c == '}')
2847                         count--;
2848                     regparse++;
2849                 }
2850                 if (*regparse != ')')
2851                     regparse--;         /* Leave one char for continuation. */
2852                 while (s < regparse)
2853                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2854             }
2855         }
2856
2857         /* likewise skip #-initiated comments in //x patterns */
2858         else if (*s == '#' && PL_lex_inpat &&
2859           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2860             while (s+1 < send && *s != '\n')
2861                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2862         }
2863
2864         /* check for embedded arrays
2865            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2866            */
2867         else if (*s == '@' && s[1]) {
2868             if (isALNUM_lazy_if(s+1,UTF))
2869                 break;
2870             if (strchr(":'{$", s[1]))
2871                 break;
2872             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2873                 break; /* in regexp, neither @+ nor @- are interpolated */
2874         }
2875
2876         /* check for embedded scalars.  only stop if we're sure it's a
2877            variable.
2878         */
2879         else if (*s == '$') {
2880             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2881                 break;
2882             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2883                 if (s[1] == '\\') {
2884                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2885                                    "Possible unintended interpolation of $\\ in regex");
2886                 }
2887                 break;          /* in regexp, $ might be tail anchor */
2888             }
2889         }
2890
2891         /* End of else if chain - OP_TRANS rejoin rest */
2892
2893         /* backslashes */
2894         if (*s == '\\' && s+1 < send) {
2895             char* e;    /* Can be used for ending '}', etc. */
2896
2897             s++;
2898
2899             /* warn on \1 - \9 in substitution replacements, but note that \11
2900              * is an octal; and \19 is \1 followed by '9' */
2901             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2902                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2903             {
2904                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2905                 *--s = '$';
2906                 break;
2907             }
2908
2909             /* string-change backslash escapes */
2910             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2911                 --s;
2912                 break;
2913             }
2914             /* In a pattern, process \N, but skip any other backslash escapes.
2915              * This is because we don't want to translate an escape sequence
2916              * into a meta symbol and have the regex compiler use the meta
2917              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2918              * in spite of this, we do have to process \N here while the proper
2919              * charnames handler is in scope.  See bugs #56444 and #62056.
2920              * There is a complication because \N in a pattern may also stand
2921              * for 'match a non-nl', and not mean a charname, in which case its
2922              * processing should be deferred to the regex compiler.  To be a
2923              * charname it must be followed immediately by a '{', and not look
2924              * like \N followed by a curly quantifier, i.e., not something like
2925              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2926              * quantifier */
2927             else if (PL_lex_inpat
2928                     && (*s != 'N'
2929                         || s[1] != '{'
2930                         || regcurly(s + 1)))
2931             {
2932                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2933                 goto default_action;
2934             }
2935
2936             switch (*s) {
2937
2938             /* quoted - in transliterations */
2939             case '-':
2940                 if (PL_lex_inwhat == OP_TRANS) {
2941                     *d++ = *s++;
2942                     continue;
2943                 }
2944                 /* FALL THROUGH */
2945             default:
2946                 {
2947                     if ((isALPHA(*s) || isDIGIT(*s)))
2948                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2949                                        "Unrecognized escape \\%c passed through",
2950                                        *s);
2951                     /* default action is to copy the quoted character */
2952                     goto default_action;
2953                 }
2954
2955             /* eg. \132 indicates the octal constant 0132 */
2956             case '0': case '1': case '2': case '3':
2957             case '4': case '5': case '6': case '7':
2958                 {
2959                     I32 flags = 0;
2960                     STRLEN len = 3;
2961                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2962                     s += len;
2963                 }
2964                 goto NUM_ESCAPE_INSERT;
2965
2966             /* eg. \o{24} indicates the octal constant \024 */
2967             case 'o':
2968                 {
2969                     STRLEN len;
2970                     const char* error;
2971
2972                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2973                     s += len;
2974                     if (! valid) {
2975                         yyerror(error);
2976                         continue;
2977                     }
2978                     goto NUM_ESCAPE_INSERT;
2979                 }
2980
2981             /* eg. \x24 indicates the hex constant 0x24 */
2982             case 'x':
2983                 ++s;
2984                 if (*s == '{') {
2985                     char* const e = strchr(s, '}');
2986                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2987                       PERL_SCAN_DISALLOW_PREFIX;
2988                     STRLEN len;
2989
2990                     ++s;
2991                     if (!e) {
2992                         yyerror("Missing right brace on \\x{}");
2993                         continue;
2994                     }
2995                     len = e - s;
2996                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2997                     s = e + 1;
2998                 }
2999                 else {
3000                     {
3001                         STRLEN len = 2;
3002                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3003                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3004                         s += len;
3005                     }
3006                 }
3007
3008               NUM_ESCAPE_INSERT:
3009                 /* Insert oct or hex escaped character.  There will always be
3010                  * enough room in sv since such escapes will be longer than any
3011                  * UTF-8 sequence they can end up as, except if they force us
3012                  * to recode the rest of the string into utf8 */
3013
3014                 /* Here uv is the ordinal of the next character being added in
3015                  * unicode (converted from native). */
3016                 if (!UNI_IS_INVARIANT(uv)) {
3017                     if (!has_utf8 && uv > 255) {
3018                         /* Might need to recode whatever we have accumulated so
3019                          * far if it contains any chars variant in utf8 or
3020                          * utf-ebcdic. */
3021
3022                         SvCUR_set(sv, d - SvPVX_const(sv));
3023                         SvPOK_on(sv);
3024                         *d = '\0';
3025                         /* See Note on sizing above.  */
3026                         sv_utf8_upgrade_flags_grow(sv,
3027                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3028                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3029                         d = SvPVX(sv) + SvCUR(sv);
3030                         has_utf8 = TRUE;
3031                     }
3032
3033                     if (has_utf8) {
3034                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3035                         if (PL_lex_inwhat == OP_TRANS &&
3036                             PL_sublex_info.sub_op) {
3037                             PL_sublex_info.sub_op->op_private |=
3038                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3039                                              : OPpTRANS_TO_UTF);
3040                         }
3041 #ifdef EBCDIC
3042                         if (uv > 255 && !dorange)
3043                             native_range = FALSE;
3044 #endif
3045                     }
3046                     else {
3047                         *d++ = (char)uv;
3048                     }
3049                 }
3050                 else {
3051                     *d++ = (char) uv;
3052                 }
3053                 continue;
3054
3055             case 'N':
3056                 /* In a non-pattern \N must be a named character, like \N{LATIN
3057                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3058                  * mean to match a non-newline.  For non-patterns, named
3059                  * characters are converted to their string equivalents. In
3060                  * patterns, named characters are not converted to their
3061                  * ultimate forms for the same reasons that other escapes
3062                  * aren't.  Instead, they are converted to the \N{U+...} form
3063                  * to get the value from the charnames that is in effect right
3064                  * now, while preserving the fact that it was a named character
3065                  * so that the regex compiler knows this */
3066
3067                 /* This section of code doesn't generally use the
3068                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3069                  * a close examination of this macro and determined it is a
3070                  * no-op except on utfebcdic variant characters.  Every
3071                  * character generated by this that would normally need to be
3072                  * enclosed by this macro is invariant, so the macro is not
3073                  * needed, and would complicate use of copy().  XXX There are
3074                  * other parts of this file where the macro is used
3075                  * inconsistently, but are saved by it being a no-op */
3076
3077                 /* The structure of this section of code (besides checking for
3078                  * errors and upgrading to utf8) is:
3079                  *  Further disambiguate between the two meanings of \N, and if
3080                  *      not a charname, go process it elsewhere
3081                  *  If of form \N{U+...}, pass it through if a pattern;
3082                  *      otherwise convert to utf8
3083                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3084                  *  pattern; otherwise convert to utf8 */
3085
3086                 /* Here, s points to the 'N'; the test below is guaranteed to
3087                  * succeed if we are being called on a pattern as we already
3088                  * know from a test above that the next character is a '{'.
3089                  * On a non-pattern \N must mean 'named sequence, which
3090                  * requires braces */
3091                 s++;
3092                 if (*s != '{') {
3093                     yyerror("Missing braces on \\N{}");
3094                     continue;
3095                 }
3096                 s++;
3097
3098                 /* If there is no matching '}', it is an error. */
3099                 if (! (e = strchr(s, '}'))) {
3100                     if (! PL_lex_inpat) {
3101                         yyerror("Missing right brace on \\N{}");
3102                     } else {
3103                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3104                     }
3105                     continue;
3106                 }
3107
3108                 /* Here it looks like a named character */
3109
3110                 if (PL_lex_inpat) {
3111
3112                     /* XXX This block is temporary code.  \N{} implies that the
3113                      * pattern is to have Unicode semantics, and therefore
3114                      * currently has to be encoded in utf8.  By putting it in
3115                      * utf8 now, we save a whole pass in the regular expression
3116                      * compiler.  Once that code is changed so Unicode
3117                      * semantics doesn't necessarily have to be in utf8, this
3118                      * block should be removed.  However, the code that parses
3119                      * the output of this would have to be changed to not
3120                      * necessarily expect utf8 */
3121                     if (!has_utf8) {
3122                         SvCUR_set(sv, d - SvPVX_const(sv));
3123                         SvPOK_on(sv);
3124                         *d = '\0';
3125                         /* See Note on sizing above.  */
3126                         sv_utf8_upgrade_flags_grow(sv,
3127                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3128                                         /* 5 = '\N{' + cur char + NUL */
3129                                         (STRLEN)(send - s) + 5);
3130                         d = SvPVX(sv) + SvCUR(sv);
3131                         has_utf8 = TRUE;
3132                     }
3133                 }
3134
3135                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3136                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3137                                 | PERL_SCAN_DISALLOW_PREFIX;
3138                     STRLEN len;
3139
3140                     /* For \N{U+...}, the '...' is a unicode value even on
3141                      * EBCDIC machines */
3142                     s += 2;         /* Skip to next char after the 'U+' */
3143                     len = e - s;
3144                     uv = grok_hex(s, &len, &flags, NULL);
3145                     if (len == 0 || len != (STRLEN)(e - s)) {
3146                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3147                         s = e + 1;
3148                         continue;
3149                     }
3150
3151                     if (PL_lex_inpat) {
3152
3153                         /* On non-EBCDIC platforms, pass through to the regex
3154                          * compiler unchanged.  The reason we evaluated the
3155                          * number above is to make sure there wasn't a syntax
3156                          * error.  But on EBCDIC we convert to native so
3157                          * downstream code can continue to assume it's native
3158                          */
3159                         s -= 5;     /* Include the '\N{U+' */
3160 #ifdef EBCDIC
3161                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3162                                                                and the \0 */
3163                                     "\\N{U+%X}",
3164                                     (unsigned int) UNI_TO_NATIVE(uv));
3165 #else
3166                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3167                         d += e - s + 1;
3168 #endif
3169                     }
3170                     else {  /* Not a pattern: convert the hex to string */
3171
3172                          /* If destination is not in utf8, unconditionally
3173                           * recode it to be so.  This is because \N{} implies
3174                           * Unicode semantics, and scalars have to be in utf8
3175                           * to guarantee those semantics */
3176                         if (! has_utf8) {
3177                             SvCUR_set(sv, d - SvPVX_const(sv));
3178                             SvPOK_on(sv);
3179                             *d = '\0';
3180                             /* See Note on sizing above.  */
3181                             sv_utf8_upgrade_flags_grow(
3182                                         sv,
3183                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3184                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3185                             d = SvPVX(sv) + SvCUR(sv);
3186                             has_utf8 = TRUE;
3187                         }
3188
3189                         /* Add the string to the output */
3190                         if (UNI_IS_INVARIANT(uv)) {
3191                             *d++ = (char) uv;
3192                         }
3193                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3194                     }
3195                 }
3196                 else { /* Here is \N{NAME} but not \N{U+...}. */
3197
3198                     SV *res;            /* result from charnames */
3199                     const char *str;    /* the string in 'res' */
3200                     STRLEN len;         /* its length */
3201
3202                     /* Get the value for NAME */
3203                     res = newSVpvn(s, e - s);
3204                     res = new_constant( NULL, 0, "charnames",
3205                                         /* includes all of: \N{...} */
3206                                         res, NULL, s - 3, e - s + 4 );
3207
3208                     /* Most likely res will be in utf8 already since the
3209                      * standard charnames uses pack U, but a custom translator
3210                      * can leave it otherwise, so make sure.  XXX This can be
3211                      * revisited to not have charnames use utf8 for characters
3212                      * that don't need it when regexes don't have to be in utf8
3213                      * for Unicode semantics.  If doing so, remember EBCDIC */
3214                     sv_utf8_upgrade(res);
3215                     str = SvPV_const(res, len);
3216
3217                     /* Don't accept malformed input */
3218                     if (! is_utf8_string((U8 *) str, len)) {
3219                         yyerror("Malformed UTF-8 returned by \\N");
3220                     }
3221                     else if (PL_lex_inpat) {
3222
3223                         if (! len) { /* The name resolved to an empty string */
3224                             Copy("\\N{}", d, 4, char);
3225                             d += 4;
3226                         }
3227                         else {
3228                             /* In order to not lose information for the regex
3229                             * compiler, pass the result in the specially made
3230                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3231                             * the code points in hex of each character
3232                             * returned by charnames */
3233
3234                             const char *str_end = str + len;
3235                             STRLEN char_length;     /* cur char's byte length */
3236                             STRLEN output_length;   /* and the number of bytes
3237                                                        after this is translated
3238                                                        into hex digits */
3239                             const STRLEN off = d - SvPVX_const(sv);
3240
3241                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3242                              * max('U+', '.'); and 1 for NUL */
3243                             char hex_string[2 * UTF8_MAXBYTES + 5];
3244
3245                             /* Get the first character of the result. */
3246                             U32 uv = utf8n_to_uvuni((U8 *) str,
3247                                                     len,
3248                                                     &char_length,
3249                                                     UTF8_ALLOW_ANYUV);
3250
3251                             /* The call to is_utf8_string() above hopefully
3252                              * guarantees that there won't be an error.  But
3253                              * it's easy here to make sure.  The function just
3254                              * above warns and returns 0 if invalid utf8, but
3255                              * it can also return 0 if the input is validly a
3256                              * NUL. Disambiguate */
3257                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3258                                 uv = UNICODE_REPLACEMENT;
3259                             }
3260
3261                             /* Convert first code point to hex, including the
3262                              * boiler plate before it.  For all these, we
3263                              * convert to native format so that downstream code
3264                              * can continue to assume the input is native */
3265                             output_length =
3266                                 my_snprintf(hex_string, sizeof(hex_string),
3267                                             "\\N{U+%X",
3268                                             (unsigned int) UNI_TO_NATIVE(uv));
3269
3270                             /* Make sure there is enough space to hold it */
3271                             d = off + SvGROW(sv, off
3272                                                  + output_length
3273                                                  + (STRLEN)(send - e)
3274                                                  + 2);  /* '}' + NUL */
3275                             /* And output it */
3276                             Copy(hex_string, d, output_length, char);
3277                             d += output_length;
3278
3279                             /* For each subsequent character, append dot and
3280                              * its ordinal in hex */
3281                             while ((str += char_length) < str_end) {
3282                                 const STRLEN off = d - SvPVX_const(sv);
3283                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3284                                                         str_end - str,
3285                                                         &char_length,
3286                                                         UTF8_ALLOW_ANYUV);
3287                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3288                                     uv = UNICODE_REPLACEMENT;
3289                                 }
3290
3291                                 output_length =
3292                                     my_snprintf(hex_string, sizeof(hex_string),
3293                                             ".%X",
3294                                             (unsigned int) UNI_TO_NATIVE(uv));
3295
3296                                 d = off + SvGROW(sv, off
3297                                                      + output_length
3298                                                      + (STRLEN)(send - e)
3299                                                      + 2);      /* '}' +  NUL */
3300                                 Copy(hex_string, d, output_length, char);
3301                                 d += output_length;
3302                             }
3303
3304                             *d++ = '}'; /* Done.  Add the trailing brace */
3305                         }
3306                     }
3307                     else { /* Here, not in a pattern.  Convert the name to a
3308                             * string. */
3309
3310                          /* If destination is not in utf8, unconditionally
3311                           * recode it to be so.  This is because \N{} implies
3312                           * Unicode semantics, and scalars have to be in utf8
3313                           * to guarantee those semantics */
3314                         if (! has_utf8) {
3315                             SvCUR_set(sv, d - SvPVX_const(sv));
3316                             SvPOK_on(sv);
3317                             *d = '\0';
3318                             /* See Note on sizing above.  */
3319                             sv_utf8_upgrade_flags_grow(sv,
3320                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3321                                                 len + (STRLEN)(send - s) + 1);
3322                             d = SvPVX(sv) + SvCUR(sv);
3323                             has_utf8 = TRUE;
3324                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3325
3326                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3327                              * set correctly here). */
3328                             const STRLEN off = d - SvPVX_const(sv);
3329                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3330                         }
3331                         Copy(str, d, len, char);
3332                         d += len;
3333                     }
3334                     SvREFCNT_dec(res);
3335
3336                     /* Deprecate non-approved name syntax */
3337                     if (ckWARN_d(WARN_DEPRECATED)) {
3338                         bool problematic = FALSE;
3339                         char* i = s;
3340
3341                         /* For non-ut8 input, look to see that the first
3342                          * character is an alpha, then loop through the rest
3343                          * checking that each is a continuation */
3344                         if (! this_utf8) {
3345                             if (! isALPHAU(*i)) problematic = TRUE;
3346                             else for (i = s + 1; i < e; i++) {
3347                                 if (isCHARNAME_CONT(*i)) continue;
3348                                 problematic = TRUE;
3349                                 break;
3350                             }
3351                         }
3352                         else {
3353                             /* Similarly for utf8.  For invariants can check
3354                              * directly.  We accept anything above the latin1
3355                              * range because it is immaterial to Perl if it is
3356                              * correct or not, and is expensive to check.  But
3357                              * it is fairly easy in the latin1 range to convert
3358                              * the variants into a single character and check
3359                              * those */
3360                             if (UTF8_IS_INVARIANT(*i)) {
3361                                 if (! isALPHAU(*i)) problematic = TRUE;
3362                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3363                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3364                                                                             *(i+1)))))
3365                                 {
3366                                     problematic = TRUE;
3367                                 }
3368                             }
3369                             if (! problematic) for (i = s + UTF8SKIP(s);
3370                                                     i < e;
3371                                                     i+= UTF8SKIP(i))
3372                             {
3373                                 if (UTF8_IS_INVARIANT(*i)) {
3374                                     if (isCHARNAME_CONT(*i)) continue;
3375                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3376                                     continue;
3377                                 } else if (isCHARNAME_CONT(
3378                                             UNI_TO_NATIVE(
3379                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3380                                 {
3381                                     continue;
3382                                 }
3383                                 problematic = TRUE;
3384                                 break;
3385                             }
3386                         }
3387                         if (problematic) {
3388                             /* The e-i passed to the final %.*s makes sure that
3389                              * should the trailing NUL be missing that this
3390                              * print won't run off the end of the string */
3391                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3392                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3393                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3394                         }
3395                     }
3396                 } /* End \N{NAME} */
3397 #ifdef EBCDIC
3398                 if (!dorange)
3399                     native_range = FALSE; /* \N{} is defined to be Unicode */
3400 #endif
3401                 s = e + 1;  /* Point to just after the '}' */
3402                 continue;
3403
3404             /* \c is a control character */
3405             case 'c':
3406                 s++;
3407                 if (s < send) {
3408                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3409                 }
3410                 else {
3411                     yyerror("Missing control char name in \\c");
3412                 }
3413                 continue;
3414
3415             /* printf-style backslashes, formfeeds, newlines, etc */
3416             case 'b':
3417                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3418                 break;
3419             case 'n':
3420                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3421                 break;
3422             case 'r':
3423                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3424                 break;
3425             case 'f':
3426                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3427                 break;
3428             case 't':
3429                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3430                 break;
3431             case 'e':
3432                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3433                 break;
3434             case 'a':
3435                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3436                 break;
3437             } /* end switch */
3438
3439             s++;
3440             continue;
3441         } /* end if (backslash) */
3442 #ifdef EBCDIC
3443         else
3444             literal_endpoint++;
3445 #endif
3446
3447     default_action:
3448         /* If we started with encoded form, or already know we want it,
3449            then encode the next character */
3450         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3451             STRLEN len  = 1;
3452
3453
3454             /* One might think that it is wasted effort in the case of the
3455              * source being utf8 (this_utf8 == TRUE) to take the next character
3456              * in the source, convert it to an unsigned value, and then convert
3457              * it back again.  But the source has not been validated here.  The
3458              * routine that does the conversion checks for errors like
3459              * malformed utf8 */
3460
3461             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3462             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3463             if (!has_utf8) {
3464                 SvCUR_set(sv, d - SvPVX_const(sv));
3465                 SvPOK_on(sv);
3466                 *d = '\0';
3467                 /* See Note on sizing above.  */
3468                 sv_utf8_upgrade_flags_grow(sv,
3469                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3470                                         need + (STRLEN)(send - s) + 1);
3471                 d = SvPVX(sv) + SvCUR(sv);
3472                 has_utf8 = TRUE;
3473             } else if (need > len) {
3474                 /* encoded value larger than old, may need extra space (NOTE:
3475                  * SvCUR() is not set correctly here).   See Note on sizing
3476                  * above.  */
3477                 const STRLEN off = d - SvPVX_const(sv);
3478                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3479             }
3480             s += len;
3481
3482             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3483 #ifdef EBCDIC
3484             if (uv > 255 && !dorange)
3485                 native_range = FALSE;
3486 #endif
3487         }
3488         else {
3489             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3490         }
3491     } /* while loop to process each character */
3492
3493     /* terminate the string and set up the sv */
3494     *d = '\0';
3495     SvCUR_set(sv, d - SvPVX_const(sv));
3496     if (SvCUR(sv) >= SvLEN(sv))
3497         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3498
3499     SvPOK_on(sv);
3500     if (PL_encoding && !has_utf8) {
3501         sv_recode_to_utf8(sv, PL_encoding);
3502         if (SvUTF8(sv))
3503             has_utf8 = TRUE;
3504     }
3505     if (has_utf8) {
3506         SvUTF8_on(sv);
3507         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3508             PL_sublex_info.sub_op->op_private |=
3509                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3510         }
3511     }
3512
3513     /* shrink the sv if we allocated more than we used */
3514     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3515         SvPV_shrink_to_cur(sv);
3516     }
3517
3518     /* return the substring (via pl_yylval) only if we parsed anything */
3519     if (s > PL_bufptr) {
3520         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3521             const char *const key = PL_lex_inpat ? "qr" : "q";
3522             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3523             const char *type;
3524             STRLEN typelen;
3525
3526             if (PL_lex_inwhat == OP_TRANS) {
3527                 type = "tr";
3528                 typelen = 2;
3529             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3530                 type = "s";
3531                 typelen = 1;
3532             } else  {
3533                 type = "qq";
3534                 typelen = 2;
3535             }
3536
3537             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3538                                 type, typelen);
3539         }
3540         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3541     } else
3542         SvREFCNT_dec(sv);
3543     return s;
3544 }
3545
3546 /* S_intuit_more
3547  * Returns TRUE if there's more to the expression (e.g., a subscript),
3548  * FALSE otherwise.
3549  *
3550  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3551  *
3552  * ->[ and ->{ return TRUE
3553  * { and [ outside a pattern are always subscripts, so return TRUE
3554  * if we're outside a pattern and it's not { or [, then return FALSE
3555  * if we're in a pattern and the first char is a {
3556  *   {4,5} (any digits around the comma) returns FALSE
3557  * if we're in a pattern and the first char is a [
3558  *   [] returns FALSE
3559  *   [SOMETHING] has a funky algorithm to decide whether it's a
3560  *      character class or not.  It has to deal with things like
3561  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3562  * anything else returns TRUE
3563  */
3564
3565 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3566
3567 STATIC int
3568 S_intuit_more(pTHX_ register char *s)
3569 {
3570     dVAR;
3571
3572     PERL_ARGS_ASSERT_INTUIT_MORE;
3573
3574     if (PL_lex_brackets)
3575         return TRUE;
3576     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3577         return TRUE;
3578     if (*s != '{' && *s != '[')
3579         return FALSE;
3580     if (!PL_lex_inpat)
3581         return TRUE;
3582
3583     /* In a pattern, so maybe we have {n,m}. */
3584     if (*s == '{') {
3585         if (regcurly(s)) {
3586             return FALSE;
3587         }
3588         return TRUE;
3589     }
3590
3591     /* On the other hand, maybe we have a character class */
3592
3593     s++;
3594     if (*s == ']' || *s == '^')
3595         return FALSE;
3596     else {
3597         /* this is terrifying, and it works */
3598         int weight = 2;         /* let's weigh the evidence */
3599         char seen[256];
3600         unsigned char un_char = 255, last_un_char;
3601         const char * const send = strchr(s,']');
3602         char tmpbuf[sizeof PL_tokenbuf * 4];
3603
3604         if (!send)              /* has to be an expression */
3605             return TRUE;
3606
3607         Zero(seen,256,char);
3608         if (*s == '$')
3609             weight -= 3;
3610         else if (isDIGIT(*s)) {
3611             if (s[1] != ']') {
3612                 if (isDIGIT(s[1]) && s[2] == ']')
3613                     weight -= 10;
3614             }
3615             else
3616                 weight -= 100;
3617         }
3618         for (; s < send; s++) {
3619             last_un_char = un_char;
3620             un_char = (unsigned char)*s;
3621             switch (*s) {
3622             case '@':
3623             case '&':
3624             case '$':
3625                 weight -= seen[un_char] * 10;
3626                 if (isALNUM_lazy_if(s+1,UTF)) {
3627                     int len;
3628                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3629                     len = (int)strlen(tmpbuf);
3630                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3631                         weight -= 100;
3632                     else
3633                         weight -= 10;
3634                 }
3635                 else if (*s == '$' && s[1] &&
3636                   strchr("[#!%*<>()-=",s[1])) {
3637                     if (/*{*/ strchr("])} =",s[2]))
3638                         weight -= 10;
3639                     else
3640                         weight -= 1;
3641                 }
3642                 break;
3643             case '\\':
3644                 un_char = 254;
3645                 if (s[1]) {
3646                     if (strchr("wds]",s[1]))
3647                         weight += 100;
3648                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3649                         weight += 1;
3650                     else if (strchr("rnftbxcav",s[1]))
3651                         weight += 40;
3652                     else if (isDIGIT(s[1])) {
3653                         weight += 40;
3654                         while (s[1] && isDIGIT(s[1]))
3655                             s++;
3656                     }
3657                 }
3658                 else
3659                     weight += 100;
3660                 break;
3661             case '-':
3662                 if (s[1] == '\\')
3663                     weight += 50;
3664                 if (strchr("aA01! ",last_un_char))
3665                     weight += 30;
3666                 if (strchr("zZ79~",s[1]))
3667                     weight += 30;
3668                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3669                     weight -= 5;        /* cope with negative subscript */
3670                 break;
3671             default:
3672                 if (!isALNUM(last_un_char)
3673                     && !(last_un_char == '$' || last_un_char == '@'
3674                          || last_un_char == '&')
3675                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3676                     char *d = tmpbuf;
3677                     while (isALPHA(*s))
3678                         *d++ = *s++;
3679                     *d = '\0';
3680                     if (keyword(tmpbuf, d - tmpbuf, 0))
3681                         weight -= 150;
3682                 }
3683                 if (un_char == last_un_char + 1)
3684                     weight += 5;
3685                 weight -= seen[un_char];
3686                 break;
3687             }
3688             seen[un_char]++;
3689         }
3690         if (weight >= 0)        /* probably a character class */
3691             return FALSE;
3692     }
3693
3694     return TRUE;
3695 }
3696
3697 /*
3698  * S_intuit_method
3699  *
3700  * Does all the checking to disambiguate
3701  *   foo bar
3702  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3703  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3704  *
3705  * First argument is the stuff after the first token, e.g. "bar".
3706  *
3707  * Not a method if bar is a filehandle.
3708  * Not a method if foo is a subroutine prototyped to take a filehandle.
3709  * Not a method if it's really "Foo $bar"
3710  * Method if it's "foo $bar"
3711  * Not a method if it's really "print foo $bar"
3712  * Method if it's really "foo package::" (interpreted as package->foo)
3713  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3714  * Not a method if bar is a filehandle or package, but is quoted with
3715  *   =>
3716  */
3717
3718 STATIC int
3719 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3720 {
3721     dVAR;
3722     char *s = start + (*start == '$');
3723     char tmpbuf[sizeof PL_tokenbuf];
3724     STRLEN len;
3725     GV* indirgv;
3726 #ifdef PERL_MAD
3727     int soff;
3728 #endif
3729
3730     PERL_ARGS_ASSERT_INTUIT_METHOD;
3731
3732     if (gv) {
3733         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3734             return 0;
3735         if (cv) {
3736             if (SvPOK(cv)) {
3737                 const char *proto = SvPVX_const(cv);
3738                 if (proto) {
3739                     if (*proto == ';')
3740                         proto++;
3741                     if (*proto == '*')
3742                         return 0;
3743                 }
3744             }
3745         } else
3746             gv = NULL;
3747     }
3748     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3749     /* start is the beginning of the possible filehandle/object,
3750      * and s is the end of it
3751      * tmpbuf is a copy of it
3752      */
3753
3754     if (*start == '$') {
3755         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3756                 isUPPER(*PL_tokenbuf))
3757             return 0;
3758 #ifdef PERL_MAD
3759         len = start - SvPVX(PL_linestr);
3760 #endif
3761         s = PEEKSPACE(s);
3762 #ifdef PERL_MAD
3763         start = SvPVX(PL_linestr) + len;
3764 #endif
3765         PL_bufptr = start;
3766         PL_expect = XREF;
3767         return *s == '(' ? FUNCMETH : METHOD;
3768     }
3769     if (!keyword(tmpbuf, len, 0)) {
3770         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3771             len -= 2;
3772             tmpbuf[len] = '\0';
3773 #ifdef PERL_MAD
3774             soff = s - SvPVX(PL_linestr);
3775 #endif
3776             goto bare_package;
3777         }
3778         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3779         if (indirgv && GvCVu(indirgv))
3780             return 0;
3781         /* filehandle or package name makes it a method */
3782         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3783 #ifdef PERL_MAD
3784             soff = s - SvPVX(PL_linestr);
3785 #endif
3786             s = PEEKSPACE(s);
3787             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3788                 return 0;       /* no assumptions -- "=>" quotes bareword */
3789       bare_package:
3790             start_force(PL_curforce);
3791             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3792                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3793             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3794             if (PL_madskills)
3795                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3796             PL_expect = XTERM;
3797             force_next(WORD);
3798             PL_bufptr = s;
3799 #ifdef PERL_MAD
3800             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3801 #endif
3802             return *s == '(' ? FUNCMETH : METHOD;
3803         }
3804     }
3805     return 0;
3806 }
3807
3808 /* Encoded script support. filter_add() effectively inserts a
3809  * 'pre-processing' function into the current source input stream.
3810  * Note that the filter function only applies to the current source file
3811  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3812  *
3813  * The datasv parameter (which may be NULL) can be used to pass
3814  * private data to this instance of the filter. The filter function
3815  * can recover the SV using the FILTER_DATA macro and use it to
3816  * store private buffers and state information.
3817  *
3818  * The supplied datasv parameter is upgraded to a PVIO type
3819  * and the IoDIRP/IoANY field is used to store the function pointer,
3820  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3821  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3822  * private use must be set using malloc'd pointers.
3823  */
3824
3825 SV *
3826 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3827 {
3828     dVAR;
3829     if (!funcp)
3830         return NULL;
3831
3832     if (!PL_parser)
3833         return NULL;
3834
3835     if (!PL_rsfp_filters)
3836         PL_rsfp_filters = newAV();
3837     if (!datasv)
3838         datasv = newSV(0);
3839     SvUPGRADE(datasv, SVt_PVIO);
3840     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3841     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3842     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3843                           FPTR2DPTR(void *, IoANY(datasv)),
3844                           SvPV_nolen(datasv)));
3845     av_unshift(PL_rsfp_filters, 1);
3846     av_store(PL_rsfp_filters, 0, datasv) ;
3847     return(datasv);
3848 }
3849
3850
3851 /* Delete most recently added instance of this filter function. */
3852 void
3853 Perl_filter_del(pTHX_ filter_t funcp)
3854 {
3855     dVAR;
3856     SV *datasv;
3857
3858     PERL_ARGS_ASSERT_FILTER_DEL;
3859
3860 #ifdef DEBUGGING
3861     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3862                           FPTR2DPTR(void*, funcp)));
3863 #endif
3864     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3865         return;
3866     /* if filter is on top of stack (usual case) just pop it off */
3867     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3868     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3869         sv_free(av_pop(PL_rsfp_filters));
3870
3871         return;
3872     }
3873     /* we need to search for the correct entry and clear it     */
3874     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3875 }
3876
3877
3878 /* Invoke the idxth filter function for the current rsfp.        */
3879 /* maxlen 0 = read one text line */
3880 I32
3881 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3882 {
3883     dVAR;
3884     filter_t funcp;
3885     SV *datasv = NULL;
3886     /* This API is bad. It should have been using unsigned int for maxlen.
3887        Not sure if we want to change the API, but if not we should sanity
3888        check the value here.  */
3889     const unsigned int correct_length
3890         = maxlen < 0 ?
3891 #ifdef PERL_MICRO
3892         0x7FFFFFFF
3893 #else
3894         INT_MAX
3895 #endif
3896         : maxlen;
3897
3898     PERL_ARGS_ASSERT_FILTER_READ;
3899
3900     if (!PL_parser || !PL_rsfp_filters)
3901         return -1;
3902     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3903         /* Provide a default input filter to make life easy.    */
3904         /* Note that we append to the line. This is handy.      */
3905         DEBUG_P(PerlIO_printf(Perl_debug_log,
3906                               "filter_read %d: from rsfp\n", idx));
3907         if (correct_length) {
3908             /* Want a block */
3909             int len ;
3910             const int old_len = SvCUR(buf_sv);
3911
3912             /* ensure buf_sv is large enough */
3913             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3914             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3915                                    correct_length)) <= 0) {
3916                 if (PerlIO_error(PL_rsfp))
3917                     return -1;          /* error */
3918                 else
3919                     return 0 ;          /* end of file */
3920             }
3921             SvCUR_set(buf_sv, old_len + len) ;
3922             SvPVX(buf_sv)[old_len + len] = '\0';
3923         } else {
3924             /* Want a line */
3925             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3926                 if (PerlIO_error(PL_rsfp))
3927                     return -1;          /* error */
3928                 else
3929                     return 0 ;          /* end of file */
3930             }
3931         }
3932         return SvCUR(buf_sv);
3933     }
3934     /* Skip this filter slot if filter has been deleted */
3935     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3936         DEBUG_P(PerlIO_printf(Perl_debug_log,
3937                               "filter_read %d: skipped (filter deleted)\n",
3938                               idx));
3939         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3940     }
3941     /* Get function pointer hidden within datasv        */
3942     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3943     DEBUG_P(PerlIO_printf(Perl_debug_log,
3944                           "filter_read %d: via function %p (%s)\n",
3945                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3946     /* Call function. The function is expected to       */
3947     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3948     /* Return: <0:error, =0:eof, >0:not eof             */
3949     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3950 }
3951
3952 STATIC char *
3953 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3954 {
3955     dVAR;
3956
3957     PERL_ARGS_ASSERT_FILTER_GETS;
3958
3959 #ifdef PERL_CR_FILTER
3960     if (!PL_rsfp_filters) {
3961         filter_add(S_cr_textfilter,NULL);
3962     }
3963 #endif
3964     if (PL_rsfp_filters) {
3965         if (!append)
3966             SvCUR_set(sv, 0);   /* start with empty line        */
3967         if (FILTER_READ(0, sv, 0) > 0)
3968             return ( SvPVX(sv) ) ;
3969         else
3970             return NULL ;
3971     }
3972     else
3973         return (sv_gets(sv, PL_rsfp, append));
3974 }
3975
3976 STATIC HV *
3977 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3978 {
3979     dVAR;
3980     GV *gv;
3981
3982     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3983
3984     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3985         return PL_curstash;
3986
3987     if (len > 2 &&
3988         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3989         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3990     {
3991         return GvHV(gv);                        /* Foo:: */
3992     }
3993
3994     /* use constant CLASS => 'MyClass' */
3995     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3996     if (gv && GvCV(gv)) {
3997         SV * const sv = cv_const_sv(GvCV(gv));
3998         if (sv)
3999             pkgname = SvPV_const(sv, len);
4000     }
4001
4002     return gv_stashpvn(pkgname, len, 0);
4003 }
4004
4005 /*
4006  * S_readpipe_override
4007  * Check whether readpipe() is overridden, and generates the appropriate
4008  * optree, provided sublex_start() is called afterwards.
4009  */
4010 STATIC void
4011 S_readpipe_override(pTHX)
4012 {
4013     GV **gvp;
4014     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4015     pl_yylval.ival = OP_BACKTICK;
4016     if ((gv_readpipe
4017                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4018             ||
4019             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4020              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4021              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4022     {
4023         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4024             op_append_elem(OP_LIST,
4025                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4026                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4027     }
4028 }
4029
4030 #ifdef PERL_MAD
4031  /*
4032  * Perl_madlex
4033  * The intent of this yylex wrapper is to minimize the changes to the
4034  * tokener when we aren't interested in collecting madprops.  It remains
4035  * to be seen how successful this strategy will be...
4036  */
4037
4038 int
4039 Perl_madlex(pTHX)
4040 {
4041     int optype;
4042     char *s = PL_bufptr;
4043
4044     /* make sure PL_thiswhite is initialized */
4045     PL_thiswhite = 0;
4046     PL_thismad = 0;
4047
4048     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4049     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4050         return S_pending_ident(aTHX);
4051
4052     /* previous token ate up our whitespace? */
4053     if (!PL_lasttoke && PL_nextwhite) {
4054         PL_thiswhite = PL_nextwhite;
4055         PL_nextwhite = 0;
4056     }
4057
4058     /* isolate the token, and figure out where it is without whitespace */
4059     PL_realtokenstart = -1;
4060     PL_thistoken = 0;
4061     optype = yylex();
4062     s = PL_bufptr;
4063     assert(PL_curforce < 0);
4064
4065     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4066         if (!PL_thistoken) {
4067             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4068                 PL_thistoken = newSVpvs("");
4069             else {
4070                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4071                 PL_thistoken = newSVpvn(tstart, s - tstart);
4072             }
4073         }
4074         if (PL_thismad) /* install head */
4075             CURMAD('X', PL_thistoken);
4076     }
4077
4078     /* last whitespace of a sublex? */
4079     if (optype == ')' && PL_endwhite) {
4080         CURMAD('X', PL_endwhite);
4081     }
4082
4083     if (!PL_thismad) {
4084
4085         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4086         if (!PL_thiswhite && !PL_endwhite && !optype) {
4087             sv_free(PL_thistoken);
4088             PL_thistoken = 0;
4089             return 0;
4090         }
4091
4092         /* put off final whitespace till peg */
4093         if (optype == ';' && !PL_rsfp) {
4094             PL_nextwhite = PL_thiswhite;
4095             PL_thiswhite = 0;
4096         }
4097         else if (PL_thisopen) {
4098             CURMAD('q', PL_thisopen);
4099             if (PL_thistoken)
4100                 sv_free(PL_thistoken);
4101             PL_thistoken = 0;
4102         }
4103         else {
4104             /* Store actual token text as madprop X */
4105             CURMAD('X', PL_thistoken);
4106         }
4107
4108         if (PL_thiswhite) {
4109             /* add preceding whitespace as madprop _ */
4110             CURMAD('_', PL_thiswhite);
4111         }
4112
4113         if (PL_thisstuff) {
4114             /* add quoted material as madprop = */
4115             CURMAD('=', PL_thisstuff);
4116         }
4117
4118         if (PL_thisclose) {
4119             /* add terminating quote as madprop Q */
4120             CURMAD('Q', PL_thisclose);
4121         }
4122     }
4123
4124     /* special processing based on optype */
4125
4126     switch (optype) {
4127
4128     /* opval doesn't need a TOKEN since it can already store mp */
4129     case WORD:
4130     case METHOD:
4131     case FUNCMETH:
4132     case THING:
4133     case PMFUNC:
4134     case PRIVATEREF:
4135     case FUNC0SUB:
4136     case UNIOPSUB:
4137     case LSTOPSUB:
4138         if (pl_yylval.opval)
4139             append_madprops(PL_thismad, pl_yylval.opval, 0);
4140         PL_thismad = 0;
4141         return optype;
4142
4143     /* fake EOF */
4144     case 0:
4145         optype = PEG;
4146         if (PL_endwhite) {
4147             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4148             PL_endwhite = 0;
4149         }
4150         break;
4151
4152     case ']':
4153     case '}':
4154         if (PL_faketokens)
4155             break;
4156         /* remember any fake bracket that lexer is about to discard */
4157         if (PL_lex_brackets == 1 &&
4158             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4159         {
4160             s = PL_bufptr;
4161             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4162                 s++;
4163             if (*s == '}') {
4164                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4165                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4166                 PL_thiswhite = 0;
4167                 PL_bufptr = s - 1;
4168                 break;  /* don't bother looking for trailing comment */
4169             }
4170             else
4171                 s = PL_bufptr;
4172         }
4173         if (optype == ']')
4174             break;
4175         /* FALLTHROUGH */
4176
4177     /* attach a trailing comment to its statement instead of next token */
4178     case ';':
4179         if (PL_faketokens)
4180             break;
4181         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4182             s = PL_bufptr;
4183             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4184                 s++;
4185             if (*s == '\n' || *s == '#') {
4186                 while (s < PL_bufend && *s != '\n')
4187                     s++;
4188                 if (s < PL_bufend)
4189                     s++;
4190                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4191                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4192                 PL_thiswhite = 0;
4193                 PL_bufptr = s;
4194             }
4195         }
4196         break;
4197
4198     /* pval */
4199     case LABEL:
4200         break;
4201
4202     /* ival */
4203     default:
4204         break;
4205
4206     }
4207
4208     /* Create new token struct.  Note: opvals return early above. */
4209     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4210     PL_thismad = 0;
4211     return optype;
4212 }
4213 #endif
4214
4215 STATIC char *
4216 S_tokenize_use(pTHX_ int is_use, char *s) {
4217     dVAR;
4218
4219     PERL_ARGS_ASSERT_TOKENIZE_USE;
4220
4221     if (PL_expect != XSTATE)
4222         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4223                     is_use ? "use" : "no"));
4224     s = SKIPSPACE1(s);
4225     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4226         s = force_version(s, TRUE);
4227         if (*s == ';' || *s == '}'
4228                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4229             start_force(PL_curforce);
4230             NEXTVAL_NEXTTOKE.opval = NULL;
4231             force_next(WORD);
4232         }
4233         else if (*s == 'v') {
4234             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4235             s = force_version(s, FALSE);
4236         }
4237     }
4238     else {
4239         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4240         s = force_version(s, FALSE);
4241     }
4242     pl_yylval.ival = is_use;
4243     return s;
4244 }
4245 #ifdef DEBUGGING
4246     static const char* const exp_name[] =
4247         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4248           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4249         };
4250 #endif
4251
4252 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4253 STATIC bool
4254 S_word_takes_any_delimeter(char *p, STRLEN len)
4255 {
4256     return (len == 1 && strchr("msyq", p[0])) ||
4257            (len == 2 && (
4258             (p[0] == 't' && p[1] == 'r') ||
4259             (p[0] == 'q' && strchr("qwxr", p[1]))));
4260 }
4261
4262 /*
4263   yylex
4264
4265   Works out what to call the token just pulled out of the input
4266   stream.  The yacc parser takes care of taking the ops we return and
4267   stitching them into a tree.
4268
4269   Returns:
4270     PRIVATEREF
4271
4272   Structure:
4273       if read an identifier
4274           if we're in a my declaration
4275               croak if they tried to say my($foo::bar)
4276               build the ops for a my() declaration
4277           if it's an access to a my() variable
4278               are we in a sort block?
4279                   croak if my($a); $a <=> $b
4280               build ops for access to a my() variable
4281           if in a dq string, and they've said @foo and we can't find @foo
4282               croak
4283           build ops for a bareword
4284       if we already built the token before, use it.
4285 */
4286
4287
4288 #ifdef __SC__
4289 #pragma segment Perl_yylex
4290 #endif
4291 int
4292 Perl_yylex(pTHX)
4293 {
4294     dVAR;
4295     register char *s = PL_bufptr;
4296     register char *d;
4297     STRLEN len;
4298     bool bof = FALSE;
4299     U32 fake_eof = 0;
4300
4301     /* orig_keyword, gvp, and gv are initialized here because
4302      * jump to the label just_a_word_zero can bypass their
4303      * initialization later. */
4304     I32 orig_keyword = 0;
4305     GV *gv = NULL;
4306     GV **gvp = NULL;
4307
4308     DEBUG_T( {
4309         SV* tmp = newSVpvs("");
4310         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4311             (IV)CopLINE(PL_curcop),
4312             lex_state_names[PL_lex_state],
4313             exp_name[PL_expect],
4314             pv_display(tmp, s, strlen(s), 0, 60));
4315         SvREFCNT_dec(tmp);
4316     } );
4317     /* check if there's an identifier for us to look at */
4318     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4319         return REPORT(S_pending_ident(aTHX));
4320
4321     /* no identifier pending identification */
4322
4323     switch (PL_lex_state) {
4324 #ifdef COMMENTARY
4325     case LEX_NORMAL:            /* Some compilers will produce faster */
4326     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4327         break;
4328 #endif
4329
4330     /* when we've already built the next token, just pull it out of the queue */
4331     case LEX_KNOWNEXT:
4332 #ifdef PERL_MAD
4333         PL_lasttoke--;
4334         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4335         if (PL_madskills) {
4336             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4337             PL_nexttoke[PL_lasttoke].next_mad = 0;
4338             if (PL_thismad && PL_thismad->mad_key == '_') {
4339                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4340                 PL_thismad->mad_val = 0;
4341                 mad_free(PL_thismad);
4342                 PL_thismad = 0;
4343             }
4344         }
4345         if (!PL_lasttoke) {
4346             PL_lex_state = PL_lex_defer;
4347             PL_expect = PL_lex_expect;
4348             PL_lex_defer = LEX_NORMAL;
4349             if (!PL_nexttoke[PL_lasttoke].next_type)
4350                 return yylex();
4351         }
4352 #else
4353         PL_nexttoke--;
4354         pl_yylval = PL_nextval[PL_nexttoke];
4355         if (!PL_nexttoke) {
4356             PL_lex_state = PL_lex_defer;
4357             PL_expect = PL_lex_expect;
4358             PL_lex_defer = LEX_NORMAL;
4359         }
4360 #endif
4361         {
4362             I32 next_type;
4363 #ifdef PERL_MAD
4364             next_type = PL_nexttoke[PL_lasttoke].next_type;
4365 #else
4366             next_type = PL_nexttype[PL_nexttoke];
4367 #endif
4368             if (next_type & (7<<24)) {
4369                 if (next_type & (1<<24)) {
4370                     if (PL_lex_brackets > 100)
4371                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4372                     PL_lex_brackstack[PL_lex_brackets++] =
4373                         (char) ((next_type >> 16) & 0xff);
4374                 }
4375                 if (next_type & (2<<24))
4376                     PL_lex_allbrackets++;
4377                 if (next_type & (4<<24))
4378                     PL_lex_allbrackets--;
4379                 next_type &= 0xffff;
4380             }
4381 #ifdef PERL_MAD
4382             /* FIXME - can these be merged?  */
4383             return next_type;
4384 #else
4385             return REPORT(next_type);
4386 #endif
4387         }
4388
4389     /* interpolated case modifiers like \L \U, including \Q and \E.
4390        when we get here, PL_bufptr is at the \
4391     */
4392     case LEX_INTERPCASEMOD:
4393 #ifdef DEBUGGING
4394         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4395             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4396 #endif
4397         /* handle \E or end of string */
4398         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4399             /* if at a \E */
4400             if (PL_lex_casemods) {
4401                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4402                 PL_lex_casestack[PL_lex_casemods] = '\0';
4403
4404                 if (PL_bufptr != PL_bufend
4405                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4406                     PL_bufptr += 2;
4407                     PL_lex_state = LEX_INTERPCONCAT;
4408 #ifdef PERL_MAD
4409                     if (PL_madskills)
4410                         PL_thistoken = newSVpvs("\\E");
4411 #endif
4412                 }
4413                 PL_lex_allbrackets--;
4414                 return REPORT(')');
4415             }
4416 #ifdef PERL_MAD
4417             while (PL_bufptr != PL_bufend &&
4418               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4419                 if (!PL_thiswhite)
4420                     PL_thiswhite = newSVpvs("");
4421                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4422                 PL_bufptr += 2;
4423             }
4424 #else
4425             if (PL_bufptr != PL_bufend)
4426                 PL_bufptr += 2;
4427 #endif
4428             PL_lex_state = LEX_INTERPCONCAT;
4429             return yylex();
4430         }
4431         else {
4432             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4433               "### Saw case modifier\n"); });
4434             s = PL_bufptr + 1;
4435             if (s[1] == '\\' && s[2] == 'E') {
4436 #ifdef PERL_MAD
4437                 if (!PL_thiswhite)
4438                     PL_thiswhite = newSVpvs("");
4439                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4440 #endif
4441                 PL_bufptr = s + 3;
4442                 PL_lex_state = LEX_INTERPCONCAT;
4443                 return yylex();
4444             }
4445             else {
4446                 I32 tmp;
4447                 if (!PL_madskills) /* when just compiling don't need correct */
4448                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4449                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4450                 if ((*s == 'L' || *s == 'U') &&
4451                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4452                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4453                     PL_lex_allbrackets--;
4454                     return REPORT(')');
4455                 }
4456                 if (PL_lex_casemods > 10)
4457                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4458                 PL_lex_casestack[PL_lex_casemods++] = *s;
4459                 PL_lex_casestack[PL_lex_casemods] = '\0';
4460                 PL_lex_state = LEX_INTERPCONCAT;
4461                 start_force(PL_curforce);
4462                 NEXTVAL_NEXTTOKE.ival = 0;
4463                 force_next((2<<24)|'(');
4464                 start_force(PL_curforce);
4465                 if (*s == 'l')
4466                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4467                 else if (*s == 'u')
4468                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4469                 else if (*s == 'L')
4470                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4471                 else if (*s == 'U')
4472                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4473                 else if (*s == 'Q')
4474                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4475                 else
4476                     Perl_croak(aTHX_ "panic: yylex");
4477                 if (PL_madskills) {
4478                     SV* const tmpsv = newSVpvs("\\ ");
4479                     /* replace the space with the character we want to escape
4480                      */
4481                     SvPVX(tmpsv)[1] = *s;
4482                     curmad('_', tmpsv);
4483                 }
4484                 PL_bufptr = s + 1;
4485             }
4486             force_next(FUNC);
4487             if (PL_lex_starts) {
4488                 s = PL_bufptr;
4489                 PL_lex_starts = 0;
4490 #ifdef PERL_MAD
4491                 if (PL_madskills) {
4492                     if (PL_thistoken)
4493                         sv_free(PL_thistoken);
4494                     PL_thistoken = newSVpvs("");
4495                 }
4496 #endif
4497                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4498                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4499                     OPERATOR(',');
4500                 else
4501                     Aop(OP_CONCAT);
4502             }
4503             else
4504                 return yylex();
4505         }
4506
4507     case LEX_INTERPPUSH:
4508         return REPORT(sublex_push());
4509
4510     case LEX_INTERPSTART:
4511         if (PL_bufptr == PL_bufend)
4512             return REPORT(sublex_done());
4513         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4514               "### Interpolated variable\n"); });
4515         PL_expect = XTERM;
4516         PL_lex_dojoin = (*PL_bufptr == '@');
4517         PL_lex_state = LEX_INTERPNORMAL;
4518         if (PL_lex_dojoin) {
4519             start_force(PL_curforce);
4520             NEXTVAL_NEXTTOKE.ival = 0;
4521             force_next(',');
4522             start_force(PL_curforce);
4523             force_ident("\"", '$');
4524             start_force(PL_curforce);
4525             NEXTVAL_NEXTTOKE.ival = 0;
4526             force_next('$');
4527             start_force(PL_curforce);
4528             NEXTVAL_NEXTTOKE.ival = 0;
4529             force_next((2<<24)|'(');
4530             start_force(PL_curforce);
4531             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4532             force_next(FUNC);
4533         }
4534         if (PL_lex_starts++) {
4535             s = PL_bufptr;
4536 #ifdef PERL_MAD
4537             if (PL_madskills) {
4538                 if (PL_thistoken)
4539                     sv_free(PL_thistoken);
4540                 PL_thistoken = newSVpvs("");
4541             }
4542 #endif
4543             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4544             if (!PL_lex_casemods && PL_lex_inpat)
4545                 OPERATOR(',');
4546             else
4547                 Aop(OP_CONCAT);
4548         }
4549         return yylex();
4550
4551     case LEX_INTERPENDMAYBE:
4552         if (intuit_more(PL_bufptr)) {
4553             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4554             break;
4555         }
4556         /* FALL THROUGH */
4557
4558     case LEX_INTERPEND:
4559         if (PL_lex_dojoin) {
4560             PL_lex_dojoin = FALSE;
4561             PL_lex_state = LEX_INTERPCONCAT;
4562 #ifdef PERL_MAD
4563             if (PL_madskills) {
4564                 if (PL_thistoken)
4565                     sv_free(PL_thistoken);
4566                 PL_thistoken = newSVpvs("");
4567             }
4568 #endif
4569             PL_lex_allbrackets--;
4570             return REPORT(')');
4571         }
4572         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4573             && SvEVALED(PL_lex_repl))
4574         {
4575             if (PL_bufptr != PL_bufend)
4576                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4577             PL_lex_repl = NULL;
4578         }
4579         /* FALLTHROUGH */
4580     case LEX_INTERPCONCAT:
4581 #ifdef DEBUGGING
4582         if (PL_lex_brackets)
4583             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4584 #endif
4585         if (PL_bufptr == PL_bufend)
4586             return REPORT(sublex_done());
4587
4588         if (SvIVX(PL_linestr) == '\'') {
4589             SV *sv = newSVsv(PL_linestr);
4590             if (!PL_lex_inpat)
4591                 sv = tokeq(sv);
4592             else if ( PL_hints & HINT_NEW_RE )
4593                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4594             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4595             s = PL_bufend;
4596         }
4597         else {
4598             s = scan_const(PL_bufptr);
4599             if (*s == '\\')
4600                 PL_lex_state = LEX_INTERPCASEMOD;
4601             else
4602                 PL_lex_state = LEX_INTERPSTART;
4603         }
4604
4605         if (s != PL_bufptr) {
4606             start_force(PL_curforce);
4607             if (PL_madskills) {
4608                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4609             }
4610             NEXTVAL_NEXTTOKE = pl_yylval;
4611             PL_expect = XTERM;
4612             force_next(THING);
4613             if (PL_lex_starts++) {
4614 #ifdef PERL_MAD
4615                 if (PL_madskills) {
4616                     if (PL_thistoken)
4617                         sv_free(PL_thistoken);
4618                     PL_thistoken = newSVpvs("");
4619                 }
4620 #endif
4621                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4622                 if (!PL_lex_casemods && PL_lex_inpat)
4623                     OPERATOR(',');
4624                 else
4625                     Aop(OP_CONCAT);
4626             }
4627             else {
4628                 PL_bufptr = s;
4629                 return yylex();
4630             }
4631         }
4632
4633         return yylex();
4634     case LEX_FORMLINE:
4635         PL_lex_state = LEX_NORMAL;
4636         s = scan_formline(PL_bufptr);
4637         if (!PL_lex_formbrack)
4638             goto rightbracket;
4639         OPERATOR(';');
4640     }
4641
4642     s = PL_bufptr;
4643     PL_oldoldbufptr = PL_oldbufptr;
4644     PL_oldbufptr = s;
4645
4646   retry:
4647 #ifdef PERL_MAD
4648     if (PL_thistoken) {
4649         sv_free(PL_thistoken);
4650         PL_thistoken = 0;
4651     }
4652     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4653 #endif
4654     switch (*s) {
4655     default:
4656         if (isIDFIRST_lazy_if(s,UTF))
4657             goto keylookup;
4658         {
4659         unsigned char c = *s;
4660         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4661         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4662             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4663         } else {
4664             d = PL_linestart;
4665         }
4666         *s = '\0';
4667         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4668     }
4669     case 4:
4670     case 26:
4671         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4672     case 0:
4673 #ifdef PERL_MAD
4674         if (PL_madskills)
4675             PL_faketokens = 0;
4676 #endif
4677         if (!PL_rsfp) {
4678             PL_last_uni = 0;
4679             PL_last_lop = 0;
4680             if (PL_lex_brackets &&
4681                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4682                 yyerror((const char *)
4683                         (PL_lex_formbrack
4684                          ? "Format not terminated"
4685                          : "Missing right curly or square bracket"));
4686             }
4687             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4688                         "### Tokener got EOF\n");
4689             } );
4690             TOKEN(0);
4691         }
4692         if (s++ < PL_bufend)
4693             goto retry;                 /* ignore stray nulls */
4694         PL_last_uni = 0;
4695         PL_last_lop = 0;
4696         if (!PL_in_eval && !PL_preambled) {
4697             PL_preambled = TRUE;
4698 #ifdef PERL_MAD
4699             if (PL_madskills)
4700                 PL_faketokens = 1;
4701 #endif
4702             if (PL_perldb) {
4703                 /* Generate a string of Perl code to load the debugger.
4704                  * If PERL5DB is set, it will return the contents of that,
4705                  * otherwise a compile-time require of perl5db.pl.  */
4706
4707                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4708
4709                 if (pdb) {
4710                     sv_setpv(PL_linestr, pdb);
4711                     sv_catpvs(PL_linestr,";");
4712                 } else {
4713                     SETERRNO(0,SS_NORMAL);
4714                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4715                 }
4716             } else
4717                 sv_setpvs(PL_linestr,"");
4718             if (PL_preambleav) {
4719                 SV **svp = AvARRAY(PL_preambleav);
4720                 SV **const end = svp + AvFILLp(PL_preambleav);
4721                 while(svp <= end) {
4722                     sv_catsv(PL_linestr, *svp);
4723                     ++svp;
4724                     sv_catpvs(PL_linestr, ";");
4725                 }
4726                 sv_free(MUTABLE_SV(PL_preambleav));
4727                 PL_preambleav = NULL;
4728             }
4729             if (PL_minus_E)
4730                 sv_catpvs(PL_linestr,
4731                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4732             if (PL_minus_n || PL_minus_p) {
4733                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4734                 if (PL_minus_l)
4735                     sv_catpvs(PL_linestr,"chomp;");
4736                 if (PL_minus_a) {
4737                     if (PL_minus_F) {
4738                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4739                              || *PL_splitstr == '"')
4740                               && strchr(PL_splitstr + 1, *PL_splitstr))
4741                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4742                         else {
4743                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4744                                bytes can be used as quoting characters.  :-) */
4745                             const char *splits = PL_splitstr;
4746                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4747                             do {
4748                                 /* Need to \ \s  */
4749                                 if (*splits == '\\')
4750                                     sv_catpvn(PL_linestr, splits, 1);
4751                                 sv_catpvn(PL_linestr, splits, 1);
4752                             } while (*splits++);
4753                             /* This loop will embed the trailing NUL of
4754                                PL_linestr as the last thing it does before
4755                                terminating.  */
4756                             sv_catpvs(PL_linestr, ");");
4757                         }
4758                     }
4759                     else
4760                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4761                 }
4762             }
4763             sv_catpvs(PL_linestr, "\n");
4764             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4765             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4766             PL_last_lop = PL_last_uni = NULL;
4767             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4768                 update_debugger_info(PL_linestr, NULL, 0);
4769             goto retry;
4770         }
4771         do {
4772             fake_eof = 0;
4773             bof = PL_rsfp ? TRUE : FALSE;
4774             if (0) {
4775               fake_eof:
4776                 fake_eof = LEX_FAKE_EOF;
4777             }
4778             PL_bufptr = PL_bufend;
4779             CopLINE_inc(PL_curcop);
4780             if (!lex_next_chunk(fake_eof)) {
4781                 CopLINE_dec(PL_curcop);
4782                 s = PL_bufptr;
4783                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4784             }
4785             CopLINE_dec(PL_curcop);
4786 #ifdef PERL_MAD
4787             if (!PL_rsfp)
4788                 PL_realtokenstart = -1;
4789 #endif
4790             s = PL_bufptr;
4791             /* If it looks like the start of a BOM or raw UTF-16,
4792              * check if it in fact is. */
4793             if (bof && PL_rsfp &&
4794                      (*s == 0 ||
4795                       *(U8*)s == 0xEF ||
4796                       *(U8*)s >= 0xFE ||
4797                       s[1] == 0)) {
4798                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4799                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4800 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4801                 /* offset may include swallowed CR */
4802                 if (!bof)
4803                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4804 #endif
4805                 if (bof) {
4806                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4807                     s = swallow_bom((U8*)s);
4808                 }
4809             }
4810             if (PL_parser->in_pod) {
4811                 /* Incest with pod. */
4812 #ifdef PERL_MAD
4813                 if (PL_madskills)
4814                     sv_catsv(PL_thiswhite, PL_linestr);
4815 #endif
4816                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4817                     sv_setpvs(PL_linestr, "");
4818                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4819                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4820                     PL_last_lop = PL_last_uni = NULL;
4821                     PL_parser->in_pod = 0;
4822                 }
4823             }
4824             if (PL_rsfp)
4825                 incline(s);
4826         } while (PL_parser->in_pod);
4827         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4828         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4829         PL_last_lop = PL_last_uni = NULL;
4830         if (CopLINE(PL_curcop) == 1) {
4831             while (s < PL_bufend && isSPACE(*s))
4832                 s++;
4833             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4834                 s++;
4835 #ifdef PERL_MAD
4836             if (PL_madskills)
4837                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4838 #endif
4839             d = NULL;
4840             if (!PL_in_eval) {
4841                 if (*s == '#' && *(s+1) == '!')
4842                     d = s + 2;
4843 #ifdef ALTERNATE_SHEBANG
4844                 else {
4845                     static char const as[] = ALTERNATE_SHEBANG;
4846                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4847                         d = s + (sizeof(as) - 1);
4848                 }
4849 #endif /* ALTERNATE_SHEBANG */
4850             }
4851             if (d) {
4852                 char *ipath;
4853                 char *ipathend;
4854
4855                 while (isSPACE(*d))
4856                     d++;
4857                 ipath = d;
4858                 while (*d && !isSPACE(*d))
4859                     d++;
4860                 ipathend = d;
4861
4862 #ifdef ARG_ZERO_IS_SCRIPT
4863                 if (ipathend > ipath) {
4864                     /*
4865                      * HP-UX (at least) sets argv[0] to the script name,
4866                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4867                      * at least, set argv[0] to the basename of the Perl
4868                      * interpreter. So, having found "#!", we'll set it right.
4869                      */
4870                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4871                                                     SVt_PV)); /* $^X */
4872                     assert(SvPOK(x) || SvGMAGICAL(x));
4873                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4874                         sv_setpvn(x, ipath, ipathend - ipath);
4875                         SvSETMAGIC(x);
4876                     }
4877                     else {
4878                         STRLEN blen;
4879                         STRLEN llen;
4880                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4881                         const char * const lstart = SvPV_const(x,llen);
4882                         if (llen < blen) {
4883                             bstart += blen - llen;
4884                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4885                                 sv_setpvn(x, ipath, ipathend - ipath);
4886                                 SvSETMAGIC(x);
4887                             }
4888                         }
4889                     }
4890                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4891                 }
4892 #endif /* ARG_ZERO_IS_SCRIPT */
4893
4894                 /*
4895                  * Look for options.
4896                  */
4897                 d = instr(s,"perl -");
4898                 if (!d) {
4899                     d = instr(s,"perl");
4900 #if defined(DOSISH)
4901                     /* avoid getting into infinite loops when shebang
4902                      * line contains "Perl" rather than "perl" */
4903                     if (!d) {
4904                         for (d = ipathend-4; d >= ipath; --d) {
4905                             if ((*d == 'p' || *d == 'P')
4906                                 && !ibcmp(d, "perl", 4))
4907                             {
4908                                 break;
4909                             }
4910                         }
4911                         if (d < ipath)
4912                             d = NULL;
4913                     }
4914 #endif
4915                 }
4916 #ifdef ALTERNATE_SHEBANG
4917                 /*
4918                  * If the ALTERNATE_SHEBANG on this system starts with a
4919                  * character that can be part of a Perl expression, then if
4920                  * we see it but not "perl", we're probably looking at the
4921                  * start of Perl code, not a request to hand off to some
4922                  * other interpreter.  Similarly, if "perl" is there, but
4923                  * not in the first 'word' of the line, we assume the line
4924                  * contains the start of the Perl program.
4925                  */
4926                 if (d && *s != '#') {
4927                     const char *c = ipath;
4928                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4929                         c++;
4930                     if (c < d)
4931                         d = NULL;       /* "perl" not in first word; ignore */
4932                     else
4933                         *s = '#';       /* Don't try to parse shebang line */
4934                 }
4935 #endif /* ALTERNATE_SHEBANG */
4936                 if (!d &&
4937                     *s == '#' &&
4938                     ipathend > ipath &&
4939                     !PL_minus_c &&
4940                     !instr(s,"indir") &&
4941                     instr(PL_origargv[0],"perl"))
4942                 {
4943                     dVAR;
4944                     char **newargv;
4945
4946                     *ipathend = '\0';
4947                     s = ipathend + 1;
4948                     while (s < PL_bufend && isSPACE(*s))
4949                         s++;
4950                     if (s < PL_bufend) {
4951                         Newx(newargv,PL_origargc+3,char*);
4952                         newargv[1] = s;
4953                         while (s < PL_bufend && !isSPACE(*s))
4954                             s++;
4955                         *s = '\0';
4956                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4957                     }
4958                     else
4959                         newargv = PL_origargv;
4960                     newargv[0] = ipath;
4961                     PERL_FPU_PRE_EXEC
4962                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4963                     PERL_FPU_POST_EXEC
4964                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4965                 }
4966                 if (d) {
4967                     while (*d && !isSPACE(*d))
4968                         d++;
4969                     while (SPACE_OR_TAB(*d))
4970                         d++;
4971
4972                     if (*d++ == '-') {
4973                         const bool switches_done = PL_doswitches;
4974                         const U32 oldpdb = PL_perldb;
4975                         const bool oldn = PL_minus_n;
4976                         const bool oldp = PL_minus_p;
4977                         const char *d1 = d;
4978
4979                         do {
4980                             bool baduni = FALSE;
4981                             if (*d1 == 'C') {
4982                                 const char *d2 = d1 + 1;
4983                                 if (parse_unicode_opts((const char **)&d2)
4984                                     != PL_unicode)
4985                                     baduni = TRUE;
4986                             }
4987                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4988                                 const char * const m = d1;
4989                                 while (*d1 && !isSPACE(*d1))
4990                                     d1++;
4991                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4992                                       (int)(d1 - m), m);
4993                             }
4994                             d1 = moreswitches(d1);
4995                         } while (d1);
4996                         if (PL_doswitches && !switches_done) {
4997                             int argc = PL_origargc;
4998                             char **argv = PL_origargv;
4999                             do {
5000                                 argc--,argv++;
5001                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5002                             init_argv_symbols(argc,argv);
5003                         }
5004                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5005                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5006                               /* if we have already added "LINE: while (<>) {",
5007                                  we must not do it again */
5008                         {
5009                             sv_setpvs(PL_linestr, "");
5010                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5011                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5012                             PL_last_lop = PL_last_uni = NULL;
5013                             PL_preambled = FALSE;
5014                             if (PERLDB_LINE || PERLDB_SAVESRC)
5015                                 (void)gv_fetchfile(PL_origfilename);
5016                             goto retry;
5017                         }
5018                     }
5019                 }
5020             }
5021         }
5022         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5023             PL_bufptr = s;
5024             PL_lex_state = LEX_FORMLINE;
5025             return yylex();
5026         }
5027         goto retry;
5028     case '\r':
5029 #ifdef PERL_STRICT_CR
5030         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5031         Perl_croak(aTHX_
5032       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5033 #endif
5034     case ' ': case '\t': case '\f': case 013:
5035 #ifdef PERL_MAD
5036         PL_realtokenstart = -1;
5037         if (!PL_thiswhite)
5038             PL_thiswhite = newSVpvs("");
5039         sv_catpvn(PL_thiswhite, s, 1);
5040 #endif
5041         s++;
5042         goto retry;
5043     case '#':
5044     case '\n':
5045 #ifdef PERL_MAD
5046         PL_realtokenstart = -1;
5047         if (PL_madskills)
5048             PL_faketokens = 0;
5049 #endif
5050         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5051             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5052                 /* handle eval qq[#line 1 "foo"\n ...] */
5053                 CopLINE_dec(PL_curcop);
5054                 incline(s);
5055             }
5056             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5057                 s = SKIPSPACE0(s);
5058                 if (!PL_in_eval || PL_rsfp)
5059                     incline(s);
5060             }
5061             else {
5062                 d = s;
5063                 while (d < PL_bufend && *d != '\n')
5064                     d++;
5065                 if (d < PL_bufend)
5066                     d++;
5067                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5068                   Perl_croak(aTHX_ "panic: input overflow");
5069 #ifdef PERL_MAD
5070                 if (PL_madskills)
5071                     PL_thiswhite = newSVpvn(s, d - s);
5072 #endif
5073                 s = d;
5074                 incline(s);
5075             }
5076             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5077                 PL_bufptr = s;
5078                 PL_lex_state = LEX_FORMLINE;
5079                 return yylex();
5080             }
5081         }
5082         else {
5083 #ifdef PERL_MAD
5084             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5085                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5086                     PL_faketokens = 0;
5087                     s = SKIPSPACE0(s);
5088                     TOKEN(PEG); /* make sure any #! line is accessible */
5089                 }
5090                 s = SKIPSPACE0(s);
5091             }
5092             else {
5093 /*              if (PL_madskills && PL_lex_formbrack) { */
5094                     d = s;
5095                     while (d < PL_bufend && *d != '\n')
5096                         d++;
5097                     if (d < PL_bufend)
5098                         d++;
5099                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5100                       Perl_croak(aTHX_ "panic: input overflow");
5101                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5102                         if (!PL_thiswhite)
5103                             PL_thiswhite = newSVpvs("");
5104                         if (CopLINE(PL_curcop) == 1) {
5105                             sv_setpvs(PL_thiswhite, "");
5106                             PL_faketokens = 0;
5107                         }
5108                         sv_catpvn(PL_thiswhite, s, d - s);
5109                     }
5110                     s = d;
5111 /*              }
5112                 *s = '\0';
5113                 PL_bufend = s; */
5114             }
5115 #else
5116             *s = '\0';
5117             PL_bufend = s;
5118 #endif
5119         }
5120         goto retry;
5121     case '-':
5122         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5123             I32 ftst = 0;
5124             char tmp;
5125
5126             s++;
5127             PL_bufptr = s;
5128             tmp = *s++;
5129
5130             while (s < PL_bufend && SPACE_OR_TAB(*s))
5131                 s++;
5132
5133             if (strnEQ(s,"=>",2)) {
5134                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5135                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5136                 OPERATOR('-');          /* unary minus */
5137             }
5138             PL_last_uni = PL_oldbufptr;
5139             switch (tmp) {
5140             case 'r': ftst = OP_FTEREAD;        break;
5141             case 'w': ftst = OP_FTEWRITE;       break;
5142             case 'x': ftst = OP_FTEEXEC;        break;
5143             case 'o': ftst = OP_FTEOWNED;       break;
5144             case 'R': ftst = OP_FTRREAD;        break;
5145             case 'W': ftst = OP_FTRWRITE;       break;
5146             case 'X': ftst = OP_FTREXEC;        break;
5147             case 'O': ftst = OP_FTROWNED;       break;
5148             case 'e': ftst = OP_FTIS;           break;
5149             case 'z': ftst = OP_FTZERO;         break;
5150             case 's': ftst = OP_FTSIZE;         break;
5151             case 'f': ftst = OP_FTFILE;         break;
5152             case 'd': ftst = OP_FTDIR;          break;
5153             case 'l': ftst = OP_FTLINK;         break;
5154             case 'p': ftst = OP_FTPIPE;         break;
5155             case 'S': ftst = OP_FTSOCK;         break;
5156             case 'u': ftst = OP_FTSUID;         break;
5157             case 'g': ftst = OP_FTSGID;         break;
5158             case 'k': ftst = OP_FTSVTX;         break;
5159             case 'b': ftst = OP_FTBLK;          break;
5160             case 'c': ftst = OP_FTCHR;          break;
5161             case 't': ftst = OP_FTTTY;          break;
5162             case 'T': ftst = OP_FTTEXT;         break;
5163             case 'B': ftst = OP_FTBINARY;       break;
5164             case 'M': case 'A': case 'C':
5165                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5166                 switch (tmp) {
5167                 case 'M': ftst = OP_FTMTIME;    break;
5168                 case 'A': ftst = OP_FTATIME;    break;
5169                 case 'C': ftst = OP_FTCTIME;    break;
5170                 default:                        break;
5171                 }
5172                 break;
5173             default:
5174                 break;
5175             }
5176             if (ftst) {
5177                 PL_last_lop_op = (OPCODE)ftst;
5178                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5179                         "### Saw file test %c\n", (int)tmp);
5180                 } );
5181                 FTST(ftst);
5182             }
5183             else {
5184                 /* Assume it was a minus followed by a one-letter named
5185                  * subroutine call (or a -bareword), then. */
5186                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5187                         "### '-%c' looked like a file test but was not\n",
5188                         (int) tmp);
5189                 } );
5190                 s = --PL_bufptr;
5191             }
5192         }
5193         {
5194             const char tmp = *s++;
5195             if (*s == tmp) {
5196                 s++;
5197                 if (PL_expect == XOPERATOR)
5198                     TERM(POSTDEC);
5199                 else
5200                     OPERATOR(PREDEC);
5201             }
5202             else if (*s == '>') {
5203                 s++;
5204                 s = SKIPSPACE1(s);
5205                 if (isIDFIRST_lazy_if(s,UTF)) {
5206                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5207                     TOKEN(ARROW);
5208                 }
5209                 else if (*s == '$')
5210                     OPERATOR(ARROW);
5211                 else
5212                     TERM(ARROW);
5213             }
5214             if (PL_expect == XOPERATOR) {
5215                 if (*s == '=' && !PL_lex_allbrackets &&
5216                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5217                     s--;
5218                     TOKEN(0);
5219                 }
5220                 Aop(OP_SUBTRACT);
5221             }
5222             else {
5223                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5224                     check_uni();
5225                 OPERATOR('-');          /* unary minus */
5226             }
5227         }
5228
5229     case '+':
5230         {
5231             const char tmp = *s++;
5232             if (*s == tmp) {
5233                 s++;
5234                 if (PL_expect == XOPERATOR)
5235                     TERM(POSTINC);
5236                 else
5237                     OPERATOR(PREINC);
5238             }
5239             if (PL_expect == XOPERATOR) {
5240                 if (*s == '=' && !PL_lex_allbrackets &&
5241                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5242                     s--;
5243                     TOKEN(0);
5244                 }
5245                 Aop(OP_ADD);
5246             }
5247             else {
5248                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5249                     check_uni();
5250                 OPERATOR('+');
5251             }
5252         }
5253
5254     case '*':
5255         if (PL_expect != XOPERATOR) {
5256             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5257             PL_expect = XOPERATOR;
5258             force_ident(PL_tokenbuf, '*');
5259             if (!*PL_tokenbuf)
5260                 PREREF('*');
5261             TERM('*');
5262         }
5263         s++;
5264         if (*s == '*') {
5265             s++;
5266             if (*s == '=' && !PL_lex_allbrackets &&
5267                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5268                 s -= 2;
5269                 TOKEN(0);
5270             }
5271             PWop(OP_POW);
5272         }
5273         if (*s == '=' && !PL_lex_allbrackets &&
5274                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5275             s--;
5276             TOKEN(0);
5277         }
5278         Mop(OP_MULTIPLY);
5279
5280     case '%':
5281         if (PL_expect == XOPERATOR) {
5282             if (s[1] == '=' && !PL_lex_allbrackets &&
5283                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5284                 TOKEN(0);
5285             ++s;
5286             Mop(OP_MODULO);
5287         }
5288         PL_tokenbuf[0] = '%';
5289         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5290                 sizeof PL_tokenbuf - 1, FALSE);
5291         if (!PL_tokenbuf[1]) {
5292             PREREF('%');
5293         }
5294         PL_pending_ident = '%';
5295         TERM('%');
5296
5297     case '^':
5298         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5299                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5300             TOKEN(0);
5301         s++;
5302         BOop(OP_BIT_XOR);
5303     case '[':
5304         if (PL_lex_brackets > 100)
5305             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5306         PL_lex_brackstack[PL_lex_brackets++] = 0;
5307         PL_lex_allbrackets++;
5308         {
5309             const char tmp = *s++;
5310             OPERATOR(tmp);
5311         }
5312     case '~':
5313         if (s[1] == '~'
5314             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5315         {
5316             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5317                 TOKEN(0);
5318             s += 2;
5319             Eop(OP_SMARTMATCH);
5320         }
5321         s++;
5322         OPERATOR('~');
5323     case ',':
5324         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5325             TOKEN(0);
5326         s++;
5327         OPERATOR(',');
5328     case ':':
5329         if (s[1] == ':') {
5330             len = 0;
5331             goto just_a_word_zero_gv;
5332         }
5333         s++;
5334         switch (PL_expect) {
5335             OP *attrs;
5336 #ifdef PERL_MAD
5337             I32 stuffstart;
5338 #endif
5339         case XOPERATOR:
5340             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5341                 break;
5342             PL_bufptr = s;      /* update in case we back off */
5343             if (*s == '=') {
5344                 Perl_croak(aTHX_
5345                            "Use of := for an empty attribute list is not allowed");
5346             }
5347             goto grabattrs;
5348         case XATTRBLOCK:
5349             PL_expect = XBLOCK;
5350             goto grabattrs;
5351         case XATTRTERM:
5352             PL_expect = XTERMBLOCK;
5353          grabattrs:
5354 #ifdef PERL_MAD
5355             stuffstart = s - SvPVX(PL_linestr) - 1;
5356 #endif
5357             s = PEEKSPACE(s);
5358             attrs = NULL;
5359             while (isIDFIRST_lazy_if(s,UTF)) {
5360                 I32 tmp;
5361                 SV *sv;
5362                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5363                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5364                     if (tmp < 0) tmp = -tmp;
5365                     switch (tmp) {
5366                     case KEY_or:
5367                     case KEY_and:
5368                     case KEY_for:
5369                     case KEY_foreach:
5370                     case KEY_unless:
5371                     case KEY_if:
5372                     case KEY_while:
5373                     case KEY_until:
5374                         goto got_attrs;
5375                     default:
5376                         break;
5377                     }
5378                 }
5379                 sv = newSVpvn(s, len);
5380                 if (*d == '(') {
5381                     d = scan_str(d,TRUE,TRUE);
5382                     if (!d) {
5383                         /* MUST advance bufptr here to avoid bogus
5384                            "at end of line" context messages from yyerror().
5385                          */
5386                         PL_bufptr = s + len;
5387                         yyerror("Unterminated attribute parameter in attribute list");
5388                         if (attrs)
5389                             op_free(attrs);
5390                         sv_free(sv);
5391                         return REPORT(0);       /* EOF indicator */
5392                     }
5393                 }
5394                 if (PL_lex_stuff) {
5395                     sv_catsv(sv, PL_lex_stuff);
5396                     attrs = op_append_elem(OP_LIST, attrs,
5397                                         newSVOP(OP_CONST, 0, sv));
5398                     SvREFCNT_dec(PL_lex_stuff);
5399                     PL_lex_stuff = NULL;
5400                 }
5401                 else {
5402                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5403                         sv_free(sv);
5404                         if (PL_in_my == KEY_our) {
5405                             deprecate(":unique");
5406                         }
5407                         else
5408                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5409                     }
5410
5411                     /* NOTE: any CV attrs applied here need to be part of
5412                        the CVf_BUILTIN_ATTRS define in cv.h! */
5413                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5414                         sv_free(sv);
5415                         CvLVALUE_on(PL_compcv);
5416                     }
5417                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5418                         sv_free(sv);
5419                         deprecate(":locked");
5420                     }
5421                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5422                         sv_free(sv);
5423                         CvMETHOD_on(PL_compcv);
5424                     }
5425                     /* After we've set the flags, it could be argued that
5426                        we don't need to do the attributes.pm-based setting
5427                        process, and shouldn't bother appending recognized
5428                        flags.  To experiment with that, uncomment the
5429                        following "else".  (Note that's already been
5430                        uncommented.  That keeps the above-applied built-in
5431                        attributes from being intercepted (and possibly
5432                        rejected) by a package's attribute routines, but is
5433                        justified by the performance win for the common case
5434                        of applying only built-in attributes.) */
5435                     else
5436                         attrs = op_append_elem(OP_LIST, attrs,
5437                                             newSVOP(OP_CONST, 0,
5438                                                     sv));
5439                 }
5440                 s = PEEKSPACE(d);
5441                 if (*s == ':' && s[1] != ':')
5442                     s = PEEKSPACE(s+1);
5443                 else if (s == d)
5444                     break;      /* require real whitespace or :'s */
5445                 /* XXX losing whitespace on sequential attributes here */
5446             }
5447             {
5448                 const char tmp
5449                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5450                 if (*s != ';' && *s != '}' && *s != tmp
5451                     && (tmp != '=' || *s != ')')) {
5452                     const char q = ((*s == '\'') ? '"' : '\'');
5453                     /* If here for an expression, and parsed no attrs, back
5454                        off. */
5455                     if (tmp == '=' && !attrs) {
5456                         s = PL_bufptr;
5457                         break;
5458                     }
5459                     /* MUST advance bufptr here to avoid bogus "at end of line"
5460                        context messages from yyerror().
5461                     */
5462                     PL_bufptr = s;
5463                     yyerror( (const char *)
5464                              (*s
5465                               ? Perl_form(aTHX_ "Invalid separator character "
5466                                           "%c%c%c in attribute list", q, *s, q)
5467                               : "Unterminated attribute list" ) );
5468                     if (attrs)
5469                         op_free(attrs);
5470                     OPERATOR(':');
5471                 }
5472             }
5473         got_attrs:
5474             if (attrs) {
5475                 start_force(PL_curforce);
5476                 NEXTVAL_NEXTTOKE.opval = attrs;
5477                 CURMAD('_', PL_nextwhite);
5478                 force_next(THING);
5479             }
5480 #ifdef PERL_MAD
5481             if (PL_madskills) {
5482                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5483                                      (s - SvPVX(PL_linestr)) - stuffstart);
5484             }
5485 #endif
5486             TOKEN(COLONATTR);
5487         }
5488         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5489             s--;
5490             TOKEN(0);
5491         }
5492         PL_lex_allbrackets--;
5493         OPERATOR(':');
5494     case '(':
5495         s++;
5496         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5497             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5498         else
5499             PL_expect = XTERM;
5500         s = SKIPSPACE1(s);
5501         PL_lex_allbrackets++;
5502         TOKEN('(');
5503     case ';':
5504         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5505             TOKEN(0);
5506         CLINE;
5507         s++;
5508         OPERATOR(';');
5509     case ')':
5510         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5511             TOKEN(0);
5512         s++;
5513         PL_lex_allbrackets--;
5514         s = SKIPSPACE1(s);
5515         if (*s == '{')
5516             PREBLOCK(')');
5517         TERM(')');
5518     case ']':
5519         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5520             TOKEN(0);
5521         s++;
5522         if (PL_lex_brackets <= 0)
5523             yyerror("Unmatched right square bracket");
5524         else
5525             --PL_lex_brackets;
5526         PL_lex_allbrackets--;
5527         if (PL_lex_state == LEX_INTERPNORMAL) {
5528             if (PL_lex_brackets == 0) {
5529                 if (*s == '-' && s[1] == '>')
5530                     PL_lex_state = LEX_INTERPENDMAYBE;
5531                 else if (*s != '[' && *s != '{')
5532                     PL_lex_state = LEX_INTERPEND;
5533             }
5534         }
5535         TERM(']');
5536     case '{':
5537       leftbracket:
5538         s++;
5539         if (PL_lex_brackets > 100) {
5540             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5541         }
5542         switch (PL_expect) {
5543         case XTERM:
5544             if (PL_lex_formbrack) {
5545                 s--;
5546                 PRETERMBLOCK(DO);
5547             }
5548             if (PL_oldoldbufptr == PL_last_lop)
5549                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5550             else
5551                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5552             PL_lex_allbrackets++;
5553             OPERATOR(HASHBRACK);
5554         case XOPERATOR:
5555             while (s < PL_bufend && SPACE_OR_TAB(*s))
5556                 s++;
5557             d = s;
5558             PL_tokenbuf[0] = '\0';
5559             if (d < PL_bufend && *d == '-') {
5560                 PL_tokenbuf[0] = '-';
5561                 d++;
5562                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5563                     d++;
5564             }
5565             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5566                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5567                               FALSE, &len);
5568                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5569                     d++;
5570                 if (*d == '}') {
5571                     const char minus = (PL_tokenbuf[0] == '-');
5572                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5573                     if (minus)
5574                         force_next('-');
5575                 }
5576             }
5577             /* FALL THROUGH */
5578         case XATTRBLOCK:
5579         case XBLOCK:
5580             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5581             PL_lex_allbrackets++;
5582             PL_expect = XSTATE;
5583             break;
5584         case XATTRTERM:
5585         case XTERMBLOCK:
5586             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5587             PL_lex_allbrackets++;
5588             PL_expect = XSTATE;
5589             break;
5590         default: {
5591                 const char *t;
5592                 if (PL_oldoldbufptr == PL_last_lop)
5593                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5594                 else
5595                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5596                 PL_lex_allbrackets++;
5597                 s = SKIPSPACE1(s);
5598                 if (*s == '}') {
5599                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5600                         PL_expect = XTERM;
5601                         /* This hack is to get the ${} in the message. */
5602                         PL_bufptr = s+1;
5603                         yyerror("syntax error");
5604                         break;
5605                     }
5606                     OPERATOR(HASHBRACK);
5607                 }
5608                 /* This hack serves to disambiguate a pair of curlies
5609                  * as being a block or an anon hash.  Normally, expectation
5610                  * determines that, but in cases where we're not in a
5611                  * position to expect anything in particular (like inside
5612                  * eval"") we have to resolve the ambiguity.  This code
5613                  * covers the case where the first term in the curlies is a
5614                  * quoted string.  Most other cases need to be explicitly
5615                  * disambiguated by prepending a "+" before the opening
5616                  * curly in order to force resolution as an anon hash.
5617                  *
5618                  * XXX should probably propagate the outer expectation
5619                  * into eval"" to rely less on this hack, but that could
5620                  * potentially break current behavior of eval"".
5621                  * GSAR 97-07-21
5622                  */
5623                 t = s;
5624                 if (*s == '\'' || *s == '"' || *s == '`') {
5625                     /* common case: get past first string, handling escapes */
5626                     for (t++; t < PL_bufend && *t != *s;)
5627                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5628                             t++;
5629                     t++;
5630                 }
5631                 else if (*s == 'q') {
5632                     if (++t < PL_bufend
5633                         && (!isALNUM(*t)
5634                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5635                                 && !isALNUM(*t))))
5636                     {
5637                         /* skip q//-like construct */
5638                         const char *tmps;
5639                         char open, close, term;
5640                         I32 brackets = 1;
5641
5642                         while (t < PL_bufend && isSPACE(*t))
5643                             t++;
5644                         /* check for q => */
5645                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5646                             OPERATOR(HASHBRACK);
5647                         }
5648                         term = *t;
5649                         open = term;
5650                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5651                             term = tmps[5];
5652                         close = term;
5653                         if (open == close)
5654                             for (t++; t < PL_bufend; t++) {
5655                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5656                                     t++;
5657                                 else if (*t == open)
5658                                     break;
5659                             }
5660                         else {
5661                             for (t++; t < PL_bufend; t++) {
5662                                 if (*t == '\\' && t+1 < PL_bufend)
5663                                     t++;
5664                                 else if (*t == close && --brackets <= 0)
5665                                     break;
5666                                 else if (*t == open)
5667                                     brackets++;
5668                             }
5669                         }
5670                         t++;
5671                     }
5672                     else
5673                         /* skip plain q word */
5674                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5675                              t += UTF8SKIP(t);
5676                 }
5677                 else if (isALNUM_lazy_if(t,UTF)) {
5678                     t += UTF8SKIP(t);
5679                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5680                          t += UTF8SKIP(t);
5681                 }
5682                 while (t < PL_bufend && isSPACE(*t))
5683                     t++;
5684                 /* if comma follows first term, call it an anon hash */
5685                 /* XXX it could be a comma expression with loop modifiers */
5686                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5687                                    || (*t == '=' && t[1] == '>')))
5688                     OPERATOR(HASHBRACK);
5689                 if (PL_expect == XREF)
5690                     PL_expect = XTERM;
5691                 else {
5692                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5693                     PL_expect = XSTATE;
5694                 }
5695             }
5696             break;
5697         }
5698         pl_yylval.ival = CopLINE(PL_curcop);
5699         if (isSPACE(*s) || *s == '#')
5700             PL_copline = NOLINE;   /* invalidate current command line number */
5701         TOKEN('{');
5702     case '}':
5703         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5704             TOKEN(0);
5705       rightbracket:
5706         s++;
5707         if (PL_lex_brackets <= 0)
5708             yyerror("Unmatched right curly bracket");
5709         else
5710             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5711         PL_lex_allbrackets--;
5712         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5713             PL_lex_formbrack = 0;
5714         if (PL_lex_state == LEX_INTERPNORMAL) {
5715             if (PL_lex_brackets == 0) {
5716                 if (PL_expect & XFAKEBRACK) {
5717                     PL_expect &= XENUMMASK;
5718                     PL_lex_state = LEX_INTERPEND;
5719                     PL_bufptr = s;
5720 #if 0
5721                     if (PL_madskills) {
5722                         if (!PL_thiswhite)
5723                             PL_thiswhite = newSVpvs("");
5724                         sv_catpvs(PL_thiswhite,"}");
5725                     }
5726 #endif
5727                     return yylex();     /* ignore fake brackets */
5728                 }
5729                 if (*s == '-' && s[1] == '>')
5730                     PL_lex_state = LEX_INTERPENDMAYBE;
5731                 else if (*s != '[' && *s != '{')
5732                     PL_lex_state = LEX_INTERPEND;
5733             }
5734         }
5735         if (PL_expect & XFAKEBRACK) {
5736             PL_expect &= XENUMMASK;
5737             PL_bufptr = s;
5738             return yylex();             /* ignore fake brackets */
5739         }
5740         start_force(PL_curforce);
5741         if (PL_madskills) {
5742             curmad('X', newSVpvn(s-1,1));
5743             CURMAD('_', PL_thiswhite);
5744         }
5745         force_next('}');
5746 #ifdef PERL_MAD
5747         if (!PL_thistoken)
5748             PL_thistoken = newSVpvs("");
5749 #endif
5750         TOKEN(';');
5751     case '&':
5752         s++;
5753         if (*s++ == '&') {
5754             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5755                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5756                 s -= 2;
5757                 TOKEN(0);
5758             }
5759             AOPERATOR(ANDAND);
5760         }
5761         s--;
5762         if (PL_expect == XOPERATOR) {
5763             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5764                 && isIDFIRST_lazy_if(s,UTF))
5765             {
5766                 CopLINE_dec(PL_curcop);
5767                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5768                 CopLINE_inc(PL_curcop);
5769             }
5770             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5771                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5772                 s--;
5773                 TOKEN(0);
5774             }
5775             BAop(OP_BIT_AND);
5776         }
5777
5778         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5779         if (*PL_tokenbuf) {
5780             PL_expect = XOPERATOR;
5781             force_ident(PL_tokenbuf, '&');
5782         }
5783         else
5784             PREREF('&');
5785         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5786         TERM('&');
5787
5788     case '|':
5789         s++;
5790         if (*s++ == '|') {
5791             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5792                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5793                 s -= 2;
5794                 TOKEN(0);
5795             }
5796             AOPERATOR(OROR);
5797         }
5798         s--;
5799         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5800                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5801             s--;
5802             TOKEN(0);
5803         }
5804         BOop(OP_BIT_OR);
5805     case '=':
5806         s++;
5807         {
5808             const char tmp = *s++;
5809             if (tmp == '=') {
5810                 if (!PL_lex_allbrackets &&
5811                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5812                     s -= 2;
5813                     TOKEN(0);
5814                 }
5815                 Eop(OP_EQ);
5816             }
5817             if (tmp == '>') {
5818                 if (!PL_lex_allbrackets &&
5819                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5820                     s -= 2;
5821                     TOKEN(0);
5822                 }
5823                 OPERATOR(',');
5824             }
5825             if (tmp == '~')
5826                 PMop(OP_MATCH);
5827             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5828                 && strchr("+-*/%.^&|<",tmp))
5829                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5830                             "Reversed %c= operator",(int)tmp);
5831             s--;
5832             if (PL_expect == XSTATE && isALPHA(tmp) &&
5833                 (s == PL_linestart+1 || s[-2] == '\n') )
5834                 {
5835                     if (PL_in_eval && !PL_rsfp) {
5836                         d = PL_bufend;
5837                         while (s < d) {
5838                             if (*s++ == '\n') {
5839                                 incline(s);
5840                                 if (strnEQ(s,"=cut",4)) {
5841                                     s = strchr(s,'\n');
5842                                     if (s)
5843                                         s++;
5844                                     else
5845                                         s = d;
5846                                     incline(s);
5847                                     goto retry;
5848                                 }
5849                             }
5850                         }
5851                         goto retry;
5852                     }
5853 #ifdef PERL_MAD
5854                     if (PL_madskills) {
5855                         if (!PL_thiswhite)
5856                             PL_thiswhite = newSVpvs("");
5857                         sv_catpvn(PL_thiswhite, PL_linestart,
5858                                   PL_bufend - PL_linestart);
5859                     }
5860 #endif
5861                     s = PL_bufend;
5862                     PL_parser->in_pod = 1;
5863                     goto retry;
5864                 }
5865         }
5866         if (PL_lex_brackets < PL_lex_formbrack) {
5867             const char *t = s;
5868 #ifdef PERL_STRICT_CR
5869             while (SPACE_OR_TAB(*t))
5870 #else
5871             while (SPACE_OR_TAB(*t) || *t == '\r')
5872 #endif
5873                 t++;
5874             if (*t == '\n' || *t == '#') {
5875                 s--;
5876                 PL_expect = XBLOCK;
5877                 goto leftbracket;
5878             }
5879         }
5880         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5881             s--;
5882             TOKEN(0);
5883         }
5884         pl_yylval.ival = 0;
5885         OPERATOR(ASSIGNOP);
5886     case '!':
5887         s++;
5888         {
5889             const char tmp = *s++;
5890             if (tmp == '=') {
5891                 /* was this !=~ where !~ was meant?
5892                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5893
5894                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5895                     const char *t = s+1;
5896
5897                     while (t < PL_bufend && isSPACE(*t))
5898                         ++t;
5899
5900                     if (*t == '/' || *t == '?' ||
5901                         ((*t == 'm' || *t == 's' || *t == 'y')
5902                          && !isALNUM(t[1])) ||
5903                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5904                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5905                                     "!=~ should be !~");
5906                 }
5907                 if (!PL_lex_allbrackets &&
5908                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5909                     s -= 2;
5910                     TOKEN(0);
5911                 }
5912                 Eop(OP_NE);
5913             }
5914             if (tmp == '~')
5915                 PMop(OP_NOT);
5916         }
5917         s--;
5918         OPERATOR('!');
5919     case '<':
5920         if (PL_expect != XOPERATOR) {
5921             if (s[1] != '<' && !strchr(s,'>'))
5922                 check_uni();
5923             if (s[1] == '<')
5924                 s = scan_heredoc(s);
5925             else
5926                 s = scan_inputsymbol(s);
5927             TERM(sublex_start());
5928         }
5929         s++;
5930         {
5931             char tmp = *s++;
5932             if (tmp == '<') {
5933                 if (*s == '=' && !PL_lex_allbrackets &&
5934                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5935                     s -= 2;
5936                     TOKEN(0);
5937                 }
5938                 SHop(OP_LEFT_SHIFT);
5939             }
5940             if (tmp == '=') {
5941                 tmp = *s++;
5942                 if (tmp == '>') {
5943                     if (!PL_lex_allbrackets &&
5944                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5945                         s -= 3;
5946                         TOKEN(0);
5947                     }
5948                     Eop(OP_NCMP);
5949                 }
5950                 s--;
5951                 if (!PL_lex_allbrackets &&
5952                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5953                     s -= 2;
5954                     TOKEN(0);
5955                 }
5956                 Rop(OP_LE);
5957             }
5958         }
5959         s--;
5960         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5961             s--;
5962             TOKEN(0);
5963         }
5964         Rop(OP_LT);
5965     case '>':
5966         s++;
5967         {
5968             const char tmp = *s++;
5969             if (tmp == '>') {
5970                 if (*s == '=' && !PL_lex_allbrackets &&
5971                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5972                     s -= 2;
5973                     TOKEN(0);
5974                 }
5975                 SHop(OP_RIGHT_SHIFT);
5976             }
5977             else if (tmp == '=') {
5978                 if (!PL_lex_allbrackets &&
5979                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5980                     s -= 2;
5981                     TOKEN(0);
5982                 }
5983                 Rop(OP_GE);
5984             }
5985         }
5986         s--;
5987         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5988             s--;
5989             TOKEN(0);
5990         }
5991         Rop(OP_GT);
5992
5993     case '$':
5994         CLINE;
5995
5996         if (PL_expect == XOPERATOR) {
5997             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5998                 return deprecate_commaless_var_list();
5999             }
6000         }
6001
6002         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6003             PL_tokenbuf[0] = '@';
6004             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6005                            sizeof PL_tokenbuf - 1, FALSE);
6006             if (PL_expect == XOPERATOR)
6007                 no_op("Array length", s);
6008             if (!PL_tokenbuf[1])
6009                 PREREF(DOLSHARP);
6010             PL_expect = XOPERATOR;
6011             PL_pending_ident = '#';
6012             TOKEN(DOLSHARP);
6013         }
6014
6015         PL_tokenbuf[0] = '$';
6016         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6017                        sizeof PL_tokenbuf - 1, FALSE);
6018         if (PL_expect == XOPERATOR)
6019             no_op("Scalar", s);
6020         if (!PL_tokenbuf[1]) {
6021             if (s == PL_bufend)
6022                 yyerror("Final $ should be \\$ or $name");
6023             PREREF('$');
6024         }
6025
6026         /* This kludge not intended to be bulletproof. */
6027         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6028             pl_yylval.opval = newSVOP(OP_CONST, 0,
6029                                    newSViv(CopARYBASE_get(&PL_compiling)));
6030             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
6031             TERM(THING);
6032         }
6033
6034         d = s;
6035         {
6036             const char tmp = *s;
6037             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6038                 s = SKIPSPACE1(s);
6039
6040             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6041                 && intuit_more(s)) {
6042                 if (*s == '[') {
6043                     PL_tokenbuf[0] = '@';
6044                     if (ckWARN(WARN_SYNTAX)) {
6045                         char *t = s+1;
6046
6047                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6048                             t++;
6049                         if (*t++ == ',') {
6050                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6051                             while (t < PL_bufend && *t != ']')
6052                                 t++;
6053                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6054                                         "Multidimensional syntax %.*s not supported",
6055                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6056                         }
6057                     }
6058                 }
6059                 else if (*s == '{') {
6060                     char *t;
6061                     PL_tokenbuf[0] = '%';
6062                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6063                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6064                         {
6065                             char tmpbuf[sizeof PL_tokenbuf];
6066                             do {
6067                                 t++;
6068                             } while (isSPACE(*t));
6069                             if (isIDFIRST_lazy_if(t,UTF)) {
6070                                 STRLEN len;
6071                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6072                                               &len);
6073                                 while (isSPACE(*t))
6074                                     t++;
6075                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6076                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6077                                                 "You need to quote \"%s\"",
6078                                                 tmpbuf);
6079                             }
6080                         }
6081                 }
6082             }
6083
6084             PL_expect = XOPERATOR;
6085             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6086                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6087                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6088                     PL_expect = XOPERATOR;
6089                 else if (strchr("$@\"'`q", *s))
6090                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6091                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6092                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6093                 else if (isIDFIRST_lazy_if(s,UTF)) {
6094                     char tmpbuf[sizeof PL_tokenbuf];
6095                     int t2;
6096                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6097                     if ((t2 = keyword(tmpbuf, len, 0))) {
6098                         /* binary operators exclude handle interpretations */
6099                         switch (t2) {
6100                         case -KEY_x:
6101                         case -KEY_eq:
6102                         case -KEY_ne:
6103                         case -KEY_gt:
6104                         case -KEY_lt:
6105                         case -KEY_ge:
6106                         case -KEY_le:
6107                         case -KEY_cmp:
6108                             break;
6109                         default:
6110                             PL_expect = XTERM;  /* e.g. print $fh length() */
6111                             break;
6112                         }
6113                     }
6114                     else {
6115                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6116                     }
6117                 }
6118                 else if (isDIGIT(*s))
6119                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6120                 else if (*s == '.' && isDIGIT(s[1]))
6121                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6122                 else if ((*s == '?' || *s == '-' || *s == '+')
6123                          && !isSPACE(s[1]) && s[1] != '=')
6124                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6125                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6126                          && s[1] != '/')
6127                     PL_expect = XTERM;          /* e.g. print $fh /.../
6128                                                    XXX except DORDOR operator
6129                                                 */
6130                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6131                          && s[2] != '=')
6132                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6133             }
6134         }
6135         PL_pending_ident = '$';
6136         TOKEN('$');
6137
6138     case '@':
6139         if (PL_expect == XOPERATOR)
6140             no_op("Array", s);
6141         PL_tokenbuf[0] = '@';
6142         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6143         if (!PL_tokenbuf[1]) {
6144             PREREF('@');
6145         }
6146         if (PL_lex_state == LEX_NORMAL)
6147             s = SKIPSPACE1(s);
6148         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6149             if (*s == '{')
6150                 PL_tokenbuf[0] = '%';
6151
6152             /* Warn about @ where they meant $. */
6153             if (*s == '[' || *s == '{') {
6154                 if (ckWARN(WARN_SYNTAX)) {
6155                     const char *t = s + 1;
6156                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6157                         t++;
6158                     if (*t == '}' || *t == ']') {
6159                         t++;
6160                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6161                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6162                             "Scalar value %.*s better written as $%.*s",
6163                             (int)(t-PL_bufptr), PL_bufptr,
6164                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6165                     }
6166                 }
6167             }
6168         }
6169         PL_pending_ident = '@';
6170         TERM('@');
6171
6172      case '/':                  /* may be division, defined-or, or pattern */
6173         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6174             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6175                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6176                 TOKEN(0);
6177             s += 2;
6178             AOPERATOR(DORDOR);
6179         }
6180      case '?':                  /* may either be conditional or pattern */
6181         if (PL_expect == XOPERATOR) {
6182              char tmp = *s++;
6183              if(tmp == '?') {
6184                 if (!PL_lex_allbrackets &&
6185                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6186                     s--;
6187                     TOKEN(0);
6188                 }
6189                 PL_lex_allbrackets++;
6190                 OPERATOR('?');
6191              }
6192              else {
6193                  tmp = *s++;
6194                  if(tmp == '/') {
6195                      /* A // operator. */
6196                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6197                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6198                                             LEX_FAKEEOF_LOGIC)) {
6199                         s -= 2;
6200                         TOKEN(0);
6201                     }
6202                     AOPERATOR(DORDOR);
6203                  }
6204                  else {
6205                      s--;
6206                      if (*s == '=' && !PL_lex_allbrackets &&
6207                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6208                          s--;
6209                          TOKEN(0);
6210                      }
6211                      Mop(OP_DIVIDE);
6212                  }
6213              }
6214          }
6215          else {
6216              /* Disable warning on "study /blah/" */
6217              if (PL_oldoldbufptr == PL_last_uni
6218               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6219                   || memNE(PL_last_uni, "study", 5)
6220                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6221               ))
6222                  check_uni();
6223              if (*s == '?')
6224                  deprecate("?PATTERN? without explicit operator");
6225              s = scan_pat(s,OP_MATCH);
6226              TERM(sublex_start());
6227          }
6228
6229     case '.':
6230         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6231 #ifdef PERL_STRICT_CR
6232             && s[1] == '\n'
6233 #else
6234             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6235 #endif
6236             && (s == PL_linestart || s[-1] == '\n') )
6237         {
6238             PL_lex_formbrack = 0;
6239             PL_expect = XSTATE;
6240             goto rightbracket;
6241         }
6242         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6243             s += 3;
6244             OPERATOR(YADAYADA);
6245         }
6246         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6247             char tmp = *s++;
6248             if (*s == tmp) {
6249                 if (!PL_lex_allbrackets &&
6250                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6251                     s--;
6252                     TOKEN(0);
6253                 }
6254                 s++;
6255                 if (*s == tmp) {
6256                     s++;
6257                     pl_yylval.ival = OPf_SPECIAL;
6258                 }
6259                 else
6260                     pl_yylval.ival = 0;
6261                 OPERATOR(DOTDOT);
6262             }
6263             if (*s == '=' && !PL_lex_allbrackets &&
6264                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6265                 s--;
6266                 TOKEN(0);
6267             }
6268             Aop(OP_CONCAT);
6269         }
6270         /* FALL THROUGH */
6271     case '0': case '1': case '2': case '3': case '4':
6272     case '5': case '6': case '7': case '8': case '9':
6273         s = scan_num(s, &pl_yylval);
6274         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6275         if (PL_expect == XOPERATOR)
6276             no_op("Number",s);
6277         TERM(THING);
6278
6279     case '\'':
6280         s = scan_str(s,!!PL_madskills,FALSE);
6281         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6282         if (PL_expect == XOPERATOR) {
6283             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6284                 return deprecate_commaless_var_list();
6285             }
6286             else
6287                 no_op("String",s);
6288         }
6289         if (!s)
6290             missingterm(NULL);
6291         pl_yylval.ival = OP_CONST;
6292         TERM(sublex_start());
6293
6294     case '"':
6295         s = scan_str(s,!!PL_madskills,FALSE);
6296         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6297         if (PL_expect == XOPERATOR) {
6298             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6299                 return deprecate_commaless_var_list();
6300             }
6301             else
6302                 no_op("String",s);
6303         }
6304         if (!s)
6305             missingterm(NULL);
6306         pl_yylval.ival = OP_CONST;
6307         /* FIXME. I think that this can be const if char *d is replaced by
6308            more localised variables.  */
6309         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6310             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6311                 pl_yylval.ival = OP_STRINGIFY;
6312                 break;
6313             }
6314         }
6315         TERM(sublex_start());
6316
6317     case '`':
6318         s = scan_str(s,!!PL_madskills,FALSE);
6319         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6320         if (PL_expect == XOPERATOR)
6321             no_op("Backticks",s);
6322         if (!s)
6323             missingterm(NULL);
6324         readpipe_override();
6325         TERM(sublex_start());
6326
6327     case '\\':
6328         s++;
6329         if (PL_lex_inwhat && isDIGIT(*s))
6330             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6331                            *s, *s);
6332         if (PL_expect == XOPERATOR)
6333             no_op("Backslash",s);
6334         OPERATOR(REFGEN);
6335
6336     case 'v':
6337         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6338             char *start = s + 2;
6339             while (isDIGIT(*start) || *start == '_')
6340                 start++;
6341             if (*start == '.' && isDIGIT(start[1])) {
6342                 s = scan_num(s, &pl_yylval);
6343                 TERM(THING);
6344             }
6345             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6346             else if (!isALPHA(*start) && (PL_expect == XTERM
6347                         || PL_expect == XREF || PL_expect == XSTATE
6348                         || PL_expect == XTERMORDORDOR)) {
6349                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6350                 if (!gv) {
6351                     s = scan_num(s, &pl_yylval);
6352                     TERM(THING);
6353                 }
6354             }
6355         }
6356         goto keylookup;
6357     case 'x':
6358         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6359             s++;
6360             Mop(OP_REPEAT);
6361         }
6362         goto keylookup;
6363
6364     case '_':
6365     case 'a': case 'A':
6366     case 'b': case 'B':
6367     case 'c': case 'C':
6368     case 'd': case 'D':
6369     case 'e': case 'E':
6370     case 'f': case 'F':
6371     case 'g': case 'G':
6372     case 'h': case 'H':
6373     case 'i': case 'I':
6374     case 'j': case 'J':
6375     case 'k': case 'K':
6376     case 'l': case 'L':
6377     case 'm': case 'M':
6378     case 'n': case 'N':
6379     case 'o': case 'O':
6380     case 'p': case 'P':
6381     case 'q': case 'Q':
6382     case 'r': case 'R':
6383     case 's': case 'S':
6384     case 't': case 'T':
6385     case 'u': case 'U':
6386               case 'V':
6387     case 'w': case 'W':
6388               case 'X':
6389     case 'y': case 'Y':
6390     case 'z': case 'Z':
6391
6392       keylookup: {
6393         bool anydelim;
6394         I32 tmp;
6395
6396         orig_keyword = 0;
6397         gv = NULL;
6398         gvp = NULL;
6399
6400         PL_bufptr = s;
6401         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6402
6403         /* Some keywords can be followed by any delimiter, including ':' */
6404         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6405
6406         /* x::* is just a word, unless x is "CORE" */
6407         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6408             goto just_a_word;
6409
6410         d = s;
6411         while (d < PL_bufend && isSPACE(*d))
6412                 d++;    /* no comments skipped here, or s### is misparsed */
6413
6414         /* Is this a word before a => operator? */
6415         if (*d == '=' && d[1] == '>') {
6416             CLINE;
6417             pl_yylval.opval
6418                 = (OP*)newSVOP(OP_CONST, 0,
6419                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6420             pl_yylval.opval->op_private = OPpCONST_BARE;
6421             TERM(WORD);
6422         }
6423
6424         /* Check for plugged-in keyword */
6425         {
6426             OP *o;
6427             int result;
6428             char *saved_bufptr = PL_bufptr;
6429             PL_bufptr = s;
6430             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6431             s = PL_bufptr;
6432             if (result == KEYWORD_PLUGIN_DECLINE) {
6433                 /* not a plugged-in keyword */
6434                 PL_bufptr = saved_bufptr;
6435             } else if (result == KEYWORD_PLUGIN_STMT) {
6436                 pl_yylval.opval = o;
6437                 CLINE;
6438                 PL_expect = XSTATE;
6439                 return REPORT(PLUGSTMT);
6440             } else if (result == KEYWORD_PLUGIN_EXPR) {
6441                 pl_yylval.opval = o;
6442                 CLINE;
6443                 PL_expect = XOPERATOR;
6444                 return REPORT(PLUGEXPR);
6445             } else {
6446                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6447                                         PL_tokenbuf);
6448             }
6449         }
6450
6451         /* Check for built-in keyword */
6452         tmp = keyword(PL_tokenbuf, len, 0);
6453
6454         /* Is this a label? */
6455         if (!anydelim && PL_expect == XSTATE
6456               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6457             s = d + 1;
6458             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6459             CLINE;
6460             TOKEN(LABEL);
6461         }
6462
6463         if (tmp < 0) {                  /* second-class keyword? */
6464             GV *ogv = NULL;     /* override (winner) */
6465             GV *hgv = NULL;     /* hidden (loser) */
6466             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6467                 CV *cv;
6468                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6469                     (cv = GvCVu(gv)))
6470                 {
6471                     if (GvIMPORTED_CV(gv))
6472                         ogv = gv;
6473                     else if (! CvMETHOD(cv))
6474                         hgv = gv;
6475                 }
6476                 if (!ogv &&
6477                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6478                     (gv = *gvp) && isGV_with_GP(gv) &&
6479                     GvCVu(gv) && GvIMPORTED_CV(gv))
6480                 {
6481                     ogv = gv;
6482                 }
6483             }
6484             if (ogv) {
6485                 orig_keyword = tmp;
6486                 tmp = 0;                /* overridden by import or by GLOBAL */
6487             }
6488             else if (gv && !gvp
6489                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6490                      && GvCVu(gv))
6491             {
6492                 tmp = 0;                /* any sub overrides "weak" keyword */
6493             }
6494             else {                      /* no override */
6495                 tmp = -tmp;
6496                 if (tmp == KEY_dump) {
6497                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6498                                    "dump() better written as CORE::dump()");
6499                 }
6500                 gv = NULL;
6501                 gvp = 0;
6502                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6503                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6504                                    "Ambiguous call resolved as CORE::%s(), "
6505                                    "qualify as such or use &",
6506                                    GvENAME(hgv));
6507             }
6508         }
6509
6510       reserved_word:
6511         switch (tmp) {
6512
6513         default:                        /* not a keyword */
6514             /* Trade off - by using this evil construction we can pull the
6515                variable gv into the block labelled keylookup. If not, then
6516                we have to give it function scope so that the goto from the
6517                earlier ':' case doesn't bypass the initialisation.  */
6518             if (0) {
6519             just_a_word_zero_gv:
6520                 gv = NULL;
6521                 gvp = NULL;
6522                 orig_keyword = 0;
6523             }
6524           just_a_word: {
6525                 SV *sv;
6526                 int pkgname = 0;
6527                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6528                 OP *rv2cv_op;
6529                 CV *cv;
6530 #ifdef PERL_MAD
6531                 SV *nextPL_nextwhite = 0;
6532 #endif
6533
6534
6535                 /* Get the rest if it looks like a package qualifier */
6536
6537                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6538                     STRLEN morelen;
6539                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6540                                   TRUE, &morelen);
6541                     if (!morelen)
6542                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6543                                 *s == '\'' ? "'" : "::");
6544                     len += morelen;
6545                     pkgname = 1;
6546                 }
6547
6548                 if (PL_expect == XOPERATOR) {
6549                     if (PL_bufptr == PL_linestart) {
6550                         CopLINE_dec(PL_curcop);
6551                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6552                         CopLINE_inc(PL_curcop);
6553                     }
6554                     else
6555                         no_op("Bareword",s);
6556                 }
6557
6558                 /* Look for a subroutine with this name in current package,
6559                    unless name is "Foo::", in which case Foo is a bareword
6560                    (and a package name). */
6561
6562                 if (len > 2 && !PL_madskills &&
6563                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6564                 {
6565                     if (ckWARN(WARN_BAREWORD)
6566                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6567                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6568                             "Bareword \"%s\" refers to nonexistent package",
6569                              PL_tokenbuf);
6570                     len -= 2;
6571                     PL_tokenbuf[len] = '\0';
6572                     gv = NULL;
6573                     gvp = 0;
6574                 }
6575                 else {
6576                     if (!gv) {
6577                         /* Mustn't actually add anything to a symbol table.
6578                            But also don't want to "initialise" any placeholder
6579                            constants that might already be there into full
6580                            blown PVGVs with attached PVCV.  */
6581                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6582                                                GV_NOADD_NOINIT, SVt_PVCV);
6583                     }
6584                     len = 0;
6585                 }
6586
6587                 /* if we saw a global override before, get the right name */
6588
6589                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6590                     len ? len : strlen(PL_tokenbuf));
6591                 if (gvp) {
6592                     SV * const tmp_sv = sv;
6593                     sv = newSVpvs("CORE::GLOBAL::");
6594                     sv_catsv(sv, tmp_sv);
6595                     SvREFCNT_dec(tmp_sv);
6596                 }
6597
6598 #ifdef PERL_MAD
6599                 if (PL_madskills && !PL_thistoken) {
6600                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6601                     PL_thistoken = newSVpvn(start,s - start);
6602                     PL_realtokenstart = s - SvPVX(PL_linestr);
6603                 }
6604 #endif
6605
6606                 /* Presume this is going to be a bareword of some sort. */
6607                 CLINE;
6608                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6609                 pl_yylval.opval->op_private = OPpCONST_BARE;
6610
6611                 /* And if "Foo::", then that's what it certainly is. */
6612                 if (len)
6613                     goto safe_bareword;
6614
6615                 {
6616                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6617                     const_op->op_private = OPpCONST_BARE;
6618                     rv2cv_op = newCVREF(0, const_op);
6619                 }
6620                 cv = rv2cv_op_cv(rv2cv_op, 0);
6621
6622                 /* See if it's the indirect object for a list operator. */
6623
6624                 if (PL_oldoldbufptr &&
6625                     PL_oldoldbufptr < PL_bufptr &&
6626                     (PL_oldoldbufptr == PL_last_lop
6627                      || PL_oldoldbufptr == PL_last_uni) &&
6628                     /* NO SKIPSPACE BEFORE HERE! */
6629                     (PL_expect == XREF ||
6630                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6631                 {
6632                     bool immediate_paren = *s == '(';
6633
6634                     /* (Now we can afford to cross potential line boundary.) */
6635                     s = SKIPSPACE2(s,nextPL_nextwhite);
6636 #ifdef PERL_MAD
6637                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6638 #endif
6639
6640                     /* Two barewords in a row may indicate method call. */
6641
6642                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6643                         (tmp = intuit_method(s, gv, cv))) {
6644                         op_free(rv2cv_op);
6645                         if (tmp == METHOD && !PL_lex_allbrackets &&
6646                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6647                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6648                         return REPORT(tmp);
6649                     }
6650
6651                     /* If not a declared subroutine, it's an indirect object. */
6652                     /* (But it's an indir obj regardless for sort.) */
6653                     /* Also, if "_" follows a filetest operator, it's a bareword */
6654
6655                     if (
6656                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6657                          (!cv &&
6658                         (PL_last_lop_op != OP_MAPSTART &&
6659                          PL_last_lop_op != OP_GREPSTART))))
6660                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6661                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6662                        )
6663                     {
6664                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6665                         goto bareword;
6666                     }
6667                 }
6668
6669                 PL_expect = XOPERATOR;
6670 #ifdef PERL_MAD
6671                 if (isSPACE(*s))
6672                     s = SKIPSPACE2(s,nextPL_nextwhite);
6673                 PL_nextwhite = nextPL_nextwhite;
6674 #else
6675                 s = skipspace(s);
6676 #endif
6677
6678                 /* Is this a word before a => operator? */
6679                 if (*s == '=' && s[1] == '>' && !pkgname) {
6680                     op_free(rv2cv_op);
6681                     CLINE;
6682                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6683                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6684                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6685                     TERM(WORD);
6686                 }
6687
6688                 /* If followed by a paren, it's certainly a subroutine. */
6689                 if (*s == '(') {
6690                     CLINE;
6691                     if (cv) {
6692                         d = s + 1;
6693                         while (SPACE_OR_TAB(*d))
6694                             d++;
6695                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6696                             s = d + 1;
6697                             goto its_constant;
6698                         }
6699                     }
6700 #ifdef PERL_MAD
6701                     if (PL_madskills) {
6702                         PL_nextwhite = PL_thiswhite;
6703                         PL_thiswhite = 0;
6704                     }
6705                     start_force(PL_curforce);
6706 #endif
6707                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6708                     PL_expect = XOPERATOR;
6709 #ifdef PERL_MAD
6710                     if (PL_madskills) {
6711                         PL_nextwhite = nextPL_nextwhite;
6712                         curmad('X', PL_thistoken);
6713                         PL_thistoken = newSVpvs("");
6714                     }
6715 #endif
6716                     op_free(rv2cv_op);
6717                     force_next(WORD);
6718                     pl_yylval.ival = 0;
6719                     TOKEN('&');
6720                 }
6721
6722                 /* If followed by var or block, call it a method (unless sub) */
6723
6724                 if ((*s == '$' || *s == '{') && !cv) {
6725                     op_free(rv2cv_op);
6726                     PL_last_lop = PL_oldbufptr;
6727                     PL_last_lop_op = OP_METHOD;
6728                     if (!PL_lex_allbrackets &&
6729                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6730                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6731                     PREBLOCK(METHOD);
6732                 }
6733
6734                 /* If followed by a bareword, see if it looks like indir obj. */
6735
6736                 if (!orig_keyword
6737                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6738                         && (tmp = intuit_method(s, gv, cv))) {
6739                     op_free(rv2cv_op);
6740                     if (tmp == METHOD && !PL_lex_allbrackets &&
6741                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6742                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6743                     return REPORT(tmp);
6744                 }
6745
6746                 /* Not a method, so call it a subroutine (if defined) */
6747
6748                 if (cv) {
6749                     if (lastchar == '-')
6750                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6751                                          "Ambiguous use of -%s resolved as -&%s()",
6752                                          PL_tokenbuf, PL_tokenbuf);
6753                     /* Check for a constant sub */
6754                     if ((sv = cv_const_sv(cv))) {
6755                   its_constant:
6756                         op_free(rv2cv_op);
6757                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6758                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6759                         pl_yylval.opval->op_private = 0;
6760                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6761                         TOKEN(WORD);
6762                     }
6763
6764                     op_free(pl_yylval.opval);
6765                     pl_yylval.opval = rv2cv_op;
6766                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6767                     PL_last_lop = PL_oldbufptr;
6768                     PL_last_lop_op = OP_ENTERSUB;
6769                     /* Is there a prototype? */
6770                     if (
6771 #ifdef PERL_MAD
6772                         cv &&
6773 #endif
6774                         SvPOK(cv))
6775                     {
6776                         STRLEN protolen;
6777                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6778                         if (!protolen)
6779                             TERM(FUNC0SUB);
6780                         while (*proto == ';')
6781                             proto++;
6782                         if (
6783                             (
6784                                 (
6785                                     *proto == '$' || *proto == '_'
6786                                  || *proto == '*' || *proto == '+'
6787                                 )
6788                              && proto[1] == '\0'
6789                             )
6790                          || (
6791                              *proto == '\\' && proto[1] && proto[2] == '\0'
6792                             )
6793                         )
6794                             OPERATOR(UNIOPSUB);
6795                         if (*proto == '\\' && proto[1] == '[') {
6796                             const char *p = proto + 2;
6797                             while(*p && *p != ']')
6798                                 ++p;
6799                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6800                         }
6801                         if (*proto == '&' && *s == '{') {
6802                             if (PL_curstash)
6803                                 sv_setpvs(PL_subname, "__ANON__");
6804                             else
6805                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6806                             if (!PL_lex_allbrackets &&
6807                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6808                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6809                             PREBLOCK(LSTOPSUB);
6810                         }
6811                     }
6812 #ifdef PERL_MAD
6813                     {
6814                         if (PL_madskills) {
6815                             PL_nextwhite = PL_thiswhite;
6816                             PL_thiswhite = 0;
6817                         }
6818                         start_force(PL_curforce);
6819                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6820                         PL_expect = XTERM;
6821                         if (PL_madskills) {
6822                             PL_nextwhite = nextPL_nextwhite;
6823                             curmad('X', PL_thistoken);
6824                             PL_thistoken = newSVpvs("");
6825                         }
6826                         force_next(WORD);
6827                         if (!PL_lex_allbrackets &&
6828                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6829                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6830                         TOKEN(NOAMP);
6831                     }
6832                 }
6833
6834                 /* Guess harder when madskills require "best effort". */
6835                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6836                     int probable_sub = 0;
6837                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6838                         probable_sub = 1;
6839                     else if (isALPHA(*s)) {
6840                         char tmpbuf[1024];
6841                         STRLEN tmplen;
6842                         d = s;
6843                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6844                         if (!keyword(tmpbuf, tmplen, 0))
6845                             probable_sub = 1;
6846                         else {
6847                             while (d < PL_bufend && isSPACE(*d))
6848                                 d++;
6849                             if (*d == '=' && d[1] == '>')
6850                                 probable_sub = 1;
6851                         }
6852                     }
6853                     if (probable_sub) {
6854                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6855                         op_free(pl_yylval.opval);
6856                         pl_yylval.opval = rv2cv_op;
6857                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6858                         PL_last_lop = PL_oldbufptr;
6859                         PL_last_lop_op = OP_ENTERSUB;
6860                         PL_nextwhite = PL_thiswhite;
6861                         PL_thiswhite = 0;
6862                         start_force(PL_curforce);
6863                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6864                         PL_expect = XTERM;
6865                         PL_nextwhite = nextPL_nextwhite;
6866                         curmad('X', PL_thistoken);
6867                         PL_thistoken = newSVpvs("");
6868                         force_next(WORD);
6869                         if (!PL_lex_allbrackets &&
6870                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6871                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6872                         TOKEN(NOAMP);
6873                     }
6874 #else
6875                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6876                     PL_expect = XTERM;
6877                     force_next(WORD);
6878                     if (!PL_lex_allbrackets &&
6879                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6880                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6881                     TOKEN(NOAMP);
6882 #endif
6883                 }
6884
6885                 /* Call it a bare word */
6886
6887                 if (PL_hints & HINT_STRICT_SUBS)
6888                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6889                 else {
6890                 bareword:
6891                     /* after "print" and similar functions (corresponding to
6892                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6893                      * a filehandle should be subject to "strict subs".
6894                      * Likewise for the optional indirect-object argument to system
6895                      * or exec, which can't be a bareword */
6896                     if ((PL_last_lop_op == OP_PRINT
6897                             || PL_last_lop_op == OP_PRTF
6898                             || PL_last_lop_op == OP_SAY
6899                             || PL_last_lop_op == OP_SYSTEM
6900                             || PL_last_lop_op == OP_EXEC)
6901                             && (PL_hints & HINT_STRICT_SUBS))
6902                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6903                     if (lastchar != '-') {
6904                         if (ckWARN(WARN_RESERVED)) {
6905                             d = PL_tokenbuf;
6906                             while (isLOWER(*d))
6907                                 d++;
6908                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6909                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6910                                        PL_tokenbuf);
6911                         }
6912                     }
6913                 }
6914                 op_free(rv2cv_op);
6915
6916             safe_bareword:
6917                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6918                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6919                                      "Operator or semicolon missing before %c%s",
6920                                      lastchar, PL_tokenbuf);
6921                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6922                                      "Ambiguous use of %c resolved as operator %c",
6923                                      lastchar, lastchar);
6924                 }
6925                 TOKEN(WORD);
6926             }
6927
6928         case KEY___FILE__:
6929             FUN0OP(
6930                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6931                                         newSVpv(CopFILE(PL_curcop),0))
6932             );
6933
6934         case KEY___LINE__:
6935             FUN0OP(
6936                 (OP*)newSVOP(OP_CONST, 0,
6937                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6938             );
6939
6940         case KEY___PACKAGE__:
6941             FUN0OP(
6942                 (OP*)newSVOP(OP_CONST, 0,
6943                                         (PL_curstash
6944                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6945                                          : &PL_sv_undef))
6946             );
6947
6948         case KEY___DATA__:
6949         case KEY___END__: {
6950             GV *gv;
6951             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6952                 const char *pname = "main";
6953                 if (PL_tokenbuf[2] == 'D')
6954                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6955                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6956                                 SVt_PVIO);
6957                 GvMULTI_on(gv);
6958                 if (!GvIO(gv))
6959                     GvIOp(gv) = newIO();
6960                 IoIFP(GvIOp(gv)) = PL_rsfp;
6961 #if defined(HAS_FCNTL) && defined(F_SETFD)
6962                 {
6963                     const int fd = PerlIO_fileno(PL_rsfp);
6964                     fcntl(fd,F_SETFD,fd >= 3);
6965                 }
6966 #endif
6967                 /* Mark this internal pseudo-handle as clean */
6968                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6969                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6970                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6971                 else
6972                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6973 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6974                 /* if the script was opened in binmode, we need to revert
6975                  * it to text mode for compatibility; but only iff it has CRs
6976                  * XXX this is a questionable hack at best. */
6977                 if (PL_bufend-PL_bufptr > 2
6978                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6979                 {
6980                     Off_t loc = 0;
6981                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6982                         loc = PerlIO_tell(PL_rsfp);
6983                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6984                     }
6985 #ifdef NETWARE
6986                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6987 #else
6988                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6989 #endif  /* NETWARE */
6990 #ifdef PERLIO_IS_STDIO /* really? */
6991 #  if defined(__BORLANDC__)
6992                         /* XXX see note in do_binmode() */
6993                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6994 #  endif
6995 #endif
6996                         if (loc > 0)
6997                             PerlIO_seek(PL_rsfp, loc, 0);
6998                     }
6999                 }
7000 #endif
7001 #ifdef PERLIO_LAYERS
7002                 if (!IN_BYTES) {
7003                     if (UTF)
7004                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7005                     else if (PL_encoding) {
7006                         SV *name;
7007                         dSP;
7008                         ENTER;
7009                         SAVETMPS;
7010                         PUSHMARK(sp);
7011                         EXTEND(SP, 1);
7012                         XPUSHs(PL_encoding);
7013                         PUTBACK;
7014                         call_method("name", G_SCALAR);
7015                         SPAGAIN;
7016                         name = POPs;
7017                         PUTBACK;
7018                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7019                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7020                                                       SVfARG(name)));
7021                         FREETMPS;
7022                         LEAVE;
7023                     }
7024                 }
7025 #endif
7026 #ifdef PERL_MAD
7027                 if (PL_madskills) {
7028                     if (PL_realtokenstart >= 0) {
7029                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7030                         if (!PL_endwhite)
7031                             PL_endwhite = newSVpvs("");
7032                         sv_catsv(PL_endwhite, PL_thiswhite);
7033                         PL_thiswhite = 0;
7034                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7035                         PL_realtokenstart = -1;
7036                     }
7037                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7038                            != NULL) ;
7039                 }
7040 #endif
7041                 PL_rsfp = NULL;
7042             }
7043             goto fake_eof;
7044         }
7045
7046         case KEY_AUTOLOAD:
7047         case KEY_DESTROY:
7048         case KEY_BEGIN:
7049         case KEY_UNITCHECK:
7050         case KEY_CHECK:
7051         case KEY_INIT:
7052         case KEY_END:
7053             if (PL_expect == XSTATE) {
7054                 s = PL_bufptr;
7055                 goto really_sub;
7056             }
7057             goto just_a_word;
7058
7059         case KEY_CORE:
7060             if (*s == ':' && s[1] == ':') {
7061                 s += 2;
7062                 d = s;
7063                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7064                 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7065                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7066                 if (tmp < 0)
7067                     tmp = -tmp;
7068                 else if (tmp == KEY_require || tmp == KEY_do)
7069                     /* that's a way to remember we saw "CORE::" */
7070                     orig_keyword = tmp;
7071                 goto reserved_word;
7072             }
7073             goto just_a_word;
7074
7075         case KEY_abs:
7076             UNI(OP_ABS);
7077
7078         case KEY_alarm:
7079             UNI(OP_ALARM);
7080
7081         case KEY_accept:
7082             LOP(OP_ACCEPT,XTERM);
7083
7084         case KEY_and:
7085             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7086                 return REPORT(0);
7087             OPERATOR(ANDOP);
7088
7089         case KEY_atan2:
7090             LOP(OP_ATAN2,XTERM);
7091
7092         case KEY_bind:
7093             LOP(OP_BIND,XTERM);
7094
7095         case KEY_binmode:
7096             LOP(OP_BINMODE,XTERM);
7097
7098         case KEY_bless:
7099             LOP(OP_BLESS,XTERM);
7100
7101         case KEY_break:
7102             FUN0(OP_BREAK);
7103
7104         case KEY_chop:
7105             UNI(OP_CHOP);
7106
7107         case KEY_continue:
7108                     /* We have to disambiguate the two senses of
7109                       "continue". If the next token is a '{' then
7110                       treat it as the start of a continue block;
7111                       otherwise treat it as a control operator.
7112                      */
7113                     s = skipspace(s);
7114                     if (*s == '{')
7115             PREBLOCK(CONTINUE);
7116                     else
7117                         FUN0(OP_CONTINUE);
7118
7119         case KEY_chdir:
7120             /* may use HOME */
7121             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7122             UNI(OP_CHDIR);
7123
7124         case KEY_close:
7125             UNI(OP_CLOSE);
7126
7127         case KEY_closedir:
7128             UNI(OP_CLOSEDIR);
7129
7130         case KEY_cmp:
7131             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7132                 return REPORT(0);
7133             Eop(OP_SCMP);
7134
7135         case KEY_caller:
7136             UNI(OP_CALLER);
7137
7138         case KEY_crypt:
7139 #ifdef FCRYPT
7140             if (!PL_cryptseen) {
7141                 PL_cryptseen = TRUE;
7142                 init_des();
7143             }
7144 #endif
7145             LOP(OP_CRYPT,XTERM);
7146
7147         case KEY_chmod:
7148             LOP(OP_CHMOD,XTERM);
7149
7150         case KEY_chown:
7151             LOP(OP_CHOWN,XTERM);
7152
7153         case KEY_connect:
7154             LOP(OP_CONNECT,XTERM);
7155
7156         case KEY_chr:
7157             UNI(OP_CHR);
7158
7159         case KEY_cos:
7160             UNI(OP_COS);
7161
7162         case KEY_chroot:
7163             UNI(OP_CHROOT);
7164
7165         case KEY_default:
7166             PREBLOCK(DEFAULT);
7167
7168         case KEY_do:
7169             s = SKIPSPACE1(s);
7170             if (*s == '{')
7171                 PRETERMBLOCK(DO);
7172             if (*s != '\'')
7173                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7174             if (orig_keyword == KEY_do) {
7175                 orig_keyword = 0;
7176                 pl_yylval.ival = 1;
7177             }
7178             else
7179                 pl_yylval.ival = 0;
7180             OPERATOR(DO);
7181
7182         case KEY_die:
7183             PL_hints |= HINT_BLOCK_SCOPE;
7184             LOP(OP_DIE,XTERM);
7185
7186         case KEY_defined:
7187             UNI(OP_DEFINED);
7188
7189         case KEY_delete:
7190             UNI(OP_DELETE);
7191
7192         case KEY_dbmopen:
7193             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7194                               STR_WITH_LEN("NDBM_File::"),
7195                               STR_WITH_LEN("DB_File::"),
7196                               STR_WITH_LEN("GDBM_File::"),
7197                               STR_WITH_LEN("SDBM_File::"),
7198                               STR_WITH_LEN("ODBM_File::"),
7199                               NULL);
7200             LOP(OP_DBMOPEN,XTERM);
7201
7202         case KEY_dbmclose:
7203             UNI(OP_DBMCLOSE);
7204
7205         case KEY_dump:
7206             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7207             LOOPX(OP_DUMP);
7208
7209         case KEY_else:
7210             PREBLOCK(ELSE);
7211
7212         case KEY_elsif:
7213             pl_yylval.ival = CopLINE(PL_curcop);
7214             OPERATOR(ELSIF);
7215
7216         case KEY_eq:
7217             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7218                 return REPORT(0);
7219             Eop(OP_SEQ);
7220
7221         case KEY_exists:
7222             UNI(OP_EXISTS);
7223
7224         case KEY_exit:
7225             if (PL_madskills)
7226                 UNI(OP_INT);
7227             UNI(OP_EXIT);
7228
7229         case KEY_eval:
7230             s = SKIPSPACE1(s);
7231             if (*s == '{') { /* block eval */
7232                 PL_expect = XTERMBLOCK;
7233                 UNIBRACK(OP_ENTERTRY);
7234             }
7235             else { /* string eval */
7236                 PL_expect = XTERM;
7237                 UNIBRACK(OP_ENTEREVAL);
7238             }
7239
7240         case KEY_eof:
7241             UNI(OP_EOF);
7242
7243         case KEY_exp:
7244             UNI(OP_EXP);
7245
7246         case KEY_each:
7247             UNI(OP_EACH);
7248
7249         case KEY_exec:
7250             LOP(OP_EXEC,XREF);
7251
7252         case KEY_endhostent:
7253             FUN0(OP_EHOSTENT);
7254
7255         case KEY_endnetent:
7256             FUN0(OP_ENETENT);
7257
7258         case KEY_endservent:
7259             FUN0(OP_ESERVENT);
7260
7261         case KEY_endprotoent:
7262             FUN0(OP_EPROTOENT);
7263
7264         case KEY_endpwent:
7265             FUN0(OP_EPWENT);
7266
7267         case KEY_endgrent:
7268             FUN0(OP_EGRENT);
7269
7270         case KEY_for:
7271         case KEY_foreach:
7272             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7273                 return REPORT(0);
7274             pl_yylval.ival = CopLINE(PL_curcop);
7275             s = SKIPSPACE1(s);
7276             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7277                 char *p = s;
7278 #ifdef PERL_MAD
7279                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7280 #endif
7281
7282                 if ((PL_bufend - p) >= 3 &&
7283                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7284                     p += 2;
7285                 else if ((PL_bufend - p) >= 4 &&
7286                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7287                     p += 3;
7288                 p = PEEKSPACE(p);
7289                 if (isIDFIRST_lazy_if(p,UTF)) {
7290                     p = scan_ident(p, PL_bufend,
7291                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7292                     p = PEEKSPACE(p);
7293                 }
7294                 if (*p != '$')
7295                     Perl_croak(aTHX_ "Missing $ on loop variable");
7296 #ifdef PERL_MAD
7297                 s = SvPVX(PL_linestr) + soff;
7298 #endif
7299             }
7300             OPERATOR(FOR);
7301
7302         case KEY_formline:
7303             LOP(OP_FORMLINE,XTERM);
7304
7305         case KEY_fork:
7306             FUN0(OP_FORK);
7307
7308         case KEY_fcntl:
7309             LOP(OP_FCNTL,XTERM);
7310
7311         case KEY_fileno:
7312             UNI(OP_FILENO);
7313
7314         case KEY_flock:
7315             LOP(OP_FLOCK,XTERM);
7316
7317         case KEY_gt:
7318             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7319                 return REPORT(0);
7320             Rop(OP_SGT);
7321
7322         case KEY_ge:
7323             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7324                 return REPORT(0);
7325             Rop(OP_SGE);
7326
7327         case KEY_grep:
7328             LOP(OP_GREPSTART, XREF);
7329
7330         case KEY_goto:
7331             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7332             LOOPX(OP_GOTO);
7333
7334         case KEY_gmtime:
7335             UNI(OP_GMTIME);
7336
7337         case KEY_getc:
7338             UNIDOR(OP_GETC);
7339
7340         case KEY_getppid:
7341             FUN0(OP_GETPPID);
7342
7343         case KEY_getpgrp:
7344             UNI(OP_GETPGRP);
7345
7346         case KEY_getpriority:
7347             LOP(OP_GETPRIORITY,XTERM);
7348
7349         case KEY_getprotobyname:
7350             UNI(OP_GPBYNAME);
7351
7352         case KEY_getprotobynumber:
7353             LOP(OP_GPBYNUMBER,XTERM);
7354
7355         case KEY_getprotoent:
7356             FUN0(OP_GPROTOENT);
7357
7358         case KEY_getpwent:
7359             FUN0(OP_GPWENT);
7360
7361         case KEY_getpwnam:
7362             UNI(OP_GPWNAM);
7363
7364         case KEY_getpwuid:
7365             UNI(OP_GPWUID);
7366
7367         case KEY_getpeername:
7368             UNI(OP_GETPEERNAME);
7369
7370         case KEY_gethostbyname:
7371             UNI(OP_GHBYNAME);
7372
7373         case KEY_gethostbyaddr:
7374             LOP(OP_GHBYADDR,XTERM);
7375
7376         case KEY_gethostent:
7377             FUN0(OP_GHOSTENT);
7378
7379         case KEY_getnetbyname:
7380             UNI(OP_GNBYNAME);
7381
7382         case KEY_getnetbyaddr:
7383             LOP(OP_GNBYADDR,XTERM);
7384
7385         case KEY_getnetent:
7386             FUN0(OP_GNETENT);
7387
7388         case KEY_getservbyname:
7389             LOP(OP_GSBYNAME,XTERM);
7390
7391         case KEY_getservbyport:
7392             LOP(OP_GSBYPORT,XTERM);
7393
7394         case KEY_getservent:
7395             FUN0(OP_GSERVENT);
7396
7397         case KEY_getsockname:
7398             UNI(OP_GETSOCKNAME);
7399
7400         case KEY_getsockopt:
7401             LOP(OP_GSOCKOPT,XTERM);
7402
7403         case KEY_getgrent:
7404             FUN0(OP_GGRENT);
7405
7406         case KEY_getgrnam:
7407             UNI(OP_GGRNAM);
7408
7409         case KEY_getgrgid:
7410             UNI(OP_GGRGID);
7411
7412         case KEY_getlogin:
7413             FUN0(OP_GETLOGIN);
7414
7415         case KEY_given:
7416             pl_yylval.ival = CopLINE(PL_curcop);
7417             OPERATOR(GIVEN);
7418
7419         case KEY_glob:
7420             LOP(OP_GLOB,XTERM);
7421
7422         case KEY_hex:
7423             UNI(OP_HEX);
7424
7425         case KEY_if:
7426             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7427                 return REPORT(0);
7428             pl_yylval.ival = CopLINE(PL_curcop);
7429             OPERATOR(IF);
7430
7431         case KEY_index:
7432             LOP(OP_INDEX,XTERM);
7433
7434         case KEY_int:
7435             UNI(OP_INT);
7436
7437         case KEY_ioctl:
7438             LOP(OP_IOCTL,XTERM);
7439
7440         case KEY_join:
7441             LOP(OP_JOIN,XTERM);
7442
7443         case KEY_keys:
7444             UNI(OP_KEYS);
7445
7446         case KEY_kill:
7447             LOP(OP_KILL,XTERM);
7448
7449         case KEY_last:
7450             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7451             LOOPX(OP_LAST);
7452
7453         case KEY_lc:
7454             UNI(OP_LC);
7455
7456         case KEY_lcfirst:
7457             UNI(OP_LCFIRST);
7458
7459         case KEY_local:
7460             pl_yylval.ival = 0;
7461             OPERATOR(LOCAL);
7462
7463         case KEY_length:
7464             UNI(OP_LENGTH);
7465
7466         case KEY_lt:
7467             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7468                 return REPORT(0);
7469             Rop(OP_SLT);
7470
7471         case KEY_le:
7472             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7473                 return REPORT(0);
7474             Rop(OP_SLE);
7475
7476         case KEY_localtime:
7477             UNI(OP_LOCALTIME);
7478
7479         case KEY_log:
7480             UNI(OP_LOG);
7481
7482         case KEY_link:
7483             LOP(OP_LINK,XTERM);
7484
7485         case KEY_listen:
7486             LOP(OP_LISTEN,XTERM);
7487
7488         case KEY_lock:
7489             UNI(OP_LOCK);
7490
7491         case KEY_lstat:
7492             UNI(OP_LSTAT);
7493
7494         case KEY_m:
7495             s = scan_pat(s,OP_MATCH);
7496             TERM(sublex_start());
7497
7498         case KEY_map:
7499             LOP(OP_MAPSTART, XREF);
7500
7501         case KEY_mkdir:
7502             LOP(OP_MKDIR,XTERM);
7503
7504         case KEY_msgctl:
7505             LOP(OP_MSGCTL,XTERM);
7506
7507         case KEY_msgget:
7508             LOP(OP_MSGGET,XTERM);
7509
7510         case KEY_msgrcv:
7511             LOP(OP_MSGRCV,XTERM);
7512
7513         case KEY_msgsnd:
7514             LOP(OP_MSGSND,XTERM);
7515
7516         case KEY_our:
7517         case KEY_my:
7518         case KEY_state:
7519             PL_in_my = (U16)tmp;
7520             s = SKIPSPACE1(s);
7521             if (isIDFIRST_lazy_if(s,UTF)) {
7522 #ifdef PERL_MAD
7523                 char* start = s;
7524 #endif
7525                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7526                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7527                     goto really_sub;
7528                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7529                 if (!PL_in_my_stash) {
7530                     char tmpbuf[1024];
7531                     PL_bufptr = s;
7532                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7533                     yyerror(tmpbuf);
7534                 }
7535 #ifdef PERL_MAD
7536                 if (PL_madskills) {     /* just add type to declarator token */
7537                     sv_catsv(PL_thistoken, PL_nextwhite);
7538                     PL_nextwhite = 0;
7539                     sv_catpvn(PL_thistoken, start, s - start);
7540                 }
7541 #endif
7542             }
7543             pl_yylval.ival = 1;
7544             OPERATOR(MY);
7545
7546         case KEY_next:
7547             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7548             LOOPX(OP_NEXT);
7549
7550         case KEY_ne:
7551             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7552                 return REPORT(0);
7553             Eop(OP_SNE);
7554
7555         case KEY_no:
7556             s = tokenize_use(0, s);
7557             OPERATOR(USE);
7558
7559         case KEY_not:
7560             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7561                 FUN1(OP_NOT);
7562             else {
7563                 if (!PL_lex_allbrackets &&
7564                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7565                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7566                 OPERATOR(NOTOP);
7567             }
7568
7569         case KEY_open:
7570             s = SKIPSPACE1(s);
7571             if (isIDFIRST_lazy_if(s,UTF)) {
7572                 const char *t;
7573                 for (d = s; isALNUM_lazy_if(d,UTF);)
7574                     d++;
7575                 for (t=d; isSPACE(*t);)
7576                     t++;
7577                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7578                     /* [perl #16184] */
7579                     && !(t[0] == '=' && t[1] == '>')
7580                 ) {
7581                     int parms_len = (int)(d-s);
7582                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7583                            "Precedence problem: open %.*s should be open(%.*s)",
7584                             parms_len, s, parms_len, s);
7585                 }
7586             }
7587             LOP(OP_OPEN,XTERM);
7588
7589         case KEY_or:
7590             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7591                 return REPORT(0);
7592             pl_yylval.ival = OP_OR;
7593             OPERATOR(OROP);
7594
7595         case KEY_ord:
7596             UNI(OP_ORD);
7597
7598         case KEY_oct:
7599             UNI(OP_OCT);
7600
7601         case KEY_opendir:
7602             LOP(OP_OPEN_DIR,XTERM);
7603
7604         case KEY_print:
7605             checkcomma(s,PL_tokenbuf,"filehandle");
7606             LOP(OP_PRINT,XREF);
7607
7608         case KEY_printf:
7609             checkcomma(s,PL_tokenbuf,"filehandle");
7610             LOP(OP_PRTF,XREF);
7611
7612         case KEY_prototype:
7613             UNI(OP_PROTOTYPE);
7614
7615         case KEY_push:
7616             LOP(OP_PUSH,XTERM);
7617
7618         case KEY_pop:
7619             UNIDOR(OP_POP);
7620
7621         case KEY_pos:
7622             UNIDOR(OP_POS);
7623
7624         case KEY_pack:
7625             LOP(OP_PACK,XTERM);
7626
7627         case KEY_package:
7628             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7629             s = SKIPSPACE1(s);
7630             s = force_strict_version(s);
7631             PL_lex_expect = XBLOCK;
7632             OPERATOR(PACKAGE);
7633
7634         case KEY_pipe:
7635             LOP(OP_PIPE_OP,XTERM);
7636
7637         case KEY_q:
7638             s = scan_str(s,!!PL_madskills,FALSE);
7639             if (!s)
7640                 missingterm(NULL);
7641             pl_yylval.ival = OP_CONST;
7642             TERM(sublex_start());
7643
7644         case KEY_quotemeta:
7645             UNI(OP_QUOTEMETA);
7646
7647         case KEY_qw: {
7648             OP *words = NULL;
7649             s = scan_str(s,!!PL_madskills,FALSE);
7650             if (!s)
7651                 missingterm(NULL);
7652             PL_expect = XOPERATOR;
7653             if (SvCUR(PL_lex_stuff)) {
7654                 int warned_comma = !ckWARN(WARN_QW);
7655                 int warned_comment = warned_comma;
7656                 d = SvPV_force(PL_lex_stuff, len);
7657                 while (len) {
7658                     for (; isSPACE(*d) && len; --len, ++d)
7659                         /**/;
7660                     if (len) {
7661                         SV *sv;
7662                         const char *b = d;
7663                         if (!warned_comma || !warned_comment) {
7664                             for (; !isSPACE(*d) && len; --len, ++d) {
7665                                 if (!warned_comma && *d == ',') {
7666                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7667                                         "Possible attempt to separate words with commas");
7668                                     ++warned_comma;
7669                                 }
7670                                 else if (!warned_comment && *d == '#') {
7671                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7672                                         "Possible attempt to put comments in qw() list");
7673                                     ++warned_comment;
7674                                 }
7675                             }
7676                         }
7677                         else {
7678                             for (; !isSPACE(*d) && len; --len, ++d)
7679                                 /**/;
7680                         }
7681                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7682                         words = op_append_elem(OP_LIST, words,
7683                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7684                     }
7685                 }
7686             }
7687             if (!words)
7688                 words = newNULLLIST();
7689             if (PL_lex_stuff) {
7690                 SvREFCNT_dec(PL_lex_stuff);
7691                 PL_lex_stuff = NULL;
7692             }
7693             PL_expect = XOPERATOR;
7694             pl_yylval.opval = sawparens(words);
7695             TOKEN(QWLIST);
7696         }
7697
7698         case KEY_qq:
7699             s = scan_str(s,!!PL_madskills,FALSE);
7700             if (!s)
7701                 missingterm(NULL);
7702             pl_yylval.ival = OP_STRINGIFY;
7703             if (SvIVX(PL_lex_stuff) == '\'')
7704                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7705             TERM(sublex_start());
7706
7707         case KEY_qr:
7708             s = scan_pat(s,OP_QR);
7709             TERM(sublex_start());
7710
7711         case KEY_qx:
7712             s = scan_str(s,!!PL_madskills,FALSE);
7713             if (!s)
7714                 missingterm(NULL);
7715             readpipe_override();
7716             TERM(sublex_start());
7717
7718         case KEY_return:
7719             OLDLOP(OP_RETURN);
7720
7721         case KEY_require:
7722             s = SKIPSPACE1(s);
7723             if (isDIGIT(*s)) {
7724                 s = force_version(s, FALSE);
7725             }
7726             else if (*s != 'v' || !isDIGIT(s[1])
7727                     || (s = force_version(s, TRUE), *s == 'v'))
7728             {
7729                 *PL_tokenbuf = '\0';
7730                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7731                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7732                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7733                 else if (*s == '<')
7734                     yyerror("<> should be quotes");
7735             }
7736             if (orig_keyword == KEY_require) {
7737                 orig_keyword = 0;
7738                 pl_yylval.ival = 1;
7739             }
7740             else
7741                 pl_yylval.ival = 0;
7742             PL_expect = XTERM;
7743             PL_bufptr = s;
7744             PL_last_uni = PL_oldbufptr;
7745             PL_last_lop_op = OP_REQUIRE;
7746             s = skipspace(s);
7747             return REPORT( (int)REQUIRE );
7748
7749         case KEY_reset:
7750             UNI(OP_RESET);
7751
7752         case KEY_redo:
7753             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7754             LOOPX(OP_REDO);
7755
7756         case KEY_rename:
7757             LOP(OP_RENAME,XTERM);
7758
7759         case KEY_rand:
7760             UNI(OP_RAND);
7761
7762         case KEY_rmdir:
7763             UNI(OP_RMDIR);
7764
7765         case KEY_rindex:
7766             LOP(OP_RINDEX,XTERM);
7767
7768         case KEY_read:
7769             LOP(OP_READ,XTERM);
7770
7771         case KEY_readdir:
7772             UNI(OP_READDIR);
7773
7774         case KEY_readline:
7775             UNIDOR(OP_READLINE);
7776
7777         case KEY_readpipe:
7778             UNIDOR(OP_BACKTICK);
7779
7780         case KEY_rewinddir:
7781             UNI(OP_REWINDDIR);
7782
7783         case KEY_recv:
7784             LOP(OP_RECV,XTERM);
7785
7786         case KEY_reverse:
7787             LOP(OP_REVERSE,XTERM);
7788
7789         case KEY_readlink:
7790             UNIDOR(OP_READLINK);
7791
7792         case KEY_ref:
7793             UNI(OP_REF);
7794
7795         case KEY_s:
7796             s = scan_subst(s);
7797             if (pl_yylval.opval)
7798                 TERM(sublex_start());
7799             else
7800                 TOKEN(1);       /* force error */
7801
7802         case KEY_say:
7803             checkcomma(s,PL_tokenbuf,"filehandle");
7804             LOP(OP_SAY,XREF);
7805
7806         case KEY_chomp:
7807             UNI(OP_CHOMP);
7808
7809         case KEY_scalar:
7810             UNI(OP_SCALAR);
7811
7812         case KEY_select:
7813             LOP(OP_SELECT,XTERM);
7814
7815         case KEY_seek:
7816             LOP(OP_SEEK,XTERM);
7817
7818         case KEY_semctl:
7819             LOP(OP_SEMCTL,XTERM);
7820
7821         case KEY_semget:
7822             LOP(OP_SEMGET,XTERM);
7823
7824         case KEY_semop:
7825             LOP(OP_SEMOP,XTERM);
7826
7827         case KEY_send:
7828             LOP(OP_SEND,XTERM);
7829
7830         case KEY_setpgrp:
7831             LOP(OP_SETPGRP,XTERM);
7832
7833         case KEY_setpriority:
7834             LOP(OP_SETPRIORITY,XTERM);
7835
7836         case KEY_sethostent:
7837             UNI(OP_SHOSTENT);
7838
7839         case KEY_setnetent:
7840             UNI(OP_SNETENT);
7841
7842         case KEY_setservent:
7843             UNI(OP_SSERVENT);
7844
7845         case KEY_setprotoent:
7846             UNI(OP_SPROTOENT);
7847
7848         case KEY_setpwent:
7849             FUN0(OP_SPWENT);
7850
7851         case KEY_setgrent:
7852             FUN0(OP_SGRENT);
7853
7854         case KEY_seekdir:
7855             LOP(OP_SEEKDIR,XTERM);
7856
7857         case KEY_setsockopt:
7858             LOP(OP_SSOCKOPT,XTERM);
7859
7860         case KEY_shift:
7861             UNIDOR(OP_SHIFT);
7862
7863         case KEY_shmctl:
7864             LOP(OP_SHMCTL,XTERM);
7865
7866         case KEY_shmget:
7867             LOP(OP_SHMGET,XTERM);
7868
7869         case KEY_shmread:
7870             LOP(OP_SHMREAD,XTERM);
7871
7872         case KEY_shmwrite:
7873             LOP(OP_SHMWRITE,XTERM);
7874
7875         case KEY_shutdown:
7876             LOP(OP_SHUTDOWN,XTERM);
7877
7878         case KEY_sin:
7879             UNI(OP_SIN);
7880
7881         case KEY_sleep:
7882             UNI(OP_SLEEP);
7883
7884         case KEY_socket:
7885             LOP(OP_SOCKET,XTERM);
7886
7887         case KEY_socketpair:
7888             LOP(OP_SOCKPAIR,XTERM);
7889
7890         case KEY_sort:
7891             checkcomma(s,PL_tokenbuf,"subroutine name");
7892             s = SKIPSPACE1(s);
7893             if (*s == ';' || *s == ')')         /* probably a close */
7894                 Perl_croak(aTHX_ "sort is now a reserved word");
7895             PL_expect = XTERM;
7896             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7897             LOP(OP_SORT,XREF);
7898
7899         case KEY_split:
7900             LOP(OP_SPLIT,XTERM);
7901
7902         case KEY_sprintf:
7903             LOP(OP_SPRINTF,XTERM);
7904
7905         case KEY_splice:
7906             LOP(OP_SPLICE,XTERM);
7907
7908         case KEY_sqrt:
7909             UNI(OP_SQRT);
7910
7911         case KEY_srand:
7912             UNI(OP_SRAND);
7913
7914         case KEY_stat:
7915             UNI(OP_STAT);
7916
7917         case KEY_study:
7918             UNI(OP_STUDY);
7919
7920         case KEY_substr:
7921             LOP(OP_SUBSTR,XTERM);
7922
7923         case KEY_format:
7924         case KEY_sub:
7925           really_sub:
7926             {
7927                 char tmpbuf[sizeof PL_tokenbuf];
7928                 SSize_t tboffset = 0;
7929                 expectation attrful;
7930                 bool have_name, have_proto;
7931                 const int key = tmp;
7932
7933 #ifdef PERL_MAD
7934                 SV *tmpwhite = 0;
7935
7936                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7937                 SV *subtoken = newSVpvn(tstart, s - tstart);
7938                 PL_thistoken = 0;
7939
7940                 d = s;
7941                 s = SKIPSPACE2(s,tmpwhite);
7942 #else
7943                 s = skipspace(s);
7944 #endif
7945
7946                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7947                     (*s == ':' && s[1] == ':'))
7948                 {
7949 #ifdef PERL_MAD
7950                     SV *nametoke = NULL;
7951 #endif
7952
7953                     PL_expect = XBLOCK;
7954                     attrful = XATTRBLOCK;
7955                     /* remember buffer pos'n for later force_word */
7956                     tboffset = s - PL_oldbufptr;
7957                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7958 #ifdef PERL_MAD
7959                     if (PL_madskills)
7960                         nametoke = newSVpvn(s, d - s);
7961 #endif
7962                     if (memchr(tmpbuf, ':', len))
7963                         sv_setpvn(PL_subname, tmpbuf, len);
7964                     else {
7965                         sv_setsv(PL_subname,PL_curstname);
7966                         sv_catpvs(PL_subname,"::");
7967                         sv_catpvn(PL_subname,tmpbuf,len);
7968                     }
7969                     have_name = TRUE;
7970
7971 #ifdef PERL_MAD
7972
7973                     start_force(0);
7974                     CURMAD('X', nametoke);
7975                     CURMAD('_', tmpwhite);
7976                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7977                                       FALSE, TRUE, TRUE);
7978
7979                     s = SKIPSPACE2(d,tmpwhite);
7980 #else
7981                     s = skipspace(d);
7982 #endif
7983                 }
7984                 else {
7985                     if (key == KEY_my)
7986                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7987                     PL_expect = XTERMBLOCK;
7988                     attrful = XATTRTERM;
7989                     sv_setpvs(PL_subname,"?");
7990                     have_name = FALSE;
7991                 }
7992
7993                 if (key == KEY_format) {
7994                     if (*s == '=')
7995                         PL_lex_formbrack = PL_lex_brackets + 1;
7996 #ifdef PERL_MAD
7997                     PL_thistoken = subtoken;
7998                     s = d;
7999 #else
8000                     if (have_name)
8001                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8002                                           FALSE, TRUE, TRUE);
8003 #endif
8004                     OPERATOR(FORMAT);
8005                 }
8006
8007                 /* Look for a prototype */
8008                 if (*s == '(') {
8009                     char *p;
8010                     bool bad_proto = FALSE;
8011                     bool in_brackets = FALSE;
8012                     char greedy_proto = ' ';
8013                     bool proto_after_greedy_proto = FALSE;
8014                     bool must_be_last = FALSE;
8015                     bool underscore = FALSE;
8016                     bool seen_underscore = FALSE;
8017                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8018
8019                     s = scan_str(s,!!PL_madskills,FALSE);
8020                     if (!s)
8021                         Perl_croak(aTHX_ "Prototype not terminated");
8022                     /* strip spaces and check for bad characters */
8023                     d = SvPVX(PL_lex_stuff);
8024                     tmp = 0;
8025                     for (p = d; *p; ++p) {
8026                         if (!isSPACE(*p)) {
8027                             d[tmp++] = *p;
8028
8029                             if (warnillegalproto) {
8030                                 if (must_be_last)
8031                                     proto_after_greedy_proto = TRUE;
8032                                 if (!strchr("$@%*;[]&\\_+", *p)) {
8033                                     bad_proto = TRUE;
8034                                 }
8035                                 else {
8036                                     if ( underscore ) {
8037                                         if ( *p != ';' )
8038                                             bad_proto = TRUE;
8039                                         underscore = FALSE;
8040                                     }
8041                                     if ( *p == '[' ) {
8042                                         in_brackets = TRUE;
8043                                     }
8044                                     else if ( *p == ']' ) {
8045                                         in_brackets = FALSE;
8046                                     }
8047                                     else if ( (*p == '@' || *p == '%') &&
8048                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8049                                          !in_brackets ) {
8050                                         must_be_last = TRUE;
8051                                         greedy_proto = *p;
8052                                     }
8053                                     else if ( *p == '_' ) {
8054                                         underscore = seen_underscore = TRUE;
8055                                     }
8056                                 }
8057                             }
8058                         }
8059                     }
8060                     d[tmp] = '\0';
8061                     if (proto_after_greedy_proto)
8062                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8063                                     "Prototype after '%c' for %"SVf" : %s",
8064                                     greedy_proto, SVfARG(PL_subname), d);
8065                     if (bad_proto)
8066                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8067                                     "Illegal character %sin prototype for %"SVf" : %s",
8068                                     seen_underscore ? "after '_' " : "",
8069                                     SVfARG(PL_subname), d);
8070                     SvCUR_set(PL_lex_stuff, tmp);
8071                     have_proto = TRUE;
8072
8073 #ifdef PERL_MAD
8074                     start_force(0);
8075                     CURMAD('q', PL_thisopen);
8076                     CURMAD('_', tmpwhite);
8077                     CURMAD('=', PL_thisstuff);
8078                     CURMAD('Q', PL_thisclose);
8079                     NEXTVAL_NEXTTOKE.opval =
8080                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8081                     PL_lex_stuff = NULL;
8082                     force_next(THING);
8083
8084                     s = SKIPSPACE2(s,tmpwhite);
8085 #else
8086                     s = skipspace(s);
8087 #endif
8088                 }
8089                 else
8090                     have_proto = FALSE;
8091
8092                 if (*s == ':' && s[1] != ':')
8093                     PL_expect = attrful;
8094                 else if (*s != '{' && key == KEY_sub) {
8095                     if (!have_name)
8096                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8097                     else if (*s != ';' && *s != '}')
8098                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8099                 }
8100
8101 #ifdef PERL_MAD
8102                 start_force(0);
8103                 if (tmpwhite) {
8104                     if (PL_madskills)
8105                         curmad('^', newSVpvs(""));
8106                     CURMAD('_', tmpwhite);
8107                 }
8108                 force_next(0);
8109
8110                 PL_thistoken = subtoken;
8111 #else
8112                 if (have_proto) {
8113                     NEXTVAL_NEXTTOKE.opval =
8114                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8115                     PL_lex_stuff = NULL;
8116                     force_next(THING);
8117                 }
8118 #endif
8119                 if (!have_name) {
8120                     if (PL_curstash)
8121                         sv_setpvs(PL_subname, "__ANON__");
8122                     else
8123                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8124                     TOKEN(ANONSUB);
8125                 }
8126 #ifndef PERL_MAD
8127                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8128                                   FALSE, TRUE, TRUE);
8129 #endif
8130                 if (key == KEY_my)
8131                     TOKEN(MYSUB);
8132                 TOKEN(SUB);
8133             }
8134
8135         case KEY_system:
8136             LOP(OP_SYSTEM,XREF);
8137
8138         case KEY_symlink:
8139             LOP(OP_SYMLINK,XTERM);
8140
8141         case KEY_syscall:
8142             LOP(OP_SYSCALL,XTERM);
8143
8144         case KEY_sysopen:
8145             LOP(OP_SYSOPEN,XTERM);
8146
8147         case KEY_sysseek:
8148             LOP(OP_SYSSEEK,XTERM);
8149
8150         case KEY_sysread:
8151             LOP(OP_SYSREAD,XTERM);
8152
8153         case KEY_syswrite:
8154             LOP(OP_SYSWRITE,XTERM);
8155
8156         case KEY_tr:
8157             s = scan_trans(s);
8158             TERM(sublex_start());
8159
8160         case KEY_tell:
8161             UNI(OP_TELL);
8162
8163         case KEY_telldir:
8164             UNI(OP_TELLDIR);
8165
8166         case KEY_tie:
8167             LOP(OP_TIE,XTERM);
8168
8169         case KEY_tied:
8170             UNI(OP_TIED);
8171
8172         case KEY_time:
8173             FUN0(OP_TIME);
8174
8175         case KEY_times:
8176             FUN0(OP_TMS);
8177
8178         case KEY_truncate:
8179             LOP(OP_TRUNCATE,XTERM);
8180
8181         case KEY_uc:
8182             UNI(OP_UC);
8183
8184         case KEY_ucfirst:
8185             UNI(OP_UCFIRST);
8186
8187         case KEY_untie:
8188             UNI(OP_UNTIE);
8189
8190         case KEY_until:
8191             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8192                 return REPORT(0);
8193             pl_yylval.ival = CopLINE(PL_curcop);
8194             OPERATOR(UNTIL);
8195
8196         case KEY_unless:
8197             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8198                 return REPORT(0);
8199             pl_yylval.ival = CopLINE(PL_curcop);
8200             OPERATOR(UNLESS);
8201
8202         case KEY_unlink:
8203             LOP(OP_UNLINK,XTERM);
8204
8205         case KEY_undef:
8206             UNIDOR(OP_UNDEF);
8207
8208         case KEY_unpack:
8209             LOP(OP_UNPACK,XTERM);
8210
8211         case KEY_utime:
8212             LOP(OP_UTIME,XTERM);
8213
8214         case KEY_umask:
8215             UNIDOR(OP_UMASK);
8216
8217         case KEY_unshift:
8218             LOP(OP_UNSHIFT,XTERM);
8219
8220         case KEY_use:
8221             s = tokenize_use(1, s);
8222             OPERATOR(USE);
8223
8224         case KEY_values:
8225             UNI(OP_VALUES);
8226
8227         case KEY_vec:
8228             LOP(OP_VEC,XTERM);
8229
8230         case KEY_when:
8231             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8232                 return REPORT(0);
8233             pl_yylval.ival = CopLINE(PL_curcop);
8234             OPERATOR(WHEN);
8235
8236         case KEY_while:
8237             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8238                 return REPORT(0);
8239             pl_yylval.ival = CopLINE(PL_curcop);
8240             OPERATOR(WHILE);
8241
8242         case KEY_warn:
8243             PL_hints |= HINT_BLOCK_SCOPE;
8244             LOP(OP_WARN,XTERM);
8245
8246         case KEY_wait:
8247             FUN0(OP_WAIT);
8248
8249         case KEY_waitpid:
8250             LOP(OP_WAITPID,XTERM);
8251
8252         case KEY_wantarray:
8253             FUN0(OP_WANTARRAY);
8254
8255         case KEY_write:
8256 #ifdef EBCDIC
8257         {
8258             char ctl_l[2];
8259             ctl_l[0] = toCTRL('L');
8260             ctl_l[1] = '\0';
8261             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8262         }
8263 #else
8264             /* Make sure $^L is defined */
8265             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8266 #endif
8267             UNI(OP_ENTERWRITE);
8268
8269         case KEY_x:
8270             if (PL_expect == XOPERATOR) {
8271                 if (*s == '=' && !PL_lex_allbrackets &&
8272                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8273                     return REPORT(0);
8274                 Mop(OP_REPEAT);
8275             }
8276             check_uni();
8277             goto just_a_word;
8278
8279         case KEY_xor:
8280             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8281                 return REPORT(0);
8282             pl_yylval.ival = OP_XOR;
8283             OPERATOR(OROP);
8284
8285         case KEY_y:
8286             s = scan_trans(s);
8287             TERM(sublex_start());
8288         }
8289     }}
8290 }
8291 #ifdef __SC__
8292 #pragma segment Main
8293 #endif
8294
8295 static int
8296 S_pending_ident(pTHX)
8297 {
8298     dVAR;
8299     register char *d;
8300     PADOFFSET tmp = 0;
8301     /* pit holds the identifier we read and pending_ident is reset */
8302     char pit = PL_pending_ident;
8303     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8304     /* All routes through this function want to know if there is a colon.  */
8305     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8306     PL_pending_ident = 0;
8307
8308     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8309     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8310           "### Pending identifier '%s'\n", PL_tokenbuf); });
8311
8312     /* if we're in a my(), we can't allow dynamics here.
8313        $foo'bar has already been turned into $foo::bar, so
8314        just check for colons.
8315
8316        if it's a legal name, the OP is a PADANY.
8317     */
8318     if (PL_in_my) {
8319         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8320             if (has_colon)
8321                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8322                                   "variable %s in \"our\"",
8323                                   PL_tokenbuf));
8324             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8325         }
8326         else {
8327             if (has_colon)
8328                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8329                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8330
8331             pl_yylval.opval = newOP(OP_PADANY, 0);
8332             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8333                                                         UTF ? SVf_UTF8 : 0);
8334             return PRIVATEREF;
8335         }
8336     }
8337
8338     /*
8339        build the ops for accesses to a my() variable.
8340
8341        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8342        then used in a comparison.  This catches most, but not
8343        all cases.  For instance, it catches
8344            sort { my($a); $a <=> $b }
8345        but not
8346            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8347        (although why you'd do that is anyone's guess).
8348     */
8349
8350     if (!has_colon) {
8351         if (!PL_in_my)
8352             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8353                                     UTF ? SVf_UTF8 : 0);
8354         if (tmp != NOT_IN_PAD) {
8355             /* might be an "our" variable" */
8356             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8357                 /* build ops for a bareword */
8358                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8359                 HEK * const stashname = HvNAME_HEK(stash);
8360                 SV *  const sym = newSVhek(stashname);
8361                 sv_catpvs(sym, "::");
8362                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8363                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8364                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8365                 gv_fetchsv(sym,
8366                     (PL_in_eval
8367                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8368                         : GV_ADDMULTI
8369                     ),
8370                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8371                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8372                      : SVt_PVHV));
8373                 return WORD;
8374             }
8375
8376             /* if it's a sort block and they're naming $a or $b */
8377             if (PL_last_lop_op == OP_SORT &&
8378                 PL_tokenbuf[0] == '$' &&
8379                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8380                 && !PL_tokenbuf[2])
8381             {
8382                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8383                      d < PL_bufend && *d != '\n';
8384                      d++)
8385                 {
8386                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8387                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8388                               PL_tokenbuf);
8389                     }
8390                 }
8391             }
8392
8393             pl_yylval.opval = newOP(OP_PADANY, 0);
8394             pl_yylval.opval->op_targ = tmp;
8395             return PRIVATEREF;
8396         }
8397     }
8398
8399     /*
8400        Whine if they've said @foo in a doublequoted string,
8401        and @foo isn't a variable we can find in the symbol
8402        table.
8403     */
8404     if (ckWARN(WARN_AMBIGUOUS) &&
8405         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8406         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8407                                          SVt_PVAV);
8408         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8409                 /* DO NOT warn for @- and @+ */
8410                 && !( PL_tokenbuf[2] == '\0' &&
8411                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8412            )
8413         {
8414             /* Downgraded from fatal to warning 20000522 mjd */
8415             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8416                         "Possible unintended interpolation of %s in string",
8417                         PL_tokenbuf);
8418         }
8419     }
8420
8421     /* build ops for a bareword */
8422     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8423                                                       tokenbuf_len - 1));
8424     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8425     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8426                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8427                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8428                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8429                       : SVt_PVHV));
8430     return WORD;
8431 }
8432
8433 STATIC void
8434 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8435 {
8436     dVAR;
8437
8438     PERL_ARGS_ASSERT_CHECKCOMMA;
8439
8440     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8441         if (ckWARN(WARN_SYNTAX)) {
8442             int level = 1;
8443             const char *w;
8444             for (w = s+2; *w && level; w++) {
8445                 if (*w == '(')
8446                     ++level;
8447                 else if (*w == ')')
8448                     --level;
8449             }
8450             while (isSPACE(*w))
8451                 ++w;
8452             /* the list of chars below is for end of statements or
8453              * block / parens, boolean operators (&&, ||, //) and branch
8454              * constructs (or, and, if, until, unless, while, err, for).
8455              * Not a very solid hack... */
8456             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8457                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8458                             "%s (...) interpreted as function",name);
8459         }
8460     }
8461     while (s < PL_bufend && isSPACE(*s))
8462         s++;
8463     if (*s == '(')
8464         s++;
8465     while (s < PL_bufend && isSPACE(*s))
8466         s++;
8467     if (isIDFIRST_lazy_if(s,UTF)) {
8468         const char * const w = s++;
8469         while (isALNUM_lazy_if(s,UTF))
8470             s++;
8471         while (s < PL_bufend && isSPACE(*s))
8472             s++;
8473         if (*s == ',') {
8474             GV* gv;
8475             if (keyword(w, s - w, 0))
8476                 return;
8477
8478             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8479             if (gv && GvCVu(gv))
8480                 return;
8481             Perl_croak(aTHX_ "No comma allowed after %s", what);
8482         }
8483     }
8484 }
8485
8486 /* Either returns sv, or mortalizes sv and returns a new SV*.
8487    Best used as sv=new_constant(..., sv, ...).
8488    If s, pv are NULL, calls subroutine with one argument,
8489    and type is used with error messages only. */
8490
8491 STATIC SV *
8492 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8493                SV *sv, SV *pv, const char *type, STRLEN typelen)
8494 {
8495     dVAR; dSP;
8496     HV * const table = GvHV(PL_hintgv);          /* ^H */
8497     SV *res;
8498     SV **cvp;
8499     SV *cv, *typesv;
8500     const char *why1 = "", *why2 = "", *why3 = "";
8501
8502     PERL_ARGS_ASSERT_NEW_CONSTANT;
8503
8504     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8505         SV *msg;
8506
8507         why2 = (const char *)
8508             (strEQ(key,"charnames")
8509              ? "(possibly a missing \"use charnames ...\")"
8510              : "");
8511         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8512                             (type ? type: "undef"), why2);
8513
8514         /* This is convoluted and evil ("goto considered harmful")
8515          * but I do not understand the intricacies of all the different
8516          * failure modes of %^H in here.  The goal here is to make
8517          * the most probable error message user-friendly. --jhi */
8518
8519         goto msgdone;
8520
8521     report:
8522         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8523                             (type ? type: "undef"), why1, why2, why3);
8524     msgdone:
8525         yyerror(SvPVX_const(msg));
8526         SvREFCNT_dec(msg);
8527         return sv;
8528     }
8529
8530     /* charnames doesn't work well if there have been errors found */
8531     if (PL_error_count > 0 && strEQ(key,"charnames"))
8532         return &PL_sv_undef;
8533
8534     cvp = hv_fetch(table, key, keylen, FALSE);
8535     if (!cvp || !SvOK(*cvp)) {
8536         why1 = "$^H{";
8537         why2 = key;
8538         why3 = "} is not defined";
8539         goto report;
8540     }
8541     sv_2mortal(sv);                     /* Parent created it permanently */
8542     cv = *cvp;
8543     if (!pv && s)
8544         pv = newSVpvn_flags(s, len, SVs_TEMP);
8545     if (type && pv)
8546         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8547     else
8548         typesv = &PL_sv_undef;
8549
8550     PUSHSTACKi(PERLSI_OVERLOAD);
8551     ENTER ;
8552     SAVETMPS;
8553
8554     PUSHMARK(SP) ;
8555     EXTEND(sp, 3);
8556     if (pv)
8557         PUSHs(pv);
8558     PUSHs(sv);
8559     if (pv)
8560         PUSHs(typesv);
8561     PUTBACK;
8562     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8563
8564     SPAGAIN ;
8565
8566     /* Check the eval first */
8567     if (!PL_in_eval && SvTRUE(ERRSV)) {
8568         sv_catpvs(ERRSV, "Propagated");
8569         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8570         (void)POPs;
8571         res = SvREFCNT_inc_simple(sv);
8572     }
8573     else {
8574         res = POPs;
8575         SvREFCNT_inc_simple_void(res);
8576     }
8577
8578     PUTBACK ;
8579     FREETMPS ;
8580     LEAVE ;
8581     POPSTACK;
8582
8583     if (!SvOK(res)) {
8584         why1 = "Call to &{$^H{";
8585         why2 = key;
8586         why3 = "}} did not return a defined value";
8587         sv = res;
8588         goto report;
8589     }
8590
8591     return res;
8592 }
8593
8594 /* Returns a NUL terminated string, with the length of the string written to
8595    *slp
8596    */
8597 STATIC char *
8598 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8599 {
8600     dVAR;
8601     register char *d = dest;
8602     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8603
8604     PERL_ARGS_ASSERT_SCAN_WORD;
8605
8606     for (;;) {
8607         if (d >= e)
8608             Perl_croak(aTHX_ ident_too_long);
8609         if (isALNUM(*s))        /* UTF handled below */
8610             *d++ = *s++;
8611         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8612             *d++ = ':';
8613             *d++ = ':';
8614             s++;
8615         }
8616         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8617             *d++ = *s++;
8618             *d++ = *s++;
8619         }
8620         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8621             char *t = s + UTF8SKIP(s);
8622             size_t len;
8623             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8624                 t += UTF8SKIP(t);
8625             len = t - s;
8626             if (d + len > e)
8627                 Perl_croak(aTHX_ ident_too_long);
8628             Copy(s, d, len, char);
8629             d += len;
8630             s = t;
8631         }
8632         else {
8633             *d = '\0';
8634             *slp = d - dest;
8635             return s;
8636         }
8637     }
8638 }
8639
8640 STATIC char *
8641 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8642 {
8643     dVAR;
8644     char *bracket = NULL;
8645     char funny = *s++;
8646     register char *d = dest;
8647     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8648
8649     PERL_ARGS_ASSERT_SCAN_IDENT;
8650
8651     if (isSPACE(*s))
8652         s = PEEKSPACE(s);
8653     if (isDIGIT(*s)) {
8654         while (isDIGIT(*s)) {
8655             if (d >= e)
8656                 Perl_croak(aTHX_ ident_too_long);
8657             *d++ = *s++;
8658         }
8659     }
8660     else {
8661         for (;;) {
8662             if (d >= e)
8663                 Perl_croak(aTHX_ ident_too_long);
8664             if (isALNUM(*s))    /* UTF handled below */
8665                 *d++ = *s++;
8666             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8667                 *d++ = ':';
8668                 *d++ = ':';
8669                 s++;
8670             }
8671             else if (*s == ':' && s[1] == ':') {
8672                 *d++ = *s++;
8673                 *d++ = *s++;
8674             }
8675             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8676                 char *t = s + UTF8SKIP(s);
8677                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8678                     t += UTF8SKIP(t);
8679                 if (d + (t - s) > e)
8680                     Perl_croak(aTHX_ ident_too_long);
8681                 Copy(s, d, t - s, char);
8682                 d += t - s;
8683                 s = t;
8684             }
8685             else
8686                 break;
8687         }
8688     }
8689     *d = '\0';
8690     d = dest;
8691     if (*d) {
8692         if (PL_lex_state != LEX_NORMAL)
8693             PL_lex_state = LEX_INTERPENDMAYBE;
8694         return s;
8695     }
8696     if (*s == '$' && s[1] &&
8697         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8698     {
8699         return s;
8700     }
8701     if (*s == '{') {
8702         bracket = s;
8703         s++;
8704     }
8705     else if (ck_uni)
8706         check_uni();
8707     if (s < send)
8708         *d = *s++;
8709     d[1] = '\0';
8710     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8711         *d = toCTRL(*s);
8712         s++;
8713     }
8714     if (bracket) {
8715         if (isSPACE(s[-1])) {
8716             while (s < send) {
8717                 const char ch = *s++;
8718                 if (!SPACE_OR_TAB(ch)) {
8719                     *d = ch;
8720                     break;
8721                 }
8722             }
8723         }
8724         if (isIDFIRST_lazy_if(d,UTF)) {
8725             d++;
8726             if (UTF) {
8727                 char *end = s;
8728                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8729                     end += UTF8SKIP(end);
8730                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8731                         end += UTF8SKIP(end);
8732                 }
8733                 Copy(s, d, end - s, char);
8734                 d += end - s;
8735                 s = end;
8736             }
8737             else {
8738                 while ((isALNUM(*s) || *s == ':') && d < e)
8739                     *d++ = *s++;
8740                 if (d >= e)
8741                     Perl_croak(aTHX_ ident_too_long);
8742             }
8743             *d = '\0';
8744             while (s < send && SPACE_OR_TAB(*s))
8745                 s++;
8746             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8747                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8748                     const char * const brack =
8749                         (const char *)
8750                         ((*s == '[') ? "[...]" : "{...}");
8751    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8752                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8753                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8754                         funny, dest, brack, funny, dest, brack);
8755                 }
8756                 bracket++;
8757                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8758                 PL_lex_allbrackets++;
8759                 return s;
8760             }
8761         }
8762         /* Handle extended ${^Foo} variables
8763          * 1999-02-27 mjd-perl-patch@plover.com */
8764         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8765                  && isALNUM(*s))
8766         {
8767             d++;
8768             while (isALNUM(*s) && d < e) {
8769                 *d++ = *s++;
8770             }
8771             if (d >= e)
8772                 Perl_croak(aTHX_ ident_too_long);
8773             *d = '\0';
8774         }
8775         if (*s == '}') {
8776             s++;
8777             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8778                 PL_lex_state = LEX_INTERPEND;
8779                 PL_expect = XREF;
8780             }
8781             if (PL_lex_state == LEX_NORMAL) {
8782                 if (ckWARN(WARN_AMBIGUOUS) &&
8783                     (keyword(dest, d - dest, 0)
8784                      || get_cvn_flags(dest, d - dest, 0)))
8785                 {
8786                     if (funny == '#')
8787                         funny = '@';
8788                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8789                         "Ambiguous use of %c{%s} resolved to %c%s",
8790                         funny, dest, funny, dest);
8791                 }
8792             }
8793         }
8794         else {
8795             s = bracket;                /* let the parser handle it */
8796             *dest = '\0';
8797         }
8798     }
8799     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8800         PL_lex_state = LEX_INTERPEND;
8801     return s;
8802 }
8803
8804 static bool
8805 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8806
8807     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8808      * the parse starting at 's', based on the subset that are valid in this
8809      * context input to this routine in 'valid_flags'. Advances s.  Returns
8810      * TRUE if the input was a valid flag, so the next char may be as well;
8811      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8812      * current regex.  This routine will set it to any charset modifier found.
8813      * The caller shouldn't change it.  This way, another charset modifier
8814      * encountered in the parse can be detected as an error, as we have decided
8815      * allow only one */
8816
8817     const char c = **s;
8818
8819     if (! strchr(valid_flags, c)) {
8820         if (isALNUM(c)) {
8821             goto deprecate;
8822         }
8823         return FALSE;
8824     }
8825
8826     switch (c) {
8827
8828         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8829         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8830         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8831         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8832         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8833         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8834         case LOCALE_PAT_MOD:
8835
8836             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8837              * can't be regex modifiers.
8838              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8839              * much as easily done.  s///lei, for example, has to mean regex
8840              * modifiers if it's not an error (as does any word character
8841              * following the 'e').  Otherwise, we resolve to the backwards-
8842              * compatible, but less likely 's/// le ...', i.e. as meaning
8843              * less-than-or-equal.  The reason it's not likely is that s//
8844              * returns a number for code in the field (/r returns a string, but
8845              * that wasn't added until the 5.13 series), and so '<=' should be
8846              * used for comparing, not 'le'. */
8847             if (*((*s) + 1) == 't') {
8848                 goto deprecate;
8849             }
8850             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8851
8852                 /* 'e' is valid only for substitutes, s///e.  If it is not
8853                  * valid in the current context, then 'm//le' must mean the
8854                  * comparison operator, so use the regular deprecation message.
8855                  */
8856                 if (! strchr(valid_flags, 'e')) {
8857                     goto deprecate;
8858                 }
8859                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8860                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8861                 return FALSE;
8862             }
8863             if (*charset) {
8864                 goto multiple_charsets;
8865             }
8866             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8867             *charset = c;
8868             break;
8869         case UNICODE_PAT_MOD:
8870             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8871              * 'n' means they can't be regex modifiers */
8872             if (*((*s) + 1) == 'n') {
8873                 goto deprecate;
8874             }
8875             if (*charset) {
8876                 goto multiple_charsets;
8877             }
8878             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8879             *charset = c;
8880             break;
8881         case ASCII_RESTRICT_PAT_MOD:
8882             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8883              * can't be regex modifiers */
8884             if (*((*s) + 1) == 'n') {
8885                 goto deprecate;
8886             }
8887
8888             if (! *charset) {
8889                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8890             }
8891             else {
8892
8893                 /* Error if previous modifier wasn't an 'a', but if it was, see
8894                  * if, and accept, a second occurrence (only) */
8895                 if (*charset != 'a'
8896                     || get_regex_charset(*pmfl)
8897                         != REGEX_ASCII_RESTRICTED_CHARSET)
8898                 {
8899                         goto multiple_charsets;
8900                 }
8901                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8902             }
8903             *charset = c;
8904             break;
8905         case DEPENDS_PAT_MOD:
8906             if (*charset) {
8907                 goto multiple_charsets;
8908             }
8909             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8910             *charset = c;
8911             break;
8912     }
8913
8914     (*s)++;
8915     return TRUE;
8916
8917     deprecate:
8918         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8919             "Having no space between pattern and following word is deprecated");
8920         return FALSE;
8921
8922     multiple_charsets:
8923         if (*charset != c) {
8924             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8925         }
8926         else if (c == 'a') {
8927             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8928         }
8929         else {
8930             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8931         }
8932
8933         /* Pretend that it worked, so will continue processing before dieing */
8934         (*s)++;
8935         return TRUE;
8936 }
8937
8938 STATIC char *
8939 S_scan_pat(pTHX_ char *start, I32 type)
8940 {
8941     dVAR;
8942     PMOP *pm;
8943     char *s = scan_str(start,!!PL_madskills,FALSE);
8944     const char * const valid_flags =
8945         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8946     char charset = '\0';    /* character set modifier */
8947 #ifdef PERL_MAD
8948     char *modstart;
8949 #endif
8950
8951     PERL_ARGS_ASSERT_SCAN_PAT;
8952
8953     if (!s) {
8954         const char * const delimiter = skipspace(start);
8955         Perl_croak(aTHX_
8956                    (const char *)
8957                    (*delimiter == '?'
8958                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8959                     : "Search pattern not terminated" ));
8960     }
8961
8962     pm = (PMOP*)newPMOP(type, 0);
8963     if (PL_multi_open == '?') {
8964         /* This is the only point in the code that sets PMf_ONCE:  */
8965         pm->op_pmflags |= PMf_ONCE;
8966
8967         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8968            allows us to restrict the list needed by reset to just the ??
8969            matches.  */
8970         assert(type != OP_TRANS);
8971         if (PL_curstash) {
8972             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8973             U32 elements;
8974             if (!mg) {
8975                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8976                                  0);
8977             }
8978             elements = mg->mg_len / sizeof(PMOP**);
8979             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8980             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8981             mg->mg_len = elements * sizeof(PMOP**);
8982             PmopSTASH_set(pm,PL_curstash);
8983         }
8984     }
8985 #ifdef PERL_MAD
8986     modstart = s;
8987 #endif
8988     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8989 #ifdef PERL_MAD
8990     if (PL_madskills && modstart != s) {
8991         SV* tmptoken = newSVpvn(modstart, s - modstart);
8992         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8993     }
8994 #endif
8995     /* issue a warning if /c is specified,but /g is not */
8996     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8997     {
8998         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8999                        "Use of /c modifier is meaningless without /g" );
9000     }
9001
9002     PL_lex_op = (OP*)pm;
9003     pl_yylval.ival = OP_MATCH;
9004     return s;
9005 }
9006
9007 STATIC char *
9008 S_scan_subst(pTHX_ char *start)
9009 {
9010     dVAR;
9011     char *s;
9012     register PMOP *pm;
9013     I32 first_start;
9014     I32 es = 0;
9015     char charset = '\0';    /* character set modifier */
9016 #ifdef PERL_MAD
9017     char *modstart;
9018 #endif
9019
9020     PERL_ARGS_ASSERT_SCAN_SUBST;
9021
9022     pl_yylval.ival = OP_NULL;
9023
9024     s = scan_str(start,!!PL_madskills,FALSE);
9025
9026     if (!s)
9027         Perl_croak(aTHX_ "Substitution pattern not terminated");
9028
9029     if (s[-1] == PL_multi_open)
9030         s--;
9031 #ifdef PERL_MAD
9032     if (PL_madskills) {
9033         CURMAD('q', PL_thisopen);
9034         CURMAD('_', PL_thiswhite);
9035         CURMAD('E', PL_thisstuff);
9036         CURMAD('Q', PL_thisclose);
9037         PL_realtokenstart = s - SvPVX(PL_linestr);
9038     }
9039 #endif
9040
9041     first_start = PL_multi_start;
9042     s = scan_str(s,!!PL_madskills,FALSE);
9043     if (!s) {
9044         if (PL_lex_stuff) {
9045             SvREFCNT_dec(PL_lex_stuff);
9046             PL_lex_stuff = NULL;
9047         }
9048         Perl_croak(aTHX_ "Substitution replacement not terminated");
9049     }
9050     PL_multi_start = first_start;       /* so whole substitution is taken together */
9051
9052     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9053
9054 #ifdef PERL_MAD
9055     if (PL_madskills) {
9056         CURMAD('z', PL_thisopen);
9057         CURMAD('R', PL_thisstuff);
9058         CURMAD('Z', PL_thisclose);
9059     }
9060     modstart = s;
9061 #endif
9062
9063     while (*s) {
9064         if (*s == EXEC_PAT_MOD) {
9065             s++;
9066             es++;
9067         }
9068         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9069         {
9070             break;
9071         }
9072     }
9073
9074 #ifdef PERL_MAD
9075     if (PL_madskills) {
9076         if (modstart != s)
9077             curmad('m', newSVpvn(modstart, s - modstart));
9078         append_madprops(PL_thismad, (OP*)pm, 0);
9079         PL_thismad = 0;
9080     }
9081 #endif
9082     if ((pm->op_pmflags & PMf_CONTINUE)) {
9083         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9084     }
9085
9086     if (es) {
9087         SV * const repl = newSVpvs("");
9088
9089         PL_sublex_info.super_bufptr = s;
9090         PL_sublex_info.super_bufend = PL_bufend;
9091         PL_multi_end = 0;
9092         pm->op_pmflags |= PMf_EVAL;
9093         while (es-- > 0) {
9094             if (es)
9095                 sv_catpvs(repl, "eval ");
9096             else
9097                 sv_catpvs(repl, "do ");
9098         }
9099         sv_catpvs(repl, "{");
9100         sv_catsv(repl, PL_lex_repl);
9101         if (strchr(SvPVX(PL_lex_repl), '#'))
9102             sv_catpvs(repl, "\n");
9103         sv_catpvs(repl, "}");
9104         SvEVALED_on(repl);
9105         SvREFCNT_dec(PL_lex_repl);
9106         PL_lex_repl = repl;
9107     }
9108
9109     PL_lex_op = (OP*)pm;
9110     pl_yylval.ival = OP_SUBST;
9111     return s;
9112 }
9113
9114 STATIC char *
9115 S_scan_trans(pTHX_ char *start)
9116 {
9117     dVAR;
9118     register char* s;
9119     OP *o;
9120     short *tbl;
9121     U8 squash;
9122     U8 del;
9123     U8 complement;
9124     bool nondestruct = 0;
9125 #ifdef PERL_MAD
9126     char *modstart;
9127 #endif
9128
9129     PERL_ARGS_ASSERT_SCAN_TRANS;
9130
9131     pl_yylval.ival = OP_NULL;
9132
9133     s = scan_str(start,!!PL_madskills,FALSE);
9134     if (!s)
9135         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9136
9137     if (s[-1] == PL_multi_open)
9138         s--;
9139 #ifdef PERL_MAD
9140     if (PL_madskills) {
9141         CURMAD('q', PL_thisopen);
9142         CURMAD('_', PL_thiswhite);
9143         CURMAD('E', PL_thisstuff);
9144         CURMAD('Q', PL_thisclose);
9145         PL_realtokenstart = s - SvPVX(PL_linestr);
9146     }
9147 #endif
9148
9149     s = scan_str(s,!!PL_madskills,FALSE);
9150     if (!s) {
9151         if (PL_lex_stuff) {
9152             SvREFCNT_dec(PL_lex_stuff);
9153             PL_lex_stuff = NULL;
9154         }
9155         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9156     }
9157     if (PL_madskills) {
9158         CURMAD('z', PL_thisopen);
9159         CURMAD('R', PL_thisstuff);
9160         CURMAD('Z', PL_thisclose);
9161     }
9162
9163     complement = del = squash = 0;
9164 #ifdef PERL_MAD
9165     modstart = s;
9166 #endif
9167     while (1) {
9168         switch (*s) {
9169         case 'c':
9170             complement = OPpTRANS_COMPLEMENT;
9171             break;
9172         case 'd':
9173             del = OPpTRANS_DELETE;
9174             break;
9175         case 's':
9176             squash = OPpTRANS_SQUASH;
9177             break;
9178         case 'r':
9179             nondestruct = 1;
9180             break;
9181         default:
9182             goto no_more;
9183         }
9184         s++;
9185     }
9186   no_more:
9187
9188     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9189     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9190     o->op_private &= ~OPpTRANS_ALL;
9191     o->op_private |= del|squash|complement|
9192       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9193       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9194
9195     PL_lex_op = o;
9196     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9197
9198 #ifdef PERL_MAD
9199     if (PL_madskills) {
9200         if (modstart != s)
9201             curmad('m', newSVpvn(modstart, s - modstart));
9202         append_madprops(PL_thismad, o, 0);
9203         PL_thismad = 0;
9204     }
9205 #endif
9206
9207     return s;
9208 }
9209
9210 STATIC char *
9211 S_scan_heredoc(pTHX_ register char *s)
9212 {
9213     dVAR;
9214     SV *herewas;
9215     I32 op_type = OP_SCALAR;
9216     I32 len;
9217     SV *tmpstr;
9218     char term;
9219     const char *found_newline;
9220     register char *d;
9221     register char *e;
9222     char *peek;
9223     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9224 #ifdef PERL_MAD
9225     I32 stuffstart = s - SvPVX(PL_linestr);
9226     char *tstart;
9227
9228     PL_realtokenstart = -1;
9229 #endif
9230
9231     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9232
9233     s += 2;
9234     d = PL_tokenbuf;
9235     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9236     if (!outer)
9237         *d++ = '\n';
9238     peek = s;
9239     while (SPACE_OR_TAB(*peek))
9240         peek++;
9241     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9242         s = peek;
9243         term = *s++;
9244         s = delimcpy(d, e, s, PL_bufend, term, &len);
9245         d += len;
9246         if (s < PL_bufend)
9247             s++;
9248     }
9249     else {
9250         if (*s == '\\')
9251             s++, term = '\'';
9252         else
9253             term = '"';
9254         if (!isALNUM_lazy_if(s,UTF))
9255             deprecate("bare << to mean <<\"\"");
9256         for (; isALNUM_lazy_if(s,UTF); s++) {
9257             if (d < e)
9258                 *d++ = *s;
9259         }
9260     }
9261     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9262         Perl_croak(aTHX_ "Delimiter for here document is too long");
9263     *d++ = '\n';
9264     *d = '\0';
9265     len = d - PL_tokenbuf;
9266
9267 #ifdef PERL_MAD
9268     if (PL_madskills) {
9269         tstart = PL_tokenbuf + !outer;
9270         PL_thisclose = newSVpvn(tstart, len - !outer);
9271         tstart = SvPVX(PL_linestr) + stuffstart;
9272         PL_thisopen = newSVpvn(tstart, s - tstart);
9273         stuffstart = s - SvPVX(PL_linestr);
9274     }
9275 #endif
9276 #ifndef PERL_STRICT_CR
9277     d = strchr(s, '\r');
9278     if (d) {
9279         char * const olds = s;
9280         s = d;
9281         while (s < PL_bufend) {
9282             if (*s == '\r') {
9283                 *d++ = '\n';
9284                 if (*++s == '\n')
9285                     s++;
9286             }
9287             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9288                 *d++ = *s++;
9289                 s++;
9290             }
9291             else
9292                 *d++ = *s++;
9293         }
9294         *d = '\0';
9295         PL_bufend = d;
9296         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9297         s = olds;
9298     }
9299 #endif
9300 #ifdef PERL_MAD
9301     found_newline = 0;
9302 #endif
9303     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9304         herewas = newSVpvn(s,PL_bufend-s);
9305     }
9306     else {
9307 #ifdef PERL_MAD
9308         herewas = newSVpvn(s-1,found_newline-s+1);
9309 #else
9310         s--;
9311         herewas = newSVpvn(s,found_newline-s);
9312 #endif
9313     }
9314 #ifdef PERL_MAD
9315     if (PL_madskills) {
9316         tstart = SvPVX(PL_linestr) + stuffstart;
9317         if (PL_thisstuff)
9318             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9319         else
9320             PL_thisstuff = newSVpvn(tstart, s - tstart);
9321     }
9322 #endif
9323     s += SvCUR(herewas);
9324
9325 #ifdef PERL_MAD
9326     stuffstart = s - SvPVX(PL_linestr);
9327
9328     if (found_newline)
9329         s--;
9330 #endif
9331
9332     tmpstr = newSV_type(SVt_PVIV);
9333     SvGROW(tmpstr, 80);
9334     if (term == '\'') {
9335         op_type = OP_CONST;
9336         SvIV_set(tmpstr, -1);
9337     }
9338     else if (term == '`') {
9339         op_type = OP_BACKTICK;
9340         SvIV_set(tmpstr, '\\');
9341     }
9342
9343     CLINE;
9344     PL_multi_start = CopLINE(PL_curcop);
9345     PL_multi_open = PL_multi_close = '<';
9346     term = *PL_tokenbuf;
9347     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9348         char * const bufptr = PL_sublex_info.super_bufptr;
9349         char * const bufend = PL_sublex_info.super_bufend;
9350         char * const olds = s - SvCUR(herewas);
9351         s = strchr(bufptr, '\n');
9352         if (!s)
9353             s = bufend;
9354         d = s;
9355         while (s < bufend &&
9356           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9357             if (*s++ == '\n')
9358                 CopLINE_inc(PL_curcop);
9359         }
9360         if (s >= bufend) {
9361             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9362             missingterm(PL_tokenbuf);
9363         }
9364         sv_setpvn(herewas,bufptr,d-bufptr+1);
9365         sv_setpvn(tmpstr,d+1,s-d);
9366         s += len - 1;
9367         sv_catpvn(herewas,s,bufend-s);
9368         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9369
9370         s = olds;
9371         goto retval;
9372     }
9373     else if (!outer) {
9374         d = s;
9375         while (s < PL_bufend &&
9376           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9377             if (*s++ == '\n')
9378                 CopLINE_inc(PL_curcop);
9379         }
9380         if (s >= PL_bufend) {
9381             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9382             missingterm(PL_tokenbuf);
9383         }
9384         sv_setpvn(tmpstr,d+1,s-d);
9385 #ifdef PERL_MAD
9386         if (PL_madskills) {
9387             if (PL_thisstuff)
9388                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9389             else
9390                 PL_thisstuff = newSVpvn(d + 1, s - d);
9391             stuffstart = s - SvPVX(PL_linestr);
9392         }
9393 #endif
9394         s += len - 1;
9395         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9396
9397         sv_catpvn(herewas,s,PL_bufend-s);
9398         sv_setsv(PL_linestr,herewas);
9399         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9400         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9401         PL_last_lop = PL_last_uni = NULL;
9402     }
9403     else
9404         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9405     while (s >= PL_bufend) {    /* multiple line string? */
9406 #ifdef PERL_MAD
9407         if (PL_madskills) {
9408             tstart = SvPVX(PL_linestr) + stuffstart;
9409             if (PL_thisstuff)
9410                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9411             else
9412                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9413         }
9414 #endif
9415         PL_bufptr = s;
9416         CopLINE_inc(PL_curcop);
9417         if (!outer || !lex_next_chunk(0)) {
9418             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9419             missingterm(PL_tokenbuf);
9420         }
9421         CopLINE_dec(PL_curcop);
9422         s = PL_bufptr;
9423 #ifdef PERL_MAD
9424         stuffstart = s - SvPVX(PL_linestr);
9425 #endif
9426         CopLINE_inc(PL_curcop);
9427         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9428         PL_last_lop = PL_last_uni = NULL;
9429 #ifndef PERL_STRICT_CR
9430         if (PL_bufend - PL_linestart >= 2) {
9431             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9432                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9433             {
9434                 PL_bufend[-2] = '\n';
9435                 PL_bufend--;
9436                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9437             }
9438             else if (PL_bufend[-1] == '\r')
9439                 PL_bufend[-1] = '\n';
9440         }
9441         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9442             PL_bufend[-1] = '\n';
9443 #endif
9444         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9445             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9446             *(SvPVX(PL_linestr) + off ) = ' ';
9447             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9448             sv_catsv(PL_linestr,herewas);
9449             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9450             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9451         }
9452         else {
9453             s = PL_bufend;
9454             sv_catsv(tmpstr,PL_linestr);
9455         }
9456     }
9457     s++;
9458 retval:
9459     PL_multi_end = CopLINE(PL_curcop);
9460     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9461         SvPV_shrink_to_cur(tmpstr);
9462     }
9463     SvREFCNT_dec(herewas);
9464     if (!IN_BYTES) {
9465         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9466             SvUTF8_on(tmpstr);
9467         else if (PL_encoding)
9468             sv_recode_to_utf8(tmpstr, PL_encoding);
9469     }
9470     PL_lex_stuff = tmpstr;
9471     pl_yylval.ival = op_type;
9472     return s;
9473 }
9474
9475 /* scan_inputsymbol
9476    takes: current position in input buffer
9477    returns: new position in input buffer
9478    side-effects: pl_yylval and lex_op are set.
9479
9480    This code handles:
9481
9482    <>           read from ARGV
9483    <FH>         read from filehandle
9484    <pkg::FH>    read from package qualified filehandle
9485    <pkg'FH>     read from package qualified filehandle
9486    <$fh>        read from filehandle in $fh
9487    <*.h>        filename glob
9488
9489 */
9490
9491 STATIC char *
9492 S_scan_inputsymbol(pTHX_ char *start)
9493 {
9494     dVAR;
9495     register char *s = start;           /* current position in buffer */
9496     char *end;
9497     I32 len;
9498     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9499     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9500
9501     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9502
9503     end = strchr(s, '\n');
9504     if (!end)
9505         end = PL_bufend;
9506     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9507
9508     /* die if we didn't have space for the contents of the <>,
9509        or if it didn't end, or if we see a newline
9510     */
9511
9512     if (len >= (I32)sizeof PL_tokenbuf)
9513         Perl_croak(aTHX_ "Excessively long <> operator");
9514     if (s >= end)
9515         Perl_croak(aTHX_ "Unterminated <> operator");
9516
9517     s++;
9518
9519     /* check for <$fh>
9520        Remember, only scalar variables are interpreted as filehandles by
9521        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9522        treated as a glob() call.
9523        This code makes use of the fact that except for the $ at the front,
9524        a scalar variable and a filehandle look the same.
9525     */
9526     if (*d == '$' && d[1]) d++;
9527
9528     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9529     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9530         d++;
9531
9532     /* If we've tried to read what we allow filehandles to look like, and
9533        there's still text left, then it must be a glob() and not a getline.
9534        Use scan_str to pull out the stuff between the <> and treat it
9535        as nothing more than a string.
9536     */
9537
9538     if (d - PL_tokenbuf != len) {
9539         pl_yylval.ival = OP_GLOB;
9540         s = scan_str(start,!!PL_madskills,FALSE);
9541         if (!s)
9542            Perl_croak(aTHX_ "Glob not terminated");
9543         return s;
9544     }
9545     else {
9546         bool readline_overriden = FALSE;
9547         GV *gv_readline;
9548         GV **gvp;
9549         /* we're in a filehandle read situation */
9550         d = PL_tokenbuf;
9551
9552         /* turn <> into <ARGV> */
9553         if (!len)
9554             Copy("ARGV",d,5,char);
9555
9556         /* Check whether readline() is overriden */
9557         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9558         if ((gv_readline
9559                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9560                 ||
9561                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9562                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9563                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9564             readline_overriden = TRUE;
9565
9566         /* if <$fh>, create the ops to turn the variable into a
9567            filehandle
9568         */
9569         if (*d == '$') {
9570             /* try to find it in the pad for this block, otherwise find
9571                add symbol table ops
9572             */
9573             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9574             if (tmp != NOT_IN_PAD) {
9575                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9576                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9577                     HEK * const stashname = HvNAME_HEK(stash);
9578                     SV * const sym = sv_2mortal(newSVhek(stashname));
9579                     sv_catpvs(sym, "::");
9580                     sv_catpv(sym, d+1);
9581                     d = SvPVX(sym);
9582                     goto intro_sym;
9583                 }
9584                 else {
9585                     OP * const o = newOP(OP_PADSV, 0);
9586                     o->op_targ = tmp;
9587                     PL_lex_op = readline_overriden
9588                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9589                                 op_append_elem(OP_LIST, o,
9590                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9591                         : (OP*)newUNOP(OP_READLINE, 0, o);
9592                 }
9593             }
9594             else {
9595                 GV *gv;
9596                 ++d;
9597 intro_sym:
9598                 gv = gv_fetchpv(d,
9599                                 (PL_in_eval
9600                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9601                                  : GV_ADDMULTI),
9602                                 SVt_PV);
9603                 PL_lex_op = readline_overriden
9604                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9605                             op_append_elem(OP_LIST,
9606                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9607                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9608                     : (OP*)newUNOP(OP_READLINE, 0,
9609                             newUNOP(OP_RV2SV, 0,
9610                                 newGVOP(OP_GV, 0, gv)));
9611             }
9612             if (!readline_overriden)
9613                 PL_lex_op->op_flags |= OPf_SPECIAL;
9614             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9615             pl_yylval.ival = OP_NULL;
9616         }
9617
9618         /* If it's none of the above, it must be a literal filehandle
9619            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9620         else {
9621             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9622             PL_lex_op = readline_overriden
9623                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9624                         op_append_elem(OP_LIST,
9625                             newGVOP(OP_GV, 0, gv),
9626                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9627                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9628             pl_yylval.ival = OP_NULL;
9629         }
9630     }
9631
9632     return s;
9633 }
9634
9635
9636 /* scan_str
9637    takes: start position in buffer
9638           keep_quoted preserve \ on the embedded delimiter(s)
9639           keep_delims preserve the delimiters around the string
9640    returns: position to continue reading from buffer
9641    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9642         updates the read buffer.
9643
9644    This subroutine pulls a string out of the input.  It is called for:
9645         q               single quotes           q(literal text)
9646         '               single quotes           'literal text'
9647         qq              double quotes           qq(interpolate $here please)
9648         "               double quotes           "interpolate $here please"
9649         qx              backticks               qx(/bin/ls -l)
9650         `               backticks               `/bin/ls -l`
9651         qw              quote words             @EXPORT_OK = qw( func() $spam )
9652         m//             regexp match            m/this/
9653         s///            regexp substitute       s/this/that/
9654         tr///           string transliterate    tr/this/that/
9655         y///            string transliterate    y/this/that/
9656         ($*@)           sub prototypes          sub foo ($)
9657         (stuff)         sub attr parameters     sub foo : attr(stuff)
9658         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9659
9660    In most of these cases (all but <>, patterns and transliterate)
9661    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9662    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9663    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9664    calls scan_str().
9665
9666    It skips whitespace before the string starts, and treats the first
9667    character as the delimiter.  If the delimiter is one of ([{< then
9668    the corresponding "close" character )]}> is used as the closing
9669    delimiter.  It allows quoting of delimiters, and if the string has
9670    balanced delimiters ([{<>}]) it allows nesting.
9671
9672    On success, the SV with the resulting string is put into lex_stuff or,
9673    if that is already non-NULL, into lex_repl. The second case occurs only
9674    when parsing the RHS of the special constructs s/// and tr/// (y///).
9675    For convenience, the terminating delimiter character is stuffed into
9676    SvIVX of the SV.
9677 */
9678
9679 STATIC char *
9680 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9681 {
9682     dVAR;
9683     SV *sv;                             /* scalar value: string */
9684     const char *tmps;                   /* temp string, used for delimiter matching */
9685     register char *s = start;           /* current position in the buffer */
9686     register char term;                 /* terminating character */
9687     register char *to;                  /* current position in the sv's data */
9688     I32 brackets = 1;                   /* bracket nesting level */
9689     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9690     I32 termcode;                       /* terminating char. code */
9691     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9692     STRLEN termlen;                     /* length of terminating string */
9693     int last_off = 0;                   /* last position for nesting bracket */
9694 #ifdef PERL_MAD
9695     int stuffstart;
9696     char *tstart;
9697 #endif
9698
9699     PERL_ARGS_ASSERT_SCAN_STR;
9700
9701     /* skip space before the delimiter */
9702     if (isSPACE(*s)) {
9703         s = PEEKSPACE(s);
9704     }
9705
9706 #ifdef PERL_MAD
9707     if (PL_realtokenstart >= 0) {
9708         stuffstart = PL_realtokenstart;
9709         PL_realtokenstart = -1;
9710     }
9711     else
9712         stuffstart = start - SvPVX(PL_linestr);
9713 #endif
9714     /* mark where we are, in case we need to report errors */
9715     CLINE;
9716
9717     /* after skipping whitespace, the next character is the terminator */
9718     term = *s;
9719     if (!UTF) {
9720         termcode = termstr[0] = term;
9721         termlen = 1;
9722     }
9723     else {
9724         termcode = utf8_to_uvchr((U8*)s, &termlen);
9725         Copy(s, termstr, termlen, U8);
9726         if (!UTF8_IS_INVARIANT(term))
9727             has_utf8 = TRUE;
9728     }
9729
9730     /* mark where we are */
9731     PL_multi_start = CopLINE(PL_curcop);
9732     PL_multi_open = term;
9733
9734     /* find corresponding closing delimiter */
9735     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9736         termcode = termstr[0] = term = tmps[5];
9737
9738     PL_multi_close = term;
9739
9740     /* create a new SV to hold the contents.  79 is the SV's initial length.
9741        What a random number. */
9742     sv = newSV_type(SVt_PVIV);
9743     SvGROW(sv, 80);
9744     SvIV_set(sv, termcode);
9745     (void)SvPOK_only(sv);               /* validate pointer */
9746
9747     /* move past delimiter and try to read a complete string */
9748     if (keep_delims)
9749         sv_catpvn(sv, s, termlen);
9750     s += termlen;
9751 #ifdef PERL_MAD
9752     tstart = SvPVX(PL_linestr) + stuffstart;
9753     if (!PL_thisopen && !keep_delims) {
9754         PL_thisopen = newSVpvn(tstart, s - tstart);
9755         stuffstart = s - SvPVX(PL_linestr);
9756     }
9757 #endif
9758     for (;;) {
9759         if (PL_encoding && !UTF) {
9760             bool cont = TRUE;
9761
9762             while (cont) {
9763                 int offset = s - SvPVX_const(PL_linestr);
9764                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9765                                            &offset, (char*)termstr, termlen);
9766                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9767                 char * const svlast = SvEND(sv) - 1;
9768
9769                 for (; s < ns; s++) {
9770                     if (*s == '\n' && !PL_rsfp)
9771                         CopLINE_inc(PL_curcop);
9772                 }
9773                 if (!found)
9774                     goto read_more_line;
9775                 else {
9776                     /* handle quoted delimiters */
9777                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9778                         const char *t;
9779                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9780                             t--;
9781                         if ((svlast-1 - t) % 2) {
9782                             if (!keep_quoted) {
9783                                 *(svlast-1) = term;
9784                                 *svlast = '\0';
9785                                 SvCUR_set(sv, SvCUR(sv) - 1);
9786                             }
9787                             continue;
9788                         }
9789                     }
9790                     if (PL_multi_open == PL_multi_close) {
9791                         cont = FALSE;
9792                     }
9793                     else {
9794                         const char *t;
9795                         char *w;
9796                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9797                             /* At here, all closes are "was quoted" one,
9798                                so we don't check PL_multi_close. */
9799                             if (*t == '\\') {
9800                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9801                                     t++;
9802                                 else
9803                                     *w++ = *t++;
9804                             }
9805                             else if (*t == PL_multi_open)
9806                                 brackets++;
9807
9808                             *w = *t;
9809                         }
9810                         if (w < t) {
9811                             *w++ = term;
9812                             *w = '\0';
9813                             SvCUR_set(sv, w - SvPVX_const(sv));
9814                         }
9815                         last_off = w - SvPVX(sv);
9816                         if (--brackets <= 0)
9817                             cont = FALSE;
9818                     }
9819                 }
9820             }
9821             if (!keep_delims) {
9822                 SvCUR_set(sv, SvCUR(sv) - 1);
9823                 *SvEND(sv) = '\0';
9824             }
9825             break;
9826         }
9827
9828         /* extend sv if need be */
9829         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9830         /* set 'to' to the next character in the sv's string */
9831         to = SvPVX(sv)+SvCUR(sv);
9832
9833         /* if open delimiter is the close delimiter read unbridle */
9834         if (PL_multi_open == PL_multi_close) {
9835             for (; s < PL_bufend; s++,to++) {
9836                 /* embedded newlines increment the current line number */
9837                 if (*s == '\n' && !PL_rsfp)
9838                     CopLINE_inc(PL_curcop);
9839                 /* handle quoted delimiters */
9840                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9841                     if (!keep_quoted && s[1] == term)
9842                         s++;
9843                 /* any other quotes are simply copied straight through */
9844                     else
9845                         *to++ = *s++;
9846                 }
9847                 /* terminate when run out of buffer (the for() condition), or
9848                    have found the terminator */
9849                 else if (*s == term) {
9850                     if (termlen == 1)
9851                         break;
9852                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9853                         break;
9854                 }
9855                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9856                     has_utf8 = TRUE;
9857                 *to = *s;
9858             }
9859         }
9860
9861         /* if the terminator isn't the same as the start character (e.g.,
9862            matched brackets), we have to allow more in the quoting, and
9863            be prepared for nested brackets.
9864         */
9865         else {
9866             /* read until we run out of string, or we find the terminator */
9867             for (; s < PL_bufend; s++,to++) {
9868                 /* embedded newlines increment the line count */
9869                 if (*s == '\n' && !PL_rsfp)
9870                     CopLINE_inc(PL_curcop);
9871                 /* backslashes can escape the open or closing characters */
9872                 if (*s == '\\' && s+1 < PL_bufend) {
9873                     if (!keep_quoted &&
9874                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9875                         s++;
9876                     else
9877                         *to++ = *s++;
9878                 }
9879                 /* allow nested opens and closes */
9880                 else if (*s == PL_multi_close && --brackets <= 0)
9881                     break;
9882                 else if (*s == PL_multi_open)
9883                     brackets++;
9884                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9885                     has_utf8 = TRUE;
9886                 *to = *s;
9887             }
9888         }
9889         /* terminate the copied string and update the sv's end-of-string */
9890         *to = '\0';
9891         SvCUR_set(sv, to - SvPVX_const(sv));
9892
9893         /*
9894          * this next chunk reads more into the buffer if we're not done yet
9895          */
9896
9897         if (s < PL_bufend)
9898             break;              /* handle case where we are done yet :-) */
9899
9900 #ifndef PERL_STRICT_CR
9901         if (to - SvPVX_const(sv) >= 2) {
9902             if ((to[-2] == '\r' && to[-1] == '\n') ||
9903                 (to[-2] == '\n' && to[-1] == '\r'))
9904             {
9905                 to[-2] = '\n';
9906                 to--;
9907                 SvCUR_set(sv, to - SvPVX_const(sv));
9908             }
9909             else if (to[-1] == '\r')
9910                 to[-1] = '\n';
9911         }
9912         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9913             to[-1] = '\n';
9914 #endif
9915
9916      read_more_line:
9917         /* if we're out of file, or a read fails, bail and reset the current
9918            line marker so we can report where the unterminated string began
9919         */
9920 #ifdef PERL_MAD
9921         if (PL_madskills) {
9922             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9923             if (PL_thisstuff)
9924                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9925             else
9926                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9927         }
9928 #endif
9929         CopLINE_inc(PL_curcop);
9930         PL_bufptr = PL_bufend;
9931         if (!lex_next_chunk(0)) {
9932             sv_free(sv);
9933             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9934             return NULL;
9935         }
9936         s = PL_bufptr;
9937 #ifdef PERL_MAD
9938         stuffstart = 0;
9939 #endif
9940     }
9941
9942     /* at this point, we have successfully read the delimited string */
9943
9944     if (!PL_encoding || UTF) {
9945 #ifdef PERL_MAD
9946         if (PL_madskills) {
9947             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9948             const int len = s - tstart;
9949             if (PL_thisstuff)
9950                 sv_catpvn(PL_thisstuff, tstart, len);
9951             else
9952                 PL_thisstuff = newSVpvn(tstart, len);
9953             if (!PL_thisclose && !keep_delims)
9954                 PL_thisclose = newSVpvn(s,termlen);
9955         }
9956 #endif
9957
9958         if (keep_delims)
9959             sv_catpvn(sv, s, termlen);
9960         s += termlen;
9961     }
9962 #ifdef PERL_MAD
9963     else {
9964         if (PL_madskills) {
9965             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9966             const int len = s - tstart - termlen;
9967             if (PL_thisstuff)
9968                 sv_catpvn(PL_thisstuff, tstart, len);
9969             else
9970                 PL_thisstuff = newSVpvn(tstart, len);
9971             if (!PL_thisclose && !keep_delims)
9972                 PL_thisclose = newSVpvn(s - termlen,termlen);
9973         }
9974     }
9975 #endif
9976     if (has_utf8 || PL_encoding)
9977         SvUTF8_on(sv);
9978
9979     PL_multi_end = CopLINE(PL_curcop);
9980
9981     /* if we allocated too much space, give some back */
9982     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9983         SvLEN_set(sv, SvCUR(sv) + 1);
9984         SvPV_renew(sv, SvLEN(sv));
9985     }
9986
9987     /* decide whether this is the first or second quoted string we've read
9988        for this op
9989     */
9990
9991     if (PL_lex_stuff)
9992         PL_lex_repl = sv;
9993     else
9994         PL_lex_stuff = sv;
9995     return s;
9996 }
9997
9998 /*
9999   scan_num
10000   takes: pointer to position in buffer
10001   returns: pointer to new position in buffer
10002   side-effects: builds ops for the constant in pl_yylval.op
10003
10004   Read a number in any of the formats that Perl accepts:
10005
10006   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10007   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10008   0b[01](_?[01])*
10009   0[0-7](_?[0-7])*
10010   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10011
10012   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10013   thing it reads.
10014
10015   If it reads a number without a decimal point or an exponent, it will
10016   try converting the number to an integer and see if it can do so
10017   without loss of precision.
10018 */
10019
10020 char *
10021 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10022 {
10023     dVAR;
10024     register const char *s = start;     /* current position in buffer */
10025     register char *d;                   /* destination in temp buffer */
10026     register char *e;                   /* end of temp buffer */
10027     NV nv;                              /* number read, as a double */
10028     SV *sv = NULL;                      /* place to put the converted number */
10029     bool floatit;                       /* boolean: int or float? */
10030     const char *lastub = NULL;          /* position of last underbar */
10031     static char const number_too_long[] = "Number too long";
10032
10033     PERL_ARGS_ASSERT_SCAN_NUM;
10034
10035     /* We use the first character to decide what type of number this is */
10036
10037     switch (*s) {
10038     default:
10039       Perl_croak(aTHX_ "panic: scan_num");
10040
10041     /* if it starts with a 0, it could be an octal number, a decimal in
10042        0.13 disguise, or a hexadecimal number, or a binary number. */
10043     case '0':
10044         {
10045           /* variables:
10046              u          holds the "number so far"
10047              shift      the power of 2 of the base
10048                         (hex == 4, octal == 3, binary == 1)
10049              overflowed was the number more than we can hold?
10050
10051              Shift is used when we add a digit.  It also serves as an "are
10052              we in octal/hex/binary?" indicator to disallow hex characters
10053              when in octal mode.
10054            */
10055             NV n = 0.0;
10056             UV u = 0;
10057             I32 shift;
10058             bool overflowed = FALSE;
10059             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10060             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10061             static const char* const bases[5] =
10062               { "", "binary", "", "octal", "hexadecimal" };
10063             static const char* const Bases[5] =
10064               { "", "Binary", "", "Octal", "Hexadecimal" };
10065             static const char* const maxima[5] =
10066               { "",
10067                 "0b11111111111111111111111111111111",
10068                 "",
10069                 "037777777777",
10070                 "0xffffffff" };
10071             const char *base, *Base, *max;
10072
10073             /* check for hex */
10074             if (s[1] == 'x' || s[1] == 'X') {
10075                 shift = 4;
10076                 s += 2;
10077                 just_zero = FALSE;
10078             } else if (s[1] == 'b' || s[1] == 'B') {
10079                 shift = 1;
10080                 s += 2;
10081                 just_zero = FALSE;
10082             }
10083             /* check for a decimal in disguise */
10084             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10085                 goto decimal;
10086             /* so it must be octal */
10087             else {
10088                 shift = 3;
10089                 s++;
10090             }
10091
10092             if (*s == '_') {
10093                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10094                                "Misplaced _ in number");
10095                lastub = s++;
10096             }
10097
10098             base = bases[shift];
10099             Base = Bases[shift];
10100             max  = maxima[shift];
10101
10102             /* read the rest of the number */
10103             for (;;) {
10104                 /* x is used in the overflow test,
10105                    b is the digit we're adding on. */
10106                 UV x, b;
10107
10108                 switch (*s) {
10109
10110                 /* if we don't mention it, we're done */
10111                 default:
10112                     goto out;
10113
10114                 /* _ are ignored -- but warned about if consecutive */
10115                 case '_':
10116                     if (lastub && s == lastub + 1)
10117                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10118                                        "Misplaced _ in number");
10119                     lastub = s++;
10120                     break;
10121
10122                 /* 8 and 9 are not octal */
10123                 case '8': case '9':
10124                     if (shift == 3)
10125                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10126                     /* FALL THROUGH */
10127
10128                 /* octal digits */
10129                 case '2': case '3': case '4':
10130                 case '5': case '6': case '7':
10131                     if (shift == 1)
10132                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10133                     /* FALL THROUGH */
10134
10135                 case '0': case '1':
10136                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10137                     goto digit;
10138
10139                 /* hex digits */
10140                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10141                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10142                     /* make sure they said 0x */
10143                     if (shift != 4)
10144                         goto out;
10145                     b = (*s++ & 7) + 9;
10146
10147                     /* Prepare to put the digit we have onto the end
10148                        of the number so far.  We check for overflows.
10149                     */
10150
10151                   digit:
10152                     just_zero = FALSE;
10153                     if (!overflowed) {
10154                         x = u << shift; /* make room for the digit */
10155
10156                         if ((x >> shift) != u
10157                             && !(PL_hints & HINT_NEW_BINARY)) {
10158                             overflowed = TRUE;
10159                             n = (NV) u;
10160                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10161                                              "Integer overflow in %s number",
10162                                              base);
10163                         } else
10164                             u = x | b;          /* add the digit to the end */
10165                     }
10166                     if (overflowed) {
10167                         n *= nvshift[shift];
10168                         /* If an NV has not enough bits in its
10169                          * mantissa to represent an UV this summing of
10170                          * small low-order numbers is a waste of time
10171                          * (because the NV cannot preserve the
10172                          * low-order bits anyway): we could just
10173                          * remember when did we overflow and in the
10174                          * end just multiply n by the right
10175                          * amount. */
10176                         n += (NV) b;
10177                     }
10178                     break;
10179                 }
10180             }
10181
10182           /* if we get here, we had success: make a scalar value from
10183              the number.
10184           */
10185           out:
10186
10187             /* final misplaced underbar check */
10188             if (s[-1] == '_') {
10189                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10190             }
10191
10192             if (overflowed) {
10193                 if (n > 4294967295.0)
10194                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10195                                    "%s number > %s non-portable",
10196                                    Base, max);
10197                 sv = newSVnv(n);
10198             }
10199             else {
10200 #if UVSIZE > 4
10201                 if (u > 0xffffffff)
10202                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10203                                    "%s number > %s non-portable",
10204                                    Base, max);
10205 #endif
10206                 sv = newSVuv(u);
10207             }
10208             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10209                 sv = new_constant(start, s - start, "integer",
10210                                   sv, NULL, NULL, 0);
10211             else if (PL_hints & HINT_NEW_BINARY)
10212                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10213         }
10214         break;
10215
10216     /*
10217       handle decimal numbers.
10218       we're also sent here when we read a 0 as the first digit
10219     */
10220     case '1': case '2': case '3': case '4': case '5':
10221     case '6': case '7': case '8': case '9': case '.':
10222       decimal:
10223         d = PL_tokenbuf;
10224         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10225         floatit = FALSE;
10226
10227         /* read next group of digits and _ and copy into d */
10228         while (isDIGIT(*s) || *s == '_') {
10229             /* skip underscores, checking for misplaced ones
10230                if -w is on
10231             */
10232             if (*s == '_') {
10233                 if (lastub && s == lastub + 1)
10234                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10235                                    "Misplaced _ in number");
10236                 lastub = s++;
10237             }
10238             else {
10239                 /* check for end of fixed-length buffer */
10240                 if (d >= e)
10241                     Perl_croak(aTHX_ number_too_long);
10242                 /* if we're ok, copy the character */
10243                 *d++ = *s++;
10244             }
10245         }
10246
10247         /* final misplaced underbar check */
10248         if (lastub && s == lastub + 1) {
10249             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10250         }
10251
10252         /* read a decimal portion if there is one.  avoid
10253            3..5 being interpreted as the number 3. followed
10254            by .5
10255         */
10256         if (*s == '.' && s[1] != '.') {
10257             floatit = TRUE;
10258             *d++ = *s++;
10259
10260             if (*s == '_') {
10261                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10262                                "Misplaced _ in number");
10263                 lastub = s;
10264             }
10265
10266             /* copy, ignoring underbars, until we run out of digits.
10267             */
10268             for (; isDIGIT(*s) || *s == '_'; s++) {
10269                 /* fixed length buffer check */
10270                 if (d >= e)
10271                     Perl_croak(aTHX_ number_too_long);
10272                 if (*s == '_') {
10273                    if (lastub && s == lastub + 1)
10274                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10275                                       "Misplaced _ in number");
10276                    lastub = s;
10277                 }
10278                 else
10279                     *d++ = *s;
10280             }
10281             /* fractional part ending in underbar? */
10282             if (s[-1] == '_') {
10283                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10284                                "Misplaced _ in number");
10285             }
10286             if (*s == '.' && isDIGIT(s[1])) {
10287                 /* oops, it's really a v-string, but without the "v" */
10288                 s = start;
10289                 goto vstring;
10290             }
10291         }
10292
10293         /* read exponent part, if present */
10294         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10295             floatit = TRUE;
10296             s++;
10297
10298             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10299             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10300
10301             /* stray preinitial _ */
10302             if (*s == '_') {
10303                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10304                                "Misplaced _ in number");
10305                 lastub = s++;
10306             }
10307
10308             /* allow positive or negative exponent */
10309             if (*s == '+' || *s == '-')
10310                 *d++ = *s++;
10311
10312             /* stray initial _ */
10313             if (*s == '_') {
10314                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10315                                "Misplaced _ in number");
10316                 lastub = s++;
10317             }
10318
10319             /* read digits of exponent */
10320             while (isDIGIT(*s) || *s == '_') {
10321                 if (isDIGIT(*s)) {
10322                     if (d >= e)
10323                         Perl_croak(aTHX_ number_too_long);
10324                     *d++ = *s++;
10325                 }
10326                 else {
10327                    if (((lastub && s == lastub + 1) ||
10328                         (!isDIGIT(s[1]) && s[1] != '_')))
10329                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10330                                       "Misplaced _ in number");
10331                    lastub = s++;
10332                 }
10333             }
10334         }
10335
10336
10337         /*
10338            We try to do an integer conversion first if no characters
10339            indicating "float" have been found.
10340          */
10341
10342         if (!floatit) {
10343             UV uv;
10344             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10345
10346             if (flags == IS_NUMBER_IN_UV) {
10347               if (uv <= IV_MAX)
10348                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10349               else
10350                 sv = newSVuv(uv);
10351             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10352               if (uv <= (UV) IV_MIN)
10353                 sv = newSViv(-(IV)uv);
10354               else
10355                 floatit = TRUE;
10356             } else
10357               floatit = TRUE;
10358         }
10359         if (floatit) {
10360             /* terminate the string */
10361             *d = '\0';
10362             nv = Atof(PL_tokenbuf);
10363             sv = newSVnv(nv);
10364         }
10365
10366         if ( floatit
10367              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10368             const char *const key = floatit ? "float" : "integer";
10369             const STRLEN keylen = floatit ? 5 : 7;
10370             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10371                                 key, keylen, sv, NULL, NULL, 0);
10372         }
10373         break;
10374
10375     /* if it starts with a v, it could be a v-string */
10376     case 'v':
10377 vstring:
10378                 sv = newSV(5); /* preallocate storage space */
10379                 s = scan_vstring(s, PL_bufend, sv);
10380         break;
10381     }
10382
10383     /* make the op for the constant and return */
10384
10385     if (sv)
10386         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10387     else
10388         lvalp->opval = NULL;
10389
10390     return (char *)s;
10391 }
10392
10393 STATIC char *
10394 S_scan_formline(pTHX_ register char *s)
10395 {
10396     dVAR;
10397     register char *eol;
10398     register char *t;
10399     SV * const stuff = newSVpvs("");
10400     bool needargs = FALSE;
10401     bool eofmt = FALSE;
10402 #ifdef PERL_MAD
10403     char *tokenstart = s;
10404     SV* savewhite = NULL;
10405
10406     if (PL_madskills) {
10407         savewhite = PL_thiswhite;
10408         PL_thiswhite = 0;
10409     }
10410 #endif
10411
10412     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10413
10414     while (!needargs) {
10415         if (*s == '.') {
10416             t = s+1;
10417 #ifdef PERL_STRICT_CR
10418             while (SPACE_OR_TAB(*t))
10419                 t++;
10420 #else
10421             while (SPACE_OR_TAB(*t) || *t == '\r')
10422                 t++;
10423 #endif
10424             if (*t == '\n' || t == PL_bufend) {
10425                 eofmt = TRUE;
10426                 break;
10427             }
10428         }
10429         if (PL_in_eval && !PL_rsfp) {
10430             eol = (char *) memchr(s,'\n',PL_bufend-s);
10431             if (!eol++)
10432                 eol = PL_bufend;
10433         }
10434         else
10435             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10436         if (*s != '#') {
10437             for (t = s; t < eol; t++) {
10438                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10439                     needargs = FALSE;
10440                     goto enough;        /* ~~ must be first line in formline */
10441                 }
10442                 if (*t == '@' || *t == '^')
10443                     needargs = TRUE;
10444             }
10445             if (eol > s) {
10446                 sv_catpvn(stuff, s, eol-s);
10447 #ifndef PERL_STRICT_CR
10448                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10449                     char *end = SvPVX(stuff) + SvCUR(stuff);
10450                     end[-2] = '\n';
10451                     end[-1] = '\0';
10452                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10453                 }
10454 #endif
10455             }
10456             else
10457               break;
10458         }
10459         s = (char*)eol;
10460         if (PL_rsfp) {
10461             bool got_some;
10462 #ifdef PERL_MAD
10463             if (PL_madskills) {
10464                 if (PL_thistoken)
10465                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10466                 else
10467                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10468             }
10469 #endif
10470             PL_bufptr = PL_bufend;
10471             CopLINE_inc(PL_curcop);
10472             got_some = lex_next_chunk(0);
10473             CopLINE_dec(PL_curcop);
10474             s = PL_bufptr;
10475 #ifdef PERL_MAD
10476             tokenstart = PL_bufptr;
10477 #endif
10478             if (!got_some)
10479                 break;
10480         }
10481         incline(s);
10482     }
10483   enough:
10484     if (SvCUR(stuff)) {
10485         PL_expect = XTERM;
10486         if (needargs) {
10487             PL_lex_state = LEX_NORMAL;
10488             start_force(PL_curforce);
10489             NEXTVAL_NEXTTOKE.ival = 0;
10490             force_next(',');
10491         }
10492         else
10493             PL_lex_state = LEX_FORMLINE;
10494         if (!IN_BYTES) {
10495             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10496                 SvUTF8_on(stuff);
10497             else if (PL_encoding)
10498                 sv_recode_to_utf8(stuff, PL_encoding);
10499         }
10500         start_force(PL_curforce);
10501         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10502         force_next(THING);
10503         start_force(PL_curforce);
10504         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10505         force_next(LSTOP);
10506     }
10507     else {
10508         SvREFCNT_dec(stuff);
10509         if (eofmt)
10510             PL_lex_formbrack = 0;
10511         PL_bufptr = s;
10512     }
10513 #ifdef PERL_MAD
10514     if (PL_madskills) {
10515         if (PL_thistoken)
10516             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10517         else
10518             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10519         PL_thiswhite = savewhite;
10520     }
10521 #endif
10522     return s;
10523 }
10524
10525 I32
10526 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10527 {
10528     dVAR;
10529     const I32 oldsavestack_ix = PL_savestack_ix;
10530     CV* const outsidecv = PL_compcv;
10531
10532     if (PL_compcv) {
10533         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10534     }
10535     SAVEI32(PL_subline);
10536     save_item(PL_subname);
10537     SAVESPTR(PL_compcv);
10538
10539     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10540     CvFLAGS(PL_compcv) |= flags;
10541
10542     PL_subline = CopLINE(PL_curcop);
10543     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10544     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10545     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10546
10547     return oldsavestack_ix;
10548 }
10549
10550 #ifdef __SC__
10551 #pragma segment Perl_yylex
10552 #endif
10553 static int
10554 S_yywarn(pTHX_ const char *const s)
10555 {
10556     dVAR;
10557
10558     PERL_ARGS_ASSERT_YYWARN;
10559
10560     PL_in_eval |= EVAL_WARNONLY;
10561     yyerror(s);
10562     PL_in_eval &= ~EVAL_WARNONLY;
10563     return 0;
10564 }
10565
10566 int
10567 Perl_yyerror(pTHX_ const char *const s)
10568 {
10569     dVAR;
10570     const char *where = NULL;
10571     const char *context = NULL;
10572     int contlen = -1;
10573     SV *msg;
10574     int yychar  = PL_parser->yychar;
10575
10576     PERL_ARGS_ASSERT_YYERROR;
10577
10578     if (!yychar || (yychar == ';' && !PL_rsfp))
10579         where = "at EOF";
10580     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10581       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10582       PL_oldbufptr != PL_bufptr) {
10583         /*
10584                 Only for NetWare:
10585                 The code below is removed for NetWare because it abends/crashes on NetWare
10586                 when the script has error such as not having the closing quotes like:
10587                     if ($var eq "value)
10588                 Checking of white spaces is anyway done in NetWare code.
10589         */
10590 #ifndef NETWARE
10591         while (isSPACE(*PL_oldoldbufptr))
10592             PL_oldoldbufptr++;
10593 #endif
10594         context = PL_oldoldbufptr;
10595         contlen = PL_bufptr - PL_oldoldbufptr;
10596     }
10597     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10598       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10599         /*
10600                 Only for NetWare:
10601                 The code below is removed for NetWare because it abends/crashes on NetWare
10602                 when the script has error such as not having the closing quotes like:
10603                     if ($var eq "value)
10604                 Checking of white spaces is anyway done in NetWare code.
10605         */
10606 #ifndef NETWARE
10607         while (isSPACE(*PL_oldbufptr))
10608             PL_oldbufptr++;
10609 #endif
10610         context = PL_oldbufptr;
10611         contlen = PL_bufptr - PL_oldbufptr;
10612     }
10613     else if (yychar > 255)
10614         where = "next token ???";
10615     else if (yychar == -2) { /* YYEMPTY */
10616         if (PL_lex_state == LEX_NORMAL ||
10617            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10618             where = "at end of line";
10619         else if (PL_lex_inpat)
10620             where = "within pattern";
10621         else
10622             where = "within string";
10623     }
10624     else {
10625         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10626         if (yychar < 32)
10627             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10628         else if (isPRINT_LC(yychar)) {
10629             const char string = yychar;
10630             sv_catpvn(where_sv, &string, 1);
10631         }
10632         else
10633             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10634         where = SvPVX_const(where_sv);
10635     }
10636     msg = sv_2mortal(newSVpv(s, 0));
10637     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10638         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10639     if (context)
10640         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10641     else
10642         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10643     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10644         Perl_sv_catpvf(aTHX_ msg,
10645         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10646                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10647         PL_multi_end = 0;
10648     }
10649     if (PL_in_eval & EVAL_WARNONLY) {
10650         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10651     }
10652     else
10653         qerror(msg);
10654     if (PL_error_count >= 10) {
10655         if (PL_in_eval && SvCUR(ERRSV))
10656             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10657                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10658         else
10659             Perl_croak(aTHX_ "%s has too many errors.\n",
10660             OutCopFILE(PL_curcop));
10661     }
10662     PL_in_my = 0;
10663     PL_in_my_stash = NULL;
10664     return 0;
10665 }
10666 #ifdef __SC__
10667 #pragma segment Main
10668 #endif
10669
10670 STATIC char*
10671 S_swallow_bom(pTHX_ U8 *s)
10672 {
10673     dVAR;
10674     const STRLEN slen = SvCUR(PL_linestr);
10675
10676     PERL_ARGS_ASSERT_SWALLOW_BOM;
10677
10678     switch (s[0]) {
10679     case 0xFF:
10680         if (s[1] == 0xFE) {
10681             /* UTF-16 little-endian? (or UTF-32LE?) */
10682             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10683                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10684 #ifndef PERL_NO_UTF16_FILTER
10685             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10686             s += 2;
10687             if (PL_bufend > (char*)s) {
10688                 s = add_utf16_textfilter(s, TRUE);
10689             }
10690 #else
10691             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10692 #endif
10693         }
10694         break;
10695     case 0xFE:
10696         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10697 #ifndef PERL_NO_UTF16_FILTER
10698             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10699             s += 2;
10700             if (PL_bufend > (char *)s) {
10701                 s = add_utf16_textfilter(s, FALSE);
10702             }
10703 #else
10704             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10705 #endif
10706         }
10707         break;
10708     case 0xEF:
10709         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10710             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10711             s += 3;                      /* UTF-8 */
10712         }
10713         break;
10714     case 0:
10715         if (slen > 3) {
10716              if (s[1] == 0) {
10717                   if (s[2] == 0xFE && s[3] == 0xFF) {
10718                        /* UTF-32 big-endian */
10719                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10720                   }
10721              }
10722              else if (s[2] == 0 && s[3] != 0) {
10723                   /* Leading bytes
10724                    * 00 xx 00 xx
10725                    * are a good indicator of UTF-16BE. */
10726 #ifndef PERL_NO_UTF16_FILTER
10727                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10728                   s = add_utf16_textfilter(s, FALSE);
10729 #else
10730                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10731 #endif
10732              }
10733         }
10734 #ifdef EBCDIC
10735     case 0xDD:
10736         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10737             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10738             s += 4;                      /* UTF-8 */
10739         }
10740         break;
10741 #endif
10742
10743     default:
10744          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10745                   /* Leading bytes
10746                    * xx 00 xx 00
10747                    * are a good indicator of UTF-16LE. */
10748 #ifndef PERL_NO_UTF16_FILTER
10749               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10750               s = add_utf16_textfilter(s, TRUE);
10751 #else
10752               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10753 #endif
10754          }
10755     }
10756     return (char*)s;
10757 }
10758
10759
10760 #ifndef PERL_NO_UTF16_FILTER
10761 static I32
10762 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10763 {
10764     dVAR;
10765     SV *const filter = FILTER_DATA(idx);
10766     /* We re-use this each time round, throwing the contents away before we
10767        return.  */
10768     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10769     SV *const utf8_buffer = filter;
10770     IV status = IoPAGE(filter);
10771     const bool reverse = cBOOL(IoLINES(filter));
10772     I32 retval;
10773
10774     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10775
10776     /* As we're automatically added, at the lowest level, and hence only called
10777        from this file, we can be sure that we're not called in block mode. Hence
10778        don't bother writing code to deal with block mode.  */
10779     if (maxlen) {
10780         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10781     }
10782     if (status < 0) {
10783         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10784     }
10785     DEBUG_P(PerlIO_printf(Perl_debug_log,
10786                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10787                           FPTR2DPTR(void *, S_utf16_textfilter),
10788                           reverse ? 'l' : 'b', idx, maxlen, status,
10789                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10790
10791     while (1) {
10792         STRLEN chars;
10793         STRLEN have;
10794         I32 newlen;
10795         U8 *end;
10796         /* First, look in our buffer of existing UTF-8 data:  */
10797         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10798
10799         if (nl) {
10800             ++nl;
10801         } else if (status == 0) {
10802             /* EOF */
10803             IoPAGE(filter) = 0;
10804             nl = SvEND(utf8_buffer);
10805         }
10806         if (nl) {
10807             STRLEN got = nl - SvPVX(utf8_buffer);
10808             /* Did we have anything to append?  */
10809             retval = got != 0;
10810             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10811             /* Everything else in this code works just fine if SVp_POK isn't
10812                set.  This, however, needs it, and we need it to work, else
10813                we loop infinitely because the buffer is never consumed.  */
10814             sv_chop(utf8_buffer, nl);
10815             break;
10816         }
10817
10818         /* OK, not a complete line there, so need to read some more UTF-16.
10819            Read an extra octect if the buffer currently has an odd number. */
10820         while (1) {
10821             if (status <= 0)
10822                 break;
10823             if (SvCUR(utf16_buffer) >= 2) {
10824                 /* Location of the high octet of the last complete code point.
10825                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10826                    *coupled* with all the benefits of partial reads and
10827                    endianness.  */
10828                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10829                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10830
10831                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10832                     break;
10833                 }
10834
10835                 /* We have the first half of a surrogate. Read more.  */
10836                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10837             }
10838
10839             status = FILTER_READ(idx + 1, utf16_buffer,
10840                                  160 + (SvCUR(utf16_buffer) & 1));
10841             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10842             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10843             if (status < 0) {
10844                 /* Error */
10845                 IoPAGE(filter) = status;
10846                 return status;
10847             }
10848         }
10849
10850         chars = SvCUR(utf16_buffer) >> 1;
10851         have = SvCUR(utf8_buffer);
10852         SvGROW(utf8_buffer, have + chars * 3 + 1);
10853
10854         if (reverse) {
10855             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10856                                          (U8*)SvPVX_const(utf8_buffer) + have,
10857                                          chars * 2, &newlen);
10858         } else {
10859             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10860                                 (U8*)SvPVX_const(utf8_buffer) + have,
10861                                 chars * 2, &newlen);
10862         }
10863         SvCUR_set(utf8_buffer, have + newlen);
10864         *end = '\0';
10865
10866         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10867            it's private to us, and utf16_to_utf8{,reversed} take a
10868            (pointer,length) pair, rather than a NUL-terminated string.  */
10869         if(SvCUR(utf16_buffer) & 1) {
10870             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10871             SvCUR_set(utf16_buffer, 1);
10872         } else {
10873             SvCUR_set(utf16_buffer, 0);
10874         }
10875     }
10876     DEBUG_P(PerlIO_printf(Perl_debug_log,
10877                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10878                           status,
10879                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10880     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10881     return retval;
10882 }
10883
10884 static U8 *
10885 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10886 {
10887     SV *filter = filter_add(S_utf16_textfilter, NULL);
10888
10889     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10890
10891     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10892     sv_setpvs(filter, "");
10893     IoLINES(filter) = reversed;
10894     IoPAGE(filter) = 1; /* Not EOF */
10895
10896     /* Sadly, we have to return a valid pointer, come what may, so we have to
10897        ignore any error return from this.  */
10898     SvCUR_set(PL_linestr, 0);
10899     if (FILTER_READ(0, PL_linestr, 0)) {
10900         SvUTF8_on(PL_linestr);
10901     } else {
10902         SvUTF8_on(PL_linestr);
10903     }
10904     PL_bufend = SvEND(PL_linestr);
10905     return (U8*)SvPVX(PL_linestr);
10906 }
10907 #endif
10908
10909 /*
10910 Returns a pointer to the next character after the parsed
10911 vstring, as well as updating the passed in sv.
10912
10913 Function must be called like
10914
10915         sv = newSV(5);
10916         s = scan_vstring(s,e,sv);
10917
10918 where s and e are the start and end of the string.
10919 The sv should already be large enough to store the vstring
10920 passed in, for performance reasons.
10921
10922 */
10923
10924 char *
10925 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10926 {
10927     dVAR;
10928     const char *pos = s;
10929     const char *start = s;
10930
10931     PERL_ARGS_ASSERT_SCAN_VSTRING;
10932
10933     if (*pos == 'v') pos++;  /* get past 'v' */
10934     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10935         pos++;
10936     if ( *pos != '.') {
10937         /* this may not be a v-string if followed by => */
10938         const char *next = pos;
10939         while (next < e && isSPACE(*next))
10940             ++next;
10941         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10942             /* return string not v-string */
10943             sv_setpvn(sv,(char *)s,pos-s);
10944             return (char *)pos;
10945         }
10946     }
10947
10948     if (!isALPHA(*pos)) {
10949         U8 tmpbuf[UTF8_MAXBYTES+1];
10950
10951         if (*s == 'v')
10952             s++;  /* get past 'v' */
10953
10954         sv_setpvs(sv, "");
10955
10956         for (;;) {
10957             /* this is atoi() that tolerates underscores */
10958             U8 *tmpend;
10959             UV rev = 0;
10960             const char *end = pos;
10961             UV mult = 1;
10962             while (--end >= s) {
10963                 if (*end != '_') {
10964                     const UV orev = rev;
10965                     rev += (*end - '0') * mult;
10966                     mult *= 10;
10967                     if (orev > rev)
10968                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10969                                          "Integer overflow in decimal number");
10970                 }
10971             }
10972 #ifdef EBCDIC
10973             if (rev > 0x7FFFFFFF)
10974                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10975 #endif
10976             /* Append native character for the rev point */
10977             tmpend = uvchr_to_utf8(tmpbuf, rev);
10978             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10979             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10980                  SvUTF8_on(sv);
10981             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10982                  s = ++pos;
10983             else {
10984                  s = pos;
10985                  break;
10986             }
10987             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10988                  pos++;
10989         }
10990         SvPOK_on(sv);
10991         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10992         SvRMAGICAL_on(sv);
10993     }
10994     return (char *)s;
10995 }
10996
10997 int
10998 Perl_keyword_plugin_standard(pTHX_
10999         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11000 {
11001     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11002     PERL_UNUSED_CONTEXT;
11003     PERL_UNUSED_ARG(keyword_ptr);
11004     PERL_UNUSED_ARG(keyword_len);
11005     PERL_UNUSED_ARG(op_ptr);
11006     return KEYWORD_PLUGIN_DECLINE;
11007 }
11008
11009 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11010 static void
11011 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11012 {
11013     SAVEI32(PL_lex_brackets);
11014     if (PL_lex_brackets > 100)
11015         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11016     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11017     SAVEI32(PL_lex_allbrackets);
11018     PL_lex_allbrackets = 0;
11019     SAVEI8(PL_lex_fakeeof);
11020     PL_lex_fakeeof = (U8)fakeeof;
11021     if(yyparse(gramtype) && !PL_parser->error_count)
11022         qerror(Perl_mess(aTHX_ "Parse error"));
11023 }
11024
11025 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11026 static OP *
11027 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11028 {
11029     OP *o;
11030     ENTER;
11031     SAVEVPTR(PL_eval_root);
11032     PL_eval_root = NULL;
11033     parse_recdescent(gramtype, fakeeof);
11034     o = PL_eval_root;
11035     LEAVE;
11036     return o;
11037 }
11038
11039 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11040 static OP *
11041 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11042 {
11043     OP *exprop;
11044     if (flags & ~PARSE_OPTIONAL)
11045         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11046     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11047     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11048         if (!PL_parser->error_count)
11049             qerror(Perl_mess(aTHX_ "Parse error"));
11050         exprop = newOP(OP_NULL, 0);
11051     }
11052     return exprop;
11053 }
11054
11055 /*
11056 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11057
11058 Parse a Perl arithmetic expression.  This may contain operators of precedence
11059 down to the bit shift operators.  The expression must be followed (and thus
11060 terminated) either by a comparison or lower-precedence operator or by
11061 something that would normally terminate an expression such as semicolon.
11062 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11063 otherwise it is mandatory.  It is up to the caller to ensure that the
11064 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11065 the source of the code to be parsed and the lexical context for the
11066 expression.
11067
11068 The op tree representing the expression is returned.  If an optional
11069 expression is absent, a null pointer is returned, otherwise the pointer
11070 will be non-null.
11071
11072 If an error occurs in parsing or compilation, in most cases a valid op
11073 tree is returned anyway.  The error is reflected in the parser state,
11074 normally resulting in a single exception at the top level of parsing
11075 which covers all the compilation errors that occurred.  Some compilation
11076 errors, however, will throw an exception immediately.
11077
11078 =cut
11079 */
11080
11081 OP *
11082 Perl_parse_arithexpr(pTHX_ U32 flags)
11083 {
11084     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11085 }
11086
11087 /*
11088 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11089
11090 Parse a Perl term expression.  This may contain operators of precedence
11091 down to the assignment operators.  The expression must be followed (and thus
11092 terminated) either by a comma or lower-precedence operator or by
11093 something that would normally terminate an expression such as semicolon.
11094 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11095 otherwise it is mandatory.  It is up to the caller to ensure that the
11096 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11097 the source of the code to be parsed and the lexical context for the
11098 expression.
11099
11100 The op tree representing the expression is returned.  If an optional
11101 expression is absent, a null pointer is returned, otherwise the pointer
11102 will be non-null.
11103
11104 If an error occurs in parsing or compilation, in most cases a valid op
11105 tree is returned anyway.  The error is reflected in the parser state,
11106 normally resulting in a single exception at the top level of parsing
11107 which covers all the compilation errors that occurred.  Some compilation
11108 errors, however, will throw an exception immediately.
11109
11110 =cut
11111 */
11112
11113 OP *
11114 Perl_parse_termexpr(pTHX_ U32 flags)
11115 {
11116     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11117 }
11118
11119 /*
11120 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11121
11122 Parse a Perl list expression.  This may contain operators of precedence
11123 down to the comma operator.  The expression must be followed (and thus
11124 terminated) either by a low-precedence logic operator such as C<or> or by
11125 something that would normally terminate an expression such as semicolon.
11126 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11127 otherwise it is mandatory.  It is up to the caller to ensure that the
11128 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11129 the source of the code to be parsed and the lexical context for the
11130 expression.
11131
11132 The op tree representing the expression is returned.  If an optional
11133 expression is absent, a null pointer is returned, otherwise the pointer
11134 will be non-null.
11135
11136 If an error occurs in parsing or compilation, in most cases a valid op
11137 tree is returned anyway.  The error is reflected in the parser state,
11138 normally resulting in a single exception at the top level of parsing
11139 which covers all the compilation errors that occurred.  Some compilation
11140 errors, however, will throw an exception immediately.
11141
11142 =cut
11143 */
11144
11145 OP *
11146 Perl_parse_listexpr(pTHX_ U32 flags)
11147 {
11148     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11149 }
11150
11151 /*
11152 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11153
11154 Parse a single complete Perl expression.  This allows the full
11155 expression grammar, including the lowest-precedence operators such
11156 as C<or>.  The expression must be followed (and thus terminated) by a
11157 token that an expression would normally be terminated by: end-of-file,
11158 closing bracketing punctuation, semicolon, or one of the keywords that
11159 signals a postfix expression-statement modifier.  If I<flags> includes
11160 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11161 mandatory.  It is up to the caller to ensure that the dynamic parser
11162 state (L</PL_parser> et al) is correctly set to reflect the source of
11163 the code to be parsed and the lexical context for the expression.
11164
11165 The op tree representing the expression is returned.  If an optional
11166 expression is absent, a null pointer is returned, otherwise the pointer
11167 will be non-null.
11168
11169 If an error occurs in parsing or compilation, in most cases a valid op
11170 tree is returned anyway.  The error is reflected in the parser state,
11171 normally resulting in a single exception at the top level of parsing
11172 which covers all the compilation errors that occurred.  Some compilation
11173 errors, however, will throw an exception immediately.
11174
11175 =cut
11176 */
11177
11178 OP *
11179 Perl_parse_fullexpr(pTHX_ U32 flags)
11180 {
11181     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11182 }
11183
11184 /*
11185 =for apidoc Amx|OP *|parse_block|U32 flags
11186
11187 Parse a single complete Perl code block.  This consists of an opening
11188 brace, a sequence of statements, and a closing brace.  The block
11189 constitutes a lexical scope, so C<my> variables and various compile-time
11190 effects can be contained within it.  It is up to the caller to ensure
11191 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11192 reflect the source of the code to be parsed and the lexical context for
11193 the statement.
11194
11195 The op tree representing the code block is returned.  This is always a
11196 real op, never a null pointer.  It will normally be a C<lineseq> list,
11197 including C<nextstate> or equivalent ops.  No ops to construct any kind
11198 of runtime scope are included by virtue of it being a block.
11199
11200 If an error occurs in parsing or compilation, in most cases a valid op
11201 tree (most likely null) is returned anyway.  The error is reflected in
11202 the parser state, normally resulting in a single exception at the top
11203 level of parsing which covers all the compilation errors that occurred.
11204 Some compilation errors, however, will throw an exception immediately.
11205
11206 The I<flags> parameter is reserved for future use, and must always
11207 be zero.
11208
11209 =cut
11210 */
11211
11212 OP *
11213 Perl_parse_block(pTHX_ U32 flags)
11214 {
11215     if (flags)
11216         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11217     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11218 }
11219
11220 /*
11221 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11222
11223 Parse a single unadorned Perl statement.  This may be a normal imperative
11224 statement or a declaration that has compile-time effect.  It does not
11225 include any label or other affixture.  It is up to the caller to ensure
11226 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11227 reflect the source of the code to be parsed and the lexical context for
11228 the statement.
11229
11230 The op tree representing the statement is returned.  This may be a
11231 null pointer if the statement is null, for example if it was actually
11232 a subroutine definition (which has compile-time side effects).  If not
11233 null, it will be ops directly implementing the statement, suitable to
11234 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11235 equivalent op (except for those embedded in a scope contained entirely
11236 within the statement).
11237
11238 If an error occurs in parsing or compilation, in most cases a valid op
11239 tree (most likely null) is returned anyway.  The error is reflected in
11240 the parser state, normally resulting in a single exception at the top
11241 level of parsing which covers all the compilation errors that occurred.
11242 Some compilation errors, however, will throw an exception immediately.
11243
11244 The I<flags> parameter is reserved for future use, and must always
11245 be zero.
11246
11247 =cut
11248 */
11249
11250 OP *
11251 Perl_parse_barestmt(pTHX_ U32 flags)
11252 {
11253     if (flags)
11254         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11255     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11256 }
11257
11258 /*
11259 =for apidoc Amx|SV *|parse_label|U32 flags
11260
11261 Parse a single label, possibly optional, of the type that may prefix a
11262 Perl statement.  It is up to the caller to ensure that the dynamic parser
11263 state (L</PL_parser> et al) is correctly set to reflect the source of
11264 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11265 label is optional, otherwise it is mandatory.
11266
11267 The name of the label is returned in the form of a fresh scalar.  If an
11268 optional label is absent, a null pointer is returned.
11269
11270 If an error occurs in parsing, which can only occur if the label is
11271 mandatory, a valid label is returned anyway.  The error is reflected in
11272 the parser state, normally resulting in a single exception at the top
11273 level of parsing which covers all the compilation errors that occurred.
11274
11275 =cut
11276 */
11277
11278 SV *
11279 Perl_parse_label(pTHX_ U32 flags)
11280 {
11281     if (flags & ~PARSE_OPTIONAL)
11282         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11283     if (PL_lex_state == LEX_KNOWNEXT) {
11284         PL_parser->yychar = yylex();
11285         if (PL_parser->yychar == LABEL) {
11286             char *lpv = pl_yylval.pval;
11287             STRLEN llen = strlen(lpv);
11288             SV *lsv;
11289             PL_parser->yychar = YYEMPTY;
11290             lsv = newSV_type(SVt_PV);
11291             SvPV_set(lsv, lpv);
11292             SvCUR_set(lsv, llen);
11293             SvLEN_set(lsv, llen+1);
11294             SvPOK_on(lsv);
11295             return lsv;
11296         } else {
11297             yyunlex();
11298             goto no_label;
11299         }
11300     } else {
11301         char *s, *t;
11302         U8 c;
11303         STRLEN wlen, bufptr_pos;
11304         lex_read_space(0);
11305         t = s = PL_bufptr;
11306         c = (U8)*s;
11307         if (!isIDFIRST_A(c))
11308             goto no_label;
11309         do {
11310             c = (U8)*++t;
11311         } while(isWORDCHAR_A(c));
11312         wlen = t - s;
11313         if (word_takes_any_delimeter(s, wlen))
11314             goto no_label;
11315         bufptr_pos = s - SvPVX(PL_linestr);
11316         PL_bufptr = t;
11317         lex_read_space(LEX_KEEP_PREVIOUS);
11318         t = PL_bufptr;
11319         s = SvPVX(PL_linestr) + bufptr_pos;
11320         if (t[0] == ':' && t[1] != ':') {
11321             PL_oldoldbufptr = PL_oldbufptr;
11322             PL_oldbufptr = s;
11323             PL_bufptr = t+1;
11324             return newSVpvn(s, wlen);
11325         } else {
11326             PL_bufptr = s;
11327             no_label:
11328             if (flags & PARSE_OPTIONAL) {
11329                 return NULL;
11330             } else {
11331                 qerror(Perl_mess(aTHX_ "Parse error"));
11332                 return newSVpvs("x");
11333             }
11334         }
11335     }
11336 }
11337
11338 /*
11339 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11340
11341 Parse a single complete Perl statement.  This may be a normal imperative
11342 statement or a declaration that has compile-time effect, and may include
11343 optional labels.  It is up to the caller to ensure that the dynamic
11344 parser state (L</PL_parser> et al) is correctly set to reflect the source
11345 of the code to be parsed and the lexical context for the statement.
11346
11347 The op tree representing the statement is returned.  This may be a
11348 null pointer if the statement is null, for example if it was actually
11349 a subroutine definition (which has compile-time side effects).  If not
11350 null, it will be the result of a L</newSTATEOP> call, normally including
11351 a C<nextstate> or equivalent op.
11352
11353 If an error occurs in parsing or compilation, in most cases a valid op
11354 tree (most likely null) is returned anyway.  The error is reflected in
11355 the parser state, normally resulting in a single exception at the top
11356 level of parsing which covers all the compilation errors that occurred.
11357 Some compilation errors, however, will throw an exception immediately.
11358
11359 The I<flags> parameter is reserved for future use, and must always
11360 be zero.
11361
11362 =cut
11363 */
11364
11365 OP *
11366 Perl_parse_fullstmt(pTHX_ U32 flags)
11367 {
11368     if (flags)
11369         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11370     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11371 }
11372
11373 /*
11374 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11375
11376 Parse a sequence of zero or more Perl statements.  These may be normal
11377 imperative statements, including optional labels, or declarations
11378 that have compile-time effect, or any mixture thereof.  The statement
11379 sequence ends when a closing brace or end-of-file is encountered in a
11380 place where a new statement could have validly started.  It is up to
11381 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11382 is correctly set to reflect the source of the code to be parsed and the
11383 lexical context for the statements.
11384
11385 The op tree representing the statement sequence is returned.  This may
11386 be a null pointer if the statements were all null, for example if there
11387 were no statements or if there were only subroutine definitions (which
11388 have compile-time side effects).  If not null, it will be a C<lineseq>
11389 list, normally including C<nextstate> or equivalent ops.
11390
11391 If an error occurs in parsing or compilation, in most cases a valid op
11392 tree is returned anyway.  The error is reflected in the parser state,
11393 normally resulting in a single exception at the top level of parsing
11394 which covers all the compilation errors that occurred.  Some compilation
11395 errors, however, will throw an exception immediately.
11396
11397 The I<flags> parameter is reserved for future use, and must always
11398 be zero.
11399
11400 =cut
11401 */
11402
11403 OP *
11404 Perl_parse_stmtseq(pTHX_ U32 flags)
11405 {
11406     OP *stmtseqop;
11407     I32 c;
11408     if (flags)
11409         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11410     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11411     c = lex_peek_unichar(0);
11412     if (c != -1 && c != /*{*/'}')
11413         qerror(Perl_mess(aTHX_ "Parse error"));
11414     return stmtseqop;
11415 }
11416
11417 void
11418 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11419 {
11420     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11421     deprecate("qw(...) as parentheses");
11422     force_next((4<<24)|')');
11423     if (qwlist->op_type == OP_STUB) {
11424         op_free(qwlist);
11425     }
11426     else {
11427         start_force(PL_curforce);
11428         NEXTVAL_NEXTTOKE.opval = qwlist;
11429         force_next(THING);
11430     }
11431     force_next((2<<24)|'(');
11432 }
11433
11434 /*
11435  * Local variables:
11436  * c-indentation-style: bsd
11437  * c-basic-offset: 4
11438  * indent-tabs-mode: t
11439  * End:
11440  *
11441  * ex: set ts=8 sts=4 sw=4 noet:
11442  */