toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN0OP       : zero-argument function, with its op created in this file
 228  * FUN1         : not used, except for not, which isn't a UNIOP
 229  * BOop         : bitwise or or xor
 230  * BAop         : bitwise and
 231  * SHop         : shift operator
 232  * PWop         : power operator
 233  * PMop         : pattern-matching operator
 234  * Aop          : addition-level operator
 235  * Mop          : multiplication-level operator
 236  * Eop          : equality-testing operator
 237  * Rop          : relational operator <= != gt
 238  *
 239  * Also see LOP and lop() below.
 240  */
 241
 242 #ifdef DEBUGGING /* Serve -DT. */
 243 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 244 #else
 245 #   define REPORT(retval) (retval)
 246 #endif
 247
 248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 256 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 257 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 258 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 259 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 260 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 261 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 262 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 263 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 264 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 265 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 266 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 267 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 268 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 269
 270 /* This bit of chicanery makes a unary function followed by
 271  * a parenthesis into a function with one argument, highest precedence.
 272  * The UNIDOR macro is for unary functions that can be followed by the //
 273  * operator (such as C<shift // 0>).
 274  */
 275 #define UNI2(f,x) { \
 276         pl_yylval.ival = f; \
 277         PL_expect = x; \
 278         PL_bufptr = s; \
 279         PL_last_uni = PL_oldbufptr; \
 280         PL_last_lop_op = f; \
 281         if (*s == '(') \
 282             return REPORT( (int)FUNC1 ); \
 283         s = PEEKSPACE(s); \
 284         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 285         }
 286 #define UNI(f)    UNI2(f,XTERM)
 287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 288
 289 #define UNIBRACK(f) { \
 290         pl_yylval.ival = f; \
 291         PL_bufptr = s; \
 292         PL_last_uni = PL_oldbufptr; \
 293         if (*s == '(') \
 294             return REPORT( (int)FUNC1 ); \
 295         s = PEEKSPACE(s); \
 296         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 297         }
 298
 299 /* grandfather return to old style */
 300 #define OLDLOP(f) \
 301         do { \
 302             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 303                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 304             pl_yylval.ival = (f); \
 305             PL_expect = XTERM; \
 306             PL_bufptr = s; \
 307             return (int)LSTOP; \
 308         } while(0)
 309
 310 #ifdef DEBUGGING
 311
 312 /* how to interpret the pl_yylval associated with the token */
 313 enum token_type {
 314     TOKENTYPE_NONE,
 315     TOKENTYPE_IVAL,
 316     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 317     TOKENTYPE_PVAL,
 318     TOKENTYPE_OPVAL,
 319     TOKENTYPE_GVVAL
 320 };
 321
 322 static struct debug_tokens {
 323     const int token;
 324     enum token_type type;
 325     const char *name;
 326 } const debug_tokens[] =
 327 {
 328     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 329     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 330     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 331     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 332     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 333     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 334     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 335     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 336     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 337     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 338     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 339     { DO,               TOKENTYPE_NONE,         "DO" },
 340     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 341     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 342     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 343     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 344     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 345     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 346     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 347     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 348     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 349     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 350     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 351     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 352     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 353     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 354     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 355     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 356     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 357     { IF,               TOKENTYPE_IVAL,         "IF" },
 358     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 359     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 360     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 361     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 362     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 363     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 364     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 365     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 366     { MY,               TOKENTYPE_IVAL,         "MY" },
 367     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 368     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 369     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 370     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 371     { OROR,             TOKENTYPE_NONE,         "OROR" },
 372     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 373     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 374     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 375     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 376     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 377     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 378     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 379     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 380     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 381     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 382     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 383     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 384     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 385     { SUB,              TOKENTYPE_NONE,         "SUB" },
 386     { THING,            TOKENTYPE_OPVAL,        "THING" },
 387     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 388     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 389     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 390     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 391     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 392     { USE,              TOKENTYPE_IVAL,         "USE" },
 393     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 394     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 395     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 396     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 397     { 0,                TOKENTYPE_NONE,         NULL }
 398 };
 399
 400 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 401
 402 STATIC int
 403 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 404 {
 405     dVAR;
 406
 407     PERL_ARGS_ASSERT_TOKEREPORT;
 408
 409     if (DEBUG_T_TEST) {
 410         const char *name = NULL;
 411         enum token_type type = TOKENTYPE_NONE;
 412         const struct debug_tokens *p;
 413         SV* const report = newSVpvs("<== ");
 414
 415         for (p = debug_tokens; p->token; p++) {
 416             if (p->token == (int)rv) {
 417                 name = p->name;
 418                 type = p->type;
 419                 break;
 420             }
 421         }
 422         if (name)
 423             Perl_sv_catpv(aTHX_ report, name);
 424         else if ((char)rv > ' ' && (char)rv < '~')
 425             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 426         else if (!rv)
 427             sv_catpvs(report, "EOF");
 428         else
 429             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 430         switch (type) {
 431         case TOKENTYPE_NONE:
 432         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 433             break;
 434         case TOKENTYPE_IVAL:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 436             break;
 437         case TOKENTYPE_OPNUM:
 438             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 439                                     PL_op_name[lvalp->ival]);
 440             break;
 441         case TOKENTYPE_PVAL:
 442             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 443             break;
 444         case TOKENTYPE_OPVAL:
 445             if (lvalp->opval) {
 446                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 447                                     PL_op_name[lvalp->opval->op_type]);
 448                 if (lvalp->opval->op_type == OP_CONST) {
 449                     Perl_sv_catpvf(aTHX_ report, " %s",
 450                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 451                 }
 452
 453             }
 454             else
 455                 sv_catpvs(report, "(opval=null)");
 456             break;
 457         }
 458         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 459     };
 460     return (int)rv;
 461 }
 462
 463
 464 /* print the buffer with suitable escapes */
 465
 466 STATIC void
 467 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 468 {
 469     SV* const tmp = newSVpvs("");
 470
 471     PERL_ARGS_ASSERT_PRINTBUF;
 472
 473     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 474     SvREFCNT_dec(tmp);
 475 }
 476
 477 #endif
 478
 479 static int
 480 S_deprecate_commaless_var_list(pTHX) {
 481     PL_expect = XTERM;
 482     deprecate("comma-less variable list");
 483     return REPORT(','); /* grandfather non-comma-format format */
 484 }
 485
 486 /*
 487  * S_ao
 488  *
 489  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 490  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 491  */
 492
 493 STATIC int
 494 S_ao(pTHX_ int toketype)
 495 {
 496     dVAR;
 497     if (*PL_bufptr == '=') {
 498         PL_bufptr++;
 499         if (toketype == ANDAND)
 500             pl_yylval.ival = OP_ANDASSIGN;
 501         else if (toketype == OROR)
 502             pl_yylval.ival = OP_ORASSIGN;
 503         else if (toketype == DORDOR)
 504             pl_yylval.ival = OP_DORASSIGN;
 505         toketype = ASSIGNOP;
 506     }
 507     return toketype;
 508 }
 509
 510 /*
 511  * S_no_op
 512  * When Perl expects an operator and finds something else, no_op
 513  * prints the warning.  It always prints "<something> found where
 514  * operator expected.  It prints "Missing semicolon on previous line?"
 515  * if the surprise occurs at the start of the line.  "do you need to
 516  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 517  * where the compiler doesn't know if foo is a method call or a function.
 518  * It prints "Missing operator before end of line" if there's nothing
 519  * after the missing operator, or "... before <...>" if there is something
 520  * after the missing operator.
 521  */
 522
 523 STATIC void
 524 S_no_op(pTHX_ const char *const what, char *s)
 525 {
 526     dVAR;
 527     char * const oldbp = PL_bufptr;
 528     const bool is_first = (PL_oldbufptr == PL_linestart);
 529
 530     PERL_ARGS_ASSERT_NO_OP;
 531
 532     if (!s)
 533         s = oldbp;
 534     else
 535         PL_bufptr = s;
 536     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 537     if (ckWARN_d(WARN_SYNTAX)) {
 538         if (is_first)
 539             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 540                     "\t(Missing semicolon on previous line?)\n");
 541         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 542             const char *t;
 543             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 544                 NOOP;
 545             if (t < PL_bufptr && isSPACE(*t))
 546                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 547                         "\t(Do you need to predeclare %.*s?)\n",
 548                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 549         }
 550         else {
 551             assert(s >= oldbp);
 552             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 553                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 554         }
 555     }
 556     PL_bufptr = oldbp;
 557 }
 558
 559 /*
 560  * S_missingterm
 561  * Complain about missing quote/regexp/heredoc terminator.
 562  * If it's called with NULL then it cauterizes the line buffer.
 563  * If we're in a delimited string and the delimiter is a control
 564  * character, it's reformatted into a two-char sequence like ^C.
 565  * This is fatal.
 566  */
 567
 568 STATIC void
 569 S_missingterm(pTHX_ char *s)
 570 {
 571     dVAR;
 572     char tmpbuf[3];
 573     char q;
 574     if (s) {
 575         char * const nl = strrchr(s,'\n');
 576         if (nl)
 577             *nl = '\0';
 578     }
 579     else if (isCNTRL(PL_multi_close)) {
 580         *tmpbuf = '^';
 581         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 582         tmpbuf[2] = '\0';
 583         s = tmpbuf;
 584     }
 585     else {
 586         *tmpbuf = (char)PL_multi_close;
 587         tmpbuf[1] = '\0';
 588         s = tmpbuf;
 589     }
 590     q = strchr(s,'"') ? '\'' : '"';
 591     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 592 }
 593
 594 /*
 595  * Check whether the named feature is enabled.
 596  */
 597 bool
 598 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 599 {
 600     dVAR;
 601     HV * const hinthv = GvHV(PL_hintgv);
 602     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 603
 604     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 605
 606     if (namelen > MAX_FEATURE_LEN)
 607         return FALSE;
 608     memcpy(&he_name[8], name, namelen);
 609
 610     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 611 }
 612
 613 /*
 614  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 615  * utf16-to-utf8-reversed.
 616  */
 617
 618 #ifdef PERL_CR_FILTER
 619 static void
 620 strip_return(SV *sv)
 621 {
 622     register const char *s = SvPVX_const(sv);
 623     register const char * const e = s + SvCUR(sv);
 624
 625     PERL_ARGS_ASSERT_STRIP_RETURN;
 626
 627     /* outer loop optimized to do nothing if there are no CR-LFs */
 628     while (s < e) {
 629         if (*s++ == '\r' && *s == '\n') {
 630             /* hit a CR-LF, need to copy the rest */
 631             register char *d = s - 1;
 632             *d++ = *s++;
 633             while (s < e) {
 634                 if (*s == '\r' && s[1] == '\n')
 635                     s++;
 636                 *d++ = *s++;
 637             }
 638             SvCUR(sv) -= s - d;
 639             return;
 640         }
 641     }
 642 }
 643
 644 STATIC I32
 645 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 646 {
 647     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 648     if (count > 0 && !maxlen)
 649         strip_return(sv);
 650     return count;
 651 }
 652 #endif
 653
 654 /*
 655 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 656
 657 Creates and initialises a new lexer/parser state object, supplying
 658 a context in which to lex and parse from a new source of Perl code.
 659 A pointer to the new state object is placed in L</PL_parser>.  An entry
 660 is made on the save stack so that upon unwinding the new state object
 661 will be destroyed and the former value of L</PL_parser> will be restored.
 662 Nothing else need be done to clean up the parsing context.
 663
 664 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 665 non-null, provides a string (in SV form) containing code to be parsed.
 666 A copy of the string is made, so subsequent modification of I<line>
 667 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 668 from which code will be read to be parsed.  If both are non-null, the
 669 code in I<line> comes first and must consist of complete lines of input,
 670 and I<rsfp> supplies the remainder of the source.
 671
 672 The I<flags> parameter is reserved for future use, and must always
 673 be zero, except for one flag that is currently reserved for perl's internal
 674 use.
 675
 676 =cut
 677 */
 678
 679 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 680    can share filters with the current parser. */
 681
 682 void
 683 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 684 {
 685     dVAR;
 686     const char *s = NULL;
 687     STRLEN len;
 688     yy_parser *parser, *oparser;
 689     if (flags && flags != LEX_START_SAME_FILTER)
 690         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 691
 692     /* create and initialise a parser */
 693
 694     Newxz(parser, 1, yy_parser);
 695     parser->old_parser = oparser = PL_parser;
 696     PL_parser = parser;
 697
 698     parser->stack = NULL;
 699     parser->ps = NULL;
 700     parser->stack_size = 0;
 701
 702     /* on scope exit, free this parser and restore any outer one */
 703     SAVEPARSER(parser);
 704     parser->saved_curcop = PL_curcop;
 705
 706     /* initialise lexer state */
 707
 708 #ifdef PERL_MAD
 709     parser->curforce = -1;
 710 #else
 711     parser->nexttoke = 0;
 712 #endif
 713     parser->error_count = oparser ? oparser->error_count : 0;
 714     parser->copline = NOLINE;
 715     parser->lex_state = LEX_NORMAL;
 716     parser->expect = XSTATE;
 717     parser->rsfp = rsfp;
 718     parser->rsfp_filters =
 719       !(flags & LEX_START_SAME_FILTER) || !oparser
 720         ? newAV()
 721         : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 722
 723     Newx(parser->lex_brackstack, 120, char);
 724     Newx(parser->lex_casestack, 12, char);
 725     *parser->lex_casestack = '\0';
 726
 727     if (line) {
 728         s = SvPV_const(line, len);
 729     } else {
 730         len = 0;
 731     }
 732
 733     if (!len) {
 734         parser->linestr = newSVpvs("\n;");
 735     } else {
 736         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 737         if (s[len-1] != ';')
 738             sv_catpvs(parser->linestr, "\n;");
 739     }
 740     parser->oldoldbufptr =
 741         parser->oldbufptr =
 742         parser->bufptr =
 743         parser->linestart = SvPVX(parser->linestr);
 744     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 745     parser->last_lop = parser->last_uni = NULL;
 746
 747     parser->in_pod = 0;
 748 }
 749
 750
 751 /* delete a parser object */
 752
 753 void
 754 Perl_parser_free(pTHX_  const yy_parser *parser)
 755 {
 756     PERL_ARGS_ASSERT_PARSER_FREE;
 757
 758     PL_curcop = parser->saved_curcop;
 759     SvREFCNT_dec(parser->linestr);
 760
 761     if (parser->rsfp == PerlIO_stdin())
 762         PerlIO_clearerr(parser->rsfp);
 763     else if (parser->rsfp && (!parser->old_parser ||
 764                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 765         PerlIO_close(parser->rsfp);
 766     SvREFCNT_dec(parser->rsfp_filters);
 767
 768     Safefree(parser->lex_brackstack);
 769     Safefree(parser->lex_casestack);
 770     PL_parser = parser->old_parser;
 771     Safefree(parser);
 772 }
 773
 774
 775 /*
 776 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 777
 778 Buffer scalar containing the chunk currently under consideration of the
 779 text currently being lexed.  This is always a plain string scalar (for
 780 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 781 normal scalar means; instead refer to the buffer directly by the pointer
 782 variables described below.
 783
 784 The lexer maintains various C<char*> pointers to things in the
 785 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 786 reallocated, all of these pointers must be updated.  Don't attempt to
 787 do this manually, but rather use L</lex_grow_linestr> if you need to
 788 reallocate the buffer.
 789
 790 The content of the text chunk in the buffer is commonly exactly one
 791 complete line of input, up to and including a newline terminator,
 792 but there are situations where it is otherwise.  The octets of the
 793 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 794 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 795 flag on this scalar, which may disagree with it.
 796
 797 For direct examination of the buffer, the variable
 798 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 799 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 800 of these pointers is usually preferable to examination of the scalar
 801 through normal scalar means.
 802
 803 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 804
 805 Direct pointer to the end of the chunk of text currently being lexed, the
 806 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 807 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 808 always located at the end of the buffer, and does not count as part of
 809 the buffer's contents.
 810
 811 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 812
 813 Points to the current position of lexing inside the lexer buffer.
 814 Characters around this point may be freely examined, within
 815 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 816 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 817 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 818
 819 Lexing code (whether in the Perl core or not) moves this pointer past
 820 the characters that it consumes.  It is also expected to perform some
 821 bookkeeping whenever a newline character is consumed.  This movement
 822 can be more conveniently performed by the function L</lex_read_to>,
 823 which handles newlines appropriately.
 824
 825 Interpretation of the buffer's octets can be abstracted out by
 826 using the slightly higher-level functions L</lex_peek_unichar> and
 827 L</lex_read_unichar>.
 828
 829 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 830
 831 Points to the start of the current line inside the lexer buffer.
 832 This is useful for indicating at which column an error occurred, and
 833 not much else.  This must be updated by any lexing code that consumes
 834 a newline; the function L</lex_read_to> handles this detail.
 835
 836 =cut
 837 */
 838
 839 /*
 840 =for apidoc Amx|bool|lex_bufutf8
 841
 842 Indicates whether the octets in the lexer buffer
 843 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 844 of Unicode characters.  If not, they should be interpreted as Latin-1
 845 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 846
 847 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 848 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 849 encoding.
 850
 851 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 852 is significant, but not the whole story regarding the input character
 853 encoding.  Normally, when a file is being read, the scalar contains octets
 854 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 855 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 856 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 857 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 858 is in effect.  This logic may change in the future; use this function
 859 instead of implementing the logic yourself.
 860
 861 =cut
 862 */
 863
 864 bool
 865 Perl_lex_bufutf8(pTHX)
 866 {
 867     return UTF;
 868 }
 869
 870 /*
 871 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 872
 873 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 874 at least I<len> octets (including terminating NUL).  Returns a
 875 pointer to the reallocated buffer.  This is necessary before making
 876 any direct modification of the buffer that would increase its length.
 877 L</lex_stuff_pvn> provides a more convenient way to insert text into
 878 the buffer.
 879
 880 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 881 this function updates all of the lexer's variables that point directly
 882 into the buffer.
 883
 884 =cut
 885 */
 886
 887 char *
 888 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 889 {
 890     SV *linestr;
 891     char *buf;
 892     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 893     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 894     linestr = PL_parser->linestr;
 895     buf = SvPVX(linestr);
 896     if (len <= SvLEN(linestr))
 897         return buf;
 898     bufend_pos = PL_parser->bufend - buf;
 899     bufptr_pos = PL_parser->bufptr - buf;
 900     oldbufptr_pos = PL_parser->oldbufptr - buf;
 901     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 902     linestart_pos = PL_parser->linestart - buf;
 903     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 904     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 905     buf = sv_grow(linestr, len);
 906     PL_parser->bufend = buf + bufend_pos;
 907     PL_parser->bufptr = buf + bufptr_pos;
 908     PL_parser->oldbufptr = buf + oldbufptr_pos;
 909     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 910     PL_parser->linestart = buf + linestart_pos;
 911     if (PL_parser->last_uni)
 912         PL_parser->last_uni = buf + last_uni_pos;
 913     if (PL_parser->last_lop)
 914         PL_parser->last_lop = buf + last_lop_pos;
 915     return buf;
 916 }
 917
 918 /*
 919 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 920
 921 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 922 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 923 reallocating the buffer if necessary.  This means that lexing code that
 924 runs later will see the characters as if they had appeared in the input.
 925 It is not recommended to do this as part of normal parsing, and most
 926 uses of this facility run the risk of the inserted characters being
 927 interpreted in an unintended manner.
 928
 929 The string to be inserted is represented by I<len> octets starting
 930 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 931 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 932 The characters are recoded for the lexer buffer, according to how the
 933 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 934 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 935 function is more convenient.
 936
 937 =cut
 938 */
 939
 940 void
 941 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 942 {
 943     dVAR;
 944     char *bufptr;
 945     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 946     if (flags & ~(LEX_STUFF_UTF8))
 947         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 948     if (UTF) {
 949         if (flags & LEX_STUFF_UTF8) {
 950             goto plain_copy;
 951         } else {
 952             STRLEN highhalf = 0;
 953             const char *p, *e = pv+len;
 954             for (p = pv; p != e; p++)
 955                 highhalf += !!(((U8)*p) & 0x80);
 956             if (!highhalf)
 957                 goto plain_copy;
 958             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 959             bufptr = PL_parser->bufptr;
 960             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 961             SvCUR_set(PL_parser->linestr,
 962                 SvCUR(PL_parser->linestr) + len+highhalf);
 963             PL_parser->bufend += len+highhalf;
 964             for (p = pv; p != e; p++) {
 965                 U8 c = (U8)*p;
 966                 if (c & 0x80) {
 967                     *bufptr++ = (char)(0xc0 | (c >> 6));
 968                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 969                 } else {
 970                     *bufptr++ = (char)c;
 971                 }
 972             }
 973         }
 974     } else {
 975         if (flags & LEX_STUFF_UTF8) {
 976             STRLEN highhalf = 0;
 977             const char *p, *e = pv+len;
 978             for (p = pv; p != e; p++) {
 979                 U8 c = (U8)*p;
 980                 if (c >= 0xc4) {
 981                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 982                                 "non-Latin-1 character into Latin-1 input");
 983                 } else if (c >= 0xc2 && p+1 != e &&
 984                             (((U8)p[1]) & 0xc0) == 0x80) {
 985                     p++;
 986                     highhalf++;
 987                 } else if (c >= 0x80) {
 988                     /* malformed UTF-8 */
 989                     ENTER;
 990                     SAVESPTR(PL_warnhook);
 991                     PL_warnhook = PERL_WARNHOOK_FATAL;
 992                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 993                     LEAVE;
 994                 }
 995             }
 996             if (!highhalf)
 997                 goto plain_copy;
 998             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 999             bufptr = PL_parser->bufptr;
1000             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1001             SvCUR_set(PL_parser->linestr,
1002                 SvCUR(PL_parser->linestr) + len-highhalf);
1003             PL_parser->bufend += len-highhalf;
1004             for (p = pv; p != e; p++) {
1005                 U8 c = (U8)*p;
1006                 if (c & 0x80) {
1007                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008                     p++;
1009                 } else {
1010                     *bufptr++ = (char)c;
1011                 }
1012             }
1013         } else {
1014             plain_copy:
1015             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016             bufptr = PL_parser->bufptr;
1017             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1018             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1019             PL_parser->bufend += len;
1020             Copy(pv, bufptr, len, char);
1021         }
1022     }
1023 }
1024
1025 /*
1026 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030 reallocating the buffer if necessary.  This means that lexing code that
1031 runs later will see the characters as if they had appeared in the input.
1032 It is not recommended to do this as part of normal parsing, and most
1033 uses of this facility run the risk of the inserted characters being
1034 interpreted in an unintended manner.
1035
1036 The string to be inserted is represented by octets starting at I<pv>
1037 and continuing to the first nul.  These octets are interpreted as either
1038 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039 in I<flags>.  The characters are recoded for the lexer buffer, according
1040 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041 If it is not convenient to nul-terminate a string to be inserted, the
1042 L</lex_stuff_pvn> function is more appropriate.
1043
1044 =cut
1045 */
1046
1047 void
1048 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049 {
1050     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051     lex_stuff_pvn(pv, strlen(pv), flags);
1052 }
1053
1054 /*
1055 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059 reallocating the buffer if necessary.  This means that lexing code that
1060 runs later will see the characters as if they had appeared in the input.
1061 It is not recommended to do this as part of normal parsing, and most
1062 uses of this facility run the risk of the inserted characters being
1063 interpreted in an unintended manner.
1064
1065 The string to be inserted is the string value of I<sv>.  The characters
1066 are recoded for the lexer buffer, according to how the buffer is currently
1067 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1068 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069 need to construct a scalar.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076 {
1077     char *pv;
1078     STRLEN len;
1079     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080     if (flags)
1081         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082     pv = SvPV(sv, len);
1083     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084 }
1085
1086 /*
1087 =for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1091 This hides the discarded text from any lexing code that runs later,
1092 as if the text had never appeared.
1093
1094 This is not the normal way to consume lexed text.  For that, use
1095 L</lex_read_to>.
1096
1097 =cut
1098 */
1099
1100 void
1101 Perl_lex_unstuff(pTHX_ char *ptr)
1102 {
1103     char *buf, *bufend;
1104     STRLEN unstuff_len;
1105     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106     buf = PL_parser->bufptr;
1107     if (ptr < buf)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109     if (ptr == buf)
1110         return;
1111     bufend = PL_parser->bufend;
1112     if (ptr > bufend)
1113         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114     unstuff_len = ptr - buf;
1115     Move(ptr, buf, bufend+1-ptr, char);
1116     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117     PL_parser->bufend = bufend - unstuff_len;
1118 }
1119
1120 /*
1121 =for apidoc Amx|void|lex_read_to|char *ptr
1122
1123 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125 performing the correct bookkeeping whenever a newline character is passed.
1126 This is the normal way to consume lexed text.
1127
1128 Interpretation of the buffer's octets can be abstracted out by
1129 using the slightly higher-level functions L</lex_peek_unichar> and
1130 L</lex_read_unichar>.
1131
1132 =cut
1133 */
1134
1135 void
1136 Perl_lex_read_to(pTHX_ char *ptr)
1137 {
1138     char *s;
1139     PERL_ARGS_ASSERT_LEX_READ_TO;
1140     s = PL_parser->bufptr;
1141     if (ptr < s || ptr > PL_parser->bufend)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143     for (; s != ptr; s++)
1144         if (*s == '\n') {
1145             CopLINE_inc(PL_curcop);
1146             PL_parser->linestart = s+1;
1147         }
1148     PL_parser->bufptr = ptr;
1149 }
1150
1151 /*
1152 =for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155 up to I<ptr>.  The remaining content of the buffer will be moved, and
1156 all pointers into the buffer updated appropriately.  I<ptr> must not
1157 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158 it is not permitted to discard text that has yet to be lexed.
1159
1160 Normally it is not necessarily to do this directly, because it suffices to
1161 use the implicit discarding behaviour of L</lex_next_chunk> and things
1162 based on it.  However, if a token stretches across multiple lines,
1163 and the lexing code has kept multiple lines of text in the buffer for
1164 that purpose, then after completion of the token it would be wise to
1165 explicitly discard the now-unneeded earlier lines, to avoid future
1166 multi-line tokens growing the buffer without bound.
1167
1168 =cut
1169 */
1170
1171 void
1172 Perl_lex_discard_to(pTHX_ char *ptr)
1173 {
1174     char *buf;
1175     STRLEN discard_len;
1176     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177     buf = SvPVX(PL_parser->linestr);
1178     if (ptr < buf)
1179         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180     if (ptr == buf)
1181         return;
1182     if (ptr > PL_parser->bufptr)
1183         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184     discard_len = ptr - buf;
1185     if (PL_parser->oldbufptr < ptr)
1186         PL_parser->oldbufptr = ptr;
1187     if (PL_parser->oldoldbufptr < ptr)
1188         PL_parser->oldoldbufptr = ptr;
1189     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190         PL_parser->last_uni = NULL;
1191     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192         PL_parser->last_lop = NULL;
1193     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195     PL_parser->bufend -= discard_len;
1196     PL_parser->bufptr -= discard_len;
1197     PL_parser->oldbufptr -= discard_len;
1198     PL_parser->oldoldbufptr -= discard_len;
1199     if (PL_parser->last_uni)
1200         PL_parser->last_uni -= discard_len;
1201     if (PL_parser->last_lop)
1202         PL_parser->last_lop -= discard_len;
1203 }
1204
1205 /*
1206 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208 Reads in the next chunk of text to be lexed, appending it to
1209 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1210 looked to the end of the current chunk and wants to know more.  It is
1211 usual, but not necessary, for lexing to have consumed the entirety of
1212 the current chunk at this time.
1213
1214 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215 chunk (i.e., the current chunk has been entirely consumed), normally the
1216 current chunk will be discarded at the same time that the new chunk is
1217 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218 will not be discarded.  If the current chunk has not been entirely
1219 consumed, then it will not be discarded regardless of the flag.
1220
1221 Returns true if some new text was added to the buffer, or false if the
1222 buffer has reached the end of the input text.
1223
1224 =cut
1225 */
1226
1227 #define LEX_FAKE_EOF 0x80000000
1228
1229 bool
1230 Perl_lex_next_chunk(pTHX_ U32 flags)
1231 {
1232     SV *linestr;
1233     char *buf;
1234     STRLEN old_bufend_pos, new_bufend_pos;
1235     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1237     bool got_some_for_debugger = 0;
1238     bool got_some;
1239     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1241     linestr = PL_parser->linestr;
1242     buf = SvPVX(linestr);
1243     if (!(flags & LEX_KEEP_PREVIOUS) &&
1244             PL_parser->bufptr == PL_parser->bufend) {
1245         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246         linestart_pos = 0;
1247         if (PL_parser->last_uni != PL_parser->bufend)
1248             PL_parser->last_uni = NULL;
1249         if (PL_parser->last_lop != PL_parser->bufend)
1250             PL_parser->last_lop = NULL;
1251         last_uni_pos = last_lop_pos = 0;
1252         *buf = 0;
1253         SvCUR(linestr) = 0;
1254     } else {
1255         old_bufend_pos = PL_parser->bufend - buf;
1256         bufptr_pos = PL_parser->bufptr - buf;
1257         oldbufptr_pos = PL_parser->oldbufptr - buf;
1258         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259         linestart_pos = PL_parser->linestart - buf;
1260         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262     }
1263     if (flags & LEX_FAKE_EOF) {
1264         goto eof;
1265     } else if (!PL_parser->rsfp) {
1266         got_some = 0;
1267     } else if (filter_gets(linestr, old_bufend_pos)) {
1268         got_some = 1;
1269         got_some_for_debugger = 1;
1270     } else {
1271         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1272             sv_setpvs(linestr, "");
1273         eof:
1274         /* End of real input.  Close filehandle (unless it was STDIN),
1275          * then add implicit termination.
1276          */
1277         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278             PerlIO_clearerr(PL_parser->rsfp);
1279         else if (PL_parser->rsfp)
1280             (void)PerlIO_close(PL_parser->rsfp);
1281         PL_parser->rsfp = NULL;
1282         PL_parser->in_pod = 0;
1283 #ifdef PERL_MAD
1284         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285             PL_faketokens = 1;
1286 #endif
1287         if (!PL_in_eval && PL_minus_p) {
1288             sv_catpvs(linestr,
1289                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290             PL_minus_n = PL_minus_p = 0;
1291         } else if (!PL_in_eval && PL_minus_n) {
1292             sv_catpvs(linestr, /*{*/";}");
1293             PL_minus_n = 0;
1294         } else
1295             sv_catpvs(linestr, ";");
1296         got_some = 1;
1297     }
1298     buf = SvPVX(linestr);
1299     new_bufend_pos = SvCUR(linestr);
1300     PL_parser->bufend = buf + new_bufend_pos;
1301     PL_parser->bufptr = buf + bufptr_pos;
1302     PL_parser->oldbufptr = buf + oldbufptr_pos;
1303     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304     PL_parser->linestart = buf + linestart_pos;
1305     if (PL_parser->last_uni)
1306         PL_parser->last_uni = buf + last_uni_pos;
1307     if (PL_parser->last_lop)
1308         PL_parser->last_lop = buf + last_lop_pos;
1309     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1310             PL_curstash != PL_debstash) {
1311         /* debugger active and we're not compiling the debugger code,
1312          * so store the line into the debugger's array of lines
1313          */
1314         update_debugger_info(NULL, buf+old_bufend_pos,
1315             new_bufend_pos-old_bufend_pos);
1316     }
1317     return got_some;
1318 }
1319
1320 /*
1321 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323 Looks ahead one (Unicode) character in the text currently being lexed.
1324 Returns the codepoint (unsigned integer value) of the next character,
1325 or -1 if lexing has reached the end of the input text.  To consume the
1326 peeked character, use L</lex_read_unichar>.
1327
1328 If the next character is in (or extends into) the next chunk of input
1329 text, the next chunk will be read in.  Normally the current chunk will be
1330 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331 then the current chunk will not be discarded.
1332
1333 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334 is encountered, an exception is generated.
1335
1336 =cut
1337 */
1338
1339 I32
1340 Perl_lex_peek_unichar(pTHX_ U32 flags)
1341 {
1342     dVAR;
1343     char *s, *bufend;
1344     if (flags & ~(LEX_KEEP_PREVIOUS))
1345         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346     s = PL_parser->bufptr;
1347     bufend = PL_parser->bufend;
1348     if (UTF) {
1349         U8 head;
1350         I32 unichar;
1351         STRLEN len, retlen;
1352         if (s == bufend) {
1353             if (!lex_next_chunk(flags))
1354                 return -1;
1355             s = PL_parser->bufptr;
1356             bufend = PL_parser->bufend;
1357         }
1358         head = (U8)*s;
1359         if (!(head & 0x80))
1360             return head;
1361         if (head & 0x40) {
1362             len = PL_utf8skip[head];
1363             while ((STRLEN)(bufend-s) < len) {
1364                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365                     break;
1366                 s = PL_parser->bufptr;
1367                 bufend = PL_parser->bufend;
1368             }
1369         }
1370         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371         if (retlen == (STRLEN)-1) {
1372             /* malformed UTF-8 */
1373             ENTER;
1374             SAVESPTR(PL_warnhook);
1375             PL_warnhook = PERL_WARNHOOK_FATAL;
1376             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377             LEAVE;
1378         }
1379         return unichar;
1380     } else {
1381         if (s == bufend) {
1382             if (!lex_next_chunk(flags))
1383                 return -1;
1384             s = PL_parser->bufptr;
1385         }
1386         return (U8)*s;
1387     }
1388 }
1389
1390 /*
1391 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393 Reads the next (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the character read,
1395 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396 if lexing has reached the end of the input text.  To non-destructively
1397 examine the next character, use L</lex_peek_unichar> instead.
1398
1399 If the next character is in (or extends into) the next chunk of input
1400 text, the next chunk will be read in.  Normally the current chunk will be
1401 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402 then the current chunk will not be discarded.
1403
1404 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405 is encountered, an exception is generated.
1406
1407 =cut
1408 */
1409
1410 I32
1411 Perl_lex_read_unichar(pTHX_ U32 flags)
1412 {
1413     I32 c;
1414     if (flags & ~(LEX_KEEP_PREVIOUS))
1415         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416     c = lex_peek_unichar(flags);
1417     if (c != -1) {
1418         if (c == '\n')
1419             CopLINE_inc(PL_curcop);
1420         if (UTF)
1421             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422         else
1423             ++(PL_parser->bufptr);
1424     }
1425     return c;
1426 }
1427
1428 /*
1429 =for apidoc Amx|void|lex_read_space|U32 flags
1430
1431 Reads optional spaces, in Perl style, in the text currently being
1432 lexed.  The spaces may include ordinary whitespace characters and
1433 Perl-style comments.  C<#line> directives are processed if encountered.
1434 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1435 at a non-space character (or the end of the input text).
1436
1437 If spaces extend into the next chunk of input text, the next chunk will
1438 be read in.  Normally the current chunk will be discarded at the same
1439 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1440 chunk will not be discarded.
1441
1442 =cut
1443 */
1444
1445 #define LEX_NO_NEXT_CHUNK 0x80000000
1446
1447 void
1448 Perl_lex_read_space(pTHX_ U32 flags)
1449 {
1450     char *s, *bufend;
1451     bool need_incline = 0;
1452     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1453         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1454 #ifdef PERL_MAD
1455     if (PL_skipwhite) {
1456         sv_free(PL_skipwhite);
1457         PL_skipwhite = NULL;
1458     }
1459     if (PL_madskills)
1460         PL_skipwhite = newSVpvs("");
1461 #endif /* PERL_MAD */
1462     s = PL_parser->bufptr;
1463     bufend = PL_parser->bufend;
1464     while (1) {
1465         char c = *s;
1466         if (c == '#') {
1467             do {
1468                 c = *++s;
1469             } while (!(c == '\n' || (c == 0 && s == bufend)));
1470         } else if (c == '\n') {
1471             s++;
1472             PL_parser->linestart = s;
1473             if (s == bufend)
1474                 need_incline = 1;
1475             else
1476                 incline(s);
1477         } else if (isSPACE(c)) {
1478             s++;
1479         } else if (c == 0 && s == bufend) {
1480             bool got_more;
1481 #ifdef PERL_MAD
1482             if (PL_madskills)
1483                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1484 #endif /* PERL_MAD */
1485             if (flags & LEX_NO_NEXT_CHUNK)
1486                 break;
1487             PL_parser->bufptr = s;
1488             CopLINE_inc(PL_curcop);
1489             got_more = lex_next_chunk(flags);
1490             CopLINE_dec(PL_curcop);
1491             s = PL_parser->bufptr;
1492             bufend = PL_parser->bufend;
1493             if (!got_more)
1494                 break;
1495             if (need_incline && PL_parser->rsfp) {
1496                 incline(s);
1497                 need_incline = 0;
1498             }
1499         } else {
1500             break;
1501         }
1502     }
1503 #ifdef PERL_MAD
1504     if (PL_madskills)
1505         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1506 #endif /* PERL_MAD */
1507     PL_parser->bufptr = s;
1508 }
1509
1510 /*
1511  * S_incline
1512  * This subroutine has nothing to do with tilting, whether at windmills
1513  * or pinball tables.  Its name is short for "increment line".  It
1514  * increments the current line number in CopLINE(PL_curcop) and checks
1515  * to see whether the line starts with a comment of the form
1516  *    # line 500 "foo.pm"
1517  * If so, it sets the current line number and file to the values in the comment.
1518  */
1519
1520 STATIC void
1521 S_incline(pTHX_ const char *s)
1522 {
1523     dVAR;
1524     const char *t;
1525     const char *n;
1526     const char *e;
1527     line_t line_num;
1528
1529     PERL_ARGS_ASSERT_INCLINE;
1530
1531     CopLINE_inc(PL_curcop);
1532     if (*s++ != '#')
1533         return;
1534     while (SPACE_OR_TAB(*s))
1535         s++;
1536     if (strnEQ(s, "line", 4))
1537         s += 4;
1538     else
1539         return;
1540     if (SPACE_OR_TAB(*s))
1541         s++;
1542     else
1543         return;
1544     while (SPACE_OR_TAB(*s))
1545         s++;
1546     if (!isDIGIT(*s))
1547         return;
1548
1549     n = s;
1550     while (isDIGIT(*s))
1551         s++;
1552     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1553         return;
1554     while (SPACE_OR_TAB(*s))
1555         s++;
1556     if (*s == '"' && (t = strchr(s+1, '"'))) {
1557         s++;
1558         e = t + 1;
1559     }
1560     else {
1561         t = s;
1562         while (!isSPACE(*t))
1563             t++;
1564         e = t;
1565     }
1566     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1567         e++;
1568     if (*e != '\n' && *e != '\0')
1569         return;         /* false alarm */
1570
1571     line_num = atoi(n)-1;
1572
1573     if (t - s > 0) {
1574         const STRLEN len = t - s;
1575         SV *const temp_sv = CopFILESV(PL_curcop);
1576         const char *cf;
1577         STRLEN tmplen;
1578
1579         if (temp_sv) {
1580             cf = SvPVX(temp_sv);
1581             tmplen = SvCUR(temp_sv);
1582         } else {
1583             cf = NULL;
1584             tmplen = 0;
1585         }
1586
1587         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1588             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1589              * to *{"::_<newfilename"} */
1590             /* However, the long form of evals is only turned on by the
1591                debugger - usually they're "(eval %lu)" */
1592             char smallbuf[128];
1593             char *tmpbuf;
1594             GV **gvp;
1595             STRLEN tmplen2 = len;
1596             if (tmplen + 2 <= sizeof smallbuf)
1597                 tmpbuf = smallbuf;
1598             else
1599                 Newx(tmpbuf, tmplen + 2, char);
1600             tmpbuf[0] = '_';
1601             tmpbuf[1] = '<';
1602             memcpy(tmpbuf + 2, cf, tmplen);
1603             tmplen += 2;
1604             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1605             if (gvp) {
1606                 char *tmpbuf2;
1607                 GV *gv2;
1608
1609                 if (tmplen2 + 2 <= sizeof smallbuf)
1610                     tmpbuf2 = smallbuf;
1611                 else
1612                     Newx(tmpbuf2, tmplen2 + 2, char);
1613
1614                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1615                     /* Either they malloc'd it, or we malloc'd it,
1616                        so no prefix is present in ours.  */
1617                     tmpbuf2[0] = '_';
1618                     tmpbuf2[1] = '<';
1619                 }
1620
1621                 memcpy(tmpbuf2 + 2, s, tmplen2);
1622                 tmplen2 += 2;
1623
1624                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1625                 if (!isGV(gv2)) {
1626                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1627                     /* adjust ${"::_<newfilename"} to store the new file name */
1628                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1629                     /* The line number may differ. If that is the case,
1630                        alias the saved lines that are in the array.
1631                        Otherwise alias the whole array. */
1632                     if (CopLINE(PL_curcop) == line_num) {
1633                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1634                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1635                     }
1636                     else if (GvAV(*gvp)) {
1637                         AV * const av = GvAV(*gvp);
1638                         const I32 start = CopLINE(PL_curcop)+1;
1639                         I32 items = AvFILLp(av) - start;
1640                         if (items > 0) {
1641                             AV * const av2 = GvAVn(gv2);
1642                             SV **svp = AvARRAY(av) + start;
1643                             I32 l = (I32)line_num+1;
1644                             while (items--)
1645                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1646                         }
1647                     }
1648                 }
1649
1650                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1651             }
1652             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1653         }
1654         CopFILE_free(PL_curcop);
1655         CopFILE_setn(PL_curcop, s, len);
1656     }
1657     CopLINE_set(PL_curcop, line_num);
1658 }
1659
1660 #ifdef PERL_MAD
1661 /* skip space before PL_thistoken */
1662
1663 STATIC char *
1664 S_skipspace0(pTHX_ register char *s)
1665 {
1666     PERL_ARGS_ASSERT_SKIPSPACE0;
1667
1668     s = skipspace(s);
1669     if (!PL_madskills)
1670         return s;
1671     if (PL_skipwhite) {
1672         if (!PL_thiswhite)
1673             PL_thiswhite = newSVpvs("");
1674         sv_catsv(PL_thiswhite, PL_skipwhite);
1675         sv_free(PL_skipwhite);
1676         PL_skipwhite = 0;
1677     }
1678     PL_realtokenstart = s - SvPVX(PL_linestr);
1679     return s;
1680 }
1681
1682 /* skip space after PL_thistoken */
1683
1684 STATIC char *
1685 S_skipspace1(pTHX_ register char *s)
1686 {
1687     const char *start = s;
1688     I32 startoff = start - SvPVX(PL_linestr);
1689
1690     PERL_ARGS_ASSERT_SKIPSPACE1;
1691
1692     s = skipspace(s);
1693     if (!PL_madskills)
1694         return s;
1695     start = SvPVX(PL_linestr) + startoff;
1696     if (!PL_thistoken && PL_realtokenstart >= 0) {
1697         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698         PL_thistoken = newSVpvn(tstart, start - tstart);
1699     }
1700     PL_realtokenstart = -1;
1701     if (PL_skipwhite) {
1702         if (!PL_nextwhite)
1703             PL_nextwhite = newSVpvs("");
1704         sv_catsv(PL_nextwhite, PL_skipwhite);
1705         sv_free(PL_skipwhite);
1706         PL_skipwhite = 0;
1707     }
1708     return s;
1709 }
1710
1711 STATIC char *
1712 S_skipspace2(pTHX_ register char *s, SV **svp)
1713 {
1714     char *start;
1715     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1716     const I32 startoff = s - SvPVX(PL_linestr);
1717
1718     PERL_ARGS_ASSERT_SKIPSPACE2;
1719
1720     s = skipspace(s);
1721     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1722     if (!PL_madskills || !svp)
1723         return s;
1724     start = SvPVX(PL_linestr) + startoff;
1725     if (!PL_thistoken && PL_realtokenstart >= 0) {
1726         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1727         PL_thistoken = newSVpvn(tstart, start - tstart);
1728         PL_realtokenstart = -1;
1729     }
1730     if (PL_skipwhite) {
1731         if (!*svp)
1732             *svp = newSVpvs("");
1733         sv_setsv(*svp, PL_skipwhite);
1734         sv_free(PL_skipwhite);
1735         PL_skipwhite = 0;
1736     }
1737
1738     return s;
1739 }
1740 #endif
1741
1742 STATIC void
1743 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1744 {
1745     AV *av = CopFILEAVx(PL_curcop);
1746     if (av) {
1747         SV * const sv = newSV_type(SVt_PVMG);
1748         if (orig_sv)
1749             sv_setsv(sv, orig_sv);
1750         else
1751             sv_setpvn(sv, buf, len);
1752         (void)SvIOK_on(sv);
1753         SvIV_set(sv, 0);
1754         av_store(av, (I32)CopLINE(PL_curcop), sv);
1755     }
1756 }
1757
1758 /*
1759  * S_skipspace
1760  * Called to gobble the appropriate amount and type of whitespace.
1761  * Skips comments as well.
1762  */
1763
1764 STATIC char *
1765 S_skipspace(pTHX_ register char *s)
1766 {
1767 #ifdef PERL_MAD
1768     char *start = s;
1769 #endif /* PERL_MAD */
1770     PERL_ARGS_ASSERT_SKIPSPACE;
1771 #ifdef PERL_MAD
1772     if (PL_skipwhite) {
1773         sv_free(PL_skipwhite);
1774         PL_skipwhite = NULL;
1775     }
1776 #endif /* PERL_MAD */
1777     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1778         while (s < PL_bufend && SPACE_OR_TAB(*s))
1779             s++;
1780     } else {
1781         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1782         PL_bufptr = s;
1783         lex_read_space(LEX_KEEP_PREVIOUS |
1784                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1785                     LEX_NO_NEXT_CHUNK : 0));
1786         s = PL_bufptr;
1787         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1788         if (PL_linestart > PL_bufptr)
1789             PL_bufptr = PL_linestart;
1790         return s;
1791     }
1792 #ifdef PERL_MAD
1793     if (PL_madskills)
1794         PL_skipwhite = newSVpvn(start, s-start);
1795 #endif /* PERL_MAD */
1796     return s;
1797 }
1798
1799 /*
1800  * S_check_uni
1801  * Check the unary operators to ensure there's no ambiguity in how they're
1802  * used.  An ambiguous piece of code would be:
1803  *     rand + 5
1804  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1805  * the +5 is its argument.
1806  */
1807
1808 STATIC void
1809 S_check_uni(pTHX)
1810 {
1811     dVAR;
1812     const char *s;
1813     const char *t;
1814
1815     if (PL_oldoldbufptr != PL_last_uni)
1816         return;
1817     while (isSPACE(*PL_last_uni))
1818         PL_last_uni++;
1819     s = PL_last_uni;
1820     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1821         s++;
1822     if ((t = strchr(s, '(')) && t < PL_bufptr)
1823         return;
1824
1825     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1826                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1827                      (int)(s - PL_last_uni), PL_last_uni);
1828 }
1829
1830 /*
1831  * LOP : macro to build a list operator.  Its behaviour has been replaced
1832  * with a subroutine, S_lop() for which LOP is just another name.
1833  */
1834
1835 #define LOP(f,x) return lop(f,x,s)
1836
1837 /*
1838  * S_lop
1839  * Build a list operator (or something that might be one).  The rules:
1840  *  - if we have a next token, then it's a list operator [why?]
1841  *  - if the next thing is an opening paren, then it's a function
1842  *  - else it's a list operator
1843  */
1844
1845 STATIC I32
1846 S_lop(pTHX_ I32 f, int x, char *s)
1847 {
1848     dVAR;
1849
1850     PERL_ARGS_ASSERT_LOP;
1851
1852     pl_yylval.ival = f;
1853     CLINE;
1854     PL_expect = x;
1855     PL_bufptr = s;
1856     PL_last_lop = PL_oldbufptr;
1857     PL_last_lop_op = (OPCODE)f;
1858 #ifdef PERL_MAD
1859     if (PL_lasttoke)
1860         goto lstop;
1861 #else
1862     if (PL_nexttoke)
1863         goto lstop;
1864 #endif
1865     if (*s == '(')
1866         return REPORT(FUNC);
1867     s = PEEKSPACE(s);
1868     if (*s == '(')
1869         return REPORT(FUNC);
1870     else {
1871         lstop:
1872         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1873             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1874         return REPORT(LSTOP);
1875     }
1876 }
1877
1878 #ifdef PERL_MAD
1879  /*
1880  * S_start_force
1881  * Sets up for an eventual force_next().  start_force(0) basically does
1882  * an unshift, while start_force(-1) does a push.  yylex removes items
1883  * on the "pop" end.
1884  */
1885
1886 STATIC void
1887 S_start_force(pTHX_ int where)
1888 {
1889     int i;
1890
1891     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1892         where = PL_lasttoke;
1893     assert(PL_curforce < 0 || PL_curforce == where);
1894     if (PL_curforce != where) {
1895         for (i = PL_lasttoke; i > where; --i) {
1896             PL_nexttoke[i] = PL_nexttoke[i-1];
1897         }
1898         PL_lasttoke++;
1899     }
1900     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1901         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1902     PL_curforce = where;
1903     if (PL_nextwhite) {
1904         if (PL_madskills)
1905             curmad('^', newSVpvs(""));
1906         CURMAD('_', PL_nextwhite);
1907     }
1908 }
1909
1910 STATIC void
1911 S_curmad(pTHX_ char slot, SV *sv)
1912 {
1913     MADPROP **where;
1914
1915     if (!sv)
1916         return;
1917     if (PL_curforce < 0)
1918         where = &PL_thismad;
1919     else
1920         where = &PL_nexttoke[PL_curforce].next_mad;
1921
1922     if (PL_faketokens)
1923         sv_setpvs(sv, "");
1924     else {
1925         if (!IN_BYTES) {
1926             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1927                 SvUTF8_on(sv);
1928             else if (PL_encoding) {
1929                 sv_recode_to_utf8(sv, PL_encoding);
1930             }
1931         }
1932     }
1933
1934     /* keep a slot open for the head of the list? */
1935     if (slot != '_' && *where && (*where)->mad_key == '^') {
1936         (*where)->mad_key = slot;
1937         sv_free(MUTABLE_SV(((*where)->mad_val)));
1938         (*where)->mad_val = (void*)sv;
1939     }
1940     else
1941         addmad(newMADsv(slot, sv), where, 0);
1942 }
1943 #else
1944 #  define start_force(where)    NOOP
1945 #  define curmad(slot, sv)      NOOP
1946 #endif
1947
1948 /*
1949  * S_force_next
1950  * When the lexer realizes it knows the next token (for instance,
1951  * it is reordering tokens for the parser) then it can call S_force_next
1952  * to know what token to return the next time the lexer is called.  Caller
1953  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1954  * and possibly PL_expect to ensure the lexer handles the token correctly.
1955  */
1956
1957 STATIC void
1958 S_force_next(pTHX_ I32 type)
1959 {
1960     dVAR;
1961 #ifdef DEBUGGING
1962     if (DEBUG_T_TEST) {
1963         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1964         tokereport(type, &NEXTVAL_NEXTTOKE);
1965     }
1966 #endif
1967 #ifdef PERL_MAD
1968     if (PL_curforce < 0)
1969         start_force(PL_lasttoke);
1970     PL_nexttoke[PL_curforce].next_type = type;
1971     if (PL_lex_state != LEX_KNOWNEXT)
1972         PL_lex_defer = PL_lex_state;
1973     PL_lex_state = LEX_KNOWNEXT;
1974     PL_lex_expect = PL_expect;
1975     PL_curforce = -1;
1976 #else
1977     PL_nexttype[PL_nexttoke] = type;
1978     PL_nexttoke++;
1979     if (PL_lex_state != LEX_KNOWNEXT) {
1980         PL_lex_defer = PL_lex_state;
1981         PL_lex_expect = PL_expect;
1982         PL_lex_state = LEX_KNOWNEXT;
1983     }
1984 #endif
1985 }
1986
1987 void
1988 Perl_yyunlex(pTHX)
1989 {
1990     int yyc = PL_parser->yychar;
1991     if (yyc != YYEMPTY) {
1992         if (yyc) {
1993             start_force(-1);
1994             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1995             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1996                 PL_lex_allbrackets--;
1997                 PL_lex_brackets--;
1998                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1999             } else if (yyc == '('/*)*/) {
2000                 PL_lex_allbrackets--;
2001                 yyc |= (2<<24);
2002             }
2003             force_next(yyc);
2004         }
2005         PL_parser->yychar = YYEMPTY;
2006     }
2007 }
2008
2009 STATIC SV *
2010 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2011 {
2012     dVAR;
2013     SV * const sv = newSVpvn_utf8(start, len,
2014                                   !IN_BYTES
2015                                   && UTF
2016                                   && !is_ascii_string((const U8*)start, len)
2017                                   && is_utf8_string((const U8*)start, len));
2018     return sv;
2019 }
2020
2021 /*
2022  * S_force_word
2023  * When the lexer knows the next thing is a word (for instance, it has
2024  * just seen -> and it knows that the next char is a word char, then
2025  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2026  * lookahead.
2027  *
2028  * Arguments:
2029  *   char *start : buffer position (must be within PL_linestr)
2030  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2031  *   int check_keyword : if true, Perl checks to make sure the word isn't
2032  *       a keyword (do this if the word is a label, e.g. goto FOO)
2033  *   int allow_pack : if true, : characters will also be allowed (require,
2034  *       use, etc. do this)
2035  *   int allow_initial_tick : used by the "sub" lexer only.
2036  */
2037
2038 STATIC char *
2039 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2040 {
2041     dVAR;
2042     register char *s;
2043     STRLEN len;
2044
2045     PERL_ARGS_ASSERT_FORCE_WORD;
2046
2047     start = SKIPSPACE1(start);
2048     s = start;
2049     if (isIDFIRST_lazy_if(s,UTF) ||
2050         (allow_pack && *s == ':') ||
2051         (allow_initial_tick && *s == '\'') )
2052     {
2053         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2054         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2055             return start;
2056         start_force(PL_curforce);
2057         if (PL_madskills)
2058             curmad('X', newSVpvn(start,s-start));
2059         if (token == METHOD) {
2060             s = SKIPSPACE1(s);
2061             if (*s == '(')
2062                 PL_expect = XTERM;
2063             else {
2064                 PL_expect = XOPERATOR;
2065             }
2066         }
2067         if (PL_madskills)
2068             curmad('g', newSVpvs( "forced" ));
2069         NEXTVAL_NEXTTOKE.opval
2070             = (OP*)newSVOP(OP_CONST,0,
2071                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2072         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2073         force_next(token);
2074     }
2075     return s;
2076 }
2077
2078 /*
2079  * S_force_ident
2080  * Called when the lexer wants $foo *foo &foo etc, but the program
2081  * text only contains the "foo" portion.  The first argument is a pointer
2082  * to the "foo", and the second argument is the type symbol to prefix.
2083  * Forces the next token to be a "WORD".
2084  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2085  */
2086
2087 STATIC void
2088 S_force_ident(pTHX_ register const char *s, int kind)
2089 {
2090     dVAR;
2091
2092     PERL_ARGS_ASSERT_FORCE_IDENT;
2093
2094     if (*s) {
2095         const STRLEN len = strlen(s);
2096         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2097                                                                 UTF ? SVf_UTF8 : 0));
2098         start_force(PL_curforce);
2099         NEXTVAL_NEXTTOKE.opval = o;
2100         force_next(WORD);
2101         if (kind) {
2102             o->op_private = OPpCONST_ENTERED;
2103             /* XXX see note in pp_entereval() for why we forgo typo
2104                warnings if the symbol must be introduced in an eval.
2105                GSAR 96-10-12 */
2106             gv_fetchpvn_flags(s, len,
2107                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2108                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2109                               kind == '$' ? SVt_PV :
2110                               kind == '@' ? SVt_PVAV :
2111                               kind == '%' ? SVt_PVHV :
2112                               SVt_PVGV
2113                               );
2114         }
2115     }
2116 }
2117
2118 NV
2119 Perl_str_to_version(pTHX_ SV *sv)
2120 {
2121     NV retval = 0.0;
2122     NV nshift = 1.0;
2123     STRLEN len;
2124     const char *start = SvPV_const(sv,len);
2125     const char * const end = start + len;
2126     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2127
2128     PERL_ARGS_ASSERT_STR_TO_VERSION;
2129
2130     while (start < end) {
2131         STRLEN skip;
2132         UV n;
2133         if (utf)
2134             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2135         else {
2136             n = *(U8*)start;
2137             skip = 1;
2138         }
2139         retval += ((NV)n)/nshift;
2140         start += skip;
2141         nshift *= 1000;
2142     }
2143     return retval;
2144 }
2145
2146 /*
2147  * S_force_version
2148  * Forces the next token to be a version number.
2149  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2150  * and if "guessing" is TRUE, then no new token is created (and the caller
2151  * must use an alternative parsing method).
2152  */
2153
2154 STATIC char *
2155 S_force_version(pTHX_ char *s, int guessing)
2156 {
2157     dVAR;
2158     OP *version = NULL;
2159     char *d;
2160 #ifdef PERL_MAD
2161     I32 startoff = s - SvPVX(PL_linestr);
2162 #endif
2163
2164     PERL_ARGS_ASSERT_FORCE_VERSION;
2165
2166     s = SKIPSPACE1(s);
2167
2168     d = s;
2169     if (*d == 'v')
2170         d++;
2171     if (isDIGIT(*d)) {
2172         while (isDIGIT(*d) || *d == '_' || *d == '.')
2173             d++;
2174 #ifdef PERL_MAD
2175         if (PL_madskills) {
2176             start_force(PL_curforce);
2177             curmad('X', newSVpvn(s,d-s));
2178         }
2179 #endif
2180         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2181             SV *ver;
2182 #ifdef USE_LOCALE_NUMERIC
2183             char *loc = setlocale(LC_NUMERIC, "C");
2184 #endif
2185             s = scan_num(s, &pl_yylval);
2186 #ifdef USE_LOCALE_NUMERIC
2187             setlocale(LC_NUMERIC, loc);
2188 #endif
2189             version = pl_yylval.opval;
2190             ver = cSVOPx(version)->op_sv;
2191             if (SvPOK(ver) && !SvNIOK(ver)) {
2192                 SvUPGRADE(ver, SVt_PVNV);
2193                 SvNV_set(ver, str_to_version(ver));
2194                 SvNOK_on(ver);          /* hint that it is a version */
2195             }
2196         }
2197         else if (guessing) {
2198 #ifdef PERL_MAD
2199             if (PL_madskills) {
2200                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2201                 PL_nextwhite = 0;
2202                 s = SvPVX(PL_linestr) + startoff;
2203             }
2204 #endif
2205             return s;
2206         }
2207     }
2208
2209 #ifdef PERL_MAD
2210     if (PL_madskills && !version) {
2211         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2212         PL_nextwhite = 0;
2213         s = SvPVX(PL_linestr) + startoff;
2214     }
2215 #endif
2216     /* NOTE: The parser sees the package name and the VERSION swapped */
2217     start_force(PL_curforce);
2218     NEXTVAL_NEXTTOKE.opval = version;
2219     force_next(WORD);
2220
2221     return s;
2222 }
2223
2224 /*
2225  * S_force_strict_version
2226  * Forces the next token to be a version number using strict syntax rules.
2227  */
2228
2229 STATIC char *
2230 S_force_strict_version(pTHX_ char *s)
2231 {
2232     dVAR;
2233     OP *version = NULL;
2234 #ifdef PERL_MAD
2235     I32 startoff = s - SvPVX(PL_linestr);
2236 #endif
2237     const char *errstr = NULL;
2238
2239     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2240
2241     while (isSPACE(*s)) /* leading whitespace */
2242         s++;
2243
2244     if (is_STRICT_VERSION(s,&errstr)) {
2245         SV *ver = newSV(0);
2246         s = (char *)scan_version(s, ver, 0);
2247         version = newSVOP(OP_CONST, 0, ver);
2248     }
2249     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2250             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2251     {
2252         PL_bufptr = s;
2253         if (errstr)
2254             yyerror(errstr); /* version required */
2255         return s;
2256     }
2257
2258 #ifdef PERL_MAD
2259     if (PL_madskills && !version) {
2260         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2261         PL_nextwhite = 0;
2262         s = SvPVX(PL_linestr) + startoff;
2263     }
2264 #endif
2265     /* NOTE: The parser sees the package name and the VERSION swapped */
2266     start_force(PL_curforce);
2267     NEXTVAL_NEXTTOKE.opval = version;
2268     force_next(WORD);
2269
2270     return s;
2271 }
2272
2273 /*
2274  * S_tokeq
2275  * Tokenize a quoted string passed in as an SV.  It finds the next
2276  * chunk, up to end of string or a backslash.  It may make a new
2277  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2278  * turns \\ into \.
2279  */
2280
2281 STATIC SV *
2282 S_tokeq(pTHX_ SV *sv)
2283 {
2284     dVAR;
2285     register char *s;
2286     register char *send;
2287     register char *d;
2288     STRLEN len = 0;
2289     SV *pv = sv;
2290
2291     PERL_ARGS_ASSERT_TOKEQ;
2292
2293     if (!SvLEN(sv))
2294         goto finish;
2295
2296     s = SvPV_force(sv, len);
2297     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2298         goto finish;
2299     send = s + len;
2300     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2301     while (s < send && !(*s == '\\' && s[1] == '\\'))
2302         s++;
2303     if (s == send)
2304         goto finish;
2305     d = s;
2306     if ( PL_hints & HINT_NEW_STRING ) {
2307         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2308     }
2309     while (s < send) {
2310         if (*s == '\\') {
2311             if (s + 1 < send && (s[1] == '\\'))
2312                 s++;            /* all that, just for this */
2313         }
2314         *d++ = *s++;
2315     }
2316     *d = '\0';
2317     SvCUR_set(sv, d - SvPVX_const(sv));
2318   finish:
2319     if ( PL_hints & HINT_NEW_STRING )
2320        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2321     return sv;
2322 }
2323
2324 /*
2325  * Now come three functions related to double-quote context,
2326  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2327  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2328  * interact with PL_lex_state, and create fake ( ... ) argument lists
2329  * to handle functions and concatenation.
2330  * They assume that whoever calls them will be setting up a fake
2331  * join call, because each subthing puts a ',' after it.  This lets
2332  *   "lower \luPpEr"
2333  * become
2334  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2335  *
2336  * (I'm not sure whether the spurious commas at the end of lcfirst's
2337  * arguments and join's arguments are created or not).
2338  */
2339
2340 /*
2341  * S_sublex_start
2342  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2343  *
2344  * Pattern matching will set PL_lex_op to the pattern-matching op to
2345  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2346  *
2347  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2348  *
2349  * Everything else becomes a FUNC.
2350  *
2351  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2352  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2353  * call to S_sublex_push().
2354  */
2355
2356 STATIC I32
2357 S_sublex_start(pTHX)
2358 {
2359     dVAR;
2360     register const I32 op_type = pl_yylval.ival;
2361
2362     if (op_type == OP_NULL) {
2363         pl_yylval.opval = PL_lex_op;
2364         PL_lex_op = NULL;
2365         return THING;
2366     }
2367     if (op_type == OP_CONST || op_type == OP_READLINE) {
2368         SV *sv = tokeq(PL_lex_stuff);
2369
2370         if (SvTYPE(sv) == SVt_PVIV) {
2371             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2372             STRLEN len;
2373             const char * const p = SvPV_const(sv, len);
2374             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2375             SvREFCNT_dec(sv);
2376             sv = nsv;
2377         }
2378         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2379         PL_lex_stuff = NULL;
2380         /* Allow <FH> // "foo" */
2381         if (op_type == OP_READLINE)
2382             PL_expect = XTERMORDORDOR;
2383         return THING;
2384     }
2385     else if (op_type == OP_BACKTICK && PL_lex_op) {
2386         /* readpipe() vas overriden */
2387         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2388         pl_yylval.opval = PL_lex_op;
2389         PL_lex_op = NULL;
2390         PL_lex_stuff = NULL;
2391         return THING;
2392     }
2393
2394     PL_sublex_info.super_state = PL_lex_state;
2395     PL_sublex_info.sub_inwhat = (U16)op_type;
2396     PL_sublex_info.sub_op = PL_lex_op;
2397     PL_lex_state = LEX_INTERPPUSH;
2398
2399     PL_expect = XTERM;
2400     if (PL_lex_op) {
2401         pl_yylval.opval = PL_lex_op;
2402         PL_lex_op = NULL;
2403         return PMFUNC;
2404     }
2405     else
2406         return FUNC;
2407 }
2408
2409 /*
2410  * S_sublex_push
2411  * Create a new scope to save the lexing state.  The scope will be
2412  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2413  * to the uc, lc, etc. found before.
2414  * Sets PL_lex_state to LEX_INTERPCONCAT.
2415  */
2416
2417 STATIC I32
2418 S_sublex_push(pTHX)
2419 {
2420     dVAR;
2421     ENTER;
2422
2423     PL_lex_state = PL_sublex_info.super_state;
2424     SAVEBOOL(PL_lex_dojoin);
2425     SAVEI32(PL_lex_brackets);
2426     SAVEI32(PL_lex_allbrackets);
2427     SAVEI8(PL_lex_fakeeof);
2428     SAVEI32(PL_lex_casemods);
2429     SAVEI32(PL_lex_starts);
2430     SAVEI8(PL_lex_state);
2431     SAVEVPTR(PL_lex_inpat);
2432     SAVEI16(PL_lex_inwhat);
2433     SAVECOPLINE(PL_curcop);
2434     SAVEPPTR(PL_bufptr);
2435     SAVEPPTR(PL_bufend);
2436     SAVEPPTR(PL_oldbufptr);
2437     SAVEPPTR(PL_oldoldbufptr);
2438     SAVEPPTR(PL_last_lop);
2439     SAVEPPTR(PL_last_uni);
2440     SAVEPPTR(PL_linestart);
2441     SAVESPTR(PL_linestr);
2442     SAVEGENERICPV(PL_lex_brackstack);
2443     SAVEGENERICPV(PL_lex_casestack);
2444
2445     PL_linestr = PL_lex_stuff;
2446     PL_lex_stuff = NULL;
2447
2448     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2449         = SvPVX(PL_linestr);
2450     PL_bufend += SvCUR(PL_linestr);
2451     PL_last_lop = PL_last_uni = NULL;
2452     SAVEFREESV(PL_linestr);
2453
2454     PL_lex_dojoin = FALSE;
2455     PL_lex_brackets = 0;
2456     PL_lex_allbrackets = 0;
2457     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2458     Newx(PL_lex_brackstack, 120, char);
2459     Newx(PL_lex_casestack, 12, char);
2460     PL_lex_casemods = 0;
2461     *PL_lex_casestack = '\0';
2462     PL_lex_starts = 0;
2463     PL_lex_state = LEX_INTERPCONCAT;
2464     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2465
2466     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2467     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2468     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2469         PL_lex_inpat = PL_sublex_info.sub_op;
2470     else
2471         PL_lex_inpat = NULL;
2472
2473     return '(';
2474 }
2475
2476 /*
2477  * S_sublex_done
2478  * Restores lexer state after a S_sublex_push.
2479  */
2480
2481 STATIC I32
2482 S_sublex_done(pTHX)
2483 {
2484     dVAR;
2485     if (!PL_lex_starts++) {
2486         SV * const sv = newSVpvs("");
2487         if (SvUTF8(PL_linestr))
2488             SvUTF8_on(sv);
2489         PL_expect = XOPERATOR;
2490         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2491         return THING;
2492     }
2493
2494     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2495         PL_lex_state = LEX_INTERPCASEMOD;
2496         return yylex();
2497     }
2498
2499     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2500     assert(PL_lex_inwhat != OP_TRANSR);
2501     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2502         PL_linestr = PL_lex_repl;
2503         PL_lex_inpat = 0;
2504         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2505         PL_bufend += SvCUR(PL_linestr);
2506         PL_last_lop = PL_last_uni = NULL;
2507         SAVEFREESV(PL_linestr);
2508         PL_lex_dojoin = FALSE;
2509         PL_lex_brackets = 0;
2510         PL_lex_allbrackets = 0;
2511         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2512         PL_lex_casemods = 0;
2513         *PL_lex_casestack = '\0';
2514         PL_lex_starts = 0;
2515         if (SvEVALED(PL_lex_repl)) {
2516             PL_lex_state = LEX_INTERPNORMAL;
2517             PL_lex_starts++;
2518             /*  we don't clear PL_lex_repl here, so that we can check later
2519                 whether this is an evalled subst; that means we rely on the
2520                 logic to ensure sublex_done() is called again only via the
2521                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2522         }
2523         else {
2524             PL_lex_state = LEX_INTERPCONCAT;
2525             PL_lex_repl = NULL;
2526         }
2527         return ',';
2528     }
2529     else {
2530 #ifdef PERL_MAD
2531         if (PL_madskills) {
2532             if (PL_thiswhite) {
2533                 if (!PL_endwhite)
2534                     PL_endwhite = newSVpvs("");
2535                 sv_catsv(PL_endwhite, PL_thiswhite);
2536                 PL_thiswhite = 0;
2537             }
2538             if (PL_thistoken)
2539                 sv_setpvs(PL_thistoken,"");
2540             else
2541                 PL_realtokenstart = -1;
2542         }
2543 #endif
2544         LEAVE;
2545         PL_bufend = SvPVX(PL_linestr);
2546         PL_bufend += SvCUR(PL_linestr);
2547         PL_expect = XOPERATOR;
2548         PL_sublex_info.sub_inwhat = 0;
2549         return ')';
2550     }
2551 }
2552
2553 /*
2554   scan_const
2555
2556   Extracts a pattern, double-quoted string, or transliteration.  This
2557   is terrifying code.
2558
2559   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2560   processing a pattern (PL_lex_inpat is true), a transliteration
2561   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2562
2563   Returns a pointer to the character scanned up to. If this is
2564   advanced from the start pointer supplied (i.e. if anything was
2565   successfully parsed), will leave an OP for the substring scanned
2566   in pl_yylval. Caller must intuit reason for not parsing further
2567   by looking at the next characters herself.
2568
2569   In patterns:
2570     backslashes:
2571       constants: \N{NAME} only
2572       case and quoting: \U \Q \E
2573     stops on @ and $, but not for $ as tail anchor
2574
2575   In transliterations:
2576     characters are VERY literal, except for - not at the start or end
2577     of the string, which indicates a range. If the range is in bytes,
2578     scan_const expands the range to the full set of intermediate
2579     characters. If the range is in utf8, the hyphen is replaced with
2580     a certain range mark which will be handled by pmtrans() in op.c.
2581
2582   In double-quoted strings:
2583     backslashes:
2584       double-quoted style: \r and \n
2585       constants: \x31, etc.
2586       deprecated backrefs: \1 (in substitution replacements)
2587       case and quoting: \U \Q \E
2588     stops on @ and $
2589
2590   scan_const does *not* construct ops to handle interpolated strings.
2591   It stops processing as soon as it finds an embedded $ or @ variable
2592   and leaves it to the caller to work out what's going on.
2593
2594   embedded arrays (whether in pattern or not) could be:
2595       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2596
2597   $ in double-quoted strings must be the symbol of an embedded scalar.
2598
2599   $ in pattern could be $foo or could be tail anchor.  Assumption:
2600   it's a tail anchor if $ is the last thing in the string, or if it's
2601   followed by one of "()| \r\n\t"
2602
2603   \1 (backreferences) are turned into $1
2604
2605   The structure of the code is
2606       while (there's a character to process) {
2607           handle transliteration ranges
2608           skip regexp comments /(?#comment)/ and codes /(?{code})/
2609           skip #-initiated comments in //x patterns
2610           check for embedded arrays
2611           check for embedded scalars
2612           if (backslash) {
2613               deprecate \1 in substitution replacements
2614               handle string-changing backslashes \l \U \Q \E, etc.
2615               switch (what was escaped) {
2616                   handle \- in a transliteration (becomes a literal -)
2617                   if a pattern and not \N{, go treat as regular character
2618                   handle \132 (octal characters)
2619                   handle \x15 and \x{1234} (hex characters)
2620                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2621                   handle \cV (control characters)
2622                   handle printf-style backslashes (\f, \r, \n, etc)
2623               } (end switch)
2624               continue
2625           } (end if backslash)
2626           handle regular character
2627     } (end while character to read)
2628
2629 */
2630
2631 STATIC char *
2632 S_scan_const(pTHX_ char *start)
2633 {
2634     dVAR;
2635     register char *send = PL_bufend;            /* end of the constant */
2636     SV *sv = newSV(send - start);               /* sv for the constant.  See
2637                                                    note below on sizing. */
2638     register char *s = start;                   /* start of the constant */
2639     register char *d = SvPVX(sv);               /* destination for copies */
2640     bool dorange = FALSE;                       /* are we in a translit range? */
2641     bool didrange = FALSE;                      /* did we just finish a range? */
2642     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2643     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2644                                                    to be UTF8?  But, this can
2645                                                    show as true when the source
2646                                                    isn't utf8, as for example
2647                                                    when it is entirely composed
2648                                                    of hex constants */
2649
2650     /* Note on sizing:  The scanned constant is placed into sv, which is
2651      * initialized by newSV() assuming one byte of output for every byte of
2652      * input.  This routine expects newSV() to allocate an extra byte for a
2653      * trailing NUL, which this routine will append if it gets to the end of
2654      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2655      * CAPITAL LETTER A}), or more output than input if the constant ends up
2656      * recoded to utf8, but each time a construct is found that might increase
2657      * the needed size, SvGROW() is called.  Its size parameter each time is
2658      * based on the best guess estimate at the time, namely the length used so
2659      * far, plus the length the current construct will occupy, plus room for
2660      * the trailing NUL, plus one byte for every input byte still unscanned */
2661
2662     UV uv;
2663 #ifdef EBCDIC
2664     UV literal_endpoint = 0;
2665     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2666 #endif
2667
2668     PERL_ARGS_ASSERT_SCAN_CONST;
2669
2670     assert(PL_lex_inwhat != OP_TRANSR);
2671     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2672         /* If we are doing a trans and we know we want UTF8 set expectation */
2673         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2674         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2675     }
2676
2677
2678     while (s < send || dorange) {
2679
2680         /* get transliterations out of the way (they're most literal) */
2681         if (PL_lex_inwhat == OP_TRANS) {
2682             /* expand a range A-Z to the full set of characters.  AIE! */
2683             if (dorange) {
2684                 I32 i;                          /* current expanded character */
2685                 I32 min;                        /* first character in range */
2686                 I32 max;                        /* last character in range */
2687
2688 #ifdef EBCDIC
2689                 UV uvmax = 0;
2690 #endif
2691
2692                 if (has_utf8
2693 #ifdef EBCDIC
2694                     && !native_range
2695 #endif
2696                     ) {
2697                     char * const c = (char*)utf8_hop((U8*)d, -1);
2698                     char *e = d++;
2699                     while (e-- > c)
2700                         *(e + 1) = *e;
2701                     *c = (char)UTF_TO_NATIVE(0xff);
2702                     /* mark the range as done, and continue */
2703                     dorange = FALSE;
2704                     didrange = TRUE;
2705                     continue;
2706                 }
2707
2708                 i = d - SvPVX_const(sv);                /* remember current offset */
2709 #ifdef EBCDIC
2710                 SvGROW(sv,
2711                        SvLEN(sv) + (has_utf8 ?
2712                                     (512 - UTF_CONTINUATION_MARK +
2713                                      UNISKIP(0x100))
2714                                     : 256));
2715                 /* How many two-byte within 0..255: 128 in UTF-8,
2716                  * 96 in UTF-8-mod. */
2717 #else
2718                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2719 #endif
2720                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2721 #ifdef EBCDIC
2722                 if (has_utf8) {
2723                     int j;
2724                     for (j = 0; j <= 1; j++) {
2725                         char * const c = (char*)utf8_hop((U8*)d, -1);
2726                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2727                         if (j)
2728                             min = (U8)uv;
2729                         else if (uv < 256)
2730                             max = (U8)uv;
2731                         else {
2732                             max = (U8)0xff; /* only to \xff */
2733                             uvmax = uv; /* \x{100} to uvmax */
2734                         }
2735                         d = c; /* eat endpoint chars */
2736                      }
2737                 }
2738                else {
2739 #endif
2740                    d -= 2;              /* eat the first char and the - */
2741                    min = (U8)*d;        /* first char in range */
2742                    max = (U8)d[1];      /* last char in range  */
2743 #ifdef EBCDIC
2744                }
2745 #endif
2746
2747                 if (min > max) {
2748                     Perl_croak(aTHX_
2749                                "Invalid range \"%c-%c\" in transliteration operator",
2750                                (char)min, (char)max);
2751                 }
2752
2753 #ifdef EBCDIC
2754                 if (literal_endpoint == 2 &&
2755                     ((isLOWER(min) && isLOWER(max)) ||
2756                      (isUPPER(min) && isUPPER(max)))) {
2757                     if (isLOWER(min)) {
2758                         for (i = min; i <= max; i++)
2759                             if (isLOWER(i))
2760                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2761                     } else {
2762                         for (i = min; i <= max; i++)
2763                             if (isUPPER(i))
2764                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2765                     }
2766                 }
2767                 else
2768 #endif
2769                     for (i = min; i <= max; i++)
2770 #ifdef EBCDIC
2771                         if (has_utf8) {
2772                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2773                             if (UNI_IS_INVARIANT(ch))
2774                                 *d++ = (U8)i;
2775                             else {
2776                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2777                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2778                             }
2779                         }
2780                         else
2781 #endif
2782                             *d++ = (char)i;
2783
2784 #ifdef EBCDIC
2785                 if (uvmax) {
2786                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2787                     if (uvmax > 0x101)
2788                         *d++ = (char)UTF_TO_NATIVE(0xff);
2789                     if (uvmax > 0x100)
2790                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2791                 }
2792 #endif
2793
2794                 /* mark the range as done, and continue */
2795                 dorange = FALSE;
2796                 didrange = TRUE;
2797 #ifdef EBCDIC
2798                 literal_endpoint = 0;
2799 #endif
2800                 continue;
2801             }
2802
2803             /* range begins (ignore - as first or last char) */
2804             else if (*s == '-' && s+1 < send  && s != start) {
2805                 if (didrange) {
2806                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2807                 }
2808                 if (has_utf8
2809 #ifdef EBCDIC
2810                     && !native_range
2811 #endif
2812                     ) {
2813                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2814                     s++;
2815                     continue;
2816                 }
2817                 dorange = TRUE;
2818                 s++;
2819             }
2820             else {
2821                 didrange = FALSE;
2822 #ifdef EBCDIC
2823                 literal_endpoint = 0;
2824                 native_range = TRUE;
2825 #endif
2826             }
2827         }
2828
2829         /* if we get here, we're not doing a transliteration */
2830
2831         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2832            except for the last char, which will be done separately. */
2833         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2834             if (s[2] == '#') {
2835                 while (s+1 < send && *s != ')')
2836                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2837             }
2838             else if (s[2] == '{' /* This should match regcomp.c */
2839                     || (s[2] == '?' && s[3] == '{'))
2840             {
2841                 I32 count = 1;
2842                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2843                 char c;
2844
2845                 while (count && (c = *regparse)) {
2846                     if (c == '\\' && regparse[1])
2847                         regparse++;
2848                     else if (c == '{')
2849                         count++;
2850                     else if (c == '}')
2851                         count--;
2852                     regparse++;
2853                 }
2854                 if (*regparse != ')')
2855                     regparse--;         /* Leave one char for continuation. */
2856                 while (s < regparse)
2857                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2858             }
2859         }
2860
2861         /* likewise skip #-initiated comments in //x patterns */
2862         else if (*s == '#' && PL_lex_inpat &&
2863           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2864             while (s+1 < send && *s != '\n')
2865                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2866         }
2867
2868         /* check for embedded arrays
2869            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2870            */
2871         else if (*s == '@' && s[1]) {
2872             if (isALNUM_lazy_if(s+1,UTF))
2873                 break;
2874             if (strchr(":'{$", s[1]))
2875                 break;
2876             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2877                 break; /* in regexp, neither @+ nor @- are interpolated */
2878         }
2879
2880         /* check for embedded scalars.  only stop if we're sure it's a
2881            variable.
2882         */
2883         else if (*s == '$') {
2884             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2885                 break;
2886             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2887                 if (s[1] == '\\') {
2888                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2889                                    "Possible unintended interpolation of $\\ in regex");
2890                 }
2891                 break;          /* in regexp, $ might be tail anchor */
2892             }
2893         }
2894
2895         /* End of else if chain - OP_TRANS rejoin rest */
2896
2897         /* backslashes */
2898         if (*s == '\\' && s+1 < send) {
2899             char* e;    /* Can be used for ending '}', etc. */
2900
2901             s++;
2902
2903             /* warn on \1 - \9 in substitution replacements, but note that \11
2904              * is an octal; and \19 is \1 followed by '9' */
2905             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2906                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2907             {
2908                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2909                 *--s = '$';
2910                 break;
2911             }
2912
2913             /* string-change backslash escapes */
2914             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2915                 --s;
2916                 break;
2917             }
2918             /* In a pattern, process \N, but skip any other backslash escapes.
2919              * This is because we don't want to translate an escape sequence
2920              * into a meta symbol and have the regex compiler use the meta
2921              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2922              * in spite of this, we do have to process \N here while the proper
2923              * charnames handler is in scope.  See bugs #56444 and #62056.
2924              * There is a complication because \N in a pattern may also stand
2925              * for 'match a non-nl', and not mean a charname, in which case its
2926              * processing should be deferred to the regex compiler.  To be a
2927              * charname it must be followed immediately by a '{', and not look
2928              * like \N followed by a curly quantifier, i.e., not something like
2929              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2930              * quantifier */
2931             else if (PL_lex_inpat
2932                     && (*s != 'N'
2933                         || s[1] != '{'
2934                         || regcurly(s + 1)))
2935             {
2936                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2937                 goto default_action;
2938             }
2939
2940             switch (*s) {
2941
2942             /* quoted - in transliterations */
2943             case '-':
2944                 if (PL_lex_inwhat == OP_TRANS) {
2945                     *d++ = *s++;
2946                     continue;
2947                 }
2948                 /* FALL THROUGH */
2949             default:
2950                 {
2951                     if ((isALPHA(*s) || isDIGIT(*s)))
2952                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2953                                        "Unrecognized escape \\%c passed through",
2954                                        *s);
2955                     /* default action is to copy the quoted character */
2956                     goto default_action;
2957                 }
2958
2959             /* eg. \132 indicates the octal constant 0132 */
2960             case '0': case '1': case '2': case '3':
2961             case '4': case '5': case '6': case '7':
2962                 {
2963                     I32 flags = 0;
2964                     STRLEN len = 3;
2965                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2966                     s += len;
2967                 }
2968                 goto NUM_ESCAPE_INSERT;
2969
2970             /* eg. \o{24} indicates the octal constant \024 */
2971             case 'o':
2972                 {
2973                     STRLEN len;
2974                     const char* error;
2975
2976                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2977                     s += len;
2978                     if (! valid) {
2979                         yyerror(error);
2980                         continue;
2981                     }
2982                     goto NUM_ESCAPE_INSERT;
2983                 }
2984
2985             /* eg. \x24 indicates the hex constant 0x24 */
2986             case 'x':
2987                 ++s;
2988                 if (*s == '{') {
2989                     char* const e = strchr(s, '}');
2990                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2991                       PERL_SCAN_DISALLOW_PREFIX;
2992                     STRLEN len;
2993
2994                     ++s;
2995                     if (!e) {
2996                         yyerror("Missing right brace on \\x{}");
2997                         continue;
2998                     }
2999                     len = e - s;
3000                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3001                     s = e + 1;
3002                 }
3003                 else {
3004                     {
3005                         STRLEN len = 2;
3006                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3007                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3008                         s += len;
3009                     }
3010                 }
3011
3012               NUM_ESCAPE_INSERT:
3013                 /* Insert oct or hex escaped character.  There will always be
3014                  * enough room in sv since such escapes will be longer than any
3015                  * UTF-8 sequence they can end up as, except if they force us
3016                  * to recode the rest of the string into utf8 */
3017
3018                 /* Here uv is the ordinal of the next character being added in
3019                  * unicode (converted from native). */
3020                 if (!UNI_IS_INVARIANT(uv)) {
3021                     if (!has_utf8 && uv > 255) {
3022                         /* Might need to recode whatever we have accumulated so
3023                          * far if it contains any chars variant in utf8 or
3024                          * utf-ebcdic. */
3025
3026                         SvCUR_set(sv, d - SvPVX_const(sv));
3027                         SvPOK_on(sv);
3028                         *d = '\0';
3029                         /* See Note on sizing above.  */
3030                         sv_utf8_upgrade_flags_grow(sv,
3031                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3032                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3033                         d = SvPVX(sv) + SvCUR(sv);
3034                         has_utf8 = TRUE;
3035                     }
3036
3037                     if (has_utf8) {
3038                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3039                         if (PL_lex_inwhat == OP_TRANS &&
3040                             PL_sublex_info.sub_op) {
3041                             PL_sublex_info.sub_op->op_private |=
3042                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3043                                              : OPpTRANS_TO_UTF);
3044                         }
3045 #ifdef EBCDIC
3046                         if (uv > 255 && !dorange)
3047                             native_range = FALSE;
3048 #endif
3049                     }
3050                     else {
3051                         *d++ = (char)uv;
3052                     }
3053                 }
3054                 else {
3055                     *d++ = (char) uv;
3056                 }
3057                 continue;
3058
3059             case 'N':
3060                 /* In a non-pattern \N must be a named character, like \N{LATIN
3061                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3062                  * mean to match a non-newline.  For non-patterns, named
3063                  * characters are converted to their string equivalents. In
3064                  * patterns, named characters are not converted to their
3065                  * ultimate forms for the same reasons that other escapes
3066                  * aren't.  Instead, they are converted to the \N{U+...} form
3067                  * to get the value from the charnames that is in effect right
3068                  * now, while preserving the fact that it was a named character
3069                  * so that the regex compiler knows this */
3070
3071                 /* This section of code doesn't generally use the
3072                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3073                  * a close examination of this macro and determined it is a
3074                  * no-op except on utfebcdic variant characters.  Every
3075                  * character generated by this that would normally need to be
3076                  * enclosed by this macro is invariant, so the macro is not
3077                  * needed, and would complicate use of copy().  XXX There are
3078                  * other parts of this file where the macro is used
3079                  * inconsistently, but are saved by it being a no-op */
3080
3081                 /* The structure of this section of code (besides checking for
3082                  * errors and upgrading to utf8) is:
3083                  *  Further disambiguate between the two meanings of \N, and if
3084                  *      not a charname, go process it elsewhere
3085                  *  If of form \N{U+...}, pass it through if a pattern;
3086                  *      otherwise convert to utf8
3087                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3088                  *  pattern; otherwise convert to utf8 */
3089
3090                 /* Here, s points to the 'N'; the test below is guaranteed to
3091                  * succeed if we are being called on a pattern as we already
3092                  * know from a test above that the next character is a '{'.
3093                  * On a non-pattern \N must mean 'named sequence, which
3094                  * requires braces */
3095                 s++;
3096                 if (*s != '{') {
3097                     yyerror("Missing braces on \\N{}");
3098                     continue;
3099                 }
3100                 s++;
3101
3102                 /* If there is no matching '}', it is an error. */
3103                 if (! (e = strchr(s, '}'))) {
3104                     if (! PL_lex_inpat) {
3105                         yyerror("Missing right brace on \\N{}");
3106                     } else {
3107                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3108                     }
3109                     continue;
3110                 }
3111
3112                 /* Here it looks like a named character */
3113
3114                 if (PL_lex_inpat) {
3115
3116                     /* XXX This block is temporary code.  \N{} implies that the
3117                      * pattern is to have Unicode semantics, and therefore
3118                      * currently has to be encoded in utf8.  By putting it in
3119                      * utf8 now, we save a whole pass in the regular expression
3120                      * compiler.  Once that code is changed so Unicode
3121                      * semantics doesn't necessarily have to be in utf8, this
3122                      * block should be removed.  However, the code that parses
3123                      * the output of this would have to be changed to not
3124                      * necessarily expect utf8 */
3125                     if (!has_utf8) {
3126                         SvCUR_set(sv, d - SvPVX_const(sv));
3127                         SvPOK_on(sv);
3128                         *d = '\0';
3129                         /* See Note on sizing above.  */
3130                         sv_utf8_upgrade_flags_grow(sv,
3131                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3132                                         /* 5 = '\N{' + cur char + NUL */
3133                                         (STRLEN)(send - s) + 5);
3134                         d = SvPVX(sv) + SvCUR(sv);
3135                         has_utf8 = TRUE;
3136                     }
3137                 }
3138
3139                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3140                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3141                                 | PERL_SCAN_DISALLOW_PREFIX;
3142                     STRLEN len;
3143
3144                     /* For \N{U+...}, the '...' is a unicode value even on
3145                      * EBCDIC machines */
3146                     s += 2;         /* Skip to next char after the 'U+' */
3147                     len = e - s;
3148                     uv = grok_hex(s, &len, &flags, NULL);
3149                     if (len == 0 || len != (STRLEN)(e - s)) {
3150                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3151                         s = e + 1;
3152                         continue;
3153                     }
3154
3155                     if (PL_lex_inpat) {
3156
3157                         /* On non-EBCDIC platforms, pass through to the regex
3158                          * compiler unchanged.  The reason we evaluated the
3159                          * number above is to make sure there wasn't a syntax
3160                          * error.  But on EBCDIC we convert to native so
3161                          * downstream code can continue to assume it's native
3162                          */
3163                         s -= 5;     /* Include the '\N{U+' */
3164 #ifdef EBCDIC
3165                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3166                                                                and the \0 */
3167                                     "\\N{U+%X}",
3168                                     (unsigned int) UNI_TO_NATIVE(uv));
3169 #else
3170                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3171                         d += e - s + 1;
3172 #endif
3173                     }
3174                     else {  /* Not a pattern: convert the hex to string */
3175
3176                          /* If destination is not in utf8, unconditionally
3177                           * recode it to be so.  This is because \N{} implies
3178                           * Unicode semantics, and scalars have to be in utf8
3179                           * to guarantee those semantics */
3180                         if (! has_utf8) {
3181                             SvCUR_set(sv, d - SvPVX_const(sv));
3182                             SvPOK_on(sv);
3183                             *d = '\0';
3184                             /* See Note on sizing above.  */
3185                             sv_utf8_upgrade_flags_grow(
3186                                         sv,
3187                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3188                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3189                             d = SvPVX(sv) + SvCUR(sv);
3190                             has_utf8 = TRUE;
3191                         }
3192
3193                         /* Add the string to the output */
3194                         if (UNI_IS_INVARIANT(uv)) {
3195                             *d++ = (char) uv;
3196                         }
3197                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3198                     }
3199                 }
3200                 else { /* Here is \N{NAME} but not \N{U+...}. */
3201
3202                     SV *res;            /* result from charnames */
3203                     const char *str;    /* the string in 'res' */
3204                     STRLEN len;         /* its length */
3205
3206                     /* Get the value for NAME */
3207                     res = newSVpvn(s, e - s);
3208                     res = new_constant( NULL, 0, "charnames",
3209                                         /* includes all of: \N{...} */
3210                                         res, NULL, s - 3, e - s + 4 );
3211
3212                     /* Most likely res will be in utf8 already since the
3213                      * standard charnames uses pack U, but a custom translator
3214                      * can leave it otherwise, so make sure.  XXX This can be
3215                      * revisited to not have charnames use utf8 for characters
3216                      * that don't need it when regexes don't have to be in utf8
3217                      * for Unicode semantics.  If doing so, remember EBCDIC */
3218                     sv_utf8_upgrade(res);
3219                     str = SvPV_const(res, len);
3220
3221                     /* Don't accept malformed input */
3222                     if (! is_utf8_string((U8 *) str, len)) {
3223                         yyerror("Malformed UTF-8 returned by \\N");
3224                     }
3225                     else if (PL_lex_inpat) {
3226
3227                         if (! len) { /* The name resolved to an empty string */
3228                             Copy("\\N{}", d, 4, char);
3229                             d += 4;
3230                         }
3231                         else {
3232                             /* In order to not lose information for the regex
3233                             * compiler, pass the result in the specially made
3234                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3235                             * the code points in hex of each character
3236                             * returned by charnames */
3237
3238                             const char *str_end = str + len;
3239                             STRLEN char_length;     /* cur char's byte length */
3240                             STRLEN output_length;   /* and the number of bytes
3241                                                        after this is translated
3242                                                        into hex digits */
3243                             const STRLEN off = d - SvPVX_const(sv);
3244
3245                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3246                              * max('U+', '.'); and 1 for NUL */
3247                             char hex_string[2 * UTF8_MAXBYTES + 5];
3248
3249                             /* Get the first character of the result. */
3250                             U32 uv = utf8n_to_uvuni((U8 *) str,
3251                                                     len,
3252                                                     &char_length,
3253                                                     UTF8_ALLOW_ANYUV);
3254
3255                             /* The call to is_utf8_string() above hopefully
3256                              * guarantees that there won't be an error.  But
3257                              * it's easy here to make sure.  The function just
3258                              * above warns and returns 0 if invalid utf8, but
3259                              * it can also return 0 if the input is validly a
3260                              * NUL. Disambiguate */
3261                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3262                                 uv = UNICODE_REPLACEMENT;
3263                             }
3264
3265                             /* Convert first code point to hex, including the
3266                              * boiler plate before it.  For all these, we
3267                              * convert to native format so that downstream code
3268                              * can continue to assume the input is native */
3269                             output_length =
3270                                 my_snprintf(hex_string, sizeof(hex_string),
3271                                             "\\N{U+%X",
3272                                             (unsigned int) UNI_TO_NATIVE(uv));
3273
3274                             /* Make sure there is enough space to hold it */
3275                             d = off + SvGROW(sv, off
3276                                                  + output_length
3277                                                  + (STRLEN)(send - e)
3278                                                  + 2);  /* '}' + NUL */
3279                             /* And output it */
3280                             Copy(hex_string, d, output_length, char);
3281                             d += output_length;
3282
3283                             /* For each subsequent character, append dot and
3284                              * its ordinal in hex */
3285                             while ((str += char_length) < str_end) {
3286                                 const STRLEN off = d - SvPVX_const(sv);
3287                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3288                                                         str_end - str,
3289                                                         &char_length,
3290                                                         UTF8_ALLOW_ANYUV);
3291                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3292                                     uv = UNICODE_REPLACEMENT;
3293                                 }
3294
3295                                 output_length =
3296                                     my_snprintf(hex_string, sizeof(hex_string),
3297                                             ".%X",
3298                                             (unsigned int) UNI_TO_NATIVE(uv));
3299
3300                                 d = off + SvGROW(sv, off
3301                                                      + output_length
3302                                                      + (STRLEN)(send - e)
3303                                                      + 2);      /* '}' +  NUL */
3304                                 Copy(hex_string, d, output_length, char);
3305                                 d += output_length;
3306                             }
3307
3308                             *d++ = '}'; /* Done.  Add the trailing brace */
3309                         }
3310                     }
3311                     else { /* Here, not in a pattern.  Convert the name to a
3312                             * string. */
3313
3314                          /* If destination is not in utf8, unconditionally
3315                           * recode it to be so.  This is because \N{} implies
3316                           * Unicode semantics, and scalars have to be in utf8
3317                           * to guarantee those semantics */
3318                         if (! has_utf8) {
3319                             SvCUR_set(sv, d - SvPVX_const(sv));
3320                             SvPOK_on(sv);
3321                             *d = '\0';
3322                             /* See Note on sizing above.  */
3323                             sv_utf8_upgrade_flags_grow(sv,
3324                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3325                                                 len + (STRLEN)(send - s) + 1);
3326                             d = SvPVX(sv) + SvCUR(sv);
3327                             has_utf8 = TRUE;
3328                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3329
3330                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3331                              * set correctly here). */
3332                             const STRLEN off = d - SvPVX_const(sv);
3333                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3334                         }
3335                         Copy(str, d, len, char);
3336                         d += len;
3337                     }
3338                     SvREFCNT_dec(res);
3339
3340                     /* Deprecate non-approved name syntax */
3341                     if (ckWARN_d(WARN_DEPRECATED)) {
3342                         bool problematic = FALSE;
3343                         char* i = s;
3344
3345                         /* For non-ut8 input, look to see that the first
3346                          * character is an alpha, then loop through the rest
3347                          * checking that each is a continuation */
3348                         if (! this_utf8) {
3349                             if (! isALPHAU(*i)) problematic = TRUE;
3350                             else for (i = s + 1; i < e; i++) {
3351                                 if (isCHARNAME_CONT(*i)) continue;
3352                                 problematic = TRUE;
3353                                 break;
3354                             }
3355                         }
3356                         else {
3357                             /* Similarly for utf8.  For invariants can check
3358                              * directly.  We accept anything above the latin1
3359                              * range because it is immaterial to Perl if it is
3360                              * correct or not, and is expensive to check.  But
3361                              * it is fairly easy in the latin1 range to convert
3362                              * the variants into a single character and check
3363                              * those */
3364                             if (UTF8_IS_INVARIANT(*i)) {
3365                                 if (! isALPHAU(*i)) problematic = TRUE;
3366                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3367                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3368                                                                             *(i+1)))))
3369                                 {
3370                                     problematic = TRUE;
3371                                 }
3372                             }
3373                             if (! problematic) for (i = s + UTF8SKIP(s);
3374                                                     i < e;
3375                                                     i+= UTF8SKIP(i))
3376                             {
3377                                 if (UTF8_IS_INVARIANT(*i)) {
3378                                     if (isCHARNAME_CONT(*i)) continue;
3379                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3380                                     continue;
3381                                 } else if (isCHARNAME_CONT(
3382                                             UNI_TO_NATIVE(
3383                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3384                                 {
3385                                     continue;
3386                                 }
3387                                 problematic = TRUE;
3388                                 break;
3389                             }
3390                         }
3391                         if (problematic) {
3392                             /* The e-i passed to the final %.*s makes sure that
3393                              * should the trailing NUL be missing that this
3394                              * print won't run off the end of the string */
3395                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3396                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3397                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3398                         }
3399                     }
3400                 } /* End \N{NAME} */
3401 #ifdef EBCDIC
3402                 if (!dorange)
3403                     native_range = FALSE; /* \N{} is defined to be Unicode */
3404 #endif
3405                 s = e + 1;  /* Point to just after the '}' */
3406                 continue;
3407
3408             /* \c is a control character */
3409             case 'c':
3410                 s++;
3411                 if (s < send) {
3412                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3413                 }
3414                 else {
3415                     yyerror("Missing control char name in \\c");
3416                 }
3417                 continue;
3418
3419             /* printf-style backslashes, formfeeds, newlines, etc */
3420             case 'b':
3421                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3422                 break;
3423             case 'n':
3424                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3425                 break;
3426             case 'r':
3427                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3428                 break;
3429             case 'f':
3430                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3431                 break;
3432             case 't':
3433                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3434                 break;
3435             case 'e':
3436                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3437                 break;
3438             case 'a':
3439                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3440                 break;
3441             } /* end switch */
3442
3443             s++;
3444             continue;
3445         } /* end if (backslash) */
3446 #ifdef EBCDIC
3447         else
3448             literal_endpoint++;
3449 #endif
3450
3451     default_action:
3452         /* If we started with encoded form, or already know we want it,
3453            then encode the next character */
3454         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3455             STRLEN len  = 1;
3456
3457
3458             /* One might think that it is wasted effort in the case of the
3459              * source being utf8 (this_utf8 == TRUE) to take the next character
3460              * in the source, convert it to an unsigned value, and then convert
3461              * it back again.  But the source has not been validated here.  The
3462              * routine that does the conversion checks for errors like
3463              * malformed utf8 */
3464
3465             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3466             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3467             if (!has_utf8) {
3468                 SvCUR_set(sv, d - SvPVX_const(sv));
3469                 SvPOK_on(sv);
3470                 *d = '\0';
3471                 /* See Note on sizing above.  */
3472                 sv_utf8_upgrade_flags_grow(sv,
3473                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3474                                         need + (STRLEN)(send - s) + 1);
3475                 d = SvPVX(sv) + SvCUR(sv);
3476                 has_utf8 = TRUE;
3477             } else if (need > len) {
3478                 /* encoded value larger than old, may need extra space (NOTE:
3479                  * SvCUR() is not set correctly here).   See Note on sizing
3480                  * above.  */
3481                 const STRLEN off = d - SvPVX_const(sv);
3482                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3483             }
3484             s += len;
3485
3486             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3487 #ifdef EBCDIC
3488             if (uv > 255 && !dorange)
3489                 native_range = FALSE;
3490 #endif
3491         }
3492         else {
3493             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3494         }
3495     } /* while loop to process each character */
3496
3497     /* terminate the string and set up the sv */
3498     *d = '\0';
3499     SvCUR_set(sv, d - SvPVX_const(sv));
3500     if (SvCUR(sv) >= SvLEN(sv))
3501         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3502
3503     SvPOK_on(sv);
3504     if (PL_encoding && !has_utf8) {
3505         sv_recode_to_utf8(sv, PL_encoding);
3506         if (SvUTF8(sv))
3507             has_utf8 = TRUE;
3508     }
3509     if (has_utf8) {
3510         SvUTF8_on(sv);
3511         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3512             PL_sublex_info.sub_op->op_private |=
3513                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3514         }
3515     }
3516
3517     /* shrink the sv if we allocated more than we used */
3518     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3519         SvPV_shrink_to_cur(sv);
3520     }
3521
3522     /* return the substring (via pl_yylval) only if we parsed anything */
3523     if (s > PL_bufptr) {
3524         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3525             const char *const key = PL_lex_inpat ? "qr" : "q";
3526             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3527             const char *type;
3528             STRLEN typelen;
3529
3530             if (PL_lex_inwhat == OP_TRANS) {
3531                 type = "tr";
3532                 typelen = 2;
3533             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3534                 type = "s";
3535                 typelen = 1;
3536             } else  {
3537                 type = "qq";
3538                 typelen = 2;
3539             }
3540
3541             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3542                                 type, typelen);
3543         }
3544         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3545     } else
3546         SvREFCNT_dec(sv);
3547     return s;
3548 }
3549
3550 /* S_intuit_more
3551  * Returns TRUE if there's more to the expression (e.g., a subscript),
3552  * FALSE otherwise.
3553  *
3554  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3555  *
3556  * ->[ and ->{ return TRUE
3557  * { and [ outside a pattern are always subscripts, so return TRUE
3558  * if we're outside a pattern and it's not { or [, then return FALSE
3559  * if we're in a pattern and the first char is a {
3560  *   {4,5} (any digits around the comma) returns FALSE
3561  * if we're in a pattern and the first char is a [
3562  *   [] returns FALSE
3563  *   [SOMETHING] has a funky algorithm to decide whether it's a
3564  *      character class or not.  It has to deal with things like
3565  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3566  * anything else returns TRUE
3567  */
3568
3569 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3570
3571 STATIC int
3572 S_intuit_more(pTHX_ register char *s)
3573 {
3574     dVAR;
3575
3576     PERL_ARGS_ASSERT_INTUIT_MORE;
3577
3578     if (PL_lex_brackets)
3579         return TRUE;
3580     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3581         return TRUE;
3582     if (*s != '{' && *s != '[')
3583         return FALSE;
3584     if (!PL_lex_inpat)
3585         return TRUE;
3586
3587     /* In a pattern, so maybe we have {n,m}. */
3588     if (*s == '{') {
3589         if (regcurly(s)) {
3590             return FALSE;
3591         }
3592         return TRUE;
3593     }
3594
3595     /* On the other hand, maybe we have a character class */
3596
3597     s++;
3598     if (*s == ']' || *s == '^')
3599         return FALSE;
3600     else {
3601         /* this is terrifying, and it works */
3602         int weight = 2;         /* let's weigh the evidence */
3603         char seen[256];
3604         unsigned char un_char = 255, last_un_char;
3605         const char * const send = strchr(s,']');
3606         char tmpbuf[sizeof PL_tokenbuf * 4];
3607
3608         if (!send)              /* has to be an expression */
3609             return TRUE;
3610
3611         Zero(seen,256,char);
3612         if (*s == '$')
3613             weight -= 3;
3614         else if (isDIGIT(*s)) {
3615             if (s[1] != ']') {
3616                 if (isDIGIT(s[1]) && s[2] == ']')
3617                     weight -= 10;
3618             }
3619             else
3620                 weight -= 100;
3621         }
3622         for (; s < send; s++) {
3623             last_un_char = un_char;
3624             un_char = (unsigned char)*s;
3625             switch (*s) {
3626             case '@':
3627             case '&':
3628             case '$':
3629                 weight -= seen[un_char] * 10;
3630                 if (isALNUM_lazy_if(s+1,UTF)) {
3631                     int len;
3632                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3633                     len = (int)strlen(tmpbuf);
3634                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3635                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3636                         weight -= 100;
3637                     else
3638                         weight -= 10;
3639                 }
3640                 else if (*s == '$' && s[1] &&
3641                   strchr("[#!%*<>()-=",s[1])) {
3642                     if (/*{*/ strchr("])} =",s[2]))
3643                         weight -= 10;
3644                     else
3645                         weight -= 1;
3646                 }
3647                 break;
3648             case '\\':
3649                 un_char = 254;
3650                 if (s[1]) {
3651                     if (strchr("wds]",s[1]))
3652                         weight += 100;
3653                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3654                         weight += 1;
3655                     else if (strchr("rnftbxcav",s[1]))
3656                         weight += 40;
3657                     else if (isDIGIT(s[1])) {
3658                         weight += 40;
3659                         while (s[1] && isDIGIT(s[1]))
3660                             s++;
3661                     }
3662                 }
3663                 else
3664                     weight += 100;
3665                 break;
3666             case '-':
3667                 if (s[1] == '\\')
3668                     weight += 50;
3669                 if (strchr("aA01! ",last_un_char))
3670                     weight += 30;
3671                 if (strchr("zZ79~",s[1]))
3672                     weight += 30;
3673                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3674                     weight -= 5;        /* cope with negative subscript */
3675                 break;
3676             default:
3677                 if (!isALNUM(last_un_char)
3678                     && !(last_un_char == '$' || last_un_char == '@'
3679                          || last_un_char == '&')
3680                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3681                     char *d = tmpbuf;
3682                     while (isALPHA(*s))
3683                         *d++ = *s++;
3684                     *d = '\0';
3685                     if (keyword(tmpbuf, d - tmpbuf, 0))
3686                         weight -= 150;
3687                 }
3688                 if (un_char == last_un_char + 1)
3689                     weight += 5;
3690                 weight -= seen[un_char];
3691                 break;
3692             }
3693             seen[un_char]++;
3694         }
3695         if (weight >= 0)        /* probably a character class */
3696             return FALSE;
3697     }
3698
3699     return TRUE;
3700 }
3701
3702 /*
3703  * S_intuit_method
3704  *
3705  * Does all the checking to disambiguate
3706  *   foo bar
3707  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3708  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3709  *
3710  * First argument is the stuff after the first token, e.g. "bar".
3711  *
3712  * Not a method if bar is a filehandle.
3713  * Not a method if foo is a subroutine prototyped to take a filehandle.
3714  * Not a method if it's really "Foo $bar"
3715  * Method if it's "foo $bar"
3716  * Not a method if it's really "print foo $bar"
3717  * Method if it's really "foo package::" (interpreted as package->foo)
3718  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3719  * Not a method if bar is a filehandle or package, but is quoted with
3720  *   =>
3721  */
3722
3723 STATIC int
3724 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3725 {
3726     dVAR;
3727     char *s = start + (*start == '$');
3728     char tmpbuf[sizeof PL_tokenbuf];
3729     STRLEN len;
3730     GV* indirgv;
3731 #ifdef PERL_MAD
3732     int soff;
3733 #endif
3734
3735     PERL_ARGS_ASSERT_INTUIT_METHOD;
3736
3737     if (gv) {
3738         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3739             return 0;
3740         if (cv) {
3741             if (SvPOK(cv)) {
3742                 const char *proto = CvPROTO(cv);
3743                 if (proto) {
3744                     if (*proto == ';')
3745                         proto++;
3746                     if (*proto == '*')
3747                         return 0;
3748                 }
3749             }
3750         } else
3751             gv = NULL;
3752     }
3753     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3754     /* start is the beginning of the possible filehandle/object,
3755      * and s is the end of it
3756      * tmpbuf is a copy of it
3757      */
3758
3759     if (*start == '$') {
3760         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3761                 isUPPER(*PL_tokenbuf))
3762             return 0;
3763 #ifdef PERL_MAD
3764         len = start - SvPVX(PL_linestr);
3765 #endif
3766         s = PEEKSPACE(s);
3767 #ifdef PERL_MAD
3768         start = SvPVX(PL_linestr) + len;
3769 #endif
3770         PL_bufptr = start;
3771         PL_expect = XREF;
3772         return *s == '(' ? FUNCMETH : METHOD;
3773     }
3774     if (!keyword(tmpbuf, len, 0)) {
3775         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3776             len -= 2;
3777             tmpbuf[len] = '\0';
3778 #ifdef PERL_MAD
3779             soff = s - SvPVX(PL_linestr);
3780 #endif
3781             goto bare_package;
3782         }
3783         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3784         if (indirgv && GvCVu(indirgv))
3785             return 0;
3786         /* filehandle or package name makes it a method */
3787         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3788 #ifdef PERL_MAD
3789             soff = s - SvPVX(PL_linestr);
3790 #endif
3791             s = PEEKSPACE(s);
3792             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3793                 return 0;       /* no assumptions -- "=>" quotes bareword */
3794       bare_package:
3795             start_force(PL_curforce);
3796             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3797                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3798             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3799             if (PL_madskills)
3800                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3801                                                             ( UTF ? SVf_UTF8 : 0 )));
3802             PL_expect = XTERM;
3803             force_next(WORD);
3804             PL_bufptr = s;
3805 #ifdef PERL_MAD
3806             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3807 #endif
3808             return *s == '(' ? FUNCMETH : METHOD;
3809         }
3810     }
3811     return 0;
3812 }
3813
3814 /* Encoded script support. filter_add() effectively inserts a
3815  * 'pre-processing' function into the current source input stream.
3816  * Note that the filter function only applies to the current source file
3817  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3818  *
3819  * The datasv parameter (which may be NULL) can be used to pass
3820  * private data to this instance of the filter. The filter function
3821  * can recover the SV using the FILTER_DATA macro and use it to
3822  * store private buffers and state information.
3823  *
3824  * The supplied datasv parameter is upgraded to a PVIO type
3825  * and the IoDIRP/IoANY field is used to store the function pointer,
3826  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3827  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3828  * private use must be set using malloc'd pointers.
3829  */
3830
3831 SV *
3832 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3833 {
3834     dVAR;
3835     if (!funcp)
3836         return NULL;
3837
3838     if (!PL_parser)
3839         return NULL;
3840
3841     if (!PL_rsfp_filters)
3842         PL_rsfp_filters = newAV();
3843     if (!datasv)
3844         datasv = newSV(0);
3845     SvUPGRADE(datasv, SVt_PVIO);
3846     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3847     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3848     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3849                           FPTR2DPTR(void *, IoANY(datasv)),
3850                           SvPV_nolen(datasv)));
3851     av_unshift(PL_rsfp_filters, 1);
3852     av_store(PL_rsfp_filters, 0, datasv) ;
3853     return(datasv);
3854 }
3855
3856
3857 /* Delete most recently added instance of this filter function. */
3858 void
3859 Perl_filter_del(pTHX_ filter_t funcp)
3860 {
3861     dVAR;
3862     SV *datasv;
3863
3864     PERL_ARGS_ASSERT_FILTER_DEL;
3865
3866 #ifdef DEBUGGING
3867     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3868                           FPTR2DPTR(void*, funcp)));
3869 #endif
3870     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3871         return;
3872     /* if filter is on top of stack (usual case) just pop it off */
3873     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3874     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3875         sv_free(av_pop(PL_rsfp_filters));
3876
3877         return;
3878     }
3879     /* we need to search for the correct entry and clear it     */
3880     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3881 }
3882
3883
3884 /* Invoke the idxth filter function for the current rsfp.        */
3885 /* maxlen 0 = read one text line */
3886 I32
3887 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3888 {
3889     dVAR;
3890     filter_t funcp;
3891     SV *datasv = NULL;
3892     /* This API is bad. It should have been using unsigned int for maxlen.
3893        Not sure if we want to change the API, but if not we should sanity
3894        check the value here.  */
3895     const unsigned int correct_length
3896         = maxlen < 0 ?
3897 #ifdef PERL_MICRO
3898         0x7FFFFFFF
3899 #else
3900         INT_MAX
3901 #endif
3902         : maxlen;
3903
3904     PERL_ARGS_ASSERT_FILTER_READ;
3905
3906     if (!PL_parser || !PL_rsfp_filters)
3907         return -1;
3908     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3909         /* Provide a default input filter to make life easy.    */
3910         /* Note that we append to the line. This is handy.      */
3911         DEBUG_P(PerlIO_printf(Perl_debug_log,
3912                               "filter_read %d: from rsfp\n", idx));
3913         if (correct_length) {
3914             /* Want a block */
3915             int len ;
3916             const int old_len = SvCUR(buf_sv);
3917
3918             /* ensure buf_sv is large enough */
3919             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3920             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3921                                    correct_length)) <= 0) {
3922                 if (PerlIO_error(PL_rsfp))
3923                     return -1;          /* error */
3924                 else
3925                     return 0 ;          /* end of file */
3926             }
3927             SvCUR_set(buf_sv, old_len + len) ;
3928             SvPVX(buf_sv)[old_len + len] = '\0';
3929         } else {
3930             /* Want a line */
3931             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3932                 if (PerlIO_error(PL_rsfp))
3933                     return -1;          /* error */
3934                 else
3935                     return 0 ;          /* end of file */
3936             }
3937         }
3938         return SvCUR(buf_sv);
3939     }
3940     /* Skip this filter slot if filter has been deleted */
3941     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3942         DEBUG_P(PerlIO_printf(Perl_debug_log,
3943                               "filter_read %d: skipped (filter deleted)\n",
3944                               idx));
3945         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3946     }
3947     /* Get function pointer hidden within datasv        */
3948     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3949     DEBUG_P(PerlIO_printf(Perl_debug_log,
3950                           "filter_read %d: via function %p (%s)\n",
3951                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3952     /* Call function. The function is expected to       */
3953     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3954     /* Return: <0:error, =0:eof, >0:not eof             */
3955     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3956 }
3957
3958 STATIC char *
3959 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3960 {
3961     dVAR;
3962
3963     PERL_ARGS_ASSERT_FILTER_GETS;
3964
3965 #ifdef PERL_CR_FILTER
3966     if (!PL_rsfp_filters) {
3967         filter_add(S_cr_textfilter,NULL);
3968     }
3969 #endif
3970     if (PL_rsfp_filters) {
3971         if (!append)
3972             SvCUR_set(sv, 0);   /* start with empty line        */
3973         if (FILTER_READ(0, sv, 0) > 0)
3974             return ( SvPVX(sv) ) ;
3975         else
3976             return NULL ;
3977     }
3978     else
3979         return (sv_gets(sv, PL_rsfp, append));
3980 }
3981
3982 STATIC HV *
3983 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3984 {
3985     dVAR;
3986     GV *gv;
3987
3988     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3989
3990     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3991         return PL_curstash;
3992
3993     if (len > 2 &&
3994         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3995         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
3996     {
3997         return GvHV(gv);                        /* Foo:: */
3998     }
3999
4000     /* use constant CLASS => 'MyClass' */
4001     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4002     if (gv && GvCV(gv)) {
4003         SV * const sv = cv_const_sv(GvCV(gv));
4004         if (sv)
4005             pkgname = SvPV_const(sv, len);
4006     }
4007
4008     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4009 }
4010
4011 /*
4012  * S_readpipe_override
4013  * Check whether readpipe() is overridden, and generates the appropriate
4014  * optree, provided sublex_start() is called afterwards.
4015  */
4016 STATIC void
4017 S_readpipe_override(pTHX)
4018 {
4019     GV **gvp;
4020     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4021     pl_yylval.ival = OP_BACKTICK;
4022     if ((gv_readpipe
4023                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4024             ||
4025             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4026              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4027              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4028     {
4029         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4030             op_append_elem(OP_LIST,
4031                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4032                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4033     }
4034 }
4035
4036 #ifdef PERL_MAD
4037  /*
4038  * Perl_madlex
4039  * The intent of this yylex wrapper is to minimize the changes to the
4040  * tokener when we aren't interested in collecting madprops.  It remains
4041  * to be seen how successful this strategy will be...
4042  */
4043
4044 int
4045 Perl_madlex(pTHX)
4046 {
4047     int optype;
4048     char *s = PL_bufptr;
4049
4050     /* make sure PL_thiswhite is initialized */
4051     PL_thiswhite = 0;
4052     PL_thismad = 0;
4053
4054     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4055     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4056         return S_pending_ident(aTHX);
4057
4058     /* previous token ate up our whitespace? */
4059     if (!PL_lasttoke && PL_nextwhite) {
4060         PL_thiswhite = PL_nextwhite;
4061         PL_nextwhite = 0;
4062     }
4063
4064     /* isolate the token, and figure out where it is without whitespace */
4065     PL_realtokenstart = -1;
4066     PL_thistoken = 0;
4067     optype = yylex();
4068     s = PL_bufptr;
4069     assert(PL_curforce < 0);
4070
4071     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4072         if (!PL_thistoken) {
4073             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4074                 PL_thistoken = newSVpvs("");
4075             else {
4076                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4077                 PL_thistoken = newSVpvn(tstart, s - tstart);
4078             }
4079         }
4080         if (PL_thismad) /* install head */
4081             CURMAD('X', PL_thistoken);
4082     }
4083
4084     /* last whitespace of a sublex? */
4085     if (optype == ')' && PL_endwhite) {
4086         CURMAD('X', PL_endwhite);
4087     }
4088
4089     if (!PL_thismad) {
4090
4091         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4092         if (!PL_thiswhite && !PL_endwhite && !optype) {
4093             sv_free(PL_thistoken);
4094             PL_thistoken = 0;
4095             return 0;
4096         }
4097
4098         /* put off final whitespace till peg */
4099         if (optype == ';' && !PL_rsfp) {
4100             PL_nextwhite = PL_thiswhite;
4101             PL_thiswhite = 0;
4102         }
4103         else if (PL_thisopen) {
4104             CURMAD('q', PL_thisopen);
4105             if (PL_thistoken)
4106                 sv_free(PL_thistoken);
4107             PL_thistoken = 0;
4108         }
4109         else {
4110             /* Store actual token text as madprop X */
4111             CURMAD('X', PL_thistoken);
4112         }
4113
4114         if (PL_thiswhite) {
4115             /* add preceding whitespace as madprop _ */
4116             CURMAD('_', PL_thiswhite);
4117         }
4118
4119         if (PL_thisstuff) {
4120             /* add quoted material as madprop = */
4121             CURMAD('=', PL_thisstuff);
4122         }
4123
4124         if (PL_thisclose) {
4125             /* add terminating quote as madprop Q */
4126             CURMAD('Q', PL_thisclose);
4127         }
4128     }
4129
4130     /* special processing based on optype */
4131
4132     switch (optype) {
4133
4134     /* opval doesn't need a TOKEN since it can already store mp */
4135     case WORD:
4136     case METHOD:
4137     case FUNCMETH:
4138     case THING:
4139     case PMFUNC:
4140     case PRIVATEREF:
4141     case FUNC0SUB:
4142     case UNIOPSUB:
4143     case LSTOPSUB:
4144         if (pl_yylval.opval)
4145             append_madprops(PL_thismad, pl_yylval.opval, 0);
4146         PL_thismad = 0;
4147         return optype;
4148
4149     /* fake EOF */
4150     case 0:
4151         optype = PEG;
4152         if (PL_endwhite) {
4153             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4154             PL_endwhite = 0;
4155         }
4156         break;
4157
4158     case ']':
4159     case '}':
4160         if (PL_faketokens)
4161             break;
4162         /* remember any fake bracket that lexer is about to discard */
4163         if (PL_lex_brackets == 1 &&
4164             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4165         {
4166             s = PL_bufptr;
4167             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4168                 s++;
4169             if (*s == '}') {
4170                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4171                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4172                 PL_thiswhite = 0;
4173                 PL_bufptr = s - 1;
4174                 break;  /* don't bother looking for trailing comment */
4175             }
4176             else
4177                 s = PL_bufptr;
4178         }
4179         if (optype == ']')
4180             break;
4181         /* FALLTHROUGH */
4182
4183     /* attach a trailing comment to its statement instead of next token */
4184     case ';':
4185         if (PL_faketokens)
4186             break;
4187         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4188             s = PL_bufptr;
4189             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4190                 s++;
4191             if (*s == '\n' || *s == '#') {
4192                 while (s < PL_bufend && *s != '\n')
4193                     s++;
4194                 if (s < PL_bufend)
4195                     s++;
4196                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4197                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4198                 PL_thiswhite = 0;
4199                 PL_bufptr = s;
4200             }
4201         }
4202         break;
4203
4204     /* pval */
4205     case LABEL:
4206         break;
4207
4208     /* ival */
4209     default:
4210         break;
4211
4212     }
4213
4214     /* Create new token struct.  Note: opvals return early above. */
4215     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4216     PL_thismad = 0;
4217     return optype;
4218 }
4219 #endif
4220
4221 STATIC char *
4222 S_tokenize_use(pTHX_ int is_use, char *s) {
4223     dVAR;
4224
4225     PERL_ARGS_ASSERT_TOKENIZE_USE;
4226
4227     if (PL_expect != XSTATE)
4228         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4229                     is_use ? "use" : "no"));
4230     s = SKIPSPACE1(s);
4231     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4232         s = force_version(s, TRUE);
4233         if (*s == ';' || *s == '}'
4234                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4235             start_force(PL_curforce);
4236             NEXTVAL_NEXTTOKE.opval = NULL;
4237             force_next(WORD);
4238         }
4239         else if (*s == 'v') {
4240             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4241             s = force_version(s, FALSE);
4242         }
4243     }
4244     else {
4245         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4246         s = force_version(s, FALSE);
4247     }
4248     pl_yylval.ival = is_use;
4249     return s;
4250 }
4251 #ifdef DEBUGGING
4252     static const char* const exp_name[] =
4253         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4254           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4255         };
4256 #endif
4257
4258 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4259 STATIC bool
4260 S_word_takes_any_delimeter(char *p, STRLEN len)
4261 {
4262     return (len == 1 && strchr("msyq", p[0])) ||
4263            (len == 2 && (
4264             (p[0] == 't' && p[1] == 'r') ||
4265             (p[0] == 'q' && strchr("qwxr", p[1]))));
4266 }
4267
4268 /*
4269   yylex
4270
4271   Works out what to call the token just pulled out of the input
4272   stream.  The yacc parser takes care of taking the ops we return and
4273   stitching them into a tree.
4274
4275   Returns:
4276     PRIVATEREF
4277
4278   Structure:
4279       if read an identifier
4280           if we're in a my declaration
4281               croak if they tried to say my($foo::bar)
4282               build the ops for a my() declaration
4283           if it's an access to a my() variable
4284               are we in a sort block?
4285                   croak if my($a); $a <=> $b
4286               build ops for access to a my() variable
4287           if in a dq string, and they've said @foo and we can't find @foo
4288               croak
4289           build ops for a bareword
4290       if we already built the token before, use it.
4291 */
4292
4293
4294 #ifdef __SC__
4295 #pragma segment Perl_yylex
4296 #endif
4297 int
4298 Perl_yylex(pTHX)
4299 {
4300     dVAR;
4301     register char *s = PL_bufptr;
4302     register char *d;
4303     STRLEN len;
4304     bool bof = FALSE;
4305     U32 fake_eof = 0;
4306
4307     /* orig_keyword, gvp, and gv are initialized here because
4308      * jump to the label just_a_word_zero can bypass their
4309      * initialization later. */
4310     I32 orig_keyword = 0;
4311     GV *gv = NULL;
4312     GV **gvp = NULL;
4313
4314     DEBUG_T( {
4315         SV* tmp = newSVpvs("");
4316         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4317             (IV)CopLINE(PL_curcop),
4318             lex_state_names[PL_lex_state],
4319             exp_name[PL_expect],
4320             pv_display(tmp, s, strlen(s), 0, 60));
4321         SvREFCNT_dec(tmp);
4322     } );
4323     /* check if there's an identifier for us to look at */
4324     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4325         return REPORT(S_pending_ident(aTHX));
4326
4327     /* no identifier pending identification */
4328
4329     switch (PL_lex_state) {
4330 #ifdef COMMENTARY
4331     case LEX_NORMAL:            /* Some compilers will produce faster */
4332     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4333         break;
4334 #endif
4335
4336     /* when we've already built the next token, just pull it out of the queue */
4337     case LEX_KNOWNEXT:
4338 #ifdef PERL_MAD
4339         PL_lasttoke--;
4340         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4341         if (PL_madskills) {
4342             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4343             PL_nexttoke[PL_lasttoke].next_mad = 0;
4344             if (PL_thismad && PL_thismad->mad_key == '_') {
4345                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4346                 PL_thismad->mad_val = 0;
4347                 mad_free(PL_thismad);
4348                 PL_thismad = 0;
4349             }
4350         }
4351         if (!PL_lasttoke) {
4352             PL_lex_state = PL_lex_defer;
4353             PL_expect = PL_lex_expect;
4354             PL_lex_defer = LEX_NORMAL;
4355             if (!PL_nexttoke[PL_lasttoke].next_type)
4356                 return yylex();
4357         }
4358 #else
4359         PL_nexttoke--;
4360         pl_yylval = PL_nextval[PL_nexttoke];
4361         if (!PL_nexttoke) {
4362             PL_lex_state = PL_lex_defer;
4363             PL_expect = PL_lex_expect;
4364             PL_lex_defer = LEX_NORMAL;
4365         }
4366 #endif
4367         {
4368             I32 next_type;
4369 #ifdef PERL_MAD
4370             next_type = PL_nexttoke[PL_lasttoke].next_type;
4371 #else
4372             next_type = PL_nexttype[PL_nexttoke];
4373 #endif
4374             if (next_type & (7<<24)) {
4375                 if (next_type & (1<<24)) {
4376                     if (PL_lex_brackets > 100)
4377                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4378                     PL_lex_brackstack[PL_lex_brackets++] =
4379                         (char) ((next_type >> 16) & 0xff);
4380                 }
4381                 if (next_type & (2<<24))
4382                     PL_lex_allbrackets++;
4383                 if (next_type & (4<<24))
4384                     PL_lex_allbrackets--;
4385                 next_type &= 0xffff;
4386             }
4387 #ifdef PERL_MAD
4388             /* FIXME - can these be merged?  */
4389             return next_type;
4390 #else
4391             return REPORT(next_type);
4392 #endif
4393         }
4394
4395     /* interpolated case modifiers like \L \U, including \Q and \E.
4396        when we get here, PL_bufptr is at the \
4397     */
4398     case LEX_INTERPCASEMOD:
4399 #ifdef DEBUGGING
4400         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4401             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4402 #endif
4403         /* handle \E or end of string */
4404         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4405             /* if at a \E */
4406             if (PL_lex_casemods) {
4407                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4408                 PL_lex_casestack[PL_lex_casemods] = '\0';
4409
4410                 if (PL_bufptr != PL_bufend
4411                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4412                     PL_bufptr += 2;
4413                     PL_lex_state = LEX_INTERPCONCAT;
4414 #ifdef PERL_MAD
4415                     if (PL_madskills)
4416                         PL_thistoken = newSVpvs("\\E");
4417 #endif
4418                 }
4419                 PL_lex_allbrackets--;
4420                 return REPORT(')');
4421             }
4422 #ifdef PERL_MAD
4423             while (PL_bufptr != PL_bufend &&
4424               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4425                 if (!PL_thiswhite)
4426                     PL_thiswhite = newSVpvs("");
4427                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4428                 PL_bufptr += 2;
4429             }
4430 #else
4431             if (PL_bufptr != PL_bufend)
4432                 PL_bufptr += 2;
4433 #endif
4434             PL_lex_state = LEX_INTERPCONCAT;
4435             return yylex();
4436         }
4437         else {
4438             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4439               "### Saw case modifier\n"); });
4440             s = PL_bufptr + 1;
4441             if (s[1] == '\\' && s[2] == 'E') {
4442 #ifdef PERL_MAD
4443                 if (!PL_thiswhite)
4444                     PL_thiswhite = newSVpvs("");
4445                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4446 #endif
4447                 PL_bufptr = s + 3;
4448                 PL_lex_state = LEX_INTERPCONCAT;
4449                 return yylex();
4450             }
4451             else {
4452                 I32 tmp;
4453                 if (!PL_madskills) /* when just compiling don't need correct */
4454                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4455                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4456                 if ((*s == 'L' || *s == 'U') &&
4457                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4458                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4459                     PL_lex_allbrackets--;
4460                     return REPORT(')');
4461                 }
4462                 if (PL_lex_casemods > 10)
4463                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4464                 PL_lex_casestack[PL_lex_casemods++] = *s;
4465                 PL_lex_casestack[PL_lex_casemods] = '\0';
4466                 PL_lex_state = LEX_INTERPCONCAT;
4467                 start_force(PL_curforce);
4468                 NEXTVAL_NEXTTOKE.ival = 0;
4469                 force_next((2<<24)|'(');
4470                 start_force(PL_curforce);
4471                 if (*s == 'l')
4472                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4473                 else if (*s == 'u')
4474                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4475                 else if (*s == 'L')
4476                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4477                 else if (*s == 'U')
4478                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4479                 else if (*s == 'Q')
4480                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4481                 else
4482                     Perl_croak(aTHX_ "panic: yylex");
4483                 if (PL_madskills) {
4484                     SV* const tmpsv = newSVpvs("\\ ");
4485                     /* replace the space with the character we want to escape
4486                      */
4487                     SvPVX(tmpsv)[1] = *s;
4488                     curmad('_', tmpsv);
4489                 }
4490                 PL_bufptr = s + 1;
4491             }
4492             force_next(FUNC);
4493             if (PL_lex_starts) {
4494                 s = PL_bufptr;
4495                 PL_lex_starts = 0;
4496 #ifdef PERL_MAD
4497                 if (PL_madskills) {
4498                     if (PL_thistoken)
4499                         sv_free(PL_thistoken);
4500                     PL_thistoken = newSVpvs("");
4501                 }
4502 #endif
4503                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4504                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4505                     OPERATOR(',');
4506                 else
4507                     Aop(OP_CONCAT);
4508             }
4509             else
4510                 return yylex();
4511         }
4512
4513     case LEX_INTERPPUSH:
4514         return REPORT(sublex_push());
4515
4516     case LEX_INTERPSTART:
4517         if (PL_bufptr == PL_bufend)
4518             return REPORT(sublex_done());
4519         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4520               "### Interpolated variable\n"); });
4521         PL_expect = XTERM;
4522         PL_lex_dojoin = (*PL_bufptr == '@');
4523         PL_lex_state = LEX_INTERPNORMAL;
4524         if (PL_lex_dojoin) {
4525             start_force(PL_curforce);
4526             NEXTVAL_NEXTTOKE.ival = 0;
4527             force_next(',');
4528             start_force(PL_curforce);
4529             force_ident("\"", '$');
4530             start_force(PL_curforce);
4531             NEXTVAL_NEXTTOKE.ival = 0;
4532             force_next('$');
4533             start_force(PL_curforce);
4534             NEXTVAL_NEXTTOKE.ival = 0;
4535             force_next((2<<24)|'(');
4536             start_force(PL_curforce);
4537             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4538             force_next(FUNC);
4539         }
4540         if (PL_lex_starts++) {
4541             s = PL_bufptr;
4542 #ifdef PERL_MAD
4543             if (PL_madskills) {
4544                 if (PL_thistoken)
4545                     sv_free(PL_thistoken);
4546                 PL_thistoken = newSVpvs("");
4547             }
4548 #endif
4549             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4550             if (!PL_lex_casemods && PL_lex_inpat)
4551                 OPERATOR(',');
4552             else
4553                 Aop(OP_CONCAT);
4554         }
4555         return yylex();
4556
4557     case LEX_INTERPENDMAYBE:
4558         if (intuit_more(PL_bufptr)) {
4559             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4560             break;
4561         }
4562         /* FALL THROUGH */
4563
4564     case LEX_INTERPEND:
4565         if (PL_lex_dojoin) {
4566             PL_lex_dojoin = FALSE;
4567             PL_lex_state = LEX_INTERPCONCAT;
4568 #ifdef PERL_MAD
4569             if (PL_madskills) {
4570                 if (PL_thistoken)
4571                     sv_free(PL_thistoken);
4572                 PL_thistoken = newSVpvs("");
4573             }
4574 #endif
4575             PL_lex_allbrackets--;
4576             return REPORT(')');
4577         }
4578         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4579             && SvEVALED(PL_lex_repl))
4580         {
4581             if (PL_bufptr != PL_bufend)
4582                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4583             PL_lex_repl = NULL;
4584         }
4585         /* FALLTHROUGH */
4586     case LEX_INTERPCONCAT:
4587 #ifdef DEBUGGING
4588         if (PL_lex_brackets)
4589             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4590 #endif
4591         if (PL_bufptr == PL_bufend)
4592             return REPORT(sublex_done());
4593
4594         if (SvIVX(PL_linestr) == '\'') {
4595             SV *sv = newSVsv(PL_linestr);
4596             if (!PL_lex_inpat)
4597                 sv = tokeq(sv);
4598             else if ( PL_hints & HINT_NEW_RE )
4599                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4600             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4601             s = PL_bufend;
4602         }
4603         else {
4604             s = scan_const(PL_bufptr);
4605             if (*s == '\\')
4606                 PL_lex_state = LEX_INTERPCASEMOD;
4607             else
4608                 PL_lex_state = LEX_INTERPSTART;
4609         }
4610
4611         if (s != PL_bufptr) {
4612             start_force(PL_curforce);
4613             if (PL_madskills) {
4614                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4615             }
4616             NEXTVAL_NEXTTOKE = pl_yylval;
4617             PL_expect = XTERM;
4618             force_next(THING);
4619             if (PL_lex_starts++) {
4620 #ifdef PERL_MAD
4621                 if (PL_madskills) {
4622                     if (PL_thistoken)
4623                         sv_free(PL_thistoken);
4624                     PL_thistoken = newSVpvs("");
4625                 }
4626 #endif
4627                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4628                 if (!PL_lex_casemods && PL_lex_inpat)
4629                     OPERATOR(',');
4630                 else
4631                     Aop(OP_CONCAT);
4632             }
4633             else {
4634                 PL_bufptr = s;
4635                 return yylex();
4636             }
4637         }
4638
4639         return yylex();
4640     case LEX_FORMLINE:
4641         PL_lex_state = LEX_NORMAL;
4642         s = scan_formline(PL_bufptr);
4643         if (!PL_lex_formbrack)
4644             goto rightbracket;
4645         OPERATOR(';');
4646     }
4647
4648     s = PL_bufptr;
4649     PL_oldoldbufptr = PL_oldbufptr;
4650     PL_oldbufptr = s;
4651
4652   retry:
4653 #ifdef PERL_MAD
4654     if (PL_thistoken) {
4655         sv_free(PL_thistoken);
4656         PL_thistoken = 0;
4657     }
4658     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4659 #endif
4660     switch (*s) {
4661     default:
4662         if (isIDFIRST_lazy_if(s,UTF))
4663             goto keylookup;
4664         {
4665         unsigned char c = *s;
4666         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4667         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4668             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4669         } else {
4670             d = PL_linestart;
4671         }
4672         *s = '\0';
4673         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4674     }
4675     case 4:
4676     case 26:
4677         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4678     case 0:
4679 #ifdef PERL_MAD
4680         if (PL_madskills)
4681             PL_faketokens = 0;
4682 #endif
4683         if (!PL_rsfp) {
4684             PL_last_uni = 0;
4685             PL_last_lop = 0;
4686             if (PL_lex_brackets &&
4687                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4688                 yyerror((const char *)
4689                         (PL_lex_formbrack
4690                          ? "Format not terminated"
4691                          : "Missing right curly or square bracket"));
4692             }
4693             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4694                         "### Tokener got EOF\n");
4695             } );
4696             TOKEN(0);
4697         }
4698         if (s++ < PL_bufend)
4699             goto retry;                 /* ignore stray nulls */
4700         PL_last_uni = 0;
4701         PL_last_lop = 0;
4702         if (!PL_in_eval && !PL_preambled) {
4703             PL_preambled = TRUE;
4704 #ifdef PERL_MAD
4705             if (PL_madskills)
4706                 PL_faketokens = 1;
4707 #endif
4708             if (PL_perldb) {
4709                 /* Generate a string of Perl code to load the debugger.
4710                  * If PERL5DB is set, it will return the contents of that,
4711                  * otherwise a compile-time require of perl5db.pl.  */
4712
4713                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4714
4715                 if (pdb) {
4716                     sv_setpv(PL_linestr, pdb);
4717                     sv_catpvs(PL_linestr,";");
4718                 } else {
4719                     SETERRNO(0,SS_NORMAL);
4720                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4721                 }
4722             } else
4723                 sv_setpvs(PL_linestr,"");
4724             if (PL_preambleav) {
4725                 SV **svp = AvARRAY(PL_preambleav);
4726                 SV **const end = svp + AvFILLp(PL_preambleav);
4727                 while(svp <= end) {
4728                     sv_catsv(PL_linestr, *svp);
4729                     ++svp;
4730                     sv_catpvs(PL_linestr, ";");
4731                 }
4732                 sv_free(MUTABLE_SV(PL_preambleav));
4733                 PL_preambleav = NULL;
4734             }
4735             if (PL_minus_E)
4736                 sv_catpvs(PL_linestr,
4737                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4738             if (PL_minus_n || PL_minus_p) {
4739                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4740                 if (PL_minus_l)
4741                     sv_catpvs(PL_linestr,"chomp;");
4742                 if (PL_minus_a) {
4743                     if (PL_minus_F) {
4744                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4745                              || *PL_splitstr == '"')
4746                               && strchr(PL_splitstr + 1, *PL_splitstr))
4747                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4748                         else {
4749                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4750                                bytes can be used as quoting characters.  :-) */
4751                             const char *splits = PL_splitstr;
4752                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4753                             do {
4754                                 /* Need to \ \s  */
4755                                 if (*splits == '\\')
4756                                     sv_catpvn(PL_linestr, splits, 1);
4757                                 sv_catpvn(PL_linestr, splits, 1);
4758                             } while (*splits++);
4759                             /* This loop will embed the trailing NUL of
4760                                PL_linestr as the last thing it does before
4761                                terminating.  */
4762                             sv_catpvs(PL_linestr, ");");
4763                         }
4764                     }
4765                     else
4766                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4767                 }
4768             }
4769             sv_catpvs(PL_linestr, "\n");
4770             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4771             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4772             PL_last_lop = PL_last_uni = NULL;
4773             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4774                 update_debugger_info(PL_linestr, NULL, 0);
4775             goto retry;
4776         }
4777         do {
4778             fake_eof = 0;
4779             bof = PL_rsfp ? TRUE : FALSE;
4780             if (0) {
4781               fake_eof:
4782                 fake_eof = LEX_FAKE_EOF;
4783             }
4784             PL_bufptr = PL_bufend;
4785             CopLINE_inc(PL_curcop);
4786             if (!lex_next_chunk(fake_eof)) {
4787                 CopLINE_dec(PL_curcop);
4788                 s = PL_bufptr;
4789                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4790             }
4791             CopLINE_dec(PL_curcop);
4792 #ifdef PERL_MAD
4793             if (!PL_rsfp)
4794                 PL_realtokenstart = -1;
4795 #endif
4796             s = PL_bufptr;
4797             /* If it looks like the start of a BOM or raw UTF-16,
4798              * check if it in fact is. */
4799             if (bof && PL_rsfp &&
4800                      (*s == 0 ||
4801                       *(U8*)s == 0xEF ||
4802                       *(U8*)s >= 0xFE ||
4803                       s[1] == 0)) {
4804                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4805                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4806 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4807                 /* offset may include swallowed CR */
4808                 if (!bof)
4809                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4810 #endif
4811                 if (bof) {
4812                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4813                     s = swallow_bom((U8*)s);
4814                 }
4815             }
4816             if (PL_parser->in_pod) {
4817                 /* Incest with pod. */
4818 #ifdef PERL_MAD
4819                 if (PL_madskills)
4820                     sv_catsv(PL_thiswhite, PL_linestr);
4821 #endif
4822                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4823                     sv_setpvs(PL_linestr, "");
4824                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4825                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4826                     PL_last_lop = PL_last_uni = NULL;
4827                     PL_parser->in_pod = 0;
4828                 }
4829             }
4830             if (PL_rsfp)
4831                 incline(s);
4832         } while (PL_parser->in_pod);
4833         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4834         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4835         PL_last_lop = PL_last_uni = NULL;
4836         if (CopLINE(PL_curcop) == 1) {
4837             while (s < PL_bufend && isSPACE(*s))
4838                 s++;
4839             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4840                 s++;
4841 #ifdef PERL_MAD
4842             if (PL_madskills)
4843                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4844 #endif
4845             d = NULL;
4846             if (!PL_in_eval) {
4847                 if (*s == '#' && *(s+1) == '!')
4848                     d = s + 2;
4849 #ifdef ALTERNATE_SHEBANG
4850                 else {
4851                     static char const as[] = ALTERNATE_SHEBANG;
4852                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4853                         d = s + (sizeof(as) - 1);
4854                 }
4855 #endif /* ALTERNATE_SHEBANG */
4856             }
4857             if (d) {
4858                 char *ipath;
4859                 char *ipathend;
4860
4861                 while (isSPACE(*d))
4862                     d++;
4863                 ipath = d;
4864                 while (*d && !isSPACE(*d))
4865                     d++;
4866                 ipathend = d;
4867
4868 #ifdef ARG_ZERO_IS_SCRIPT
4869                 if (ipathend > ipath) {
4870                     /*
4871                      * HP-UX (at least) sets argv[0] to the script name,
4872                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4873                      * at least, set argv[0] to the basename of the Perl
4874                      * interpreter. So, having found "#!", we'll set it right.
4875                      */
4876                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4877                                                     SVt_PV)); /* $^X */
4878                     assert(SvPOK(x) || SvGMAGICAL(x));
4879                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4880                         sv_setpvn(x, ipath, ipathend - ipath);
4881                         SvSETMAGIC(x);
4882                     }
4883                     else {
4884                         STRLEN blen;
4885                         STRLEN llen;
4886                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4887                         const char * const lstart = SvPV_const(x,llen);
4888                         if (llen < blen) {
4889                             bstart += blen - llen;
4890                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4891                                 sv_setpvn(x, ipath, ipathend - ipath);
4892                                 SvSETMAGIC(x);
4893                             }
4894                         }
4895                     }
4896                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4897                 }
4898 #endif /* ARG_ZERO_IS_SCRIPT */
4899
4900                 /*
4901                  * Look for options.
4902                  */
4903                 d = instr(s,"perl -");
4904                 if (!d) {
4905                     d = instr(s,"perl");
4906 #if defined(DOSISH)
4907                     /* avoid getting into infinite loops when shebang
4908                      * line contains "Perl" rather than "perl" */
4909                     if (!d) {
4910                         for (d = ipathend-4; d >= ipath; --d) {
4911                             if ((*d == 'p' || *d == 'P')
4912                                 && !ibcmp(d, "perl", 4))
4913                             {
4914                                 break;
4915                             }
4916                         }
4917                         if (d < ipath)
4918                             d = NULL;
4919                     }
4920 #endif
4921                 }
4922 #ifdef ALTERNATE_SHEBANG
4923                 /*
4924                  * If the ALTERNATE_SHEBANG on this system starts with a
4925                  * character that can be part of a Perl expression, then if
4926                  * we see it but not "perl", we're probably looking at the
4927                  * start of Perl code, not a request to hand off to some
4928                  * other interpreter.  Similarly, if "perl" is there, but
4929                  * not in the first 'word' of the line, we assume the line
4930                  * contains the start of the Perl program.
4931                  */
4932                 if (d && *s != '#') {
4933                     const char *c = ipath;
4934                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4935                         c++;
4936                     if (c < d)
4937                         d = NULL;       /* "perl" not in first word; ignore */
4938                     else
4939                         *s = '#';       /* Don't try to parse shebang line */
4940                 }
4941 #endif /* ALTERNATE_SHEBANG */
4942                 if (!d &&
4943                     *s == '#' &&
4944                     ipathend > ipath &&
4945                     !PL_minus_c &&
4946                     !instr(s,"indir") &&
4947                     instr(PL_origargv[0],"perl"))
4948                 {
4949                     dVAR;
4950                     char **newargv;
4951
4952                     *ipathend = '\0';
4953                     s = ipathend + 1;
4954                     while (s < PL_bufend && isSPACE(*s))
4955                         s++;
4956                     if (s < PL_bufend) {
4957                         Newx(newargv,PL_origargc+3,char*);
4958                         newargv[1] = s;
4959                         while (s < PL_bufend && !isSPACE(*s))
4960                             s++;
4961                         *s = '\0';
4962                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4963                     }
4964                     else
4965                         newargv = PL_origargv;
4966                     newargv[0] = ipath;
4967                     PERL_FPU_PRE_EXEC
4968                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4969                     PERL_FPU_POST_EXEC
4970                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4971                 }
4972                 if (d) {
4973                     while (*d && !isSPACE(*d))
4974                         d++;
4975                     while (SPACE_OR_TAB(*d))
4976                         d++;
4977
4978                     if (*d++ == '-') {
4979                         const bool switches_done = PL_doswitches;
4980                         const U32 oldpdb = PL_perldb;
4981                         const bool oldn = PL_minus_n;
4982                         const bool oldp = PL_minus_p;
4983                         const char *d1 = d;
4984
4985                         do {
4986                             bool baduni = FALSE;
4987                             if (*d1 == 'C') {
4988                                 const char *d2 = d1 + 1;
4989                                 if (parse_unicode_opts((const char **)&d2)
4990                                     != PL_unicode)
4991                                     baduni = TRUE;
4992                             }
4993                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4994                                 const char * const m = d1;
4995                                 while (*d1 && !isSPACE(*d1))
4996                                     d1++;
4997                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4998                                       (int)(d1 - m), m);
4999                             }
5000                             d1 = moreswitches(d1);
5001                         } while (d1);
5002                         if (PL_doswitches && !switches_done) {
5003                             int argc = PL_origargc;
5004                             char **argv = PL_origargv;
5005                             do {
5006                                 argc--,argv++;
5007                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5008                             init_argv_symbols(argc,argv);
5009                         }
5010                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5011                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5012                               /* if we have already added "LINE: while (<>) {",
5013                                  we must not do it again */
5014                         {
5015                             sv_setpvs(PL_linestr, "");
5016                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5017                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5018                             PL_last_lop = PL_last_uni = NULL;
5019                             PL_preambled = FALSE;
5020                             if (PERLDB_LINE || PERLDB_SAVESRC)
5021                                 (void)gv_fetchfile(PL_origfilename);
5022                             goto retry;
5023                         }
5024                     }
5025                 }
5026             }
5027         }
5028         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5029             PL_bufptr = s;
5030             PL_lex_state = LEX_FORMLINE;
5031             return yylex();
5032         }
5033         goto retry;
5034     case '\r':
5035 #ifdef PERL_STRICT_CR
5036         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5037         Perl_croak(aTHX_
5038       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5039 #endif
5040     case ' ': case '\t': case '\f': case 013:
5041 #ifdef PERL_MAD
5042         PL_realtokenstart = -1;
5043         if (!PL_thiswhite)
5044             PL_thiswhite = newSVpvs("");
5045         sv_catpvn(PL_thiswhite, s, 1);
5046 #endif
5047         s++;
5048         goto retry;
5049     case '#':
5050     case '\n':
5051 #ifdef PERL_MAD
5052         PL_realtokenstart = -1;
5053         if (PL_madskills)
5054             PL_faketokens = 0;
5055 #endif
5056         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5057             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5058                 /* handle eval qq[#line 1 "foo"\n ...] */
5059                 CopLINE_dec(PL_curcop);
5060                 incline(s);
5061             }
5062             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5063                 s = SKIPSPACE0(s);
5064                 if (!PL_in_eval || PL_rsfp)
5065                     incline(s);
5066             }
5067             else {
5068                 d = s;
5069                 while (d < PL_bufend && *d != '\n')
5070                     d++;
5071                 if (d < PL_bufend)
5072                     d++;
5073                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5074                   Perl_croak(aTHX_ "panic: input overflow");
5075 #ifdef PERL_MAD
5076                 if (PL_madskills)
5077                     PL_thiswhite = newSVpvn(s, d - s);
5078 #endif
5079                 s = d;
5080                 incline(s);
5081             }
5082             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5083                 PL_bufptr = s;
5084                 PL_lex_state = LEX_FORMLINE;
5085                 return yylex();
5086             }
5087         }
5088         else {
5089 #ifdef PERL_MAD
5090             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5091                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5092                     PL_faketokens = 0;
5093                     s = SKIPSPACE0(s);
5094                     TOKEN(PEG); /* make sure any #! line is accessible */
5095                 }
5096                 s = SKIPSPACE0(s);
5097             }
5098             else {
5099 /*              if (PL_madskills && PL_lex_formbrack) { */
5100                     d = s;
5101                     while (d < PL_bufend && *d != '\n')
5102                         d++;
5103                     if (d < PL_bufend)
5104                         d++;
5105                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5106                       Perl_croak(aTHX_ "panic: input overflow");
5107                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5108                         if (!PL_thiswhite)
5109                             PL_thiswhite = newSVpvs("");
5110                         if (CopLINE(PL_curcop) == 1) {
5111                             sv_setpvs(PL_thiswhite, "");
5112                             PL_faketokens = 0;
5113                         }
5114                         sv_catpvn(PL_thiswhite, s, d - s);
5115                     }
5116                     s = d;
5117 /*              }
5118                 *s = '\0';
5119                 PL_bufend = s; */
5120             }
5121 #else
5122             *s = '\0';
5123             PL_bufend = s;
5124 #endif
5125         }
5126         goto retry;
5127     case '-':
5128         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5129             I32 ftst = 0;
5130             char tmp;
5131
5132             s++;
5133             PL_bufptr = s;
5134             tmp = *s++;
5135
5136             while (s < PL_bufend && SPACE_OR_TAB(*s))
5137                 s++;
5138
5139             if (strnEQ(s,"=>",2)) {
5140                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5141                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5142                 OPERATOR('-');          /* unary minus */
5143             }
5144             PL_last_uni = PL_oldbufptr;
5145             switch (tmp) {
5146             case 'r': ftst = OP_FTEREAD;        break;
5147             case 'w': ftst = OP_FTEWRITE;       break;
5148             case 'x': ftst = OP_FTEEXEC;        break;
5149             case 'o': ftst = OP_FTEOWNED;       break;
5150             case 'R': ftst = OP_FTRREAD;        break;
5151             case 'W': ftst = OP_FTRWRITE;       break;
5152             case 'X': ftst = OP_FTREXEC;        break;
5153             case 'O': ftst = OP_FTROWNED;       break;
5154             case 'e': ftst = OP_FTIS;           break;
5155             case 'z': ftst = OP_FTZERO;         break;
5156             case 's': ftst = OP_FTSIZE;         break;
5157             case 'f': ftst = OP_FTFILE;         break;
5158             case 'd': ftst = OP_FTDIR;          break;
5159             case 'l': ftst = OP_FTLINK;         break;
5160             case 'p': ftst = OP_FTPIPE;         break;
5161             case 'S': ftst = OP_FTSOCK;         break;
5162             case 'u': ftst = OP_FTSUID;         break;
5163             case 'g': ftst = OP_FTSGID;         break;
5164             case 'k': ftst = OP_FTSVTX;         break;
5165             case 'b': ftst = OP_FTBLK;          break;
5166             case 'c': ftst = OP_FTCHR;          break;
5167             case 't': ftst = OP_FTTTY;          break;
5168             case 'T': ftst = OP_FTTEXT;         break;
5169             case 'B': ftst = OP_FTBINARY;       break;
5170             case 'M': case 'A': case 'C':
5171                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5172                 switch (tmp) {
5173                 case 'M': ftst = OP_FTMTIME;    break;
5174                 case 'A': ftst = OP_FTATIME;    break;
5175                 case 'C': ftst = OP_FTCTIME;    break;
5176                 default:                        break;
5177                 }
5178                 break;
5179             default:
5180                 break;
5181             }
5182             if (ftst) {
5183                 PL_last_lop_op = (OPCODE)ftst;
5184                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5185                         "### Saw file test %c\n", (int)tmp);
5186                 } );
5187                 FTST(ftst);
5188             }
5189             else {
5190                 /* Assume it was a minus followed by a one-letter named
5191                  * subroutine call (or a -bareword), then. */
5192                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5193                         "### '-%c' looked like a file test but was not\n",
5194                         (int) tmp);
5195                 } );
5196                 s = --PL_bufptr;
5197             }
5198         }
5199         {
5200             const char tmp = *s++;
5201             if (*s == tmp) {
5202                 s++;
5203                 if (PL_expect == XOPERATOR)
5204                     TERM(POSTDEC);
5205                 else
5206                     OPERATOR(PREDEC);
5207             }
5208             else if (*s == '>') {
5209                 s++;
5210                 s = SKIPSPACE1(s);
5211                 if (isIDFIRST_lazy_if(s,UTF)) {
5212                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5213                     TOKEN(ARROW);
5214                 }
5215                 else if (*s == '$')
5216                     OPERATOR(ARROW);
5217                 else
5218                     TERM(ARROW);
5219             }
5220             if (PL_expect == XOPERATOR) {
5221                 if (*s == '=' && !PL_lex_allbrackets &&
5222                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5223                     s--;
5224                     TOKEN(0);
5225                 }
5226                 Aop(OP_SUBTRACT);
5227             }
5228             else {
5229                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5230                     check_uni();
5231                 OPERATOR('-');          /* unary minus */
5232             }
5233         }
5234
5235     case '+':
5236         {
5237             const char tmp = *s++;
5238             if (*s == tmp) {
5239                 s++;
5240                 if (PL_expect == XOPERATOR)
5241                     TERM(POSTINC);
5242                 else
5243                     OPERATOR(PREINC);
5244             }
5245             if (PL_expect == XOPERATOR) {
5246                 if (*s == '=' && !PL_lex_allbrackets &&
5247                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5248                     s--;
5249                     TOKEN(0);
5250                 }
5251                 Aop(OP_ADD);
5252             }
5253             else {
5254                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5255                     check_uni();
5256                 OPERATOR('+');
5257             }
5258         }
5259
5260     case '*':
5261         if (PL_expect != XOPERATOR) {
5262             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5263             PL_expect = XOPERATOR;
5264             force_ident(PL_tokenbuf, '*');
5265             if (!*PL_tokenbuf)
5266                 PREREF('*');
5267             TERM('*');
5268         }
5269         s++;
5270         if (*s == '*') {
5271             s++;
5272             if (*s == '=' && !PL_lex_allbrackets &&
5273                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5274                 s -= 2;
5275                 TOKEN(0);
5276             }
5277             PWop(OP_POW);
5278         }
5279         if (*s == '=' && !PL_lex_allbrackets &&
5280                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5281             s--;
5282             TOKEN(0);
5283         }
5284         Mop(OP_MULTIPLY);
5285
5286     case '%':
5287         if (PL_expect == XOPERATOR) {
5288             if (s[1] == '=' && !PL_lex_allbrackets &&
5289                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5290                 TOKEN(0);
5291             ++s;
5292             Mop(OP_MODULO);
5293         }
5294         PL_tokenbuf[0] = '%';
5295         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5296                 sizeof PL_tokenbuf - 1, FALSE);
5297         if (!PL_tokenbuf[1]) {
5298             PREREF('%');
5299         }
5300         PL_pending_ident = '%';
5301         TERM('%');
5302
5303     case '^':
5304         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5305                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5306             TOKEN(0);
5307         s++;
5308         BOop(OP_BIT_XOR);
5309     case '[':
5310         if (PL_lex_brackets > 100)
5311             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5312         PL_lex_brackstack[PL_lex_brackets++] = 0;
5313         PL_lex_allbrackets++;
5314         {
5315             const char tmp = *s++;
5316             OPERATOR(tmp);
5317         }
5318     case '~':
5319         if (s[1] == '~'
5320             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5321         {
5322             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5323                 TOKEN(0);
5324             s += 2;
5325             Eop(OP_SMARTMATCH);
5326         }
5327         s++;
5328         OPERATOR('~');
5329     case ',':
5330         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5331             TOKEN(0);
5332         s++;
5333         OPERATOR(',');
5334     case ':':
5335         if (s[1] == ':') {
5336             len = 0;
5337             goto just_a_word_zero_gv;
5338         }
5339         s++;
5340         switch (PL_expect) {
5341             OP *attrs;
5342 #ifdef PERL_MAD
5343             I32 stuffstart;
5344 #endif
5345         case XOPERATOR:
5346             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5347                 break;
5348             PL_bufptr = s;      /* update in case we back off */
5349             if (*s == '=') {
5350                 Perl_croak(aTHX_
5351                            "Use of := for an empty attribute list is not allowed");
5352             }
5353             goto grabattrs;
5354         case XATTRBLOCK:
5355             PL_expect = XBLOCK;
5356             goto grabattrs;
5357         case XATTRTERM:
5358             PL_expect = XTERMBLOCK;
5359          grabattrs:
5360 #ifdef PERL_MAD
5361             stuffstart = s - SvPVX(PL_linestr) - 1;
5362 #endif
5363             s = PEEKSPACE(s);
5364             attrs = NULL;
5365             while (isIDFIRST_lazy_if(s,UTF)) {
5366                 I32 tmp;
5367                 SV *sv;
5368                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5369                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5370                     if (tmp < 0) tmp = -tmp;
5371                     switch (tmp) {
5372                     case KEY_or:
5373                     case KEY_and:
5374                     case KEY_for:
5375                     case KEY_foreach:
5376                     case KEY_unless:
5377                     case KEY_if:
5378                     case KEY_while:
5379                     case KEY_until:
5380                         goto got_attrs;
5381                     default:
5382                         break;
5383                     }
5384                 }
5385                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5386                 if (*d == '(') {
5387                     d = scan_str(d,TRUE,TRUE);
5388                     if (!d) {
5389                         /* MUST advance bufptr here to avoid bogus
5390                            "at end of line" context messages from yyerror().
5391                          */
5392                         PL_bufptr = s + len;
5393                         yyerror("Unterminated attribute parameter in attribute list");
5394                         if (attrs)
5395                             op_free(attrs);
5396                         sv_free(sv);
5397                         return REPORT(0);       /* EOF indicator */
5398                     }
5399                 }
5400                 if (PL_lex_stuff) {
5401                     sv_catsv(sv, PL_lex_stuff);
5402                     attrs = op_append_elem(OP_LIST, attrs,
5403                                         newSVOP(OP_CONST, 0, sv));
5404                     SvREFCNT_dec(PL_lex_stuff);
5405                     PL_lex_stuff = NULL;
5406                 }
5407                 else {
5408                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5409                         sv_free(sv);
5410                         if (PL_in_my == KEY_our) {
5411                             deprecate(":unique");
5412                         }
5413                         else
5414                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5415                     }
5416
5417                     /* NOTE: any CV attrs applied here need to be part of
5418                        the CVf_BUILTIN_ATTRS define in cv.h! */
5419                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5420                         sv_free(sv);
5421                         CvLVALUE_on(PL_compcv);
5422                     }
5423                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5424                         sv_free(sv);
5425                         deprecate(":locked");
5426                     }
5427                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5428                         sv_free(sv);
5429                         CvMETHOD_on(PL_compcv);
5430                     }
5431                     /* After we've set the flags, it could be argued that
5432                        we don't need to do the attributes.pm-based setting
5433                        process, and shouldn't bother appending recognized
5434                        flags.  To experiment with that, uncomment the
5435                        following "else".  (Note that's already been
5436                        uncommented.  That keeps the above-applied built-in
5437                        attributes from being intercepted (and possibly
5438                        rejected) by a package's attribute routines, but is
5439                        justified by the performance win for the common case
5440                        of applying only built-in attributes.) */
5441                     else
5442                         attrs = op_append_elem(OP_LIST, attrs,
5443                                             newSVOP(OP_CONST, 0,
5444                                                     sv));
5445                 }
5446                 s = PEEKSPACE(d);
5447                 if (*s == ':' && s[1] != ':')
5448                     s = PEEKSPACE(s+1);
5449                 else if (s == d)
5450                     break;      /* require real whitespace or :'s */
5451                 /* XXX losing whitespace on sequential attributes here */
5452             }
5453             {
5454                 const char tmp
5455                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5456                 if (*s != ';' && *s != '}' && *s != tmp
5457                     && (tmp != '=' || *s != ')')) {
5458                     const char q = ((*s == '\'') ? '"' : '\'');
5459                     /* If here for an expression, and parsed no attrs, back
5460                        off. */
5461                     if (tmp == '=' && !attrs) {
5462                         s = PL_bufptr;
5463                         break;
5464                     }
5465                     /* MUST advance bufptr here to avoid bogus "at end of line"
5466                        context messages from yyerror().
5467                     */
5468                     PL_bufptr = s;
5469                     yyerror( (const char *)
5470                              (*s
5471                               ? Perl_form(aTHX_ "Invalid separator character "
5472                                           "%c%c%c in attribute list", q, *s, q)
5473                               : "Unterminated attribute list" ) );
5474                     if (attrs)
5475                         op_free(attrs);
5476                     OPERATOR(':');
5477                 }
5478             }
5479         got_attrs:
5480             if (attrs) {
5481                 start_force(PL_curforce);
5482                 NEXTVAL_NEXTTOKE.opval = attrs;
5483                 CURMAD('_', PL_nextwhite);
5484                 force_next(THING);
5485             }
5486 #ifdef PERL_MAD
5487             if (PL_madskills) {
5488                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5489                                      (s - SvPVX(PL_linestr)) - stuffstart);
5490             }
5491 #endif
5492             TOKEN(COLONATTR);
5493         }
5494         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5495             s--;
5496             TOKEN(0);
5497         }
5498         PL_lex_allbrackets--;
5499         OPERATOR(':');
5500     case '(':
5501         s++;
5502         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5503             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5504         else
5505             PL_expect = XTERM;
5506         s = SKIPSPACE1(s);
5507         PL_lex_allbrackets++;
5508         TOKEN('(');
5509     case ';':
5510         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5511             TOKEN(0);
5512         CLINE;
5513         s++;
5514         OPERATOR(';');
5515     case ')':
5516         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5517             TOKEN(0);
5518         s++;
5519         PL_lex_allbrackets--;
5520         s = SKIPSPACE1(s);
5521         if (*s == '{')
5522             PREBLOCK(')');
5523         TERM(')');
5524     case ']':
5525         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5526             TOKEN(0);
5527         s++;
5528         if (PL_lex_brackets <= 0)
5529             yyerror("Unmatched right square bracket");
5530         else
5531             --PL_lex_brackets;
5532         PL_lex_allbrackets--;
5533         if (PL_lex_state == LEX_INTERPNORMAL) {
5534             if (PL_lex_brackets == 0) {
5535                 if (*s == '-' && s[1] == '>')
5536                     PL_lex_state = LEX_INTERPENDMAYBE;
5537                 else if (*s != '[' && *s != '{')
5538                     PL_lex_state = LEX_INTERPEND;
5539             }
5540         }
5541         TERM(']');
5542     case '{':
5543       leftbracket:
5544         s++;
5545         if (PL_lex_brackets > 100) {
5546             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5547         }
5548         switch (PL_expect) {
5549         case XTERM:
5550             if (PL_lex_formbrack) {
5551                 s--;
5552                 PRETERMBLOCK(DO);
5553             }
5554             if (PL_oldoldbufptr == PL_last_lop)
5555                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5556             else
5557                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5558             PL_lex_allbrackets++;
5559             OPERATOR(HASHBRACK);
5560         case XOPERATOR:
5561             while (s < PL_bufend && SPACE_OR_TAB(*s))
5562                 s++;
5563             d = s;
5564             PL_tokenbuf[0] = '\0';
5565             if (d < PL_bufend && *d == '-') {
5566                 PL_tokenbuf[0] = '-';
5567                 d++;
5568                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5569                     d++;
5570             }
5571             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5572                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5573                               FALSE, &len);
5574                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5575                     d++;
5576                 if (*d == '}') {
5577                     const char minus = (PL_tokenbuf[0] == '-');
5578                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5579                     if (minus)
5580                         force_next('-');
5581                 }
5582             }
5583             /* FALL THROUGH */
5584         case XATTRBLOCK:
5585         case XBLOCK:
5586             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5587             PL_lex_allbrackets++;
5588             PL_expect = XSTATE;
5589             break;
5590         case XATTRTERM:
5591         case XTERMBLOCK:
5592             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5593             PL_lex_allbrackets++;
5594             PL_expect = XSTATE;
5595             break;
5596         default: {
5597                 const char *t;
5598                 if (PL_oldoldbufptr == PL_last_lop)
5599                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5600                 else
5601                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5602                 PL_lex_allbrackets++;
5603                 s = SKIPSPACE1(s);
5604                 if (*s == '}') {
5605                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5606                         PL_expect = XTERM;
5607                         /* This hack is to get the ${} in the message. */
5608                         PL_bufptr = s+1;
5609                         yyerror("syntax error");
5610                         break;
5611                     }
5612                     OPERATOR(HASHBRACK);
5613                 }
5614                 /* This hack serves to disambiguate a pair of curlies
5615                  * as being a block or an anon hash.  Normally, expectation
5616                  * determines that, but in cases where we're not in a
5617                  * position to expect anything in particular (like inside
5618                  * eval"") we have to resolve the ambiguity.  This code
5619                  * covers the case where the first term in the curlies is a
5620                  * quoted string.  Most other cases need to be explicitly
5621                  * disambiguated by prepending a "+" before the opening
5622                  * curly in order to force resolution as an anon hash.
5623                  *
5624                  * XXX should probably propagate the outer expectation
5625                  * into eval"" to rely less on this hack, but that could
5626                  * potentially break current behavior of eval"".
5627                  * GSAR 97-07-21
5628                  */
5629                 t = s;
5630                 if (*s == '\'' || *s == '"' || *s == '`') {
5631                     /* common case: get past first string, handling escapes */
5632                     for (t++; t < PL_bufend && *t != *s;)
5633                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5634                             t++;
5635                     t++;
5636                 }
5637                 else if (*s == 'q') {
5638                     if (++t < PL_bufend
5639                         && (!isALNUM(*t)
5640                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5641                                 && !isALNUM(*t))))
5642                     {
5643                         /* skip q//-like construct */
5644                         const char *tmps;
5645                         char open, close, term;
5646                         I32 brackets = 1;
5647
5648                         while (t < PL_bufend && isSPACE(*t))
5649                             t++;
5650                         /* check for q => */
5651                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5652                             OPERATOR(HASHBRACK);
5653                         }
5654                         term = *t;
5655                         open = term;
5656                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5657                             term = tmps[5];
5658                         close = term;
5659                         if (open == close)
5660                             for (t++; t < PL_bufend; t++) {
5661                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5662                                     t++;
5663                                 else if (*t == open)
5664                                     break;
5665                             }
5666                         else {
5667                             for (t++; t < PL_bufend; t++) {
5668                                 if (*t == '\\' && t+1 < PL_bufend)
5669                                     t++;
5670                                 else if (*t == close && --brackets <= 0)
5671                                     break;
5672                                 else if (*t == open)
5673                                     brackets++;
5674                             }
5675                         }
5676                         t++;
5677                     }
5678                     else
5679                         /* skip plain q word */
5680                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5681                              t += UTF8SKIP(t);
5682                 }
5683                 else if (isALNUM_lazy_if(t,UTF)) {
5684                     t += UTF8SKIP(t);
5685                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5686                          t += UTF8SKIP(t);
5687                 }
5688                 while (t < PL_bufend && isSPACE(*t))
5689                     t++;
5690                 /* if comma follows first term, call it an anon hash */
5691                 /* XXX it could be a comma expression with loop modifiers */
5692                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5693                                    || (*t == '=' && t[1] == '>')))
5694                     OPERATOR(HASHBRACK);
5695                 if (PL_expect == XREF)
5696                     PL_expect = XTERM;
5697                 else {
5698                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5699                     PL_expect = XSTATE;
5700                 }
5701             }
5702             break;
5703         }
5704         pl_yylval.ival = CopLINE(PL_curcop);
5705         if (isSPACE(*s) || *s == '#')
5706             PL_copline = NOLINE;   /* invalidate current command line number */
5707         TOKEN('{');
5708     case '}':
5709         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5710             TOKEN(0);
5711       rightbracket:
5712         s++;
5713         if (PL_lex_brackets <= 0)
5714             yyerror("Unmatched right curly bracket");
5715         else
5716             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5717         PL_lex_allbrackets--;
5718         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5719             PL_lex_formbrack = 0;
5720         if (PL_lex_state == LEX_INTERPNORMAL) {
5721             if (PL_lex_brackets == 0) {
5722                 if (PL_expect & XFAKEBRACK) {
5723                     PL_expect &= XENUMMASK;
5724                     PL_lex_state = LEX_INTERPEND;
5725                     PL_bufptr = s;
5726 #if 0
5727                     if (PL_madskills) {
5728                         if (!PL_thiswhite)
5729                             PL_thiswhite = newSVpvs("");
5730                         sv_catpvs(PL_thiswhite,"}");
5731                     }
5732 #endif
5733                     return yylex();     /* ignore fake brackets */
5734                 }
5735                 if (*s == '-' && s[1] == '>')
5736                     PL_lex_state = LEX_INTERPENDMAYBE;
5737                 else if (*s != '[' && *s != '{')
5738                     PL_lex_state = LEX_INTERPEND;
5739             }
5740         }
5741         if (PL_expect & XFAKEBRACK) {
5742             PL_expect &= XENUMMASK;
5743             PL_bufptr = s;
5744             return yylex();             /* ignore fake brackets */
5745         }
5746         start_force(PL_curforce);
5747         if (PL_madskills) {
5748             curmad('X', newSVpvn(s-1,1));
5749             CURMAD('_', PL_thiswhite);
5750         }
5751         force_next('}');
5752 #ifdef PERL_MAD
5753         if (!PL_thistoken)
5754             PL_thistoken = newSVpvs("");
5755 #endif
5756         TOKEN(';');
5757     case '&':
5758         s++;
5759         if (*s++ == '&') {
5760             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5761                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5762                 s -= 2;
5763                 TOKEN(0);
5764             }
5765             AOPERATOR(ANDAND);
5766         }
5767         s--;
5768         if (PL_expect == XOPERATOR) {
5769             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5770                 && isIDFIRST_lazy_if(s,UTF))
5771             {
5772                 CopLINE_dec(PL_curcop);
5773                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5774                 CopLINE_inc(PL_curcop);
5775             }
5776             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5777                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5778                 s--;
5779                 TOKEN(0);
5780             }
5781             BAop(OP_BIT_AND);
5782         }
5783
5784         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5785         if (*PL_tokenbuf) {
5786             PL_expect = XOPERATOR;
5787             force_ident(PL_tokenbuf, '&');
5788         }
5789         else
5790             PREREF('&');
5791         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5792         TERM('&');
5793
5794     case '|':
5795         s++;
5796         if (*s++ == '|') {
5797             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5798                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5799                 s -= 2;
5800                 TOKEN(0);
5801             }
5802             AOPERATOR(OROR);
5803         }
5804         s--;
5805         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5806                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5807             s--;
5808             TOKEN(0);
5809         }
5810         BOop(OP_BIT_OR);
5811     case '=':
5812         s++;
5813         {
5814             const char tmp = *s++;
5815             if (tmp == '=') {
5816                 if (!PL_lex_allbrackets &&
5817                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5818                     s -= 2;
5819                     TOKEN(0);
5820                 }
5821                 Eop(OP_EQ);
5822             }
5823             if (tmp == '>') {
5824                 if (!PL_lex_allbrackets &&
5825                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5826                     s -= 2;
5827                     TOKEN(0);
5828                 }
5829                 OPERATOR(',');
5830             }
5831             if (tmp == '~')
5832                 PMop(OP_MATCH);
5833             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5834                 && strchr("+-*/%.^&|<",tmp))
5835                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5836                             "Reversed %c= operator",(int)tmp);
5837             s--;
5838             if (PL_expect == XSTATE && isALPHA(tmp) &&
5839                 (s == PL_linestart+1 || s[-2] == '\n') )
5840                 {
5841                     if (PL_in_eval && !PL_rsfp) {
5842                         d = PL_bufend;
5843                         while (s < d) {
5844                             if (*s++ == '\n') {
5845                                 incline(s);
5846                                 if (strnEQ(s,"=cut",4)) {
5847                                     s = strchr(s,'\n');
5848                                     if (s)
5849                                         s++;
5850                                     else
5851                                         s = d;
5852                                     incline(s);
5853                                     goto retry;
5854                                 }
5855                             }
5856                         }
5857                         goto retry;
5858                     }
5859 #ifdef PERL_MAD
5860                     if (PL_madskills) {
5861                         if (!PL_thiswhite)
5862                             PL_thiswhite = newSVpvs("");
5863                         sv_catpvn(PL_thiswhite, PL_linestart,
5864                                   PL_bufend - PL_linestart);
5865                     }
5866 #endif
5867                     s = PL_bufend;
5868                     PL_parser->in_pod = 1;
5869                     goto retry;
5870                 }
5871         }
5872         if (PL_lex_brackets < PL_lex_formbrack) {
5873             const char *t = s;
5874 #ifdef PERL_STRICT_CR
5875             while (SPACE_OR_TAB(*t))
5876 #else
5877             while (SPACE_OR_TAB(*t) || *t == '\r')
5878 #endif
5879                 t++;
5880             if (*t == '\n' || *t == '#') {
5881                 s--;
5882                 PL_expect = XBLOCK;
5883                 goto leftbracket;
5884             }
5885         }
5886         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5887             s--;
5888             TOKEN(0);
5889         }
5890         pl_yylval.ival = 0;
5891         OPERATOR(ASSIGNOP);
5892     case '!':
5893         s++;
5894         {
5895             const char tmp = *s++;
5896             if (tmp == '=') {
5897                 /* was this !=~ where !~ was meant?
5898                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5899
5900                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5901                     const char *t = s+1;
5902
5903                     while (t < PL_bufend && isSPACE(*t))
5904                         ++t;
5905
5906                     if (*t == '/' || *t == '?' ||
5907                         ((*t == 'm' || *t == 's' || *t == 'y')
5908                          && !isALNUM(t[1])) ||
5909                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5910                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5911                                     "!=~ should be !~");
5912                 }
5913                 if (!PL_lex_allbrackets &&
5914                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5915                     s -= 2;
5916                     TOKEN(0);
5917                 }
5918                 Eop(OP_NE);
5919             }
5920             if (tmp == '~')
5921                 PMop(OP_NOT);
5922         }
5923         s--;
5924         OPERATOR('!');
5925     case '<':
5926         if (PL_expect != XOPERATOR) {
5927             if (s[1] != '<' && !strchr(s,'>'))
5928                 check_uni();
5929             if (s[1] == '<')
5930                 s = scan_heredoc(s);
5931             else
5932                 s = scan_inputsymbol(s);
5933             TERM(sublex_start());
5934         }
5935         s++;
5936         {
5937             char tmp = *s++;
5938             if (tmp == '<') {
5939                 if (*s == '=' && !PL_lex_allbrackets &&
5940                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5941                     s -= 2;
5942                     TOKEN(0);
5943                 }
5944                 SHop(OP_LEFT_SHIFT);
5945             }
5946             if (tmp == '=') {
5947                 tmp = *s++;
5948                 if (tmp == '>') {
5949                     if (!PL_lex_allbrackets &&
5950                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5951                         s -= 3;
5952                         TOKEN(0);
5953                     }
5954                     Eop(OP_NCMP);
5955                 }
5956                 s--;
5957                 if (!PL_lex_allbrackets &&
5958                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5959                     s -= 2;
5960                     TOKEN(0);
5961                 }
5962                 Rop(OP_LE);
5963             }
5964         }
5965         s--;
5966         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5967             s--;
5968             TOKEN(0);
5969         }
5970         Rop(OP_LT);
5971     case '>':
5972         s++;
5973         {
5974             const char tmp = *s++;
5975             if (tmp == '>') {
5976                 if (*s == '=' && !PL_lex_allbrackets &&
5977                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5978                     s -= 2;
5979                     TOKEN(0);
5980                 }
5981                 SHop(OP_RIGHT_SHIFT);
5982             }
5983             else if (tmp == '=') {
5984                 if (!PL_lex_allbrackets &&
5985                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5986                     s -= 2;
5987                     TOKEN(0);
5988                 }
5989                 Rop(OP_GE);
5990             }
5991         }
5992         s--;
5993         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5994             s--;
5995             TOKEN(0);
5996         }
5997         Rop(OP_GT);
5998
5999     case '$':
6000         CLINE;
6001
6002         if (PL_expect == XOPERATOR) {
6003             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6004                 return deprecate_commaless_var_list();
6005             }
6006         }
6007
6008         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6009             PL_tokenbuf[0] = '@';
6010             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6011                            sizeof PL_tokenbuf - 1, FALSE);
6012             if (PL_expect == XOPERATOR)
6013                 no_op("Array length", s);
6014             if (!PL_tokenbuf[1])
6015                 PREREF(DOLSHARP);
6016             PL_expect = XOPERATOR;
6017             PL_pending_ident = '#';
6018             TOKEN(DOLSHARP);
6019         }
6020
6021         PL_tokenbuf[0] = '$';
6022         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6023                        sizeof PL_tokenbuf - 1, FALSE);
6024         if (PL_expect == XOPERATOR)
6025             no_op("Scalar", s);
6026         if (!PL_tokenbuf[1]) {
6027             if (s == PL_bufend)
6028                 yyerror("Final $ should be \\$ or $name");
6029             PREREF('$');
6030         }
6031
6032         d = s;
6033         {
6034             const char tmp = *s;
6035             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6036                 s = SKIPSPACE1(s);
6037
6038             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6039                 && intuit_more(s)) {
6040                 if (*s == '[') {
6041                     PL_tokenbuf[0] = '@';
6042                     if (ckWARN(WARN_SYNTAX)) {
6043                         char *t = s+1;
6044
6045                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6046                             t++;
6047                         if (*t++ == ',') {
6048                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6049                             while (t < PL_bufend && *t != ']')
6050                                 t++;
6051                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6052                                         "Multidimensional syntax %.*s not supported",
6053                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6054                         }
6055                     }
6056                 }
6057                 else if (*s == '{') {
6058                     char *t;
6059                     PL_tokenbuf[0] = '%';
6060                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6061                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6062                         {
6063                             char tmpbuf[sizeof PL_tokenbuf];
6064                             do {
6065                                 t++;
6066                             } while (isSPACE(*t));
6067                             if (isIDFIRST_lazy_if(t,UTF)) {
6068                                 STRLEN len;
6069                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6070                                               &len);
6071                                 while (isSPACE(*t))
6072                                     t++;
6073                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6074                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6075                                                 "You need to quote \"%s\"",
6076                                                 tmpbuf);
6077                             }
6078                         }
6079                 }
6080             }
6081
6082             PL_expect = XOPERATOR;
6083             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6084                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6085                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6086                     PL_expect = XOPERATOR;
6087                 else if (strchr("$@\"'`q", *s))
6088                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6089                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6090                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6091                 else if (isIDFIRST_lazy_if(s,UTF)) {
6092                     char tmpbuf[sizeof PL_tokenbuf];
6093                     int t2;
6094                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6095                     if ((t2 = keyword(tmpbuf, len, 0))) {
6096                         /* binary operators exclude handle interpretations */
6097                         switch (t2) {
6098                         case -KEY_x:
6099                         case -KEY_eq:
6100                         case -KEY_ne:
6101                         case -KEY_gt:
6102                         case -KEY_lt:
6103                         case -KEY_ge:
6104                         case -KEY_le:
6105                         case -KEY_cmp:
6106                             break;
6107                         default:
6108                             PL_expect = XTERM;  /* e.g. print $fh length() */
6109                             break;
6110                         }
6111                     }
6112                     else {
6113                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6114                     }
6115                 }
6116                 else if (isDIGIT(*s))
6117                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6118                 else if (*s == '.' && isDIGIT(s[1]))
6119                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6120                 else if ((*s == '?' || *s == '-' || *s == '+')
6121                          && !isSPACE(s[1]) && s[1] != '=')
6122                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6123                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6124                          && s[1] != '/')
6125                     PL_expect = XTERM;          /* e.g. print $fh /.../
6126                                                    XXX except DORDOR operator
6127                                                 */
6128                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6129                          && s[2] != '=')
6130                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6131             }
6132         }
6133         PL_pending_ident = '$';
6134         TOKEN('$');
6135
6136     case '@':
6137         if (PL_expect == XOPERATOR)
6138             no_op("Array", s);
6139         PL_tokenbuf[0] = '@';
6140         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6141         if (!PL_tokenbuf[1]) {
6142             PREREF('@');
6143         }
6144         if (PL_lex_state == LEX_NORMAL)
6145             s = SKIPSPACE1(s);
6146         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6147             if (*s == '{')
6148                 PL_tokenbuf[0] = '%';
6149
6150             /* Warn about @ where they meant $. */
6151             if (*s == '[' || *s == '{') {
6152                 if (ckWARN(WARN_SYNTAX)) {
6153                     const char *t = s + 1;
6154                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6155                         t++;
6156                     if (*t == '}' || *t == ']') {
6157                         t++;
6158                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6159                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6160                             "Scalar value %.*s better written as $%.*s",
6161                             (int)(t-PL_bufptr), PL_bufptr,
6162                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6163                     }
6164                 }
6165             }
6166         }
6167         PL_pending_ident = '@';
6168         TERM('@');
6169
6170      case '/':                  /* may be division, defined-or, or pattern */
6171         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6172             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6173                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6174                 TOKEN(0);
6175             s += 2;
6176             AOPERATOR(DORDOR);
6177         }
6178      case '?':                  /* may either be conditional or pattern */
6179         if (PL_expect == XOPERATOR) {
6180              char tmp = *s++;
6181              if(tmp == '?') {
6182                 if (!PL_lex_allbrackets &&
6183                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6184                     s--;
6185                     TOKEN(0);
6186                 }
6187                 PL_lex_allbrackets++;
6188                 OPERATOR('?');
6189              }
6190              else {
6191                  tmp = *s++;
6192                  if(tmp == '/') {
6193                      /* A // operator. */
6194                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6195                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6196                                             LEX_FAKEEOF_LOGIC)) {
6197                         s -= 2;
6198                         TOKEN(0);
6199                     }
6200                     AOPERATOR(DORDOR);
6201                  }
6202                  else {
6203                      s--;
6204                      if (*s == '=' && !PL_lex_allbrackets &&
6205                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6206                          s--;
6207                          TOKEN(0);
6208                      }
6209                      Mop(OP_DIVIDE);
6210                  }
6211              }
6212          }
6213          else {
6214              /* Disable warning on "study /blah/" */
6215              if (PL_oldoldbufptr == PL_last_uni
6216               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6217                   || memNE(PL_last_uni, "study", 5)
6218                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6219               ))
6220                  check_uni();
6221              if (*s == '?')
6222                  deprecate("?PATTERN? without explicit operator");
6223              s = scan_pat(s,OP_MATCH);
6224              TERM(sublex_start());
6225          }
6226
6227     case '.':
6228         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6229 #ifdef PERL_STRICT_CR
6230             && s[1] == '\n'
6231 #else
6232             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6233 #endif
6234             && (s == PL_linestart || s[-1] == '\n') )
6235         {
6236             PL_lex_formbrack = 0;
6237             PL_expect = XSTATE;
6238             goto rightbracket;
6239         }
6240         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6241             s += 3;
6242             OPERATOR(YADAYADA);
6243         }
6244         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6245             char tmp = *s++;
6246             if (*s == tmp) {
6247                 if (!PL_lex_allbrackets &&
6248                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6249                     s--;
6250                     TOKEN(0);
6251                 }
6252                 s++;
6253                 if (*s == tmp) {
6254                     s++;
6255                     pl_yylval.ival = OPf_SPECIAL;
6256                 }
6257                 else
6258                     pl_yylval.ival = 0;
6259                 OPERATOR(DOTDOT);
6260             }
6261             if (*s == '=' && !PL_lex_allbrackets &&
6262                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6263                 s--;
6264                 TOKEN(0);
6265             }
6266             Aop(OP_CONCAT);
6267         }
6268         /* FALL THROUGH */
6269     case '0': case '1': case '2': case '3': case '4':
6270     case '5': case '6': case '7': case '8': case '9':
6271         s = scan_num(s, &pl_yylval);
6272         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6273         if (PL_expect == XOPERATOR)
6274             no_op("Number",s);
6275         TERM(THING);
6276
6277     case '\'':
6278         s = scan_str(s,!!PL_madskills,FALSE);
6279         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6280         if (PL_expect == XOPERATOR) {
6281             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6282                 return deprecate_commaless_var_list();
6283             }
6284             else
6285                 no_op("String",s);
6286         }
6287         if (!s)
6288             missingterm(NULL);
6289         pl_yylval.ival = OP_CONST;
6290         TERM(sublex_start());
6291
6292     case '"':
6293         s = scan_str(s,!!PL_madskills,FALSE);
6294         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6295         if (PL_expect == XOPERATOR) {
6296             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6297                 return deprecate_commaless_var_list();
6298             }
6299             else
6300                 no_op("String",s);
6301         }
6302         if (!s)
6303             missingterm(NULL);
6304         pl_yylval.ival = OP_CONST;
6305         /* FIXME. I think that this can be const if char *d is replaced by
6306            more localised variables.  */
6307         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6308             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6309                 pl_yylval.ival = OP_STRINGIFY;
6310                 break;
6311             }
6312         }
6313         TERM(sublex_start());
6314
6315     case '`':
6316         s = scan_str(s,!!PL_madskills,FALSE);
6317         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6318         if (PL_expect == XOPERATOR)
6319             no_op("Backticks",s);
6320         if (!s)
6321             missingterm(NULL);
6322         readpipe_override();
6323         TERM(sublex_start());
6324
6325     case '\\':
6326         s++;
6327         if (PL_lex_inwhat && isDIGIT(*s))
6328             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6329                            *s, *s);
6330         if (PL_expect == XOPERATOR)
6331             no_op("Backslash",s);
6332         OPERATOR(REFGEN);
6333
6334     case 'v':
6335         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6336             char *start = s + 2;
6337             while (isDIGIT(*start) || *start == '_')
6338                 start++;
6339             if (*start == '.' && isDIGIT(start[1])) {
6340                 s = scan_num(s, &pl_yylval);
6341                 TERM(THING);
6342             }
6343             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6344             else if (!isALPHA(*start) && (PL_expect == XTERM
6345                         || PL_expect == XREF || PL_expect == XSTATE
6346                         || PL_expect == XTERMORDORDOR)) {
6347                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6348                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6349                 if (!gv) {
6350                     s = scan_num(s, &pl_yylval);
6351                     TERM(THING);
6352                 }
6353             }
6354         }
6355         goto keylookup;
6356     case 'x':
6357         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6358             s++;
6359             Mop(OP_REPEAT);
6360         }
6361         goto keylookup;
6362
6363     case '_':
6364     case 'a': case 'A':
6365     case 'b': case 'B':
6366     case 'c': case 'C':
6367     case 'd': case 'D':
6368     case 'e': case 'E':
6369     case 'f': case 'F':
6370     case 'g': case 'G':
6371     case 'h': case 'H':
6372     case 'i': case 'I':
6373     case 'j': case 'J':
6374     case 'k': case 'K':
6375     case 'l': case 'L':
6376     case 'm': case 'M':
6377     case 'n': case 'N':
6378     case 'o': case 'O':
6379     case 'p': case 'P':
6380     case 'q': case 'Q':
6381     case 'r': case 'R':
6382     case 's': case 'S':
6383     case 't': case 'T':
6384     case 'u': case 'U':
6385               case 'V':
6386     case 'w': case 'W':
6387               case 'X':
6388     case 'y': case 'Y':
6389     case 'z': case 'Z':
6390
6391       keylookup: {
6392         bool anydelim;
6393         I32 tmp;
6394
6395         orig_keyword = 0;
6396         gv = NULL;
6397         gvp = NULL;
6398
6399         PL_bufptr = s;
6400         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6401
6402         /* Some keywords can be followed by any delimiter, including ':' */
6403         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6404
6405         /* x::* is just a word, unless x is "CORE" */
6406         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6407             goto just_a_word;
6408
6409         d = s;
6410         while (d < PL_bufend && isSPACE(*d))
6411                 d++;    /* no comments skipped here, or s### is misparsed */
6412
6413         /* Is this a word before a => operator? */
6414         if (*d == '=' && d[1] == '>') {
6415             CLINE;
6416             pl_yylval.opval
6417                 = (OP*)newSVOP(OP_CONST, 0,
6418                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6419             pl_yylval.opval->op_private = OPpCONST_BARE;
6420             TERM(WORD);
6421         }
6422
6423         /* Check for plugged-in keyword */
6424         {
6425             OP *o;
6426             int result;
6427             char *saved_bufptr = PL_bufptr;
6428             PL_bufptr = s;
6429             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6430             s = PL_bufptr;
6431             if (result == KEYWORD_PLUGIN_DECLINE) {
6432                 /* not a plugged-in keyword */
6433                 PL_bufptr = saved_bufptr;
6434             } else if (result == KEYWORD_PLUGIN_STMT) {
6435                 pl_yylval.opval = o;
6436                 CLINE;
6437                 PL_expect = XSTATE;
6438                 return REPORT(PLUGSTMT);
6439             } else if (result == KEYWORD_PLUGIN_EXPR) {
6440                 pl_yylval.opval = o;
6441                 CLINE;
6442                 PL_expect = XOPERATOR;
6443                 return REPORT(PLUGEXPR);
6444             } else {
6445                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6446                                         PL_tokenbuf);
6447             }
6448         }
6449
6450         /* Check for built-in keyword */
6451         tmp = keyword(PL_tokenbuf, len, 0);
6452
6453         /* Is this a label? */
6454         if (!anydelim && PL_expect == XSTATE
6455               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6456             s = d + 1;
6457             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6458             CLINE;
6459             TOKEN(LABEL);
6460         }
6461
6462         if (tmp < 0) {                  /* second-class keyword? */
6463             GV *ogv = NULL;     /* override (winner) */
6464             GV *hgv = NULL;     /* hidden (loser) */
6465             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6466                 CV *cv;
6467                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6468                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6469                     (cv = GvCVu(gv)))
6470                 {
6471                     if (GvIMPORTED_CV(gv))
6472                         ogv = gv;
6473                     else if (! CvMETHOD(cv))
6474                         hgv = gv;
6475                 }
6476                 if (!ogv &&
6477                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6478                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6479                     (gv = *gvp) && isGV_with_GP(gv) &&
6480                     GvCVu(gv) && GvIMPORTED_CV(gv))
6481                 {
6482                     ogv = gv;
6483                 }
6484             }
6485             if (ogv) {
6486                 orig_keyword = tmp;
6487                 tmp = 0;                /* overridden by import or by GLOBAL */
6488             }
6489             else if (gv && !gvp
6490                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6491                      && GvCVu(gv))
6492             {
6493                 tmp = 0;                /* any sub overrides "weak" keyword */
6494             }
6495             else {                      /* no override */
6496                 tmp = -tmp;
6497                 if (tmp == KEY_dump) {
6498                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6499                                    "dump() better written as CORE::dump()");
6500                 }
6501                 gv = NULL;
6502                 gvp = 0;
6503                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6504                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6505                                    "Ambiguous call resolved as CORE::%s(), "
6506                                    "qualify as such or use &",
6507                                    GvENAME(hgv));
6508             }
6509         }
6510
6511       reserved_word:
6512         switch (tmp) {
6513
6514         default:                        /* not a keyword */
6515             /* Trade off - by using this evil construction we can pull the
6516                variable gv into the block labelled keylookup. If not, then
6517                we have to give it function scope so that the goto from the
6518                earlier ':' case doesn't bypass the initialisation.  */
6519             if (0) {
6520             just_a_word_zero_gv:
6521                 gv = NULL;
6522                 gvp = NULL;
6523                 orig_keyword = 0;
6524             }
6525           just_a_word: {
6526                 SV *sv;
6527                 int pkgname = 0;
6528                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6529                 OP *rv2cv_op;
6530                 CV *cv;
6531 #ifdef PERL_MAD
6532                 SV *nextPL_nextwhite = 0;
6533 #endif
6534
6535
6536                 /* Get the rest if it looks like a package qualifier */
6537
6538                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6539                     STRLEN morelen;
6540                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6541                                   TRUE, &morelen);
6542                     if (!morelen)
6543                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6544                                 *s == '\'' ? "'" : "::");
6545                     len += morelen;
6546                     pkgname = 1;
6547                 }
6548
6549                 if (PL_expect == XOPERATOR) {
6550                     if (PL_bufptr == PL_linestart) {
6551                         CopLINE_dec(PL_curcop);
6552                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6553                         CopLINE_inc(PL_curcop);
6554                     }
6555                     else
6556                         no_op("Bareword",s);
6557                 }
6558
6559                 /* Look for a subroutine with this name in current package,
6560                    unless name is "Foo::", in which case Foo is a bareword
6561                    (and a package name). */
6562
6563                 if (len > 2 && !PL_madskills &&
6564                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6565                 {
6566                     if (ckWARN(WARN_BAREWORD)
6567                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6568                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6569                             "Bareword \"%s\" refers to nonexistent package",
6570                              PL_tokenbuf);
6571                     len -= 2;
6572                     PL_tokenbuf[len] = '\0';
6573                     gv = NULL;
6574                     gvp = 0;
6575                 }
6576                 else {
6577                     if (!gv) {
6578                         /* Mustn't actually add anything to a symbol table.
6579                            But also don't want to "initialise" any placeholder
6580                            constants that might already be there into full
6581                            blown PVGVs with attached PVCV.  */
6582                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6583                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6584                                                SVt_PVCV);
6585                     }
6586                     len = 0;
6587                 }
6588
6589                 /* if we saw a global override before, get the right name */
6590
6591                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6592                     len ? len : strlen(PL_tokenbuf));
6593                 if (gvp) {
6594                     SV * const tmp_sv = sv;
6595                     sv = newSVpvs("CORE::GLOBAL::");
6596                     sv_catsv(sv, tmp_sv);
6597                     SvREFCNT_dec(tmp_sv);
6598                 }
6599
6600 #ifdef PERL_MAD
6601                 if (PL_madskills && !PL_thistoken) {
6602                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6603                     PL_thistoken = newSVpvn(start,s - start);
6604                     PL_realtokenstart = s - SvPVX(PL_linestr);
6605                 }
6606 #endif
6607
6608                 /* Presume this is going to be a bareword of some sort. */
6609                 CLINE;
6610                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6611                 pl_yylval.opval->op_private = OPpCONST_BARE;
6612
6613                 /* And if "Foo::", then that's what it certainly is. */
6614                 if (len)
6615                     goto safe_bareword;
6616
6617                 {
6618                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6619                     const_op->op_private = OPpCONST_BARE;
6620                     rv2cv_op = newCVREF(0, const_op);
6621                 }
6622                 cv = rv2cv_op_cv(rv2cv_op, 0);
6623
6624                 /* See if it's the indirect object for a list operator. */
6625
6626                 if (PL_oldoldbufptr &&
6627                     PL_oldoldbufptr < PL_bufptr &&
6628                     (PL_oldoldbufptr == PL_last_lop
6629                      || PL_oldoldbufptr == PL_last_uni) &&
6630                     /* NO SKIPSPACE BEFORE HERE! */
6631                     (PL_expect == XREF ||
6632                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6633                 {
6634                     bool immediate_paren = *s == '(';
6635
6636                     /* (Now we can afford to cross potential line boundary.) */
6637                     s = SKIPSPACE2(s,nextPL_nextwhite);
6638 #ifdef PERL_MAD
6639                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6640 #endif
6641
6642                     /* Two barewords in a row may indicate method call. */
6643
6644                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6645                         (tmp = intuit_method(s, gv, cv))) {
6646                         op_free(rv2cv_op);
6647                         if (tmp == METHOD && !PL_lex_allbrackets &&
6648                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6649                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6650                         return REPORT(tmp);
6651                     }
6652
6653                     /* If not a declared subroutine, it's an indirect object. */
6654                     /* (But it's an indir obj regardless for sort.) */
6655                     /* Also, if "_" follows a filetest operator, it's a bareword */
6656
6657                     if (
6658                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6659                          (!cv &&
6660                         (PL_last_lop_op != OP_MAPSTART &&
6661                          PL_last_lop_op != OP_GREPSTART))))
6662                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6663                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6664                        )
6665                     {
6666                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6667                         goto bareword;
6668                     }
6669                 }
6670
6671                 PL_expect = XOPERATOR;
6672 #ifdef PERL_MAD
6673                 if (isSPACE(*s))
6674                     s = SKIPSPACE2(s,nextPL_nextwhite);
6675                 PL_nextwhite = nextPL_nextwhite;
6676 #else
6677                 s = skipspace(s);
6678 #endif
6679
6680                 /* Is this a word before a => operator? */
6681                 if (*s == '=' && s[1] == '>' && !pkgname) {
6682                     op_free(rv2cv_op);
6683                     CLINE;
6684                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6685                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6686                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6687                     TERM(WORD);
6688                 }
6689
6690                 /* If followed by a paren, it's certainly a subroutine. */
6691                 if (*s == '(') {
6692                     CLINE;
6693                     if (cv) {
6694                         d = s + 1;
6695                         while (SPACE_OR_TAB(*d))
6696                             d++;
6697                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6698                             s = d + 1;
6699                             goto its_constant;
6700                         }
6701                     }
6702 #ifdef PERL_MAD
6703                     if (PL_madskills) {
6704                         PL_nextwhite = PL_thiswhite;
6705                         PL_thiswhite = 0;
6706                     }
6707                     start_force(PL_curforce);
6708 #endif
6709                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6710                     PL_expect = XOPERATOR;
6711 #ifdef PERL_MAD
6712                     if (PL_madskills) {
6713                         PL_nextwhite = nextPL_nextwhite;
6714                         curmad('X', PL_thistoken);
6715                         PL_thistoken = newSVpvs("");
6716                     }
6717 #endif
6718                     op_free(rv2cv_op);
6719                     force_next(WORD);
6720                     pl_yylval.ival = 0;
6721                     TOKEN('&');
6722                 }
6723
6724                 /* If followed by var or block, call it a method (unless sub) */
6725
6726                 if ((*s == '$' || *s == '{') && !cv) {
6727                     op_free(rv2cv_op);
6728                     PL_last_lop = PL_oldbufptr;
6729                     PL_last_lop_op = OP_METHOD;
6730                     if (!PL_lex_allbrackets &&
6731                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6732                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6733                     PREBLOCK(METHOD);
6734                 }
6735
6736                 /* If followed by a bareword, see if it looks like indir obj. */
6737
6738                 if (!orig_keyword
6739                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6740                         && (tmp = intuit_method(s, gv, cv))) {
6741                     op_free(rv2cv_op);
6742                     if (tmp == METHOD && !PL_lex_allbrackets &&
6743                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6744                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6745                     return REPORT(tmp);
6746                 }
6747
6748                 /* Not a method, so call it a subroutine (if defined) */
6749
6750                 if (cv) {
6751                     if (lastchar == '-')
6752                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6753                                          "Ambiguous use of -%s resolved as -&%s()",
6754                                          PL_tokenbuf, PL_tokenbuf);
6755                     /* Check for a constant sub */
6756                     if ((sv = cv_const_sv(cv))) {
6757                   its_constant:
6758                         op_free(rv2cv_op);
6759                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6760                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6761                         pl_yylval.opval->op_private = 0;
6762                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6763                         TOKEN(WORD);
6764                     }
6765
6766                     op_free(pl_yylval.opval);
6767                     pl_yylval.opval = rv2cv_op;
6768                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6769                     PL_last_lop = PL_oldbufptr;
6770                     PL_last_lop_op = OP_ENTERSUB;
6771                     /* Is there a prototype? */
6772                     if (
6773 #ifdef PERL_MAD
6774                         cv &&
6775 #endif
6776                         SvPOK(cv))
6777                     {
6778                         STRLEN protolen = CvPROTOLEN(cv);
6779                         const char *proto = CvPROTO(cv);
6780                         if (!protolen)
6781                             TERM(FUNC0SUB);
6782                         while (*proto == ';')
6783                             proto++;
6784                         if (
6785                             (
6786                                 (
6787                                     *proto == '$' || *proto == '_'
6788                                  || *proto == '*' || *proto == '+'
6789                                 )
6790                              && proto[1] == '\0'
6791                             )
6792                          || (
6793                              *proto == '\\' && proto[1] && proto[2] == '\0'
6794                             )
6795                         )
6796                             OPERATOR(UNIOPSUB);
6797                         if (*proto == '\\' && proto[1] == '[') {
6798                             const char *p = proto + 2;
6799                             while(*p && *p != ']')
6800                                 ++p;
6801                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6802                         }
6803                         if (*proto == '&' && *s == '{') {
6804                             if (PL_curstash)
6805                                 sv_setpvs(PL_subname, "__ANON__");
6806                             else
6807                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6808                             if (!PL_lex_allbrackets &&
6809                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6810                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6811                             PREBLOCK(LSTOPSUB);
6812                         }
6813                     }
6814 #ifdef PERL_MAD
6815                     {
6816                         if (PL_madskills) {
6817                             PL_nextwhite = PL_thiswhite;
6818                             PL_thiswhite = 0;
6819                         }
6820                         start_force(PL_curforce);
6821                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6822                         PL_expect = XTERM;
6823                         if (PL_madskills) {
6824                             PL_nextwhite = nextPL_nextwhite;
6825                             curmad('X', PL_thistoken);
6826                             PL_thistoken = newSVpvs("");
6827                         }
6828                         force_next(WORD);
6829                         if (!PL_lex_allbrackets &&
6830                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6831                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6832                         TOKEN(NOAMP);
6833                     }
6834                 }
6835
6836                 /* Guess harder when madskills require "best effort". */
6837                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6838                     int probable_sub = 0;
6839                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6840                         probable_sub = 1;
6841                     else if (isALPHA(*s)) {
6842                         char tmpbuf[1024];
6843                         STRLEN tmplen;
6844                         d = s;
6845                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6846                         if (!keyword(tmpbuf, tmplen, 0))
6847                             probable_sub = 1;
6848                         else {
6849                             while (d < PL_bufend && isSPACE(*d))
6850                                 d++;
6851                             if (*d == '=' && d[1] == '>')
6852                                 probable_sub = 1;
6853                         }
6854                     }
6855                     if (probable_sub) {
6856                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
6857                                         SVt_PVCV);
6858                         op_free(pl_yylval.opval);
6859                         pl_yylval.opval = rv2cv_op;
6860                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6861                         PL_last_lop = PL_oldbufptr;
6862                         PL_last_lop_op = OP_ENTERSUB;
6863                         PL_nextwhite = PL_thiswhite;
6864                         PL_thiswhite = 0;
6865                         start_force(PL_curforce);
6866                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6867                         PL_expect = XTERM;
6868                         PL_nextwhite = nextPL_nextwhite;
6869                         curmad('X', PL_thistoken);
6870                         PL_thistoken = newSVpvs("");
6871                         force_next(WORD);
6872                         if (!PL_lex_allbrackets &&
6873                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6874                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6875                         TOKEN(NOAMP);
6876                     }
6877 #else
6878                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6879                     PL_expect = XTERM;
6880                     force_next(WORD);
6881                     if (!PL_lex_allbrackets &&
6882                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6883                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6884                     TOKEN(NOAMP);
6885 #endif
6886                 }
6887
6888                 /* Call it a bare word */
6889
6890                 if (PL_hints & HINT_STRICT_SUBS)
6891                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6892                 else {
6893                 bareword:
6894                     /* after "print" and similar functions (corresponding to
6895                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6896                      * a filehandle should be subject to "strict subs".
6897                      * Likewise for the optional indirect-object argument to system
6898                      * or exec, which can't be a bareword */
6899                     if ((PL_last_lop_op == OP_PRINT
6900                             || PL_last_lop_op == OP_PRTF
6901                             || PL_last_lop_op == OP_SAY
6902                             || PL_last_lop_op == OP_SYSTEM
6903                             || PL_last_lop_op == OP_EXEC)
6904                             && (PL_hints & HINT_STRICT_SUBS))
6905                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6906                     if (lastchar != '-') {
6907                         if (ckWARN(WARN_RESERVED)) {
6908                             d = PL_tokenbuf;
6909                             while (isLOWER(*d))
6910                                 d++;
6911                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6912                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6913                                        PL_tokenbuf);
6914                         }
6915                     }
6916                 }
6917                 op_free(rv2cv_op);
6918
6919             safe_bareword:
6920                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6921                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6922                                      "Operator or semicolon missing before %c%s",
6923                                      lastchar, PL_tokenbuf);
6924                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6925                                      "Ambiguous use of %c resolved as operator %c",
6926                                      lastchar, lastchar);
6927                 }
6928                 TOKEN(WORD);
6929             }
6930
6931         case KEY___FILE__:
6932             FUN0OP(
6933                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6934             );
6935
6936         case KEY___LINE__:
6937             FUN0OP(
6938                 (OP*)newSVOP(OP_CONST, 0,
6939                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6940             );
6941
6942         case KEY___PACKAGE__:
6943             FUN0OP(
6944                 (OP*)newSVOP(OP_CONST, 0,
6945                                         (PL_curstash
6946                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6947                                          : &PL_sv_undef))
6948             );
6949
6950         case KEY___DATA__:
6951         case KEY___END__: {
6952             GV *gv;
6953             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6954                 const char *pname = "main";
6955                 STRLEN plen = 4;
6956                 U32 putf8 = 0;
6957                 if (PL_tokenbuf[2] == 'D')
6958                 {
6959                     HV * const stash =
6960                         PL_curstash ? PL_curstash : PL_defstash;
6961                     pname = HvNAME_get(stash);
6962                     plen  = HvNAMELEN (stash);
6963                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
6964                 }
6965                 gv = gv_fetchpvn_flags(
6966                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
6967                         plen+6, GV_ADD|putf8, SVt_PVIO
6968                 );
6969                 GvMULTI_on(gv);
6970                 if (!GvIO(gv))
6971                     GvIOp(gv) = newIO();
6972                 IoIFP(GvIOp(gv)) = PL_rsfp;
6973 #if defined(HAS_FCNTL) && defined(F_SETFD)
6974                 {
6975                     const int fd = PerlIO_fileno(PL_rsfp);
6976                     fcntl(fd,F_SETFD,fd >= 3);
6977                 }
6978 #endif
6979                 /* Mark this internal pseudo-handle as clean */
6980                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6981                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6982                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6983                 else
6984                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6985 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6986                 /* if the script was opened in binmode, we need to revert
6987                  * it to text mode for compatibility; but only iff it has CRs
6988                  * XXX this is a questionable hack at best. */
6989                 if (PL_bufend-PL_bufptr > 2
6990                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6991                 {
6992                     Off_t loc = 0;
6993                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6994                         loc = PerlIO_tell(PL_rsfp);
6995                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6996                     }
6997 #ifdef NETWARE
6998                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6999 #else
7000                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7001 #endif  /* NETWARE */
7002                         if (loc > 0)
7003                             PerlIO_seek(PL_rsfp, loc, 0);
7004                     }
7005                 }
7006 #endif
7007 #ifdef PERLIO_LAYERS
7008                 if (!IN_BYTES) {
7009                     if (UTF)
7010                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7011                     else if (PL_encoding) {
7012                         SV *name;
7013                         dSP;
7014                         ENTER;
7015                         SAVETMPS;
7016                         PUSHMARK(sp);
7017                         EXTEND(SP, 1);
7018                         XPUSHs(PL_encoding);
7019                         PUTBACK;
7020                         call_method("name", G_SCALAR);
7021                         SPAGAIN;
7022                         name = POPs;
7023                         PUTBACK;
7024                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7025                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7026                                                       SVfARG(name)));
7027                         FREETMPS;
7028                         LEAVE;
7029                     }
7030                 }
7031 #endif
7032 #ifdef PERL_MAD
7033                 if (PL_madskills) {
7034                     if (PL_realtokenstart >= 0) {
7035                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7036                         if (!PL_endwhite)
7037                             PL_endwhite = newSVpvs("");
7038                         sv_catsv(PL_endwhite, PL_thiswhite);
7039                         PL_thiswhite = 0;
7040                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7041                         PL_realtokenstart = -1;
7042                     }
7043                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7044                            != NULL) ;
7045                 }
7046 #endif
7047                 PL_rsfp = NULL;
7048             }
7049             goto fake_eof;
7050         }
7051
7052         case KEY_AUTOLOAD:
7053         case KEY_DESTROY:
7054         case KEY_BEGIN:
7055         case KEY_UNITCHECK:
7056         case KEY_CHECK:
7057         case KEY_INIT:
7058         case KEY_END:
7059             if (PL_expect == XSTATE) {
7060                 s = PL_bufptr;
7061                 goto really_sub;
7062             }
7063             goto just_a_word;
7064
7065         case KEY_CORE:
7066             if (*s == ':' && s[1] == ':') {
7067                 s += 2;
7068                 d = s;
7069                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7070                 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7071                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7072                 if (tmp < 0)
7073                     tmp = -tmp;
7074                 else if (tmp == KEY_require || tmp == KEY_do
7075                       || tmp == KEY_glob)
7076                     /* that's a way to remember we saw "CORE::" */
7077                     orig_keyword = tmp;
7078                 goto reserved_word;
7079             }
7080             goto just_a_word;
7081
7082         case KEY_abs:
7083             UNI(OP_ABS);
7084
7085         case KEY_alarm:
7086             UNI(OP_ALARM);
7087
7088         case KEY_accept:
7089             LOP(OP_ACCEPT,XTERM);
7090
7091         case KEY_and:
7092             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7093                 return REPORT(0);
7094             OPERATOR(ANDOP);
7095
7096         case KEY_atan2:
7097             LOP(OP_ATAN2,XTERM);
7098
7099         case KEY_bind:
7100             LOP(OP_BIND,XTERM);
7101
7102         case KEY_binmode:
7103             LOP(OP_BINMODE,XTERM);
7104
7105         case KEY_bless:
7106             LOP(OP_BLESS,XTERM);
7107
7108         case KEY_break:
7109             FUN0(OP_BREAK);
7110
7111         case KEY_chop:
7112             UNI(OP_CHOP);
7113
7114         case KEY_continue:
7115                     /* We have to disambiguate the two senses of
7116                       "continue". If the next token is a '{' then
7117                       treat it as the start of a continue block;
7118                       otherwise treat it as a control operator.
7119                      */
7120                     s = skipspace(s);
7121                     if (*s == '{')
7122             PREBLOCK(CONTINUE);
7123                     else
7124                         FUN0(OP_CONTINUE);
7125
7126         case KEY_chdir:
7127             /* may use HOME */
7128             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7129             UNI(OP_CHDIR);
7130
7131         case KEY_close:
7132             UNI(OP_CLOSE);
7133
7134         case KEY_closedir:
7135             UNI(OP_CLOSEDIR);
7136
7137         case KEY_cmp:
7138             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7139                 return REPORT(0);
7140             Eop(OP_SCMP);
7141
7142         case KEY_caller:
7143             UNI(OP_CALLER);
7144
7145         case KEY_crypt:
7146 #ifdef FCRYPT
7147             if (!PL_cryptseen) {
7148                 PL_cryptseen = TRUE;
7149                 init_des();
7150             }
7151 #endif
7152             LOP(OP_CRYPT,XTERM);
7153
7154         case KEY_chmod:
7155             LOP(OP_CHMOD,XTERM);
7156
7157         case KEY_chown:
7158             LOP(OP_CHOWN,XTERM);
7159
7160         case KEY_connect:
7161             LOP(OP_CONNECT,XTERM);
7162
7163         case KEY_chr:
7164             UNI(OP_CHR);
7165
7166         case KEY_cos:
7167             UNI(OP_COS);
7168
7169         case KEY_chroot:
7170             UNI(OP_CHROOT);
7171
7172         case KEY_default:
7173             PREBLOCK(DEFAULT);
7174
7175         case KEY_do:
7176             s = SKIPSPACE1(s);
7177             if (*s == '{')
7178                 PRETERMBLOCK(DO);
7179             if (*s != '\'')
7180                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7181             if (orig_keyword == KEY_do) {
7182                 orig_keyword = 0;
7183                 pl_yylval.ival = 1;
7184             }
7185             else
7186                 pl_yylval.ival = 0;
7187             OPERATOR(DO);
7188
7189         case KEY_die:
7190             PL_hints |= HINT_BLOCK_SCOPE;
7191             LOP(OP_DIE,XTERM);
7192
7193         case KEY_defined:
7194             UNI(OP_DEFINED);
7195
7196         case KEY_delete:
7197             UNI(OP_DELETE);
7198
7199         case KEY_dbmopen:
7200             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7201                               STR_WITH_LEN("NDBM_File::"),
7202                               STR_WITH_LEN("DB_File::"),
7203                               STR_WITH_LEN("GDBM_File::"),
7204                               STR_WITH_LEN("SDBM_File::"),
7205                               STR_WITH_LEN("ODBM_File::"),
7206                               NULL);
7207             LOP(OP_DBMOPEN,XTERM);
7208
7209         case KEY_dbmclose:
7210             UNI(OP_DBMCLOSE);
7211
7212         case KEY_dump:
7213             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7214             LOOPX(OP_DUMP);
7215
7216         case KEY_else:
7217             PREBLOCK(ELSE);
7218
7219         case KEY_elsif:
7220             pl_yylval.ival = CopLINE(PL_curcop);
7221             OPERATOR(ELSIF);
7222
7223         case KEY_eq:
7224             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7225                 return REPORT(0);
7226             Eop(OP_SEQ);
7227
7228         case KEY_exists:
7229             UNI(OP_EXISTS);
7230
7231         case KEY_exit:
7232             if (PL_madskills)
7233                 UNI(OP_INT);
7234             UNI(OP_EXIT);
7235
7236         case KEY_eval:
7237             s = SKIPSPACE1(s);
7238             if (*s == '{') { /* block eval */
7239                 PL_expect = XTERMBLOCK;
7240                 UNIBRACK(OP_ENTERTRY);
7241             }
7242             else { /* string eval */
7243                 PL_expect = XTERM;
7244                 UNIBRACK(OP_ENTEREVAL);
7245             }
7246
7247         case KEY_eof:
7248             UNI(OP_EOF);
7249
7250         case KEY_exp:
7251             UNI(OP_EXP);
7252
7253         case KEY_each:
7254             UNI(OP_EACH);
7255
7256         case KEY_exec:
7257             LOP(OP_EXEC,XREF);
7258
7259         case KEY_endhostent:
7260             FUN0(OP_EHOSTENT);
7261
7262         case KEY_endnetent:
7263             FUN0(OP_ENETENT);
7264
7265         case KEY_endservent:
7266             FUN0(OP_ESERVENT);
7267
7268         case KEY_endprotoent:
7269             FUN0(OP_EPROTOENT);
7270
7271         case KEY_endpwent:
7272             FUN0(OP_EPWENT);
7273
7274         case KEY_endgrent:
7275             FUN0(OP_EGRENT);
7276
7277         case KEY_for:
7278         case KEY_foreach:
7279             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7280                 return REPORT(0);
7281             pl_yylval.ival = CopLINE(PL_curcop);
7282             s = SKIPSPACE1(s);
7283             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7284                 char *p = s;
7285 #ifdef PERL_MAD
7286                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7287 #endif
7288
7289                 if ((PL_bufend - p) >= 3 &&
7290                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7291                     p += 2;
7292                 else if ((PL_bufend - p) >= 4 &&
7293                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7294                     p += 3;
7295                 p = PEEKSPACE(p);
7296                 if (isIDFIRST_lazy_if(p,UTF)) {
7297                     p = scan_ident(p, PL_bufend,
7298                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7299                     p = PEEKSPACE(p);
7300                 }
7301                 if (*p != '$')
7302                     Perl_croak(aTHX_ "Missing $ on loop variable");
7303 #ifdef PERL_MAD
7304                 s = SvPVX(PL_linestr) + soff;
7305 #endif
7306             }
7307             OPERATOR(FOR);
7308
7309         case KEY_formline:
7310             LOP(OP_FORMLINE,XTERM);
7311
7312         case KEY_fork:
7313             FUN0(OP_FORK);
7314
7315         case KEY_fcntl:
7316             LOP(OP_FCNTL,XTERM);
7317
7318         case KEY_fileno:
7319             UNI(OP_FILENO);
7320
7321         case KEY_flock:
7322             LOP(OP_FLOCK,XTERM);
7323
7324         case KEY_gt:
7325             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7326                 return REPORT(0);
7327             Rop(OP_SGT);
7328
7329         case KEY_ge:
7330             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7331                 return REPORT(0);
7332             Rop(OP_SGE);
7333
7334         case KEY_grep:
7335             LOP(OP_GREPSTART, XREF);
7336
7337         case KEY_goto:
7338             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7339             LOOPX(OP_GOTO);
7340
7341         case KEY_gmtime:
7342             UNI(OP_GMTIME);
7343
7344         case KEY_getc:
7345             UNIDOR(OP_GETC);
7346
7347         case KEY_getppid:
7348             FUN0(OP_GETPPID);
7349
7350         case KEY_getpgrp:
7351             UNI(OP_GETPGRP);
7352
7353         case KEY_getpriority:
7354             LOP(OP_GETPRIORITY,XTERM);
7355
7356         case KEY_getprotobyname:
7357             UNI(OP_GPBYNAME);
7358
7359         case KEY_getprotobynumber:
7360             LOP(OP_GPBYNUMBER,XTERM);
7361
7362         case KEY_getprotoent:
7363             FUN0(OP_GPROTOENT);
7364
7365         case KEY_getpwent:
7366             FUN0(OP_GPWENT);
7367
7368         case KEY_getpwnam:
7369             UNI(OP_GPWNAM);
7370
7371         case KEY_getpwuid:
7372             UNI(OP_GPWUID);
7373
7374         case KEY_getpeername:
7375             UNI(OP_GETPEERNAME);
7376
7377         case KEY_gethostbyname:
7378             UNI(OP_GHBYNAME);
7379
7380         case KEY_gethostbyaddr:
7381             LOP(OP_GHBYADDR,XTERM);
7382
7383         case KEY_gethostent:
7384             FUN0(OP_GHOSTENT);
7385
7386         case KEY_getnetbyname:
7387             UNI(OP_GNBYNAME);
7388
7389         case KEY_getnetbyaddr:
7390             LOP(OP_GNBYADDR,XTERM);
7391
7392         case KEY_getnetent:
7393             FUN0(OP_GNETENT);
7394
7395         case KEY_getservbyname:
7396             LOP(OP_GSBYNAME,XTERM);
7397
7398         case KEY_getservbyport:
7399             LOP(OP_GSBYPORT,XTERM);
7400
7401         case KEY_getservent:
7402             FUN0(OP_GSERVENT);
7403
7404         case KEY_getsockname:
7405             UNI(OP_GETSOCKNAME);
7406
7407         case KEY_getsockopt:
7408             LOP(OP_GSOCKOPT,XTERM);
7409
7410         case KEY_getgrent:
7411             FUN0(OP_GGRENT);
7412
7413         case KEY_getgrnam:
7414             UNI(OP_GGRNAM);
7415
7416         case KEY_getgrgid:
7417             UNI(OP_GGRGID);
7418
7419         case KEY_getlogin:
7420             FUN0(OP_GETLOGIN);
7421
7422         case KEY_given:
7423             pl_yylval.ival = CopLINE(PL_curcop);
7424             OPERATOR(GIVEN);
7425
7426         case KEY_glob:
7427             LOP(
7428              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7429              XTERM
7430             );
7431
7432         case KEY_hex:
7433             UNI(OP_HEX);
7434
7435         case KEY_if:
7436             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7437                 return REPORT(0);
7438             pl_yylval.ival = CopLINE(PL_curcop);
7439             OPERATOR(IF);
7440
7441         case KEY_index:
7442             LOP(OP_INDEX,XTERM);
7443
7444         case KEY_int:
7445             UNI(OP_INT);
7446
7447         case KEY_ioctl:
7448             LOP(OP_IOCTL,XTERM);
7449
7450         case KEY_join:
7451             LOP(OP_JOIN,XTERM);
7452
7453         case KEY_keys:
7454             UNI(OP_KEYS);
7455
7456         case KEY_kill:
7457             LOP(OP_KILL,XTERM);
7458
7459         case KEY_last:
7460             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7461             LOOPX(OP_LAST);
7462
7463         case KEY_lc:
7464             UNI(OP_LC);
7465
7466         case KEY_lcfirst:
7467             UNI(OP_LCFIRST);
7468
7469         case KEY_local:
7470             pl_yylval.ival = 0;
7471             OPERATOR(LOCAL);
7472
7473         case KEY_length:
7474             UNI(OP_LENGTH);
7475
7476         case KEY_lt:
7477             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7478                 return REPORT(0);
7479             Rop(OP_SLT);
7480
7481         case KEY_le:
7482             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7483                 return REPORT(0);
7484             Rop(OP_SLE);
7485
7486         case KEY_localtime:
7487             UNI(OP_LOCALTIME);
7488
7489         case KEY_log:
7490             UNI(OP_LOG);
7491
7492         case KEY_link:
7493             LOP(OP_LINK,XTERM);
7494
7495         case KEY_listen:
7496             LOP(OP_LISTEN,XTERM);
7497
7498         case KEY_lock:
7499             UNI(OP_LOCK);
7500
7501         case KEY_lstat:
7502             UNI(OP_LSTAT);
7503
7504         case KEY_m:
7505             s = scan_pat(s,OP_MATCH);
7506             TERM(sublex_start());
7507
7508         case KEY_map:
7509             LOP(OP_MAPSTART, XREF);
7510
7511         case KEY_mkdir:
7512             LOP(OP_MKDIR,XTERM);
7513
7514         case KEY_msgctl:
7515             LOP(OP_MSGCTL,XTERM);
7516
7517         case KEY_msgget:
7518             LOP(OP_MSGGET,XTERM);
7519
7520         case KEY_msgrcv:
7521             LOP(OP_MSGRCV,XTERM);
7522
7523         case KEY_msgsnd:
7524             LOP(OP_MSGSND,XTERM);
7525
7526         case KEY_our:
7527         case KEY_my:
7528         case KEY_state:
7529             PL_in_my = (U16)tmp;
7530             s = SKIPSPACE1(s);
7531             if (isIDFIRST_lazy_if(s,UTF)) {
7532 #ifdef PERL_MAD
7533                 char* start = s;
7534 #endif
7535                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7536                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7537                     goto really_sub;
7538                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7539                 if (!PL_in_my_stash) {
7540                     char tmpbuf[1024];
7541                     PL_bufptr = s;
7542                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7543                     yyerror(tmpbuf);
7544                 }
7545 #ifdef PERL_MAD
7546                 if (PL_madskills) {     /* just add type to declarator token */
7547                     sv_catsv(PL_thistoken, PL_nextwhite);
7548                     PL_nextwhite = 0;
7549                     sv_catpvn(PL_thistoken, start, s - start);
7550                 }
7551 #endif
7552             }
7553             pl_yylval.ival = 1;
7554             OPERATOR(MY);
7555
7556         case KEY_next:
7557             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7558             LOOPX(OP_NEXT);
7559
7560         case KEY_ne:
7561             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7562                 return REPORT(0);
7563             Eop(OP_SNE);
7564
7565         case KEY_no:
7566             s = tokenize_use(0, s);
7567             OPERATOR(USE);
7568
7569         case KEY_not:
7570             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7571                 FUN1(OP_NOT);
7572             else {
7573                 if (!PL_lex_allbrackets &&
7574                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7575                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7576                 OPERATOR(NOTOP);
7577             }
7578
7579         case KEY_open:
7580             s = SKIPSPACE1(s);
7581             if (isIDFIRST_lazy_if(s,UTF)) {
7582                 const char *t;
7583                 for (d = s; isALNUM_lazy_if(d,UTF);)
7584                     d++;
7585                 for (t=d; isSPACE(*t);)
7586                     t++;
7587                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7588                     /* [perl #16184] */
7589                     && !(t[0] == '=' && t[1] == '>')
7590                 ) {
7591                     int parms_len = (int)(d-s);
7592                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7593                            "Precedence problem: open %.*s should be open(%.*s)",
7594                             parms_len, s, parms_len, s);
7595                 }
7596             }
7597             LOP(OP_OPEN,XTERM);
7598
7599         case KEY_or:
7600             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7601                 return REPORT(0);
7602             pl_yylval.ival = OP_OR;
7603             OPERATOR(OROP);
7604
7605         case KEY_ord:
7606             UNI(OP_ORD);
7607
7608         case KEY_oct:
7609             UNI(OP_OCT);
7610
7611         case KEY_opendir:
7612             LOP(OP_OPEN_DIR,XTERM);
7613
7614         case KEY_print:
7615             checkcomma(s,PL_tokenbuf,"filehandle");
7616             LOP(OP_PRINT,XREF);
7617
7618         case KEY_printf:
7619             checkcomma(s,PL_tokenbuf,"filehandle");
7620             LOP(OP_PRTF,XREF);
7621
7622         case KEY_prototype:
7623             UNI(OP_PROTOTYPE);
7624
7625         case KEY_push:
7626             LOP(OP_PUSH,XTERM);
7627
7628         case KEY_pop:
7629             UNIDOR(OP_POP);
7630
7631         case KEY_pos:
7632             UNIDOR(OP_POS);
7633
7634         case KEY_pack:
7635             LOP(OP_PACK,XTERM);
7636
7637         case KEY_package:
7638             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7639             s = SKIPSPACE1(s);
7640             s = force_strict_version(s);
7641             PL_lex_expect = XBLOCK;
7642             OPERATOR(PACKAGE);
7643
7644         case KEY_pipe:
7645             LOP(OP_PIPE_OP,XTERM);
7646
7647         case KEY_q:
7648             s = scan_str(s,!!PL_madskills,FALSE);
7649             if (!s)
7650                 missingterm(NULL);
7651             pl_yylval.ival = OP_CONST;
7652             TERM(sublex_start());
7653
7654         case KEY_quotemeta:
7655             UNI(OP_QUOTEMETA);
7656
7657         case KEY_qw: {
7658             OP *words = NULL;
7659             s = scan_str(s,!!PL_madskills,FALSE);
7660             if (!s)
7661                 missingterm(NULL);
7662             PL_expect = XOPERATOR;
7663             if (SvCUR(PL_lex_stuff)) {
7664                 int warned_comma = !ckWARN(WARN_QW);
7665                 int warned_comment = warned_comma;
7666                 d = SvPV_force(PL_lex_stuff, len);
7667                 while (len) {
7668                     for (; isSPACE(*d) && len; --len, ++d)
7669                         /**/;
7670                     if (len) {
7671                         SV *sv;
7672                         const char *b = d;
7673                         if (!warned_comma || !warned_comment) {
7674                             for (; !isSPACE(*d) && len; --len, ++d) {
7675                                 if (!warned_comma && *d == ',') {
7676                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7677                                         "Possible attempt to separate words with commas");
7678                                     ++warned_comma;
7679                                 }
7680                                 else if (!warned_comment && *d == '#') {
7681                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7682                                         "Possible attempt to put comments in qw() list");
7683                                     ++warned_comment;
7684                                 }
7685                             }
7686                         }
7687                         else {
7688                             for (; !isSPACE(*d) && len; --len, ++d)
7689                                 /**/;
7690                         }
7691                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7692                         words = op_append_elem(OP_LIST, words,
7693                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7694                     }
7695                 }
7696             }
7697             if (!words)
7698                 words = newNULLLIST();
7699             if (PL_lex_stuff) {
7700                 SvREFCNT_dec(PL_lex_stuff);
7701                 PL_lex_stuff = NULL;
7702             }
7703             PL_expect = XOPERATOR;
7704             pl_yylval.opval = sawparens(words);
7705             TOKEN(QWLIST);
7706         }
7707
7708         case KEY_qq:
7709             s = scan_str(s,!!PL_madskills,FALSE);
7710             if (!s)
7711                 missingterm(NULL);
7712             pl_yylval.ival = OP_STRINGIFY;
7713             if (SvIVX(PL_lex_stuff) == '\'')
7714                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7715             TERM(sublex_start());
7716
7717         case KEY_qr:
7718             s = scan_pat(s,OP_QR);
7719             TERM(sublex_start());
7720
7721         case KEY_qx:
7722             s = scan_str(s,!!PL_madskills,FALSE);
7723             if (!s)
7724                 missingterm(NULL);
7725             readpipe_override();
7726             TERM(sublex_start());
7727
7728         case KEY_return:
7729             OLDLOP(OP_RETURN);
7730
7731         case KEY_require:
7732             s = SKIPSPACE1(s);
7733             if (isDIGIT(*s)) {
7734                 s = force_version(s, FALSE);
7735             }
7736             else if (*s != 'v' || !isDIGIT(s[1])
7737                     || (s = force_version(s, TRUE), *s == 'v'))
7738             {
7739                 *PL_tokenbuf = '\0';
7740                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7741                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7742                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7743                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7744                 else if (*s == '<')
7745                     yyerror("<> should be quotes");
7746             }
7747             if (orig_keyword == KEY_require) {
7748                 orig_keyword = 0;
7749                 pl_yylval.ival = 1;
7750             }
7751             else
7752                 pl_yylval.ival = 0;
7753             PL_expect = XTERM;
7754             PL_bufptr = s;
7755             PL_last_uni = PL_oldbufptr;
7756             PL_last_lop_op = OP_REQUIRE;
7757             s = skipspace(s);
7758             return REPORT( (int)REQUIRE );
7759
7760         case KEY_reset:
7761             UNI(OP_RESET);
7762
7763         case KEY_redo:
7764             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7765             LOOPX(OP_REDO);
7766
7767         case KEY_rename:
7768             LOP(OP_RENAME,XTERM);
7769
7770         case KEY_rand:
7771             UNI(OP_RAND);
7772
7773         case KEY_rmdir:
7774             UNI(OP_RMDIR);
7775
7776         case KEY_rindex:
7777             LOP(OP_RINDEX,XTERM);
7778
7779         case KEY_read:
7780             LOP(OP_READ,XTERM);
7781
7782         case KEY_readdir:
7783             UNI(OP_READDIR);
7784
7785         case KEY_readline:
7786             UNIDOR(OP_READLINE);
7787
7788         case KEY_readpipe:
7789             UNIDOR(OP_BACKTICK);
7790
7791         case KEY_rewinddir:
7792             UNI(OP_REWINDDIR);
7793
7794         case KEY_recv:
7795             LOP(OP_RECV,XTERM);
7796
7797         case KEY_reverse:
7798             LOP(OP_REVERSE,XTERM);
7799
7800         case KEY_readlink:
7801             UNIDOR(OP_READLINK);
7802
7803         case KEY_ref:
7804             UNI(OP_REF);
7805
7806         case KEY_s:
7807             s = scan_subst(s);
7808             if (pl_yylval.opval)
7809                 TERM(sublex_start());
7810             else
7811                 TOKEN(1);       /* force error */
7812
7813         case KEY_say:
7814             checkcomma(s,PL_tokenbuf,"filehandle");
7815             LOP(OP_SAY,XREF);
7816
7817         case KEY_chomp:
7818             UNI(OP_CHOMP);
7819
7820         case KEY_scalar:
7821             UNI(OP_SCALAR);
7822
7823         case KEY_select:
7824             LOP(OP_SELECT,XTERM);
7825
7826         case KEY_seek:
7827             LOP(OP_SEEK,XTERM);
7828
7829         case KEY_semctl:
7830             LOP(OP_SEMCTL,XTERM);
7831
7832         case KEY_semget:
7833             LOP(OP_SEMGET,XTERM);
7834
7835         case KEY_semop:
7836             LOP(OP_SEMOP,XTERM);
7837
7838         case KEY_send:
7839             LOP(OP_SEND,XTERM);
7840
7841         case KEY_setpgrp:
7842             LOP(OP_SETPGRP,XTERM);
7843
7844         case KEY_setpriority:
7845             LOP(OP_SETPRIORITY,XTERM);
7846
7847         case KEY_sethostent:
7848             UNI(OP_SHOSTENT);
7849
7850         case KEY_setnetent:
7851             UNI(OP_SNETENT);
7852
7853         case KEY_setservent:
7854             UNI(OP_SSERVENT);
7855
7856         case KEY_setprotoent:
7857             UNI(OP_SPROTOENT);
7858
7859         case KEY_setpwent:
7860             FUN0(OP_SPWENT);
7861
7862         case KEY_setgrent:
7863             FUN0(OP_SGRENT);
7864
7865         case KEY_seekdir:
7866             LOP(OP_SEEKDIR,XTERM);
7867
7868         case KEY_setsockopt:
7869             LOP(OP_SSOCKOPT,XTERM);
7870
7871         case KEY_shift:
7872             UNIDOR(OP_SHIFT);
7873
7874         case KEY_shmctl:
7875             LOP(OP_SHMCTL,XTERM);
7876
7877         case KEY_shmget:
7878             LOP(OP_SHMGET,XTERM);
7879
7880         case KEY_shmread:
7881             LOP(OP_SHMREAD,XTERM);
7882
7883         case KEY_shmwrite:
7884             LOP(OP_SHMWRITE,XTERM);
7885
7886         case KEY_shutdown:
7887             LOP(OP_SHUTDOWN,XTERM);
7888
7889         case KEY_sin:
7890             UNI(OP_SIN);
7891
7892         case KEY_sleep:
7893             UNI(OP_SLEEP);
7894
7895         case KEY_socket:
7896             LOP(OP_SOCKET,XTERM);
7897
7898         case KEY_socketpair:
7899             LOP(OP_SOCKPAIR,XTERM);
7900
7901         case KEY_sort:
7902             checkcomma(s,PL_tokenbuf,"subroutine name");
7903             s = SKIPSPACE1(s);
7904             if (*s == ';' || *s == ')')         /* probably a close */
7905                 Perl_croak(aTHX_ "sort is now a reserved word");
7906             PL_expect = XTERM;
7907             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7908             LOP(OP_SORT,XREF);
7909
7910         case KEY_split:
7911             LOP(OP_SPLIT,XTERM);
7912
7913         case KEY_sprintf:
7914             LOP(OP_SPRINTF,XTERM);
7915
7916         case KEY_splice:
7917             LOP(OP_SPLICE,XTERM);
7918
7919         case KEY_sqrt:
7920             UNI(OP_SQRT);
7921
7922         case KEY_srand:
7923             UNI(OP_SRAND);
7924
7925         case KEY_stat:
7926             UNI(OP_STAT);
7927
7928         case KEY_study:
7929             UNI(OP_STUDY);
7930
7931         case KEY_substr:
7932             LOP(OP_SUBSTR,XTERM);
7933
7934         case KEY_format:
7935         case KEY_sub:
7936           really_sub:
7937             {
7938                 char tmpbuf[sizeof PL_tokenbuf];
7939                 SSize_t tboffset = 0;
7940                 expectation attrful;
7941                 bool have_name, have_proto;
7942                 const int key = tmp;
7943
7944 #ifdef PERL_MAD
7945                 SV *tmpwhite = 0;
7946
7947                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7948                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
7949                 PL_thistoken = 0;
7950
7951                 d = s;
7952                 s = SKIPSPACE2(s,tmpwhite);
7953 #else
7954                 s = skipspace(s);
7955 #endif
7956
7957                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7958                     (*s == ':' && s[1] == ':'))
7959                 {
7960 #ifdef PERL_MAD
7961                     SV *nametoke = NULL;
7962 #endif
7963
7964                     PL_expect = XBLOCK;
7965                     attrful = XATTRBLOCK;
7966                     /* remember buffer pos'n for later force_word */
7967                     tboffset = s - PL_oldbufptr;
7968                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7969 #ifdef PERL_MAD
7970                     if (PL_madskills)
7971                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
7972 #endif
7973                     if (memchr(tmpbuf, ':', len))
7974                         sv_setpvn(PL_subname, tmpbuf, len);
7975                     else {
7976                         sv_setsv(PL_subname,PL_curstname);
7977                         sv_catpvs(PL_subname,"::");
7978                         sv_catpvn(PL_subname,tmpbuf,len);
7979                     }
7980                     if (SvUTF8(PL_linestr))
7981                         SvUTF8_on(PL_subname);
7982                     have_name = TRUE;
7983
7984 #ifdef PERL_MAD
7985
7986                     start_force(0);
7987                     CURMAD('X', nametoke);
7988                     CURMAD('_', tmpwhite);
7989                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7990                                       FALSE, TRUE, TRUE);
7991
7992                     s = SKIPSPACE2(d,tmpwhite);
7993 #else
7994                     s = skipspace(d);
7995 #endif
7996                 }
7997                 else {
7998                     if (key == KEY_my)
7999                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
8000                     PL_expect = XTERMBLOCK;
8001                     attrful = XATTRTERM;
8002                     sv_setpvs(PL_subname,"?");
8003                     have_name = FALSE;
8004                 }
8005
8006                 if (key == KEY_format) {
8007                     if (*s == '=')
8008                         PL_lex_formbrack = PL_lex_brackets + 1;
8009 #ifdef PERL_MAD
8010                     PL_thistoken = subtoken;
8011                     s = d;
8012 #else
8013                     if (have_name)
8014                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8015                                           FALSE, TRUE, TRUE);
8016 #endif
8017                     OPERATOR(FORMAT);
8018                 }
8019
8020                 /* Look for a prototype */
8021                 if (*s == '(') {
8022                     char *p;
8023                     bool bad_proto = FALSE;
8024                     bool in_brackets = FALSE;
8025                     char greedy_proto = ' ';
8026                     bool proto_after_greedy_proto = FALSE;
8027                     bool must_be_last = FALSE;
8028                     bool underscore = FALSE;
8029                     bool seen_underscore = FALSE;
8030                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8031                     STRLEN tmplen;
8032
8033                     s = scan_str(s,!!PL_madskills,FALSE);
8034                     if (!s)
8035                         Perl_croak(aTHX_ "Prototype not terminated");
8036                     /* strip spaces and check for bad characters */
8037                     d = SvPV(PL_lex_stuff, tmplen);
8038                     tmp = 0;
8039                     for (p = d; tmplen; tmplen--, ++p) {
8040                         if (!isSPACE(*p)) {
8041                             d[tmp++] = *p;
8042
8043                             if (warnillegalproto) {
8044                                 if (must_be_last)
8045                                     proto_after_greedy_proto = TRUE;
8046                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8047                                     bad_proto = TRUE;
8048                                 }
8049                                 else {
8050                                     if ( underscore ) {
8051                                         if ( *p != ';' )
8052                                             bad_proto = TRUE;
8053                                         underscore = FALSE;
8054                                     }
8055                                     if ( *p == '[' ) {
8056                                         in_brackets = TRUE;
8057                                     }
8058                                     else if ( *p == ']' ) {
8059                                         in_brackets = FALSE;
8060                                     }
8061                                     else if ( (*p == '@' || *p == '%') &&
8062                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8063                                          !in_brackets ) {
8064                                         must_be_last = TRUE;
8065                                         greedy_proto = *p;
8066                                     }
8067                                     else if ( *p == '_' ) {
8068                                         underscore = seen_underscore = TRUE;
8069                                     }
8070                                 }
8071                             }
8072                         }
8073                     }
8074                     d[tmp] = '\0';
8075                     if (proto_after_greedy_proto)
8076                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8077                                     "Prototype after '%c' for %"SVf" : %s",
8078                                     greedy_proto, SVfARG(PL_subname), d);
8079                     if (bad_proto) {
8080                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8081                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8082                                     "Illegal character %sin prototype for %"SVf" : %s",
8083                                     seen_underscore ? "after '_' " : "",
8084                                     SVfARG(PL_subname),
8085                                     sv_uni_display(dsv,
8086                                          newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
8087                                          tmp, UNI_DISPLAY_ISPRINT));
8088                     }
8089                     SvCUR_set(PL_lex_stuff, tmp);
8090                     have_proto = TRUE;
8091
8092 #ifdef PERL_MAD
8093                     start_force(0);
8094                     CURMAD('q', PL_thisopen);
8095                     CURMAD('_', tmpwhite);
8096                     CURMAD('=', PL_thisstuff);
8097                     CURMAD('Q', PL_thisclose);
8098                     NEXTVAL_NEXTTOKE.opval =
8099                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8100                     PL_lex_stuff = NULL;
8101                     force_next(THING);
8102
8103                     s = SKIPSPACE2(s,tmpwhite);
8104 #else
8105                     s = skipspace(s);
8106 #endif
8107                 }
8108                 else
8109                     have_proto = FALSE;
8110
8111                 if (*s == ':' && s[1] != ':')
8112                     PL_expect = attrful;
8113                 else if (*s != '{' && key == KEY_sub) {
8114                     if (!have_name)
8115                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8116                     else if (*s != ';' && *s != '}')
8117                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8118                 }
8119
8120 #ifdef PERL_MAD
8121                 start_force(0);
8122                 if (tmpwhite) {
8123                     if (PL_madskills)
8124                         curmad('^', newSVpvs(""));
8125                     CURMAD('_', tmpwhite);
8126                 }
8127                 force_next(0);
8128
8129                 PL_thistoken = subtoken;
8130 #else
8131                 if (have_proto) {
8132                     NEXTVAL_NEXTTOKE.opval =
8133                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8134                     PL_lex_stuff = NULL;
8135                     force_next(THING);
8136                 }
8137 #endif
8138                 if (!have_name) {
8139                     if (PL_curstash)
8140                         sv_setpvs(PL_subname, "__ANON__");
8141                     else
8142                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8143                     TOKEN(ANONSUB);
8144                 }
8145 #ifndef PERL_MAD
8146                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8147                                   FALSE, TRUE, TRUE);
8148 #endif
8149                 if (key == KEY_my)
8150                     TOKEN(MYSUB);
8151                 TOKEN(SUB);
8152             }
8153
8154         case KEY_system:
8155             LOP(OP_SYSTEM,XREF);
8156
8157         case KEY_symlink:
8158             LOP(OP_SYMLINK,XTERM);
8159
8160         case KEY_syscall:
8161             LOP(OP_SYSCALL,XTERM);
8162
8163         case KEY_sysopen:
8164             LOP(OP_SYSOPEN,XTERM);
8165
8166         case KEY_sysseek:
8167             LOP(OP_SYSSEEK,XTERM);
8168
8169         case KEY_sysread:
8170             LOP(OP_SYSREAD,XTERM);
8171
8172         case KEY_syswrite:
8173             LOP(OP_SYSWRITE,XTERM);
8174
8175         case KEY_tr:
8176             s = scan_trans(s);
8177             TERM(sublex_start());
8178
8179         case KEY_tell:
8180             UNI(OP_TELL);
8181
8182         case KEY_telldir:
8183             UNI(OP_TELLDIR);
8184
8185         case KEY_tie:
8186             LOP(OP_TIE,XTERM);
8187
8188         case KEY_tied:
8189             UNI(OP_TIED);
8190
8191         case KEY_time:
8192             FUN0(OP_TIME);
8193
8194         case KEY_times:
8195             FUN0(OP_TMS);
8196
8197         case KEY_truncate:
8198             LOP(OP_TRUNCATE,XTERM);
8199
8200         case KEY_uc:
8201             UNI(OP_UC);
8202
8203         case KEY_ucfirst:
8204             UNI(OP_UCFIRST);
8205
8206         case KEY_untie:
8207             UNI(OP_UNTIE);
8208
8209         case KEY_until:
8210             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8211                 return REPORT(0);
8212             pl_yylval.ival = CopLINE(PL_curcop);
8213             OPERATOR(UNTIL);
8214
8215         case KEY_unless:
8216             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8217                 return REPORT(0);
8218             pl_yylval.ival = CopLINE(PL_curcop);
8219             OPERATOR(UNLESS);
8220
8221         case KEY_unlink:
8222             LOP(OP_UNLINK,XTERM);
8223
8224         case KEY_undef:
8225             UNIDOR(OP_UNDEF);
8226
8227         case KEY_unpack:
8228             LOP(OP_UNPACK,XTERM);
8229
8230         case KEY_utime:
8231             LOP(OP_UTIME,XTERM);
8232
8233         case KEY_umask:
8234             UNIDOR(OP_UMASK);
8235
8236         case KEY_unshift:
8237             LOP(OP_UNSHIFT,XTERM);
8238
8239         case KEY_use:
8240             s = tokenize_use(1, s);
8241             OPERATOR(USE);
8242
8243         case KEY_values:
8244             UNI(OP_VALUES);
8245
8246         case KEY_vec:
8247             LOP(OP_VEC,XTERM);
8248
8249         case KEY_when:
8250             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8251                 return REPORT(0);
8252             pl_yylval.ival = CopLINE(PL_curcop);
8253             OPERATOR(WHEN);
8254
8255         case KEY_while:
8256             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8257                 return REPORT(0);
8258             pl_yylval.ival = CopLINE(PL_curcop);
8259             OPERATOR(WHILE);
8260
8261         case KEY_warn:
8262             PL_hints |= HINT_BLOCK_SCOPE;
8263             LOP(OP_WARN,XTERM);
8264
8265         case KEY_wait:
8266             FUN0(OP_WAIT);
8267
8268         case KEY_waitpid:
8269             LOP(OP_WAITPID,XTERM);
8270
8271         case KEY_wantarray:
8272             FUN0(OP_WANTARRAY);
8273
8274         case KEY_write:
8275 #ifdef EBCDIC
8276         {
8277             char ctl_l[2];
8278             ctl_l[0] = toCTRL('L');
8279             ctl_l[1] = '\0';
8280             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8281         }
8282 #else
8283             /* Make sure $^L is defined */
8284             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8285 #endif
8286             UNI(OP_ENTERWRITE);
8287
8288         case KEY_x:
8289             if (PL_expect == XOPERATOR) {
8290                 if (*s == '=' && !PL_lex_allbrackets &&
8291                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8292                     return REPORT(0);
8293                 Mop(OP_REPEAT);
8294             }
8295             check_uni();
8296             goto just_a_word;
8297
8298         case KEY_xor:
8299             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8300                 return REPORT(0);
8301             pl_yylval.ival = OP_XOR;
8302             OPERATOR(OROP);
8303
8304         case KEY_y:
8305             s = scan_trans(s);
8306             TERM(sublex_start());
8307         }
8308     }}
8309 }
8310 #ifdef __SC__
8311 #pragma segment Main
8312 #endif
8313
8314 static int
8315 S_pending_ident(pTHX)
8316 {
8317     dVAR;
8318     register char *d;
8319     PADOFFSET tmp = 0;
8320     /* pit holds the identifier we read and pending_ident is reset */
8321     char pit = PL_pending_ident;
8322     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8323     /* All routes through this function want to know if there is a colon.  */
8324     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8325     PL_pending_ident = 0;
8326
8327     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8328     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8329           "### Pending identifier '%s'\n", PL_tokenbuf); });
8330
8331     /* if we're in a my(), we can't allow dynamics here.
8332        $foo'bar has already been turned into $foo::bar, so
8333        just check for colons.
8334
8335        if it's a legal name, the OP is a PADANY.
8336     */
8337     if (PL_in_my) {
8338         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8339             if (has_colon)
8340                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8341                                   "variable %s in \"our\"",
8342                                   PL_tokenbuf));
8343             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8344         }
8345         else {
8346             if (has_colon)
8347                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8348                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8349
8350             pl_yylval.opval = newOP(OP_PADANY, 0);
8351             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8352                                                         UTF ? SVf_UTF8 : 0);
8353             return PRIVATEREF;
8354         }
8355     }
8356
8357     /*
8358        build the ops for accesses to a my() variable.
8359
8360        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8361        then used in a comparison.  This catches most, but not
8362        all cases.  For instance, it catches
8363            sort { my($a); $a <=> $b }
8364        but not
8365            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8366        (although why you'd do that is anyone's guess).
8367     */
8368
8369     if (!has_colon) {
8370         if (!PL_in_my)
8371             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8372                                     UTF ? SVf_UTF8 : 0);
8373         if (tmp != NOT_IN_PAD) {
8374             /* might be an "our" variable" */
8375             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8376                 /* build ops for a bareword */
8377                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8378                 HEK * const stashname = HvNAME_HEK(stash);
8379                 SV *  const sym = newSVhek(stashname);
8380                 sv_catpvs(sym, "::");
8381                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8382                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8383                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8384                 gv_fetchsv(sym,
8385                     (PL_in_eval
8386                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8387                         : GV_ADDMULTI
8388                     ),
8389                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8390                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8391                      : SVt_PVHV));
8392                 return WORD;
8393             }
8394
8395             /* if it's a sort block and they're naming $a or $b */
8396             if (PL_last_lop_op == OP_SORT &&
8397                 PL_tokenbuf[0] == '$' &&
8398                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8399                 && !PL_tokenbuf[2])
8400             {
8401                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8402                      d < PL_bufend && *d != '\n';
8403                      d++)
8404                 {
8405                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8406                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8407                               PL_tokenbuf);
8408                     }
8409                 }
8410             }
8411
8412             pl_yylval.opval = newOP(OP_PADANY, 0);
8413             pl_yylval.opval->op_targ = tmp;
8414             return PRIVATEREF;
8415         }
8416     }
8417
8418     /*
8419        Whine if they've said @foo in a doublequoted string,
8420        and @foo isn't a variable we can find in the symbol
8421        table.
8422     */
8423     if (ckWARN(WARN_AMBIGUOUS) &&
8424         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8425         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8426                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8427         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8428                 /* DO NOT warn for @- and @+ */
8429                 && !( PL_tokenbuf[2] == '\0' &&
8430                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8431            )
8432         {
8433             /* Downgraded from fatal to warning 20000522 mjd */
8434             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8435                         "Possible unintended interpolation of %s in string",
8436                         PL_tokenbuf);
8437         }
8438     }
8439
8440     /* build ops for a bareword */
8441     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8442                                                       tokenbuf_len - 1,
8443                                                       UTF ? SVf_UTF8 : 0 ));
8444     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8445     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8446                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8447                      | ( UTF ? SVf_UTF8 : 0 ),
8448                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8449                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8450                       : SVt_PVHV));
8451     return WORD;
8452 }
8453
8454 STATIC void
8455 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8456 {
8457     dVAR;
8458
8459     PERL_ARGS_ASSERT_CHECKCOMMA;
8460
8461     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8462         if (ckWARN(WARN_SYNTAX)) {
8463             int level = 1;
8464             const char *w;
8465             for (w = s+2; *w && level; w++) {
8466                 if (*w == '(')
8467                     ++level;
8468                 else if (*w == ')')
8469                     --level;
8470             }
8471             while (isSPACE(*w))
8472                 ++w;
8473             /* the list of chars below is for end of statements or
8474              * block / parens, boolean operators (&&, ||, //) and branch
8475              * constructs (or, and, if, until, unless, while, err, for).
8476              * Not a very solid hack... */
8477             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8478                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8479                             "%s (...) interpreted as function",name);
8480         }
8481     }
8482     while (s < PL_bufend && isSPACE(*s))
8483         s++;
8484     if (*s == '(')
8485         s++;
8486     while (s < PL_bufend && isSPACE(*s))
8487         s++;
8488     if (isIDFIRST_lazy_if(s,UTF)) {
8489         const char * const w = s++;
8490         while (isALNUM_lazy_if(s,UTF))
8491             s++;
8492         while (s < PL_bufend && isSPACE(*s))
8493             s++;
8494         if (*s == ',') {
8495             GV* gv;
8496             if (keyword(w, s - w, 0))
8497                 return;
8498
8499             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8500             if (gv && GvCVu(gv))
8501                 return;
8502             Perl_croak(aTHX_ "No comma allowed after %s", what);
8503         }
8504     }
8505 }
8506
8507 /* Either returns sv, or mortalizes sv and returns a new SV*.
8508    Best used as sv=new_constant(..., sv, ...).
8509    If s, pv are NULL, calls subroutine with one argument,
8510    and type is used with error messages only. */
8511
8512 STATIC SV *
8513 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8514                SV *sv, SV *pv, const char *type, STRLEN typelen)
8515 {
8516     dVAR; dSP;
8517     HV * const table = GvHV(PL_hintgv);          /* ^H */
8518     SV *res;
8519     SV **cvp;
8520     SV *cv, *typesv;
8521     const char *why1 = "", *why2 = "", *why3 = "";
8522
8523     PERL_ARGS_ASSERT_NEW_CONSTANT;
8524
8525     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8526         SV *msg;
8527
8528         why2 = (const char *)
8529             (strEQ(key,"charnames")
8530              ? "(possibly a missing \"use charnames ...\")"
8531              : "");
8532         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8533                             (type ? type: "undef"), why2);
8534
8535         /* This is convoluted and evil ("goto considered harmful")
8536          * but I do not understand the intricacies of all the different
8537          * failure modes of %^H in here.  The goal here is to make
8538          * the most probable error message user-friendly. --jhi */
8539
8540         goto msgdone;
8541
8542     report:
8543         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8544                             (type ? type: "undef"), why1, why2, why3);
8545     msgdone:
8546         yyerror(SvPVX_const(msg));
8547         SvREFCNT_dec(msg);
8548         return sv;
8549     }
8550
8551     /* charnames doesn't work well if there have been errors found */
8552     if (PL_error_count > 0 && strEQ(key,"charnames"))
8553         return &PL_sv_undef;
8554
8555     cvp = hv_fetch(table, key, keylen, FALSE);
8556     if (!cvp || !SvOK(*cvp)) {
8557         why1 = "$^H{";
8558         why2 = key;
8559         why3 = "} is not defined";
8560         goto report;
8561     }
8562     sv_2mortal(sv);                     /* Parent created it permanently */
8563     cv = *cvp;
8564     if (!pv && s)
8565         pv = newSVpvn_flags(s, len, SVs_TEMP);
8566     if (type && pv)
8567         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8568     else
8569         typesv = &PL_sv_undef;
8570
8571     PUSHSTACKi(PERLSI_OVERLOAD);
8572     ENTER ;
8573     SAVETMPS;
8574
8575     PUSHMARK(SP) ;
8576     EXTEND(sp, 3);
8577     if (pv)
8578         PUSHs(pv);
8579     PUSHs(sv);
8580     if (pv)
8581         PUSHs(typesv);
8582     PUTBACK;
8583     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8584
8585     SPAGAIN ;
8586
8587     /* Check the eval first */
8588     if (!PL_in_eval && SvTRUE(ERRSV)) {
8589         sv_catpvs(ERRSV, "Propagated");
8590         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8591         (void)POPs;
8592         res = SvREFCNT_inc_simple(sv);
8593     }
8594     else {
8595         res = POPs;
8596         SvREFCNT_inc_simple_void(res);
8597     }
8598
8599     PUTBACK ;
8600     FREETMPS ;
8601     LEAVE ;
8602     POPSTACK;
8603
8604     if (!SvOK(res)) {
8605         why1 = "Call to &{$^H{";
8606         why2 = key;
8607         why3 = "}} did not return a defined value";
8608         sv = res;
8609         goto report;
8610     }
8611
8612     return res;
8613 }
8614
8615 /* Returns a NUL terminated string, with the length of the string written to
8616    *slp
8617    */
8618 STATIC char *
8619 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8620 {
8621     dVAR;
8622     register char *d = dest;
8623     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8624
8625     PERL_ARGS_ASSERT_SCAN_WORD;
8626
8627     for (;;) {
8628         if (d >= e)
8629             Perl_croak(aTHX_ ident_too_long);
8630         if (isALNUM(*s))        /* UTF handled below */
8631             *d++ = *s++;
8632         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8633             *d++ = ':';
8634             *d++ = ':';
8635             s++;
8636         }
8637         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8638             *d++ = *s++;
8639             *d++ = *s++;
8640         }
8641         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8642             char *t = s + UTF8SKIP(s);
8643             size_t len;
8644             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8645                 t += UTF8SKIP(t);
8646             len = t - s;
8647             if (d + len > e)
8648                 Perl_croak(aTHX_ ident_too_long);
8649             Copy(s, d, len, char);
8650             d += len;
8651             s = t;
8652         }
8653         else {
8654             *d = '\0';
8655             *slp = d - dest;
8656             return s;
8657         }
8658     }
8659 }
8660
8661 STATIC char *
8662 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8663 {
8664     dVAR;
8665     char *bracket = NULL;
8666     char funny = *s++;
8667     register char *d = dest;
8668     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8669
8670     PERL_ARGS_ASSERT_SCAN_IDENT;
8671
8672     if (isSPACE(*s))
8673         s = PEEKSPACE(s);
8674     if (isDIGIT(*s)) {
8675         while (isDIGIT(*s)) {
8676             if (d >= e)
8677                 Perl_croak(aTHX_ ident_too_long);
8678             *d++ = *s++;
8679         }
8680     }
8681     else {
8682         for (;;) {
8683             if (d >= e)
8684                 Perl_croak(aTHX_ ident_too_long);
8685             if (isALNUM(*s))    /* UTF handled below */
8686                 *d++ = *s++;
8687             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8688                 *d++ = ':';
8689                 *d++ = ':';
8690                 s++;
8691             }
8692             else if (*s == ':' && s[1] == ':') {
8693                 *d++ = *s++;
8694                 *d++ = *s++;
8695             }
8696             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8697                 char *t = s + UTF8SKIP(s);
8698                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8699                     t += UTF8SKIP(t);
8700                 if (d + (t - s) > e)
8701                     Perl_croak(aTHX_ ident_too_long);
8702                 Copy(s, d, t - s, char);
8703                 d += t - s;
8704                 s = t;
8705             }
8706             else
8707                 break;
8708         }
8709     }
8710     *d = '\0';
8711     d = dest;
8712     if (*d) {
8713         if (PL_lex_state != LEX_NORMAL)
8714             PL_lex_state = LEX_INTERPENDMAYBE;
8715         return s;
8716     }
8717     if (*s == '$' && s[1] &&
8718         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8719     {
8720         return s;
8721     }
8722     if (*s == '{') {
8723         bracket = s;
8724         s++;
8725     }
8726     else if (ck_uni)
8727         check_uni();
8728     if (s < send) {
8729         if (UTF) {
8730             const STRLEN skip = UTF8SKIP(s);
8731             STRLEN i;
8732             d[skip] = '\0';
8733             for ( i = 0; i < skip; i++ )
8734                 d[i] = *s++;
8735         }
8736         else {
8737             *d = *s++;
8738             d[1] = '\0';
8739         }
8740     }
8741     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8742         *d = toCTRL(*s);
8743         s++;
8744     }
8745     if (bracket) {
8746         if (isSPACE(s[-1])) {
8747             while (s < send) {
8748                 const char ch = *s++;
8749                 if (!SPACE_OR_TAB(ch)) {
8750                     *d = ch;
8751                     break;
8752                 }
8753             }
8754         }
8755         if (isIDFIRST_lazy_if(d,UTF)) {
8756             d += UTF8SKIP(d);
8757             if (UTF) {
8758                 char *end = s;
8759                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8760                     end += UTF8SKIP(end);
8761                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8762                         end += UTF8SKIP(end);
8763                 }
8764                 Copy(s, d, end - s, char);
8765                 d += end - s;
8766                 s = end;
8767             }
8768             else {
8769                 while ((isALNUM(*s) || *s == ':') && d < e)
8770                     *d++ = *s++;
8771                 if (d >= e)
8772                     Perl_croak(aTHX_ ident_too_long);
8773             }
8774             *d = '\0';
8775             while (s < send && SPACE_OR_TAB(*s))
8776                 s++;
8777             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8778                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8779                     const char * const brack =
8780                         (const char *)
8781                         ((*s == '[') ? "[...]" : "{...}");
8782    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8783                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8784                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8785                         funny, dest, brack, funny, dest, brack);
8786                 }
8787                 bracket++;
8788                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8789                 PL_lex_allbrackets++;
8790                 return s;
8791             }
8792         }
8793         /* Handle extended ${^Foo} variables
8794          * 1999-02-27 mjd-perl-patch@plover.com */
8795         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8796                  && isALNUM(*s))
8797         {
8798             d++;
8799             while (isALNUM(*s) && d < e) {
8800                 *d++ = *s++;
8801             }
8802             if (d >= e)
8803                 Perl_croak(aTHX_ ident_too_long);
8804             *d = '\0';
8805         }
8806         if (*s == '}') {
8807             s++;
8808             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8809                 PL_lex_state = LEX_INTERPEND;
8810                 PL_expect = XREF;
8811             }
8812             if (PL_lex_state == LEX_NORMAL) {
8813                 if (ckWARN(WARN_AMBIGUOUS) &&
8814                     (keyword(dest, d - dest, 0)
8815                      || get_cvn_flags(dest, d - dest, 0)))
8816                 {
8817                     if (funny == '#')
8818                         funny = '@';
8819                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8820                         "Ambiguous use of %c{%s} resolved to %c%s",
8821                         funny, dest, funny, dest);
8822                 }
8823             }
8824         }
8825         else {
8826             s = bracket;                /* let the parser handle it */
8827             *dest = '\0';
8828         }
8829     }
8830     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8831         PL_lex_state = LEX_INTERPEND;
8832     return s;
8833 }
8834
8835 static bool
8836 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8837
8838     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8839      * the parse starting at 's', based on the subset that are valid in this
8840      * context input to this routine in 'valid_flags'. Advances s.  Returns
8841      * TRUE if the input was a valid flag, so the next char may be as well;
8842      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8843      * current regex.  This routine will set it to any charset modifier found.
8844      * The caller shouldn't change it.  This way, another charset modifier
8845      * encountered in the parse can be detected as an error, as we have decided
8846      * allow only one */
8847
8848     const char c = **s;
8849
8850     if (! strchr(valid_flags, c)) {
8851         if (isALNUM(c)) {
8852             goto deprecate;
8853         }
8854         return FALSE;
8855     }
8856
8857     switch (c) {
8858
8859         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8860         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8861         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8862         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8863         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8864         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8865         case LOCALE_PAT_MOD:
8866
8867             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8868              * can't be regex modifiers.
8869              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8870              * much as easily done.  s///lei, for example, has to mean regex
8871              * modifiers if it's not an error (as does any word character
8872              * following the 'e').  Otherwise, we resolve to the backwards-
8873              * compatible, but less likely 's/// le ...', i.e. as meaning
8874              * less-than-or-equal.  The reason it's not likely is that s//
8875              * returns a number for code in the field (/r returns a string, but
8876              * that wasn't added until the 5.13 series), and so '<=' should be
8877              * used for comparing, not 'le'. */
8878             if (*((*s) + 1) == 't') {
8879                 goto deprecate;
8880             }
8881             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8882
8883                 /* 'e' is valid only for substitutes, s///e.  If it is not
8884                  * valid in the current context, then 'm//le' must mean the
8885                  * comparison operator, so use the regular deprecation message.
8886                  */
8887                 if (! strchr(valid_flags, 'e')) {
8888                     goto deprecate;
8889                 }
8890                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8891                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8892                 return FALSE;
8893             }
8894             if (*charset) {
8895                 goto multiple_charsets;
8896             }
8897             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8898             *charset = c;
8899             break;
8900         case UNICODE_PAT_MOD:
8901             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8902              * 'n' means they can't be regex modifiers */
8903             if (*((*s) + 1) == 'n') {
8904                 goto deprecate;
8905             }
8906             if (*charset) {
8907                 goto multiple_charsets;
8908             }
8909             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8910             *charset = c;
8911             break;
8912         case ASCII_RESTRICT_PAT_MOD:
8913             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8914              * can't be regex modifiers */
8915             if (*((*s) + 1) == 'n') {
8916                 goto deprecate;
8917             }
8918
8919             if (! *charset) {
8920                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8921             }
8922             else {
8923
8924                 /* Error if previous modifier wasn't an 'a', but if it was, see
8925                  * if, and accept, a second occurrence (only) */
8926                 if (*charset != 'a'
8927                     || get_regex_charset(*pmfl)
8928                         != REGEX_ASCII_RESTRICTED_CHARSET)
8929                 {
8930                         goto multiple_charsets;
8931                 }
8932                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8933             }
8934             *charset = c;
8935             break;
8936         case DEPENDS_PAT_MOD:
8937             if (*charset) {
8938                 goto multiple_charsets;
8939             }
8940             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8941             *charset = c;
8942             break;
8943     }
8944
8945     (*s)++;
8946     return TRUE;
8947
8948     deprecate:
8949         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8950             "Having no space between pattern and following word is deprecated");
8951         return FALSE;
8952
8953     multiple_charsets:
8954         if (*charset != c) {
8955             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8956         }
8957         else if (c == 'a') {
8958             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8959         }
8960         else {
8961             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8962         }
8963
8964         /* Pretend that it worked, so will continue processing before dieing */
8965         (*s)++;
8966         return TRUE;
8967 }
8968
8969 STATIC char *
8970 S_scan_pat(pTHX_ char *start, I32 type)
8971 {
8972     dVAR;
8973     PMOP *pm;
8974     char *s = scan_str(start,!!PL_madskills,FALSE);
8975     const char * const valid_flags =
8976         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8977     char charset = '\0';    /* character set modifier */
8978 #ifdef PERL_MAD
8979     char *modstart;
8980 #endif
8981
8982     PERL_ARGS_ASSERT_SCAN_PAT;
8983
8984     if (!s) {
8985         const char * const delimiter = skipspace(start);
8986         Perl_croak(aTHX_
8987                    (const char *)
8988                    (*delimiter == '?'
8989                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8990                     : "Search pattern not terminated" ));
8991     }
8992
8993     pm = (PMOP*)newPMOP(type, 0);
8994     if (PL_multi_open == '?') {
8995         /* This is the only point in the code that sets PMf_ONCE:  */
8996         pm->op_pmflags |= PMf_ONCE;
8997
8998         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8999            allows us to restrict the list needed by reset to just the ??
9000            matches.  */
9001         assert(type != OP_TRANS);
9002         if (PL_curstash) {
9003             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9004             U32 elements;
9005             if (!mg) {
9006                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9007                                  0);
9008             }
9009             elements = mg->mg_len / sizeof(PMOP**);
9010             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9011             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9012             mg->mg_len = elements * sizeof(PMOP**);
9013             PmopSTASH_set(pm,PL_curstash);
9014         }
9015     }
9016 #ifdef PERL_MAD
9017     modstart = s;
9018 #endif
9019     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9020 #ifdef PERL_MAD
9021     if (PL_madskills && modstart != s) {
9022         SV* tmptoken = newSVpvn(modstart, s - modstart);
9023         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9024     }
9025 #endif
9026     /* issue a warning if /c is specified,but /g is not */
9027     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9028     {
9029         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9030                        "Use of /c modifier is meaningless without /g" );
9031     }
9032
9033     PL_lex_op = (OP*)pm;
9034     pl_yylval.ival = OP_MATCH;
9035     return s;
9036 }
9037
9038 STATIC char *
9039 S_scan_subst(pTHX_ char *start)
9040 {
9041     dVAR;
9042     char *s;
9043     register PMOP *pm;
9044     I32 first_start;
9045     I32 es = 0;
9046     char charset = '\0';    /* character set modifier */
9047 #ifdef PERL_MAD
9048     char *modstart;
9049 #endif
9050
9051     PERL_ARGS_ASSERT_SCAN_SUBST;
9052
9053     pl_yylval.ival = OP_NULL;
9054
9055     s = scan_str(start,!!PL_madskills,FALSE);
9056
9057     if (!s)
9058         Perl_croak(aTHX_ "Substitution pattern not terminated");
9059
9060     if (s[-1] == PL_multi_open)
9061         s--;
9062 #ifdef PERL_MAD
9063     if (PL_madskills) {
9064         CURMAD('q', PL_thisopen);
9065         CURMAD('_', PL_thiswhite);
9066         CURMAD('E', PL_thisstuff);
9067         CURMAD('Q', PL_thisclose);
9068         PL_realtokenstart = s - SvPVX(PL_linestr);
9069     }
9070 #endif
9071
9072     first_start = PL_multi_start;
9073     s = scan_str(s,!!PL_madskills,FALSE);
9074     if (!s) {
9075         if (PL_lex_stuff) {
9076             SvREFCNT_dec(PL_lex_stuff);
9077             PL_lex_stuff = NULL;
9078         }
9079         Perl_croak(aTHX_ "Substitution replacement not terminated");
9080     }
9081     PL_multi_start = first_start;       /* so whole substitution is taken together */
9082
9083     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9084
9085 #ifdef PERL_MAD
9086     if (PL_madskills) {
9087         CURMAD('z', PL_thisopen);
9088         CURMAD('R', PL_thisstuff);
9089         CURMAD('Z', PL_thisclose);
9090     }
9091     modstart = s;
9092 #endif
9093
9094     while (*s) {
9095         if (*s == EXEC_PAT_MOD) {
9096             s++;
9097             es++;
9098         }
9099         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9100         {
9101             break;
9102         }
9103     }
9104
9105 #ifdef PERL_MAD
9106     if (PL_madskills) {
9107         if (modstart != s)
9108             curmad('m', newSVpvn(modstart, s - modstart));
9109         append_madprops(PL_thismad, (OP*)pm, 0);
9110         PL_thismad = 0;
9111     }
9112 #endif
9113     if ((pm->op_pmflags & PMf_CONTINUE)) {
9114         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9115     }
9116
9117     if (es) {
9118         SV * const repl = newSVpvs("");
9119
9120         PL_sublex_info.super_bufptr = s;
9121         PL_sublex_info.super_bufend = PL_bufend;
9122         PL_multi_end = 0;
9123         pm->op_pmflags |= PMf_EVAL;
9124         while (es-- > 0) {
9125             if (es)
9126                 sv_catpvs(repl, "eval ");
9127             else
9128                 sv_catpvs(repl, "do ");
9129         }
9130         sv_catpvs(repl, "{");
9131         sv_catsv(repl, PL_lex_repl);
9132         if (strchr(SvPVX(PL_lex_repl), '#'))
9133             sv_catpvs(repl, "\n");
9134         sv_catpvs(repl, "}");
9135         SvEVALED_on(repl);
9136         SvREFCNT_dec(PL_lex_repl);
9137         PL_lex_repl = repl;
9138     }
9139
9140     PL_lex_op = (OP*)pm;
9141     pl_yylval.ival = OP_SUBST;
9142     return s;
9143 }
9144
9145 STATIC char *
9146 S_scan_trans(pTHX_ char *start)
9147 {
9148     dVAR;
9149     register char* s;
9150     OP *o;
9151     short *tbl;
9152     U8 squash;
9153     U8 del;
9154     U8 complement;
9155     bool nondestruct = 0;
9156 #ifdef PERL_MAD
9157     char *modstart;
9158 #endif
9159
9160     PERL_ARGS_ASSERT_SCAN_TRANS;
9161
9162     pl_yylval.ival = OP_NULL;
9163
9164     s = scan_str(start,!!PL_madskills,FALSE);
9165     if (!s)
9166         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9167
9168     if (s[-1] == PL_multi_open)
9169         s--;
9170 #ifdef PERL_MAD
9171     if (PL_madskills) {
9172         CURMAD('q', PL_thisopen);
9173         CURMAD('_', PL_thiswhite);
9174         CURMAD('E', PL_thisstuff);
9175         CURMAD('Q', PL_thisclose);
9176         PL_realtokenstart = s - SvPVX(PL_linestr);
9177     }
9178 #endif
9179
9180     s = scan_str(s,!!PL_madskills,FALSE);
9181     if (!s) {
9182         if (PL_lex_stuff) {
9183             SvREFCNT_dec(PL_lex_stuff);
9184             PL_lex_stuff = NULL;
9185         }
9186         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9187     }
9188     if (PL_madskills) {
9189         CURMAD('z', PL_thisopen);
9190         CURMAD('R', PL_thisstuff);
9191         CURMAD('Z', PL_thisclose);
9192     }
9193
9194     complement = del = squash = 0;
9195 #ifdef PERL_MAD
9196     modstart = s;
9197 #endif
9198     while (1) {
9199         switch (*s) {
9200         case 'c':
9201             complement = OPpTRANS_COMPLEMENT;
9202             break;
9203         case 'd':
9204             del = OPpTRANS_DELETE;
9205             break;
9206         case 's':
9207             squash = OPpTRANS_SQUASH;
9208             break;
9209         case 'r':
9210             nondestruct = 1;
9211             break;
9212         default:
9213             goto no_more;
9214         }
9215         s++;
9216     }
9217   no_more:
9218
9219     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9220     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9221     o->op_private &= ~OPpTRANS_ALL;
9222     o->op_private |= del|squash|complement|
9223       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9224       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9225
9226     PL_lex_op = o;
9227     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9228
9229 #ifdef PERL_MAD
9230     if (PL_madskills) {
9231         if (modstart != s)
9232             curmad('m', newSVpvn(modstart, s - modstart));
9233         append_madprops(PL_thismad, o, 0);
9234         PL_thismad = 0;
9235     }
9236 #endif
9237
9238     return s;
9239 }
9240
9241 STATIC char *
9242 S_scan_heredoc(pTHX_ register char *s)
9243 {
9244     dVAR;
9245     SV *herewas;
9246     I32 op_type = OP_SCALAR;
9247     I32 len;
9248     SV *tmpstr;
9249     char term;
9250     const char *found_newline;
9251     register char *d;
9252     register char *e;
9253     char *peek;
9254     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9255 #ifdef PERL_MAD
9256     I32 stuffstart = s - SvPVX(PL_linestr);
9257     char *tstart;
9258
9259     PL_realtokenstart = -1;
9260 #endif
9261
9262     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9263
9264     s += 2;
9265     d = PL_tokenbuf;
9266     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9267     if (!outer)
9268         *d++ = '\n';
9269     peek = s;
9270     while (SPACE_OR_TAB(*peek))
9271         peek++;
9272     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9273         s = peek;
9274         term = *s++;
9275         s = delimcpy(d, e, s, PL_bufend, term, &len);
9276         d += len;
9277         if (s < PL_bufend)
9278             s++;
9279     }
9280     else {
9281         if (*s == '\\')
9282             s++, term = '\'';
9283         else
9284             term = '"';
9285         if (!isALNUM_lazy_if(s,UTF))
9286             deprecate("bare << to mean <<\"\"");
9287         for (; isALNUM_lazy_if(s,UTF); s++) {
9288             if (d < e)
9289                 *d++ = *s;
9290         }
9291     }
9292     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9293         Perl_croak(aTHX_ "Delimiter for here document is too long");
9294     *d++ = '\n';
9295     *d = '\0';
9296     len = d - PL_tokenbuf;
9297
9298 #ifdef PERL_MAD
9299     if (PL_madskills) {
9300         tstart = PL_tokenbuf + !outer;
9301         PL_thisclose = newSVpvn(tstart, len - !outer);
9302         tstart = SvPVX(PL_linestr) + stuffstart;
9303         PL_thisopen = newSVpvn(tstart, s - tstart);
9304         stuffstart = s - SvPVX(PL_linestr);
9305     }
9306 #endif
9307 #ifndef PERL_STRICT_CR
9308     d = strchr(s, '\r');
9309     if (d) {
9310         char * const olds = s;
9311         s = d;
9312         while (s < PL_bufend) {
9313             if (*s == '\r') {
9314                 *d++ = '\n';
9315                 if (*++s == '\n')
9316                     s++;
9317             }
9318             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9319                 *d++ = *s++;
9320                 s++;
9321             }
9322             else
9323                 *d++ = *s++;
9324         }
9325         *d = '\0';
9326         PL_bufend = d;
9327         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9328         s = olds;
9329     }
9330 #endif
9331 #ifdef PERL_MAD
9332     found_newline = 0;
9333 #endif
9334     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9335         herewas = newSVpvn(s,PL_bufend-s);
9336     }
9337     else {
9338 #ifdef PERL_MAD
9339         herewas = newSVpvn(s-1,found_newline-s+1);
9340 #else
9341         s--;
9342         herewas = newSVpvn(s,found_newline-s);
9343 #endif
9344     }
9345 #ifdef PERL_MAD
9346     if (PL_madskills) {
9347         tstart = SvPVX(PL_linestr) + stuffstart;
9348         if (PL_thisstuff)
9349             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9350         else
9351             PL_thisstuff = newSVpvn(tstart, s - tstart);
9352     }
9353 #endif
9354     s += SvCUR(herewas);
9355
9356 #ifdef PERL_MAD
9357     stuffstart = s - SvPVX(PL_linestr);
9358
9359     if (found_newline)
9360         s--;
9361 #endif
9362
9363     tmpstr = newSV_type(SVt_PVIV);
9364     SvGROW(tmpstr, 80);
9365     if (term == '\'') {
9366         op_type = OP_CONST;
9367         SvIV_set(tmpstr, -1);
9368     }
9369     else if (term == '`') {
9370         op_type = OP_BACKTICK;
9371         SvIV_set(tmpstr, '\\');
9372     }
9373
9374     CLINE;
9375     PL_multi_start = CopLINE(PL_curcop);
9376     PL_multi_open = PL_multi_close = '<';
9377     term = *PL_tokenbuf;
9378     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9379         char * const bufptr = PL_sublex_info.super_bufptr;
9380         char * const bufend = PL_sublex_info.super_bufend;
9381         char * const olds = s - SvCUR(herewas);
9382         s = strchr(bufptr, '\n');
9383         if (!s)
9384             s = bufend;
9385         d = s;
9386         while (s < bufend &&
9387           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9388             if (*s++ == '\n')
9389                 CopLINE_inc(PL_curcop);
9390         }
9391         if (s >= bufend) {
9392             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9393             missingterm(PL_tokenbuf);
9394         }
9395         sv_setpvn(herewas,bufptr,d-bufptr+1);
9396         sv_setpvn(tmpstr,d+1,s-d);
9397         s += len - 1;
9398         sv_catpvn(herewas,s,bufend-s);
9399         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9400
9401         s = olds;
9402         goto retval;
9403     }
9404     else if (!outer) {
9405         d = s;
9406         while (s < PL_bufend &&
9407           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9408             if (*s++ == '\n')
9409                 CopLINE_inc(PL_curcop);
9410         }
9411         if (s >= PL_bufend) {
9412             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9413             missingterm(PL_tokenbuf);
9414         }
9415         sv_setpvn(tmpstr,d+1,s-d);
9416 #ifdef PERL_MAD
9417         if (PL_madskills) {
9418             if (PL_thisstuff)
9419                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9420             else
9421                 PL_thisstuff = newSVpvn(d + 1, s - d);
9422             stuffstart = s - SvPVX(PL_linestr);
9423         }
9424 #endif
9425         s += len - 1;
9426         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9427
9428         sv_catpvn(herewas,s,PL_bufend-s);
9429         sv_setsv(PL_linestr,herewas);
9430         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9431         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9432         PL_last_lop = PL_last_uni = NULL;
9433     }
9434     else
9435         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9436     while (s >= PL_bufend) {    /* multiple line string? */
9437 #ifdef PERL_MAD
9438         if (PL_madskills) {
9439             tstart = SvPVX(PL_linestr) + stuffstart;
9440             if (PL_thisstuff)
9441                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9442             else
9443                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9444         }
9445 #endif
9446         PL_bufptr = s;
9447         CopLINE_inc(PL_curcop);
9448         if (!outer || !lex_next_chunk(0)) {
9449             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9450             missingterm(PL_tokenbuf);
9451         }
9452         CopLINE_dec(PL_curcop);
9453         s = PL_bufptr;
9454 #ifdef PERL_MAD
9455         stuffstart = s - SvPVX(PL_linestr);
9456 #endif
9457         CopLINE_inc(PL_curcop);
9458         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9459         PL_last_lop = PL_last_uni = NULL;
9460 #ifndef PERL_STRICT_CR
9461         if (PL_bufend - PL_linestart >= 2) {
9462             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9463                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9464             {
9465                 PL_bufend[-2] = '\n';
9466                 PL_bufend--;
9467                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9468             }
9469             else if (PL_bufend[-1] == '\r')
9470                 PL_bufend[-1] = '\n';
9471         }
9472         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9473             PL_bufend[-1] = '\n';
9474 #endif
9475         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9476             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9477             *(SvPVX(PL_linestr) + off ) = ' ';
9478             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9479             sv_catsv(PL_linestr,herewas);
9480             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9481             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9482         }
9483         else {
9484             s = PL_bufend;
9485             sv_catsv(tmpstr,PL_linestr);
9486         }
9487     }
9488     s++;
9489 retval:
9490     PL_multi_end = CopLINE(PL_curcop);
9491     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9492         SvPV_shrink_to_cur(tmpstr);
9493     }
9494     SvREFCNT_dec(herewas);
9495     if (!IN_BYTES) {
9496         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9497             SvUTF8_on(tmpstr);
9498         else if (PL_encoding)
9499             sv_recode_to_utf8(tmpstr, PL_encoding);
9500     }
9501     PL_lex_stuff = tmpstr;
9502     pl_yylval.ival = op_type;
9503     return s;
9504 }
9505
9506 /* scan_inputsymbol
9507    takes: current position in input buffer
9508    returns: new position in input buffer
9509    side-effects: pl_yylval and lex_op are set.
9510
9511    This code handles:
9512
9513    <>           read from ARGV
9514    <FH>         read from filehandle
9515    <pkg::FH>    read from package qualified filehandle
9516    <pkg'FH>     read from package qualified filehandle
9517    <$fh>        read from filehandle in $fh
9518    <*.h>        filename glob
9519
9520 */
9521
9522 STATIC char *
9523 S_scan_inputsymbol(pTHX_ char *start)
9524 {
9525     dVAR;
9526     register char *s = start;           /* current position in buffer */
9527     char *end;
9528     I32 len;
9529     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9530     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9531
9532     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9533
9534     end = strchr(s, '\n');
9535     if (!end)
9536         end = PL_bufend;
9537     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9538
9539     /* die if we didn't have space for the contents of the <>,
9540        or if it didn't end, or if we see a newline
9541     */
9542
9543     if (len >= (I32)sizeof PL_tokenbuf)
9544         Perl_croak(aTHX_ "Excessively long <> operator");
9545     if (s >= end)
9546         Perl_croak(aTHX_ "Unterminated <> operator");
9547
9548     s++;
9549
9550     /* check for <$fh>
9551        Remember, only scalar variables are interpreted as filehandles by
9552        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9553        treated as a glob() call.
9554        This code makes use of the fact that except for the $ at the front,
9555        a scalar variable and a filehandle look the same.
9556     */
9557     if (*d == '$' && d[1]) d++;
9558
9559     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9560     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9561         d += UTF ? UTF8SKIP(d) : 1;
9562
9563     /* If we've tried to read what we allow filehandles to look like, and
9564        there's still text left, then it must be a glob() and not a getline.
9565        Use scan_str to pull out the stuff between the <> and treat it
9566        as nothing more than a string.
9567     */
9568
9569     if (d - PL_tokenbuf != len) {
9570         pl_yylval.ival = OP_GLOB;
9571         s = scan_str(start,!!PL_madskills,FALSE);
9572         if (!s)
9573            Perl_croak(aTHX_ "Glob not terminated");
9574         return s;
9575     }
9576     else {
9577         bool readline_overriden = FALSE;
9578         GV *gv_readline;
9579         GV **gvp;
9580         /* we're in a filehandle read situation */
9581         d = PL_tokenbuf;
9582
9583         /* turn <> into <ARGV> */
9584         if (!len)
9585             Copy("ARGV",d,5,char);
9586
9587         /* Check whether readline() is overriden */
9588         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9589         if ((gv_readline
9590                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9591                 ||
9592                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9593                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9594                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9595             readline_overriden = TRUE;
9596
9597         /* if <$fh>, create the ops to turn the variable into a
9598            filehandle
9599         */
9600         if (*d == '$') {
9601             /* try to find it in the pad for this block, otherwise find
9602                add symbol table ops
9603             */
9604             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9605             if (tmp != NOT_IN_PAD) {
9606                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9607                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9608                     HEK * const stashname = HvNAME_HEK(stash);
9609                     SV * const sym = sv_2mortal(newSVhek(stashname));
9610                     sv_catpvs(sym, "::");
9611                     sv_catpv(sym, d+1);
9612                     d = SvPVX(sym);
9613                     goto intro_sym;
9614                 }
9615                 else {
9616                     OP * const o = newOP(OP_PADSV, 0);
9617                     o->op_targ = tmp;
9618                     PL_lex_op = readline_overriden
9619                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9620                                 op_append_elem(OP_LIST, o,
9621                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9622                         : (OP*)newUNOP(OP_READLINE, 0, o);
9623                 }
9624             }
9625             else {
9626                 GV *gv;
9627                 ++d;
9628 intro_sym:
9629                 gv = gv_fetchpv(d,
9630                                 (PL_in_eval
9631                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9632                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9633                                 SVt_PV);
9634                 PL_lex_op = readline_overriden
9635                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9636                             op_append_elem(OP_LIST,
9637                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9638                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9639                     : (OP*)newUNOP(OP_READLINE, 0,
9640                             newUNOP(OP_RV2SV, 0,
9641                                 newGVOP(OP_GV, 0, gv)));
9642             }
9643             if (!readline_overriden)
9644                 PL_lex_op->op_flags |= OPf_SPECIAL;
9645             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9646             pl_yylval.ival = OP_NULL;
9647         }
9648
9649         /* If it's none of the above, it must be a literal filehandle
9650            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9651         else {
9652             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9653             PL_lex_op = readline_overriden
9654                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9655                         op_append_elem(OP_LIST,
9656                             newGVOP(OP_GV, 0, gv),
9657                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9658                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9659             pl_yylval.ival = OP_NULL;
9660         }
9661     }
9662
9663     return s;
9664 }
9665
9666
9667 /* scan_str
9668    takes: start position in buffer
9669           keep_quoted preserve \ on the embedded delimiter(s)
9670           keep_delims preserve the delimiters around the string
9671    returns: position to continue reading from buffer
9672    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9673         updates the read buffer.
9674
9675    This subroutine pulls a string out of the input.  It is called for:
9676         q               single quotes           q(literal text)
9677         '               single quotes           'literal text'
9678         qq              double quotes           qq(interpolate $here please)
9679         "               double quotes           "interpolate $here please"
9680         qx              backticks               qx(/bin/ls -l)
9681         `               backticks               `/bin/ls -l`
9682         qw              quote words             @EXPORT_OK = qw( func() $spam )
9683         m//             regexp match            m/this/
9684         s///            regexp substitute       s/this/that/
9685         tr///           string transliterate    tr/this/that/
9686         y///            string transliterate    y/this/that/
9687         ($*@)           sub prototypes          sub foo ($)
9688         (stuff)         sub attr parameters     sub foo : attr(stuff)
9689         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9690
9691    In most of these cases (all but <>, patterns and transliterate)
9692    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9693    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9694    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9695    calls scan_str().
9696
9697    It skips whitespace before the string starts, and treats the first
9698    character as the delimiter.  If the delimiter is one of ([{< then
9699    the corresponding "close" character )]}> is used as the closing
9700    delimiter.  It allows quoting of delimiters, and if the string has
9701    balanced delimiters ([{<>}]) it allows nesting.
9702
9703    On success, the SV with the resulting string is put into lex_stuff or,
9704    if that is already non-NULL, into lex_repl. The second case occurs only
9705    when parsing the RHS of the special constructs s/// and tr/// (y///).
9706    For convenience, the terminating delimiter character is stuffed into
9707    SvIVX of the SV.
9708 */
9709
9710 STATIC char *
9711 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9712 {
9713     dVAR;
9714     SV *sv;                             /* scalar value: string */
9715     const char *tmps;                   /* temp string, used for delimiter matching */
9716     register char *s = start;           /* current position in the buffer */
9717     register char term;                 /* terminating character */
9718     register char *to;                  /* current position in the sv's data */
9719     I32 brackets = 1;                   /* bracket nesting level */
9720     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9721     I32 termcode;                       /* terminating char. code */
9722     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9723     STRLEN termlen;                     /* length of terminating string */
9724     int last_off = 0;                   /* last position for nesting bracket */
9725 #ifdef PERL_MAD
9726     int stuffstart;
9727     char *tstart;
9728 #endif
9729
9730     PERL_ARGS_ASSERT_SCAN_STR;
9731
9732     /* skip space before the delimiter */
9733     if (isSPACE(*s)) {
9734         s = PEEKSPACE(s);
9735     }
9736
9737 #ifdef PERL_MAD
9738     if (PL_realtokenstart >= 0) {
9739         stuffstart = PL_realtokenstart;
9740         PL_realtokenstart = -1;
9741     }
9742     else
9743         stuffstart = start - SvPVX(PL_linestr);
9744 #endif
9745     /* mark where we are, in case we need to report errors */
9746     CLINE;
9747
9748     /* after skipping whitespace, the next character is the terminator */
9749     term = *s;
9750     if (!UTF) {
9751         termcode = termstr[0] = term;
9752         termlen = 1;
9753     }
9754     else {
9755         termcode = utf8_to_uvchr((U8*)s, &termlen);
9756         Copy(s, termstr, termlen, U8);
9757         if (!UTF8_IS_INVARIANT(term))
9758             has_utf8 = TRUE;
9759     }
9760
9761     /* mark where we are */
9762     PL_multi_start = CopLINE(PL_curcop);
9763     PL_multi_open = term;
9764
9765     /* find corresponding closing delimiter */
9766     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9767         termcode = termstr[0] = term = tmps[5];
9768
9769     PL_multi_close = term;
9770
9771     /* create a new SV to hold the contents.  79 is the SV's initial length.
9772        What a random number. */
9773     sv = newSV_type(SVt_PVIV);
9774     SvGROW(sv, 80);
9775     SvIV_set(sv, termcode);
9776     (void)SvPOK_only(sv);               /* validate pointer */
9777
9778     /* move past delimiter and try to read a complete string */
9779     if (keep_delims)
9780         sv_catpvn(sv, s, termlen);
9781     s += termlen;
9782 #ifdef PERL_MAD
9783     tstart = SvPVX(PL_linestr) + stuffstart;
9784     if (!PL_thisopen && !keep_delims) {
9785         PL_thisopen = newSVpvn(tstart, s - tstart);
9786         stuffstart = s - SvPVX(PL_linestr);
9787     }
9788 #endif
9789     for (;;) {
9790         if (PL_encoding && !UTF) {
9791             bool cont = TRUE;
9792
9793             while (cont) {
9794                 int offset = s - SvPVX_const(PL_linestr);
9795                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9796                                            &offset, (char*)termstr, termlen);
9797                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9798                 char * const svlast = SvEND(sv) - 1;
9799
9800                 for (; s < ns; s++) {
9801                     if (*s == '\n' && !PL_rsfp)
9802                         CopLINE_inc(PL_curcop);
9803                 }
9804                 if (!found)
9805                     goto read_more_line;
9806                 else {
9807                     /* handle quoted delimiters */
9808                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9809                         const char *t;
9810                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9811                             t--;
9812                         if ((svlast-1 - t) % 2) {
9813                             if (!keep_quoted) {
9814                                 *(svlast-1) = term;
9815                                 *svlast = '\0';
9816                                 SvCUR_set(sv, SvCUR(sv) - 1);
9817                             }
9818                             continue;
9819                         }
9820                     }
9821                     if (PL_multi_open == PL_multi_close) {
9822                         cont = FALSE;
9823                     }
9824                     else {
9825                         const char *t;
9826                         char *w;
9827                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9828                             /* At here, all closes are "was quoted" one,
9829                                so we don't check PL_multi_close. */
9830                             if (*t == '\\') {
9831                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9832                                     t++;
9833                                 else
9834                                     *w++ = *t++;
9835                             }
9836                             else if (*t == PL_multi_open)
9837                                 brackets++;
9838
9839                             *w = *t;
9840                         }
9841                         if (w < t) {
9842                             *w++ = term;
9843                             *w = '\0';
9844                             SvCUR_set(sv, w - SvPVX_const(sv));
9845                         }
9846                         last_off = w - SvPVX(sv);
9847                         if (--brackets <= 0)
9848                             cont = FALSE;
9849                     }
9850                 }
9851             }
9852             if (!keep_delims) {
9853                 SvCUR_set(sv, SvCUR(sv) - 1);
9854                 *SvEND(sv) = '\0';
9855             }
9856             break;
9857         }
9858
9859         /* extend sv if need be */
9860         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9861         /* set 'to' to the next character in the sv's string */
9862         to = SvPVX(sv)+SvCUR(sv);
9863
9864         /* if open delimiter is the close delimiter read unbridle */
9865         if (PL_multi_open == PL_multi_close) {
9866             for (; s < PL_bufend; s++,to++) {
9867                 /* embedded newlines increment the current line number */
9868                 if (*s == '\n' && !PL_rsfp)
9869                     CopLINE_inc(PL_curcop);
9870                 /* handle quoted delimiters */
9871                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9872                     if (!keep_quoted && s[1] == term)
9873                         s++;
9874                 /* any other quotes are simply copied straight through */
9875                     else
9876                         *to++ = *s++;
9877                 }
9878                 /* terminate when run out of buffer (the for() condition), or
9879                    have found the terminator */
9880                 else if (*s == term) {
9881                     if (termlen == 1)
9882                         break;
9883                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9884                         break;
9885                 }
9886                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9887                     has_utf8 = TRUE;
9888                 *to = *s;
9889             }
9890         }
9891
9892         /* if the terminator isn't the same as the start character (e.g.,
9893            matched brackets), we have to allow more in the quoting, and
9894            be prepared for nested brackets.
9895         */
9896         else {
9897             /* read until we run out of string, or we find the terminator */
9898             for (; s < PL_bufend; s++,to++) {
9899                 /* embedded newlines increment the line count */
9900                 if (*s == '\n' && !PL_rsfp)
9901                     CopLINE_inc(PL_curcop);
9902                 /* backslashes can escape the open or closing characters */
9903                 if (*s == '\\' && s+1 < PL_bufend) {
9904                     if (!keep_quoted &&
9905                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9906                         s++;
9907                     else
9908                         *to++ = *s++;
9909                 }
9910                 /* allow nested opens and closes */
9911                 else if (*s == PL_multi_close && --brackets <= 0)
9912                     break;
9913                 else if (*s == PL_multi_open)
9914                     brackets++;
9915                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9916                     has_utf8 = TRUE;
9917                 *to = *s;
9918             }
9919         }
9920         /* terminate the copied string and update the sv's end-of-string */
9921         *to = '\0';
9922         SvCUR_set(sv, to - SvPVX_const(sv));
9923
9924         /*
9925          * this next chunk reads more into the buffer if we're not done yet
9926          */
9927
9928         if (s < PL_bufend)
9929             break;              /* handle case where we are done yet :-) */
9930
9931 #ifndef PERL_STRICT_CR
9932         if (to - SvPVX_const(sv) >= 2) {
9933             if ((to[-2] == '\r' && to[-1] == '\n') ||
9934                 (to[-2] == '\n' && to[-1] == '\r'))
9935             {
9936                 to[-2] = '\n';
9937                 to--;
9938                 SvCUR_set(sv, to - SvPVX_const(sv));
9939             }
9940             else if (to[-1] == '\r')
9941                 to[-1] = '\n';
9942         }
9943         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9944             to[-1] = '\n';
9945 #endif
9946
9947      read_more_line:
9948         /* if we're out of file, or a read fails, bail and reset the current
9949            line marker so we can report where the unterminated string began
9950         */
9951 #ifdef PERL_MAD
9952         if (PL_madskills) {
9953             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9954             if (PL_thisstuff)
9955                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9956             else
9957                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9958         }
9959 #endif
9960         CopLINE_inc(PL_curcop);
9961         PL_bufptr = PL_bufend;
9962         if (!lex_next_chunk(0)) {
9963             sv_free(sv);
9964             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9965             return NULL;
9966         }
9967         s = PL_bufptr;
9968 #ifdef PERL_MAD
9969         stuffstart = 0;
9970 #endif
9971     }
9972
9973     /* at this point, we have successfully read the delimited string */
9974
9975     if (!PL_encoding || UTF) {
9976 #ifdef PERL_MAD
9977         if (PL_madskills) {
9978             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9979             const int len = s - tstart;
9980             if (PL_thisstuff)
9981                 sv_catpvn(PL_thisstuff, tstart, len);
9982             else
9983                 PL_thisstuff = newSVpvn(tstart, len);
9984             if (!PL_thisclose && !keep_delims)
9985                 PL_thisclose = newSVpvn(s,termlen);
9986         }
9987 #endif
9988
9989         if (keep_delims)
9990             sv_catpvn(sv, s, termlen);
9991         s += termlen;
9992     }
9993 #ifdef PERL_MAD
9994     else {
9995         if (PL_madskills) {
9996             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9997             const int len = s - tstart - termlen;
9998             if (PL_thisstuff)
9999                 sv_catpvn(PL_thisstuff, tstart, len);
10000             else
10001                 PL_thisstuff = newSVpvn(tstart, len);
10002             if (!PL_thisclose && !keep_delims)
10003                 PL_thisclose = newSVpvn(s - termlen,termlen);
10004         }
10005     }
10006 #endif
10007     if (has_utf8 || PL_encoding)
10008         SvUTF8_on(sv);
10009
10010     PL_multi_end = CopLINE(PL_curcop);
10011
10012     /* if we allocated too much space, give some back */
10013     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10014         SvLEN_set(sv, SvCUR(sv) + 1);
10015         SvPV_renew(sv, SvLEN(sv));
10016     }
10017
10018     /* decide whether this is the first or second quoted string we've read
10019        for this op
10020     */
10021
10022     if (PL_lex_stuff)
10023         PL_lex_repl = sv;
10024     else
10025         PL_lex_stuff = sv;
10026     return s;
10027 }
10028
10029 /*
10030   scan_num
10031   takes: pointer to position in buffer
10032   returns: pointer to new position in buffer
10033   side-effects: builds ops for the constant in pl_yylval.op
10034
10035   Read a number in any of the formats that Perl accepts:
10036
10037   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10038   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10039   0b[01](_?[01])*
10040   0[0-7](_?[0-7])*
10041   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10042
10043   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10044   thing it reads.
10045
10046   If it reads a number without a decimal point or an exponent, it will
10047   try converting the number to an integer and see if it can do so
10048   without loss of precision.
10049 */
10050
10051 char *
10052 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10053 {
10054     dVAR;
10055     register const char *s = start;     /* current position in buffer */
10056     register char *d;                   /* destination in temp buffer */
10057     register char *e;                   /* end of temp buffer */
10058     NV nv;                              /* number read, as a double */
10059     SV *sv = NULL;                      /* place to put the converted number */
10060     bool floatit;                       /* boolean: int or float? */
10061     const char *lastub = NULL;          /* position of last underbar */
10062     static char const number_too_long[] = "Number too long";
10063
10064     PERL_ARGS_ASSERT_SCAN_NUM;
10065
10066     /* We use the first character to decide what type of number this is */
10067
10068     switch (*s) {
10069     default:
10070       Perl_croak(aTHX_ "panic: scan_num");
10071
10072     /* if it starts with a 0, it could be an octal number, a decimal in
10073        0.13 disguise, or a hexadecimal number, or a binary number. */
10074     case '0':
10075         {
10076           /* variables:
10077              u          holds the "number so far"
10078              shift      the power of 2 of the base
10079                         (hex == 4, octal == 3, binary == 1)
10080              overflowed was the number more than we can hold?
10081
10082              Shift is used when we add a digit.  It also serves as an "are
10083              we in octal/hex/binary?" indicator to disallow hex characters
10084              when in octal mode.
10085            */
10086             NV n = 0.0;
10087             UV u = 0;
10088             I32 shift;
10089             bool overflowed = FALSE;
10090             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10091             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10092             static const char* const bases[5] =
10093               { "", "binary", "", "octal", "hexadecimal" };
10094             static const char* const Bases[5] =
10095               { "", "Binary", "", "Octal", "Hexadecimal" };
10096             static const char* const maxima[5] =
10097               { "",
10098                 "0b11111111111111111111111111111111",
10099                 "",
10100                 "037777777777",
10101                 "0xffffffff" };
10102             const char *base, *Base, *max;
10103
10104             /* check for hex */
10105             if (s[1] == 'x' || s[1] == 'X') {
10106                 shift = 4;
10107                 s += 2;
10108                 just_zero = FALSE;
10109             } else if (s[1] == 'b' || s[1] == 'B') {
10110                 shift = 1;
10111                 s += 2;
10112                 just_zero = FALSE;
10113             }
10114             /* check for a decimal in disguise */
10115             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10116                 goto decimal;
10117             /* so it must be octal */
10118             else {
10119                 shift = 3;
10120                 s++;
10121             }
10122
10123             if (*s == '_') {
10124                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10125                                "Misplaced _ in number");
10126                lastub = s++;
10127             }
10128
10129             base = bases[shift];
10130             Base = Bases[shift];
10131             max  = maxima[shift];
10132
10133             /* read the rest of the number */
10134             for (;;) {
10135                 /* x is used in the overflow test,
10136                    b is the digit we're adding on. */
10137                 UV x, b;
10138
10139                 switch (*s) {
10140
10141                 /* if we don't mention it, we're done */
10142                 default:
10143                     goto out;
10144
10145                 /* _ are ignored -- but warned about if consecutive */
10146                 case '_':
10147                     if (lastub && s == lastub + 1)
10148                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10149                                        "Misplaced _ in number");
10150                     lastub = s++;
10151                     break;
10152
10153                 /* 8 and 9 are not octal */
10154                 case '8': case '9':
10155                     if (shift == 3)
10156                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10157                     /* FALL THROUGH */
10158
10159                 /* octal digits */
10160                 case '2': case '3': case '4':
10161                 case '5': case '6': case '7':
10162                     if (shift == 1)
10163                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10164                     /* FALL THROUGH */
10165
10166                 case '0': case '1':
10167                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10168                     goto digit;
10169
10170                 /* hex digits */
10171                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10172                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10173                     /* make sure they said 0x */
10174                     if (shift != 4)
10175                         goto out;
10176                     b = (*s++ & 7) + 9;
10177
10178                     /* Prepare to put the digit we have onto the end
10179                        of the number so far.  We check for overflows.
10180                     */
10181
10182                   digit:
10183                     just_zero = FALSE;
10184                     if (!overflowed) {
10185                         x = u << shift; /* make room for the digit */
10186
10187                         if ((x >> shift) != u
10188                             && !(PL_hints & HINT_NEW_BINARY)) {
10189                             overflowed = TRUE;
10190                             n = (NV) u;
10191                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10192                                              "Integer overflow in %s number",
10193                                              base);
10194                         } else
10195                             u = x | b;          /* add the digit to the end */
10196                     }
10197                     if (overflowed) {
10198                         n *= nvshift[shift];
10199                         /* If an NV has not enough bits in its
10200                          * mantissa to represent an UV this summing of
10201                          * small low-order numbers is a waste of time
10202                          * (because the NV cannot preserve the
10203                          * low-order bits anyway): we could just
10204                          * remember when did we overflow and in the
10205                          * end just multiply n by the right
10206                          * amount. */
10207                         n += (NV) b;
10208                     }
10209                     break;
10210                 }
10211             }
10212
10213           /* if we get here, we had success: make a scalar value from
10214              the number.
10215           */
10216           out:
10217
10218             /* final misplaced underbar check */
10219             if (s[-1] == '_') {
10220                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10221             }
10222
10223             if (overflowed) {
10224                 if (n > 4294967295.0)
10225                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10226                                    "%s number > %s non-portable",
10227                                    Base, max);
10228                 sv = newSVnv(n);
10229             }
10230             else {
10231 #if UVSIZE > 4
10232                 if (u > 0xffffffff)
10233                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10234                                    "%s number > %s non-portable",
10235                                    Base, max);
10236 #endif
10237                 sv = newSVuv(u);
10238             }
10239             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10240                 sv = new_constant(start, s - start, "integer",
10241                                   sv, NULL, NULL, 0);
10242             else if (PL_hints & HINT_NEW_BINARY)
10243                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10244         }
10245         break;
10246
10247     /*
10248       handle decimal numbers.
10249       we're also sent here when we read a 0 as the first digit
10250     */
10251     case '1': case '2': case '3': case '4': case '5':
10252     case '6': case '7': case '8': case '9': case '.':
10253       decimal:
10254         d = PL_tokenbuf;
10255         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10256         floatit = FALSE;
10257
10258         /* read next group of digits and _ and copy into d */
10259         while (isDIGIT(*s) || *s == '_') {
10260             /* skip underscores, checking for misplaced ones
10261                if -w is on
10262             */
10263             if (*s == '_') {
10264                 if (lastub && s == lastub + 1)
10265                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10266                                    "Misplaced _ in number");
10267                 lastub = s++;
10268             }
10269             else {
10270                 /* check for end of fixed-length buffer */
10271                 if (d >= e)
10272                     Perl_croak(aTHX_ number_too_long);
10273                 /* if we're ok, copy the character */
10274                 *d++ = *s++;
10275             }
10276         }
10277
10278         /* final misplaced underbar check */
10279         if (lastub && s == lastub + 1) {
10280             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10281         }
10282
10283         /* read a decimal portion if there is one.  avoid
10284            3..5 being interpreted as the number 3. followed
10285            by .5
10286         */
10287         if (*s == '.' && s[1] != '.') {
10288             floatit = TRUE;
10289             *d++ = *s++;
10290
10291             if (*s == '_') {
10292                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10293                                "Misplaced _ in number");
10294                 lastub = s;
10295             }
10296
10297             /* copy, ignoring underbars, until we run out of digits.
10298             */
10299             for (; isDIGIT(*s) || *s == '_'; s++) {
10300                 /* fixed length buffer check */
10301                 if (d >= e)
10302                     Perl_croak(aTHX_ number_too_long);
10303                 if (*s == '_') {
10304                    if (lastub && s == lastub + 1)
10305                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10306                                       "Misplaced _ in number");
10307                    lastub = s;
10308                 }
10309                 else
10310                     *d++ = *s;
10311             }
10312             /* fractional part ending in underbar? */
10313             if (s[-1] == '_') {
10314                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10315                                "Misplaced _ in number");
10316             }
10317             if (*s == '.' && isDIGIT(s[1])) {
10318                 /* oops, it's really a v-string, but without the "v" */
10319                 s = start;
10320                 goto vstring;
10321             }
10322         }
10323
10324         /* read exponent part, if present */
10325         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10326             floatit = TRUE;
10327             s++;
10328
10329             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10330             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10331
10332             /* stray preinitial _ */
10333             if (*s == '_') {
10334                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10335                                "Misplaced _ in number");
10336                 lastub = s++;
10337             }
10338
10339             /* allow positive or negative exponent */
10340             if (*s == '+' || *s == '-')
10341                 *d++ = *s++;
10342
10343             /* stray initial _ */
10344             if (*s == '_') {
10345                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10346                                "Misplaced _ in number");
10347                 lastub = s++;
10348             }
10349
10350             /* read digits of exponent */
10351             while (isDIGIT(*s) || *s == '_') {
10352                 if (isDIGIT(*s)) {
10353                     if (d >= e)
10354                         Perl_croak(aTHX_ number_too_long);
10355                     *d++ = *s++;
10356                 }
10357                 else {
10358                    if (((lastub && s == lastub + 1) ||
10359                         (!isDIGIT(s[1]) && s[1] != '_')))
10360                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10361                                       "Misplaced _ in number");
10362                    lastub = s++;
10363                 }
10364             }
10365         }
10366
10367
10368         /*
10369            We try to do an integer conversion first if no characters
10370            indicating "float" have been found.
10371          */
10372
10373         if (!floatit) {
10374             UV uv;
10375             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10376
10377             if (flags == IS_NUMBER_IN_UV) {
10378               if (uv <= IV_MAX)
10379                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10380               else
10381                 sv = newSVuv(uv);
10382             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10383               if (uv <= (UV) IV_MIN)
10384                 sv = newSViv(-(IV)uv);
10385               else
10386                 floatit = TRUE;
10387             } else
10388               floatit = TRUE;
10389         }
10390         if (floatit) {
10391             /* terminate the string */
10392             *d = '\0';
10393             nv = Atof(PL_tokenbuf);
10394             sv = newSVnv(nv);
10395         }
10396
10397         if ( floatit
10398              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10399             const char *const key = floatit ? "float" : "integer";
10400             const STRLEN keylen = floatit ? 5 : 7;
10401             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10402                                 key, keylen, sv, NULL, NULL, 0);
10403         }
10404         break;
10405
10406     /* if it starts with a v, it could be a v-string */
10407     case 'v':
10408 vstring:
10409                 sv = newSV(5); /* preallocate storage space */
10410                 s = scan_vstring(s, PL_bufend, sv);
10411         break;
10412     }
10413
10414     /* make the op for the constant and return */
10415
10416     if (sv)
10417         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10418     else
10419         lvalp->opval = NULL;
10420
10421     return (char *)s;
10422 }
10423
10424 STATIC char *
10425 S_scan_formline(pTHX_ register char *s)
10426 {
10427     dVAR;
10428     register char *eol;
10429     register char *t;
10430     SV * const stuff = newSVpvs("");
10431     bool needargs = FALSE;
10432     bool eofmt = FALSE;
10433 #ifdef PERL_MAD
10434     char *tokenstart = s;
10435     SV* savewhite = NULL;
10436
10437     if (PL_madskills) {
10438         savewhite = PL_thiswhite;
10439         PL_thiswhite = 0;
10440     }
10441 #endif
10442
10443     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10444
10445     while (!needargs) {
10446         if (*s == '.') {
10447             t = s+1;
10448 #ifdef PERL_STRICT_CR
10449             while (SPACE_OR_TAB(*t))
10450                 t++;
10451 #else
10452             while (SPACE_OR_TAB(*t) || *t == '\r')
10453                 t++;
10454 #endif
10455             if (*t == '\n' || t == PL_bufend) {
10456                 eofmt = TRUE;
10457                 break;
10458             }
10459         }
10460         if (PL_in_eval && !PL_rsfp) {
10461             eol = (char *) memchr(s,'\n',PL_bufend-s);
10462             if (!eol++)
10463                 eol = PL_bufend;
10464         }
10465         else
10466             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10467         if (*s != '#') {
10468             for (t = s; t < eol; t++) {
10469                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10470                     needargs = FALSE;
10471                     goto enough;        /* ~~ must be first line in formline */
10472                 }
10473                 if (*t == '@' || *t == '^')
10474                     needargs = TRUE;
10475             }
10476             if (eol > s) {
10477                 sv_catpvn(stuff, s, eol-s);
10478 #ifndef PERL_STRICT_CR
10479                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10480                     char *end = SvPVX(stuff) + SvCUR(stuff);
10481                     end[-2] = '\n';
10482                     end[-1] = '\0';
10483                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10484                 }
10485 #endif
10486             }
10487             else
10488               break;
10489         }
10490         s = (char*)eol;
10491         if (PL_rsfp) {
10492             bool got_some;
10493 #ifdef PERL_MAD
10494             if (PL_madskills) {
10495                 if (PL_thistoken)
10496                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10497                 else
10498                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10499             }
10500 #endif
10501             PL_bufptr = PL_bufend;
10502             CopLINE_inc(PL_curcop);
10503             got_some = lex_next_chunk(0);
10504             CopLINE_dec(PL_curcop);
10505             s = PL_bufptr;
10506 #ifdef PERL_MAD
10507             tokenstart = PL_bufptr;
10508 #endif
10509             if (!got_some)
10510                 break;
10511         }
10512         incline(s);
10513     }
10514   enough:
10515     if (SvCUR(stuff)) {
10516         PL_expect = XTERM;
10517         if (needargs) {
10518             PL_lex_state = LEX_NORMAL;
10519             start_force(PL_curforce);
10520             NEXTVAL_NEXTTOKE.ival = 0;
10521             force_next(',');
10522         }
10523         else
10524             PL_lex_state = LEX_FORMLINE;
10525         if (!IN_BYTES) {
10526             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10527                 SvUTF8_on(stuff);
10528             else if (PL_encoding)
10529                 sv_recode_to_utf8(stuff, PL_encoding);
10530         }
10531         start_force(PL_curforce);
10532         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10533         force_next(THING);
10534         start_force(PL_curforce);
10535         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10536         force_next(LSTOP);
10537     }
10538     else {
10539         SvREFCNT_dec(stuff);
10540         if (eofmt)
10541             PL_lex_formbrack = 0;
10542         PL_bufptr = s;
10543     }
10544 #ifdef PERL_MAD
10545     if (PL_madskills) {
10546         if (PL_thistoken)
10547             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10548         else
10549             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10550         PL_thiswhite = savewhite;
10551     }
10552 #endif
10553     return s;
10554 }
10555
10556 I32
10557 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10558 {
10559     dVAR;
10560     const I32 oldsavestack_ix = PL_savestack_ix;
10561     CV* const outsidecv = PL_compcv;
10562
10563     if (PL_compcv) {
10564         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10565     }
10566     SAVEI32(PL_subline);
10567     save_item(PL_subname);
10568     SAVESPTR(PL_compcv);
10569
10570     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10571     CvFLAGS(PL_compcv) |= flags;
10572
10573     PL_subline = CopLINE(PL_curcop);
10574     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10575     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10576     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10577
10578     return oldsavestack_ix;
10579 }
10580
10581 #ifdef __SC__
10582 #pragma segment Perl_yylex
10583 #endif
10584 static int
10585 S_yywarn(pTHX_ const char *const s)
10586 {
10587     dVAR;
10588
10589     PERL_ARGS_ASSERT_YYWARN;
10590
10591     PL_in_eval |= EVAL_WARNONLY;
10592     yyerror(s);
10593     PL_in_eval &= ~EVAL_WARNONLY;
10594     return 0;
10595 }
10596
10597 int
10598 Perl_yyerror(pTHX_ const char *const s)
10599 {
10600     dVAR;
10601     const char *where = NULL;
10602     const char *context = NULL;
10603     int contlen = -1;
10604     SV *msg;
10605     int yychar  = PL_parser->yychar;
10606
10607     PERL_ARGS_ASSERT_YYERROR;
10608
10609     if (!yychar || (yychar == ';' && !PL_rsfp))
10610         where = "at EOF";
10611     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10612       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10613       PL_oldbufptr != PL_bufptr) {
10614         /*
10615                 Only for NetWare:
10616                 The code below is removed for NetWare because it abends/crashes on NetWare
10617                 when the script has error such as not having the closing quotes like:
10618                     if ($var eq "value)
10619                 Checking of white spaces is anyway done in NetWare code.
10620         */
10621 #ifndef NETWARE
10622         while (isSPACE(*PL_oldoldbufptr))
10623             PL_oldoldbufptr++;
10624 #endif
10625         context = PL_oldoldbufptr;
10626         contlen = PL_bufptr - PL_oldoldbufptr;
10627     }
10628     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10629       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10630         /*
10631                 Only for NetWare:
10632                 The code below is removed for NetWare because it abends/crashes on NetWare
10633                 when the script has error such as not having the closing quotes like:
10634                     if ($var eq "value)
10635                 Checking of white spaces is anyway done in NetWare code.
10636         */
10637 #ifndef NETWARE
10638         while (isSPACE(*PL_oldbufptr))
10639             PL_oldbufptr++;
10640 #endif
10641         context = PL_oldbufptr;
10642         contlen = PL_bufptr - PL_oldbufptr;
10643     }
10644     else if (yychar > 255)
10645         where = "next token ???";
10646     else if (yychar == -2) { /* YYEMPTY */
10647         if (PL_lex_state == LEX_NORMAL ||
10648            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10649             where = "at end of line";
10650         else if (PL_lex_inpat)
10651             where = "within pattern";
10652         else
10653             where = "within string";
10654     }
10655     else {
10656         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10657         if (yychar < 32)
10658             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10659         else if (isPRINT_LC(yychar)) {
10660             const char string = yychar;
10661             sv_catpvn(where_sv, &string, 1);
10662         }
10663         else
10664             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10665         where = SvPVX_const(where_sv);
10666     }
10667     msg = sv_2mortal(newSVpv(s, 0));
10668     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10669         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10670     if (context)
10671         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10672     else
10673         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10674     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10675         Perl_sv_catpvf(aTHX_ msg,
10676         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10677                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10678         PL_multi_end = 0;
10679     }
10680     if (PL_in_eval & EVAL_WARNONLY) {
10681         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10682     }
10683     else
10684         qerror(msg);
10685     if (PL_error_count >= 10) {
10686         if (PL_in_eval && SvCUR(ERRSV))
10687             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10688                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10689         else
10690             Perl_croak(aTHX_ "%s has too many errors.\n",
10691             OutCopFILE(PL_curcop));
10692     }
10693     PL_in_my = 0;
10694     PL_in_my_stash = NULL;
10695     return 0;
10696 }
10697 #ifdef __SC__
10698 #pragma segment Main
10699 #endif
10700
10701 STATIC char*
10702 S_swallow_bom(pTHX_ U8 *s)
10703 {
10704     dVAR;
10705     const STRLEN slen = SvCUR(PL_linestr);
10706
10707     PERL_ARGS_ASSERT_SWALLOW_BOM;
10708
10709     switch (s[0]) {
10710     case 0xFF:
10711         if (s[1] == 0xFE) {
10712             /* UTF-16 little-endian? (or UTF-32LE?) */
10713             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10714                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10715 #ifndef PERL_NO_UTF16_FILTER
10716             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10717             s += 2;
10718             if (PL_bufend > (char*)s) {
10719                 s = add_utf16_textfilter(s, TRUE);
10720             }
10721 #else
10722             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10723 #endif
10724         }
10725         break;
10726     case 0xFE:
10727         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10728 #ifndef PERL_NO_UTF16_FILTER
10729             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10730             s += 2;
10731             if (PL_bufend > (char *)s) {
10732                 s = add_utf16_textfilter(s, FALSE);
10733             }
10734 #else
10735             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10736 #endif
10737         }
10738         break;
10739     case 0xEF:
10740         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10741             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10742             s += 3;                      /* UTF-8 */
10743         }
10744         break;
10745     case 0:
10746         if (slen > 3) {
10747              if (s[1] == 0) {
10748                   if (s[2] == 0xFE && s[3] == 0xFF) {
10749                        /* UTF-32 big-endian */
10750                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10751                   }
10752              }
10753              else if (s[2] == 0 && s[3] != 0) {
10754                   /* Leading bytes
10755                    * 00 xx 00 xx
10756                    * are a good indicator of UTF-16BE. */
10757 #ifndef PERL_NO_UTF16_FILTER
10758                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10759                   s = add_utf16_textfilter(s, FALSE);
10760 #else
10761                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10762 #endif
10763              }
10764         }
10765 #ifdef EBCDIC
10766     case 0xDD:
10767         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10768             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10769             s += 4;                      /* UTF-8 */
10770         }
10771         break;
10772 #endif
10773
10774     default:
10775          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10776                   /* Leading bytes
10777                    * xx 00 xx 00
10778                    * are a good indicator of UTF-16LE. */
10779 #ifndef PERL_NO_UTF16_FILTER
10780               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10781               s = add_utf16_textfilter(s, TRUE);
10782 #else
10783               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10784 #endif
10785          }
10786     }
10787     return (char*)s;
10788 }
10789
10790
10791 #ifndef PERL_NO_UTF16_FILTER
10792 static I32
10793 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10794 {
10795     dVAR;
10796     SV *const filter = FILTER_DATA(idx);
10797     /* We re-use this each time round, throwing the contents away before we
10798        return.  */
10799     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10800     SV *const utf8_buffer = filter;
10801     IV status = IoPAGE(filter);
10802     const bool reverse = cBOOL(IoLINES(filter));
10803     I32 retval;
10804
10805     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10806
10807     /* As we're automatically added, at the lowest level, and hence only called
10808        from this file, we can be sure that we're not called in block mode. Hence
10809        don't bother writing code to deal with block mode.  */
10810     if (maxlen) {
10811         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10812     }
10813     if (status < 0) {
10814         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10815     }
10816     DEBUG_P(PerlIO_printf(Perl_debug_log,
10817                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10818                           FPTR2DPTR(void *, S_utf16_textfilter),
10819                           reverse ? 'l' : 'b', idx, maxlen, status,
10820                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10821
10822     while (1) {
10823         STRLEN chars;
10824         STRLEN have;
10825         I32 newlen;
10826         U8 *end;
10827         /* First, look in our buffer of existing UTF-8 data:  */
10828         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10829
10830         if (nl) {
10831             ++nl;
10832         } else if (status == 0) {
10833             /* EOF */
10834             IoPAGE(filter) = 0;
10835             nl = SvEND(utf8_buffer);
10836         }
10837         if (nl) {
10838             STRLEN got = nl - SvPVX(utf8_buffer);
10839             /* Did we have anything to append?  */
10840             retval = got != 0;
10841             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10842             /* Everything else in this code works just fine if SVp_POK isn't
10843                set.  This, however, needs it, and we need it to work, else
10844                we loop infinitely because the buffer is never consumed.  */
10845             sv_chop(utf8_buffer, nl);
10846             break;
10847         }
10848
10849         /* OK, not a complete line there, so need to read some more UTF-16.
10850            Read an extra octect if the buffer currently has an odd number. */
10851         while (1) {
10852             if (status <= 0)
10853                 break;
10854             if (SvCUR(utf16_buffer) >= 2) {
10855                 /* Location of the high octet of the last complete code point.
10856                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10857                    *coupled* with all the benefits of partial reads and
10858                    endianness.  */
10859                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10860                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10861
10862                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10863                     break;
10864                 }
10865
10866                 /* We have the first half of a surrogate. Read more.  */
10867                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10868             }
10869
10870             status = FILTER_READ(idx + 1, utf16_buffer,
10871                                  160 + (SvCUR(utf16_buffer) & 1));
10872             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10873             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10874             if (status < 0) {
10875                 /* Error */
10876                 IoPAGE(filter) = status;
10877                 return status;
10878             }
10879         }
10880
10881         chars = SvCUR(utf16_buffer) >> 1;
10882         have = SvCUR(utf8_buffer);
10883         SvGROW(utf8_buffer, have + chars * 3 + 1);
10884
10885         if (reverse) {
10886             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10887                                          (U8*)SvPVX_const(utf8_buffer) + have,
10888                                          chars * 2, &newlen);
10889         } else {
10890             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10891                                 (U8*)SvPVX_const(utf8_buffer) + have,
10892                                 chars * 2, &newlen);
10893         }
10894         SvCUR_set(utf8_buffer, have + newlen);
10895         *end = '\0';
10896
10897         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10898            it's private to us, and utf16_to_utf8{,reversed} take a
10899            (pointer,length) pair, rather than a NUL-terminated string.  */
10900         if(SvCUR(utf16_buffer) & 1) {
10901             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10902             SvCUR_set(utf16_buffer, 1);
10903         } else {
10904             SvCUR_set(utf16_buffer, 0);
10905         }
10906     }
10907     DEBUG_P(PerlIO_printf(Perl_debug_log,
10908                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10909                           status,
10910                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10911     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10912     return retval;
10913 }
10914
10915 static U8 *
10916 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10917 {
10918     SV *filter = filter_add(S_utf16_textfilter, NULL);
10919
10920     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10921
10922     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10923     sv_setpvs(filter, "");
10924     IoLINES(filter) = reversed;
10925     IoPAGE(filter) = 1; /* Not EOF */
10926
10927     /* Sadly, we have to return a valid pointer, come what may, so we have to
10928        ignore any error return from this.  */
10929     SvCUR_set(PL_linestr, 0);
10930     if (FILTER_READ(0, PL_linestr, 0)) {
10931         SvUTF8_on(PL_linestr);
10932     } else {
10933         SvUTF8_on(PL_linestr);
10934     }
10935     PL_bufend = SvEND(PL_linestr);
10936     return (U8*)SvPVX(PL_linestr);
10937 }
10938 #endif
10939
10940 /*
10941 Returns a pointer to the next character after the parsed
10942 vstring, as well as updating the passed in sv.
10943
10944 Function must be called like
10945
10946         sv = newSV(5);
10947         s = scan_vstring(s,e,sv);
10948
10949 where s and e are the start and end of the string.
10950 The sv should already be large enough to store the vstring
10951 passed in, for performance reasons.
10952
10953 */
10954
10955 char *
10956 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10957 {
10958     dVAR;
10959     const char *pos = s;
10960     const char *start = s;
10961
10962     PERL_ARGS_ASSERT_SCAN_VSTRING;
10963
10964     if (*pos == 'v') pos++;  /* get past 'v' */
10965     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10966         pos++;
10967     if ( *pos != '.') {
10968         /* this may not be a v-string if followed by => */
10969         const char *next = pos;
10970         while (next < e && isSPACE(*next))
10971             ++next;
10972         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10973             /* return string not v-string */
10974             sv_setpvn(sv,(char *)s,pos-s);
10975             return (char *)pos;
10976         }
10977     }
10978
10979     if (!isALPHA(*pos)) {
10980         U8 tmpbuf[UTF8_MAXBYTES+1];
10981
10982         if (*s == 'v')
10983             s++;  /* get past 'v' */
10984
10985         sv_setpvs(sv, "");
10986
10987         for (;;) {
10988             /* this is atoi() that tolerates underscores */
10989             U8 *tmpend;
10990             UV rev = 0;
10991             const char *end = pos;
10992             UV mult = 1;
10993             while (--end >= s) {
10994                 if (*end != '_') {
10995                     const UV orev = rev;
10996                     rev += (*end - '0') * mult;
10997                     mult *= 10;
10998                     if (orev > rev)
10999                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11000                                          "Integer overflow in decimal number");
11001                 }
11002             }
11003 #ifdef EBCDIC
11004             if (rev > 0x7FFFFFFF)
11005                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11006 #endif
11007             /* Append native character for the rev point */
11008             tmpend = uvchr_to_utf8(tmpbuf, rev);
11009             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11010             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11011                  SvUTF8_on(sv);
11012             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11013                  s = ++pos;
11014             else {
11015                  s = pos;
11016                  break;
11017             }
11018             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11019                  pos++;
11020         }
11021         SvPOK_on(sv);
11022         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11023         SvRMAGICAL_on(sv);
11024     }
11025     return (char *)s;
11026 }
11027
11028 int
11029 Perl_keyword_plugin_standard(pTHX_
11030         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11031 {
11032     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11033     PERL_UNUSED_CONTEXT;
11034     PERL_UNUSED_ARG(keyword_ptr);
11035     PERL_UNUSED_ARG(keyword_len);
11036     PERL_UNUSED_ARG(op_ptr);
11037     return KEYWORD_PLUGIN_DECLINE;
11038 }
11039
11040 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11041 static void
11042 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11043 {
11044     SAVEI32(PL_lex_brackets);
11045     if (PL_lex_brackets > 100)
11046         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11047     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11048     SAVEI32(PL_lex_allbrackets);
11049     PL_lex_allbrackets = 0;
11050     SAVEI8(PL_lex_fakeeof);
11051     PL_lex_fakeeof = (U8)fakeeof;
11052     if(yyparse(gramtype) && !PL_parser->error_count)
11053         qerror(Perl_mess(aTHX_ "Parse error"));
11054 }
11055
11056 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11057 static OP *
11058 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11059 {
11060     OP *o;
11061     ENTER;
11062     SAVEVPTR(PL_eval_root);
11063     PL_eval_root = NULL;
11064     parse_recdescent(gramtype, fakeeof);
11065     o = PL_eval_root;
11066     LEAVE;
11067     return o;
11068 }
11069
11070 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11071 static OP *
11072 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11073 {
11074     OP *exprop;
11075     if (flags & ~PARSE_OPTIONAL)
11076         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11077     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11078     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11079         if (!PL_parser->error_count)
11080             qerror(Perl_mess(aTHX_ "Parse error"));
11081         exprop = newOP(OP_NULL, 0);
11082     }
11083     return exprop;
11084 }
11085
11086 /*
11087 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11088
11089 Parse a Perl arithmetic expression.  This may contain operators of precedence
11090 down to the bit shift operators.  The expression must be followed (and thus
11091 terminated) either by a comparison or lower-precedence operator or by
11092 something that would normally terminate an expression such as semicolon.
11093 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11094 otherwise it is mandatory.  It is up to the caller to ensure that the
11095 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11096 the source of the code to be parsed and the lexical context for the
11097 expression.
11098
11099 The op tree representing the expression is returned.  If an optional
11100 expression is absent, a null pointer is returned, otherwise the pointer
11101 will be non-null.
11102
11103 If an error occurs in parsing or compilation, in most cases a valid op
11104 tree is returned anyway.  The error is reflected in the parser state,
11105 normally resulting in a single exception at the top level of parsing
11106 which covers all the compilation errors that occurred.  Some compilation
11107 errors, however, will throw an exception immediately.
11108
11109 =cut
11110 */
11111
11112 OP *
11113 Perl_parse_arithexpr(pTHX_ U32 flags)
11114 {
11115     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11116 }
11117
11118 /*
11119 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11120
11121 Parse a Perl term expression.  This may contain operators of precedence
11122 down to the assignment operators.  The expression must be followed (and thus
11123 terminated) either by a comma or lower-precedence operator or by
11124 something that would normally terminate an expression such as semicolon.
11125 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11126 otherwise it is mandatory.  It is up to the caller to ensure that the
11127 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11128 the source of the code to be parsed and the lexical context for the
11129 expression.
11130
11131 The op tree representing the expression is returned.  If an optional
11132 expression is absent, a null pointer is returned, otherwise the pointer
11133 will be non-null.
11134
11135 If an error occurs in parsing or compilation, in most cases a valid op
11136 tree is returned anyway.  The error is reflected in the parser state,
11137 normally resulting in a single exception at the top level of parsing
11138 which covers all the compilation errors that occurred.  Some compilation
11139 errors, however, will throw an exception immediately.
11140
11141 =cut
11142 */
11143
11144 OP *
11145 Perl_parse_termexpr(pTHX_ U32 flags)
11146 {
11147     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11148 }
11149
11150 /*
11151 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11152
11153 Parse a Perl list expression.  This may contain operators of precedence
11154 down to the comma operator.  The expression must be followed (and thus
11155 terminated) either by a low-precedence logic operator such as C<or> or by
11156 something that would normally terminate an expression such as semicolon.
11157 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11158 otherwise it is mandatory.  It is up to the caller to ensure that the
11159 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11160 the source of the code to be parsed and the lexical context for the
11161 expression.
11162
11163 The op tree representing the expression is returned.  If an optional
11164 expression is absent, a null pointer is returned, otherwise the pointer
11165 will be non-null.
11166
11167 If an error occurs in parsing or compilation, in most cases a valid op
11168 tree is returned anyway.  The error is reflected in the parser state,
11169 normally resulting in a single exception at the top level of parsing
11170 which covers all the compilation errors that occurred.  Some compilation
11171 errors, however, will throw an exception immediately.
11172
11173 =cut
11174 */
11175
11176 OP *
11177 Perl_parse_listexpr(pTHX_ U32 flags)
11178 {
11179     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11180 }
11181
11182 /*
11183 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11184
11185 Parse a single complete Perl expression.  This allows the full
11186 expression grammar, including the lowest-precedence operators such
11187 as C<or>.  The expression must be followed (and thus terminated) by a
11188 token that an expression would normally be terminated by: end-of-file,
11189 closing bracketing punctuation, semicolon, or one of the keywords that
11190 signals a postfix expression-statement modifier.  If I<flags> includes
11191 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11192 mandatory.  It is up to the caller to ensure that the dynamic parser
11193 state (L</PL_parser> et al) is correctly set to reflect the source of
11194 the code to be parsed and the lexical context for the expression.
11195
11196 The op tree representing the expression is returned.  If an optional
11197 expression is absent, a null pointer is returned, otherwise the pointer
11198 will be non-null.
11199
11200 If an error occurs in parsing or compilation, in most cases a valid op
11201 tree is returned anyway.  The error is reflected in the parser state,
11202 normally resulting in a single exception at the top level of parsing
11203 which covers all the compilation errors that occurred.  Some compilation
11204 errors, however, will throw an exception immediately.
11205
11206 =cut
11207 */
11208
11209 OP *
11210 Perl_parse_fullexpr(pTHX_ U32 flags)
11211 {
11212     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11213 }
11214
11215 /*
11216 =for apidoc Amx|OP *|parse_block|U32 flags
11217
11218 Parse a single complete Perl code block.  This consists of an opening
11219 brace, a sequence of statements, and a closing brace.  The block
11220 constitutes a lexical scope, so C<my> variables and various compile-time
11221 effects can be contained within it.  It is up to the caller to ensure
11222 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11223 reflect the source of the code to be parsed and the lexical context for
11224 the statement.
11225
11226 The op tree representing the code block is returned.  This is always a
11227 real op, never a null pointer.  It will normally be a C<lineseq> list,
11228 including C<nextstate> or equivalent ops.  No ops to construct any kind
11229 of runtime scope are included by virtue of it being a block.
11230
11231 If an error occurs in parsing or compilation, in most cases a valid op
11232 tree (most likely null) is returned anyway.  The error is reflected in
11233 the parser state, normally resulting in a single exception at the top
11234 level of parsing which covers all the compilation errors that occurred.
11235 Some compilation errors, however, will throw an exception immediately.
11236
11237 The I<flags> parameter is reserved for future use, and must always
11238 be zero.
11239
11240 =cut
11241 */
11242
11243 OP *
11244 Perl_parse_block(pTHX_ U32 flags)
11245 {
11246     if (flags)
11247         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11248     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11249 }
11250
11251 /*
11252 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11253
11254 Parse a single unadorned Perl statement.  This may be a normal imperative
11255 statement or a declaration that has compile-time effect.  It does not
11256 include any label or other affixture.  It is up to the caller to ensure
11257 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11258 reflect the source of the code to be parsed and the lexical context for
11259 the statement.
11260
11261 The op tree representing the statement is returned.  This may be a
11262 null pointer if the statement is null, for example if it was actually
11263 a subroutine definition (which has compile-time side effects).  If not
11264 null, it will be ops directly implementing the statement, suitable to
11265 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11266 equivalent op (except for those embedded in a scope contained entirely
11267 within the statement).
11268
11269 If an error occurs in parsing or compilation, in most cases a valid op
11270 tree (most likely null) is returned anyway.  The error is reflected in
11271 the parser state, normally resulting in a single exception at the top
11272 level of parsing which covers all the compilation errors that occurred.
11273 Some compilation errors, however, will throw an exception immediately.
11274
11275 The I<flags> parameter is reserved for future use, and must always
11276 be zero.
11277
11278 =cut
11279 */
11280
11281 OP *
11282 Perl_parse_barestmt(pTHX_ U32 flags)
11283 {
11284     if (flags)
11285         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11286     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11287 }
11288
11289 /*
11290 =for apidoc Amx|SV *|parse_label|U32 flags
11291
11292 Parse a single label, possibly optional, of the type that may prefix a
11293 Perl statement.  It is up to the caller to ensure that the dynamic parser
11294 state (L</PL_parser> et al) is correctly set to reflect the source of
11295 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11296 label is optional, otherwise it is mandatory.
11297
11298 The name of the label is returned in the form of a fresh scalar.  If an
11299 optional label is absent, a null pointer is returned.
11300
11301 If an error occurs in parsing, which can only occur if the label is
11302 mandatory, a valid label is returned anyway.  The error is reflected in
11303 the parser state, normally resulting in a single exception at the top
11304 level of parsing which covers all the compilation errors that occurred.
11305
11306 =cut
11307 */
11308
11309 SV *
11310 Perl_parse_label(pTHX_ U32 flags)
11311 {
11312     if (flags & ~PARSE_OPTIONAL)
11313         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11314     if (PL_lex_state == LEX_KNOWNEXT) {
11315         PL_parser->yychar = yylex();
11316         if (PL_parser->yychar == LABEL) {
11317             char *lpv = pl_yylval.pval;
11318             STRLEN llen = strlen(lpv);
11319             SV *lsv;
11320             PL_parser->yychar = YYEMPTY;
11321             lsv = newSV_type(SVt_PV);
11322             SvPV_set(lsv, lpv);
11323             SvCUR_set(lsv, llen);
11324             SvLEN_set(lsv, llen+1);
11325             SvPOK_on(lsv);
11326             return lsv;
11327         } else {
11328             yyunlex();
11329             goto no_label;
11330         }
11331     } else {
11332         char *s, *t;
11333         U8 c;
11334         STRLEN wlen, bufptr_pos;
11335         lex_read_space(0);
11336         t = s = PL_bufptr;
11337         c = (U8)*s;
11338         if (!isIDFIRST_A(c))
11339             goto no_label;
11340         do {
11341             c = (U8)*++t;
11342         } while(isWORDCHAR_A(c));
11343         wlen = t - s;
11344         if (word_takes_any_delimeter(s, wlen))
11345             goto no_label;
11346         bufptr_pos = s - SvPVX(PL_linestr);
11347         PL_bufptr = t;
11348         lex_read_space(LEX_KEEP_PREVIOUS);
11349         t = PL_bufptr;
11350         s = SvPVX(PL_linestr) + bufptr_pos;
11351         if (t[0] == ':' && t[1] != ':') {
11352             PL_oldoldbufptr = PL_oldbufptr;
11353             PL_oldbufptr = s;
11354             PL_bufptr = t+1;
11355             return newSVpvn(s, wlen);
11356         } else {
11357             PL_bufptr = s;
11358             no_label:
11359             if (flags & PARSE_OPTIONAL) {
11360                 return NULL;
11361             } else {
11362                 qerror(Perl_mess(aTHX_ "Parse error"));
11363                 return newSVpvs("x");
11364             }
11365         }
11366     }
11367 }
11368
11369 /*
11370 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11371
11372 Parse a single complete Perl statement.  This may be a normal imperative
11373 statement or a declaration that has compile-time effect, and may include
11374 optional labels.  It is up to the caller to ensure that the dynamic
11375 parser state (L</PL_parser> et al) is correctly set to reflect the source
11376 of the code to be parsed and the lexical context for the statement.
11377
11378 The op tree representing the statement is returned.  This may be a
11379 null pointer if the statement is null, for example if it was actually
11380 a subroutine definition (which has compile-time side effects).  If not
11381 null, it will be the result of a L</newSTATEOP> call, normally including
11382 a C<nextstate> or equivalent op.
11383
11384 If an error occurs in parsing or compilation, in most cases a valid op
11385 tree (most likely null) is returned anyway.  The error is reflected in
11386 the parser state, normally resulting in a single exception at the top
11387 level of parsing which covers all the compilation errors that occurred.
11388 Some compilation errors, however, will throw an exception immediately.
11389
11390 The I<flags> parameter is reserved for future use, and must always
11391 be zero.
11392
11393 =cut
11394 */
11395
11396 OP *
11397 Perl_parse_fullstmt(pTHX_ U32 flags)
11398 {
11399     if (flags)
11400         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11401     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11402 }
11403
11404 /*
11405 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11406
11407 Parse a sequence of zero or more Perl statements.  These may be normal
11408 imperative statements, including optional labels, or declarations
11409 that have compile-time effect, or any mixture thereof.  The statement
11410 sequence ends when a closing brace or end-of-file is encountered in a
11411 place where a new statement could have validly started.  It is up to
11412 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11413 is correctly set to reflect the source of the code to be parsed and the
11414 lexical context for the statements.
11415
11416 The op tree representing the statement sequence is returned.  This may
11417 be a null pointer if the statements were all null, for example if there
11418 were no statements or if there were only subroutine definitions (which
11419 have compile-time side effects).  If not null, it will be a C<lineseq>
11420 list, normally including C<nextstate> or equivalent ops.
11421
11422 If an error occurs in parsing or compilation, in most cases a valid op
11423 tree is returned anyway.  The error is reflected in the parser state,
11424 normally resulting in a single exception at the top level of parsing
11425 which covers all the compilation errors that occurred.  Some compilation
11426 errors, however, will throw an exception immediately.
11427
11428 The I<flags> parameter is reserved for future use, and must always
11429 be zero.
11430
11431 =cut
11432 */
11433
11434 OP *
11435 Perl_parse_stmtseq(pTHX_ U32 flags)
11436 {
11437     OP *stmtseqop;
11438     I32 c;
11439     if (flags)
11440         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11441     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11442     c = lex_peek_unichar(0);
11443     if (c != -1 && c != /*{*/'}')
11444         qerror(Perl_mess(aTHX_ "Parse error"));
11445     return stmtseqop;
11446 }
11447
11448 void
11449 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11450 {
11451     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11452     deprecate("qw(...) as parentheses");
11453     force_next((4<<24)|')');
11454     if (qwlist->op_type == OP_STUB) {
11455         op_free(qwlist);
11456     }
11457     else {
11458         start_force(PL_curforce);
11459         NEXTVAL_NEXTTOKE.opval = qwlist;
11460         force_next(THING);
11461     }
11462     force_next((2<<24)|'(');
11463 }
11464
11465 /*
11466  * Local variables:
11467  * c-indentation-style: bsd
11468  * c-basic-offset: 4
11469  * indent-tabs-mode: t
11470  * End:
11471  *
11472  * ex: set ts=8 sts=4 sw=4 noet:
11473  */