toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN1         : not used, except for not, which isn't a UNIOP
 228  * BOop         : bitwise or or xor
 229  * BAop         : bitwise and
 230  * SHop         : shift operator
 231  * PWop         : power operator
 232  * PMop         : pattern-matching operator
 233  * Aop          : addition-level operator
 234  * Mop          : multiplication-level operator
 235  * Eop          : equality-testing operator
 236  * Rop          : relational operator <= != gt
 237  *
 238  * Also see LOP and lop() below.
 239  */
 240
 241 #ifdef DEBUGGING /* Serve -DT. */
 242 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 243 #else
 244 #   define REPORT(retval) (retval)
 245 #endif
 246
 247 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 248 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 249 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 250 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 253 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 254 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 255 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 256 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 257 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 258 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 259 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 260 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 261 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 262 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 263 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 264 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 265 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 266 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 267
 268 /* This bit of chicanery makes a unary function followed by
 269  * a parenthesis into a function with one argument, highest precedence.
 270  * The UNIDOR macro is for unary functions that can be followed by the //
 271  * operator (such as C<shift // 0>).
 272  */
 273 #define UNI2(f,x) { \
 274         pl_yylval.ival = f; \
 275         PL_expect = x; \
 276         PL_bufptr = s; \
 277         PL_last_uni = PL_oldbufptr; \
 278         PL_last_lop_op = f; \
 279         if (*s == '(') \
 280             return REPORT( (int)FUNC1 ); \
 281         s = PEEKSPACE(s); \
 282         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 283         }
 284 #define UNI(f)    UNI2(f,XTERM)
 285 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 286
 287 #define UNIBRACK(f) { \
 288         pl_yylval.ival = f; \
 289         PL_bufptr = s; \
 290         PL_last_uni = PL_oldbufptr; \
 291         if (*s == '(') \
 292             return REPORT( (int)FUNC1 ); \
 293         s = PEEKSPACE(s); \
 294         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 295         }
 296
 297 /* grandfather return to old style */
 298 #define OLDLOP(f) \
 299         do { \
 300             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 301                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 302             pl_yylval.ival = (f); \
 303             PL_expect = XTERM; \
 304             PL_bufptr = s; \
 305             return (int)LSTOP; \
 306         } while(0)
 307
 308 #ifdef DEBUGGING
 309
 310 /* how to interpret the pl_yylval associated with the token */
 311 enum token_type {
 312     TOKENTYPE_NONE,
 313     TOKENTYPE_IVAL,
 314     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 315     TOKENTYPE_PVAL,
 316     TOKENTYPE_OPVAL,
 317     TOKENTYPE_GVVAL
 318 };
 319
 320 static struct debug_tokens {
 321     const int token;
 322     enum token_type type;
 323     const char *name;
 324 } const debug_tokens[] =
 325 {
 326     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 327     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 328     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 329     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 330     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 331     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 332     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 333     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 334     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 335     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 336     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 337     { DO,               TOKENTYPE_NONE,         "DO" },
 338     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 339     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 340     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 341     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 342     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 343     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 344     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 345     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 346     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 347     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 348     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 349     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 350     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 351     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 352     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 353     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 354     { IF,               TOKENTYPE_IVAL,         "IF" },
 355     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 356     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 357     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 358     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 359     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 360     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 361     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 362     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 363     { MY,               TOKENTYPE_IVAL,         "MY" },
 364     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 365     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 366     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 367     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 368     { OROR,             TOKENTYPE_NONE,         "OROR" },
 369     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 370     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 371     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 372     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 373     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 374     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 375     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 376     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 377     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 378     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 379     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 380     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 381     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 382     { SUB,              TOKENTYPE_NONE,         "SUB" },
 383     { THING,            TOKENTYPE_OPVAL,        "THING" },
 384     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 385     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 386     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 387     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 388     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 389     { USE,              TOKENTYPE_IVAL,         "USE" },
 390     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 391     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 392     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 393     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 394     { 0,                TOKENTYPE_NONE,         NULL }
 395 };
 396
 397 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 398
 399 STATIC int
 400 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 401 {
 402     dVAR;
 403
 404     PERL_ARGS_ASSERT_TOKEREPORT;
 405
 406     if (DEBUG_T_TEST) {
 407         const char *name = NULL;
 408         enum token_type type = TOKENTYPE_NONE;
 409         const struct debug_tokens *p;
 410         SV* const report = newSVpvs("<== ");
 411
 412         for (p = debug_tokens; p->token; p++) {
 413             if (p->token == (int)rv) {
 414                 name = p->name;
 415                 type = p->type;
 416                 break;
 417             }
 418         }
 419         if (name)
 420             Perl_sv_catpv(aTHX_ report, name);
 421         else if ((char)rv > ' ' && (char)rv < '~')
 422             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 423         else if (!rv)
 424             sv_catpvs(report, "EOF");
 425         else
 426             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 427         switch (type) {
 428         case TOKENTYPE_NONE:
 429         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 430             break;
 431         case TOKENTYPE_IVAL:
 432             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 433             break;
 434         case TOKENTYPE_OPNUM:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 436                                     PL_op_name[lvalp->ival]);
 437             break;
 438         case TOKENTYPE_PVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 440             break;
 441         case TOKENTYPE_OPVAL:
 442             if (lvalp->opval) {
 443                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 444                                     PL_op_name[lvalp->opval->op_type]);
 445                 if (lvalp->opval->op_type == OP_CONST) {
 446                     Perl_sv_catpvf(aTHX_ report, " %s",
 447                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 448                 }
 449
 450             }
 451             else
 452                 sv_catpvs(report, "(opval=null)");
 453             break;
 454         }
 455         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 456     };
 457     return (int)rv;
 458 }
 459
 460
 461 /* print the buffer with suitable escapes */
 462
 463 STATIC void
 464 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 465 {
 466     SV* const tmp = newSVpvs("");
 467
 468     PERL_ARGS_ASSERT_PRINTBUF;
 469
 470     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 471     SvREFCNT_dec(tmp);
 472 }
 473
 474 #endif
 475
 476 static int
 477 S_deprecate_commaless_var_list(pTHX) {
 478     PL_expect = XTERM;
 479     deprecate("comma-less variable list");
 480     return REPORT(','); /* grandfather non-comma-format format */
 481 }
 482
 483 /*
 484  * S_ao
 485  *
 486  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 487  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 488  */
 489
 490 STATIC int
 491 S_ao(pTHX_ int toketype)
 492 {
 493     dVAR;
 494     if (*PL_bufptr == '=') {
 495         PL_bufptr++;
 496         if (toketype == ANDAND)
 497             pl_yylval.ival = OP_ANDASSIGN;
 498         else if (toketype == OROR)
 499             pl_yylval.ival = OP_ORASSIGN;
 500         else if (toketype == DORDOR)
 501             pl_yylval.ival = OP_DORASSIGN;
 502         toketype = ASSIGNOP;
 503     }
 504     return toketype;
 505 }
 506
 507 /*
 508  * S_no_op
 509  * When Perl expects an operator and finds something else, no_op
 510  * prints the warning.  It always prints "<something> found where
 511  * operator expected.  It prints "Missing semicolon on previous line?"
 512  * if the surprise occurs at the start of the line.  "do you need to
 513  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 514  * where the compiler doesn't know if foo is a method call or a function.
 515  * It prints "Missing operator before end of line" if there's nothing
 516  * after the missing operator, or "... before <...>" if there is something
 517  * after the missing operator.
 518  */
 519
 520 STATIC void
 521 S_no_op(pTHX_ const char *const what, char *s)
 522 {
 523     dVAR;
 524     char * const oldbp = PL_bufptr;
 525     const bool is_first = (PL_oldbufptr == PL_linestart);
 526
 527     PERL_ARGS_ASSERT_NO_OP;
 528
 529     if (!s)
 530         s = oldbp;
 531     else
 532         PL_bufptr = s;
 533     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 534     if (ckWARN_d(WARN_SYNTAX)) {
 535         if (is_first)
 536             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 537                     "\t(Missing semicolon on previous line?)\n");
 538         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 539             const char *t;
 540             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 541                 NOOP;
 542             if (t < PL_bufptr && isSPACE(*t))
 543                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                         "\t(Do you need to predeclare %.*s?)\n",
 545                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 546         }
 547         else {
 548             assert(s >= oldbp);
 549             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 550                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 551         }
 552     }
 553     PL_bufptr = oldbp;
 554 }
 555
 556 /*
 557  * S_missingterm
 558  * Complain about missing quote/regexp/heredoc terminator.
 559  * If it's called with NULL then it cauterizes the line buffer.
 560  * If we're in a delimited string and the delimiter is a control
 561  * character, it's reformatted into a two-char sequence like ^C.
 562  * This is fatal.
 563  */
 564
 565 STATIC void
 566 S_missingterm(pTHX_ char *s)
 567 {
 568     dVAR;
 569     char tmpbuf[3];
 570     char q;
 571     if (s) {
 572         char * const nl = strrchr(s,'\n');
 573         if (nl)
 574             *nl = '\0';
 575     }
 576     else if (isCNTRL(PL_multi_close)) {
 577         *tmpbuf = '^';
 578         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 579         tmpbuf[2] = '\0';
 580         s = tmpbuf;
 581     }
 582     else {
 583         *tmpbuf = (char)PL_multi_close;
 584         tmpbuf[1] = '\0';
 585         s = tmpbuf;
 586     }
 587     q = strchr(s,'"') ? '\'' : '"';
 588     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 589 }
 590
 591 /*
 592  * Check whether the named feature is enabled.
 593  */
 594 bool
 595 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 596 {
 597     dVAR;
 598     HV * const hinthv = GvHV(PL_hintgv);
 599     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 600
 601     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 602
 603     if (namelen > MAX_FEATURE_LEN)
 604         return FALSE;
 605     memcpy(&he_name[8], name, namelen);
 606
 607     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 608 }
 609
 610 /*
 611  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 612  * utf16-to-utf8-reversed.
 613  */
 614
 615 #ifdef PERL_CR_FILTER
 616 static void
 617 strip_return(SV *sv)
 618 {
 619     register const char *s = SvPVX_const(sv);
 620     register const char * const e = s + SvCUR(sv);
 621
 622     PERL_ARGS_ASSERT_STRIP_RETURN;
 623
 624     /* outer loop optimized to do nothing if there are no CR-LFs */
 625     while (s < e) {
 626         if (*s++ == '\r' && *s == '\n') {
 627             /* hit a CR-LF, need to copy the rest */
 628             register char *d = s - 1;
 629             *d++ = *s++;
 630             while (s < e) {
 631                 if (*s == '\r' && s[1] == '\n')
 632                     s++;
 633                 *d++ = *s++;
 634             }
 635             SvCUR(sv) -= s - d;
 636             return;
 637         }
 638     }
 639 }
 640
 641 STATIC I32
 642 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 643 {
 644     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 645     if (count > 0 && !maxlen)
 646         strip_return(sv);
 647     return count;
 648 }
 649 #endif
 650
 651 /*
 652 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 653
 654 Creates and initialises a new lexer/parser state object, supplying
 655 a context in which to lex and parse from a new source of Perl code.
 656 A pointer to the new state object is placed in L</PL_parser>.  An entry
 657 is made on the save stack so that upon unwinding the new state object
 658 will be destroyed and the former value of L</PL_parser> will be restored.
 659 Nothing else need be done to clean up the parsing context.
 660
 661 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 662 non-null, provides a string (in SV form) containing code to be parsed.
 663 A copy of the string is made, so subsequent modification of I<line>
 664 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 665 from which code will be read to be parsed.  If both are non-null, the
 666 code in I<line> comes first and must consist of complete lines of input,
 667 and I<rsfp> supplies the remainder of the source.
 668
 669 The I<flags> parameter is reserved for future use, and must always
 670 be zero.
 671
 672 =cut
 673 */
 674
 675 void
 676 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 677 {
 678     dVAR;
 679     const char *s = NULL;
 680     STRLEN len;
 681     yy_parser *parser, *oparser;
 682     if (flags)
 683         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 684
 685     /* create and initialise a parser */
 686
 687     Newxz(parser, 1, yy_parser);
 688     parser->old_parser = oparser = PL_parser;
 689     PL_parser = parser;
 690
 691     parser->stack = NULL;
 692     parser->ps = NULL;
 693     parser->stack_size = 0;
 694
 695     /* on scope exit, free this parser and restore any outer one */
 696     SAVEPARSER(parser);
 697     parser->saved_curcop = PL_curcop;
 698
 699     /* initialise lexer state */
 700
 701 #ifdef PERL_MAD
 702     parser->curforce = -1;
 703 #else
 704     parser->nexttoke = 0;
 705 #endif
 706     parser->error_count = oparser ? oparser->error_count : 0;
 707     parser->copline = NOLINE;
 708     parser->lex_state = LEX_NORMAL;
 709     parser->expect = XSTATE;
 710     parser->rsfp = rsfp;
 711     parser->rsfp_filters = newAV();
 712
 713     Newx(parser->lex_brackstack, 120, char);
 714     Newx(parser->lex_casestack, 12, char);
 715     *parser->lex_casestack = '\0';
 716
 717     if (line) {
 718         s = SvPV_const(line, len);
 719     } else {
 720         len = 0;
 721     }
 722
 723     if (!len) {
 724         parser->linestr = newSVpvs("\n;");
 725     } else {
 726         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 727         if (s[len-1] != ';')
 728             sv_catpvs(parser->linestr, "\n;");
 729     }
 730     parser->oldoldbufptr =
 731         parser->oldbufptr =
 732         parser->bufptr =
 733         parser->linestart = SvPVX(parser->linestr);
 734     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 735     parser->last_lop = parser->last_uni = NULL;
 736
 737     parser->in_pod = 0;
 738 }
 739
 740
 741 /* delete a parser object */
 742
 743 void
 744 Perl_parser_free(pTHX_  const yy_parser *parser)
 745 {
 746     PERL_ARGS_ASSERT_PARSER_FREE;
 747
 748     PL_curcop = parser->saved_curcop;
 749     SvREFCNT_dec(parser->linestr);
 750
 751     if (parser->rsfp == PerlIO_stdin())
 752         PerlIO_clearerr(parser->rsfp);
 753     else if (parser->rsfp && (!parser->old_parser ||
 754                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 755         PerlIO_close(parser->rsfp);
 756     SvREFCNT_dec(parser->rsfp_filters);
 757
 758     Safefree(parser->lex_brackstack);
 759     Safefree(parser->lex_casestack);
 760     PL_parser = parser->old_parser;
 761     Safefree(parser);
 762 }
 763
 764
 765 /*
 766 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 767
 768 Buffer scalar containing the chunk currently under consideration of the
 769 text currently being lexed.  This is always a plain string scalar (for
 770 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 771 normal scalar means; instead refer to the buffer directly by the pointer
 772 variables described below.
 773
 774 The lexer maintains various C<char*> pointers to things in the
 775 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 776 reallocated, all of these pointers must be updated.  Don't attempt to
 777 do this manually, but rather use L</lex_grow_linestr> if you need to
 778 reallocate the buffer.
 779
 780 The content of the text chunk in the buffer is commonly exactly one
 781 complete line of input, up to and including a newline terminator,
 782 but there are situations where it is otherwise.  The octets of the
 783 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 784 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 785 flag on this scalar, which may disagree with it.
 786
 787 For direct examination of the buffer, the variable
 788 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 789 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 790 of these pointers is usually preferable to examination of the scalar
 791 through normal scalar means.
 792
 793 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 794
 795 Direct pointer to the end of the chunk of text currently being lexed, the
 796 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 797 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 798 always located at the end of the buffer, and does not count as part of
 799 the buffer's contents.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 802
 803 Points to the current position of lexing inside the lexer buffer.
 804 Characters around this point may be freely examined, within
 805 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 806 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 807 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 808
 809 Lexing code (whether in the Perl core or not) moves this pointer past
 810 the characters that it consumes.  It is also expected to perform some
 811 bookkeeping whenever a newline character is consumed.  This movement
 812 can be more conveniently performed by the function L</lex_read_to>,
 813 which handles newlines appropriately.
 814
 815 Interpretation of the buffer's octets can be abstracted out by
 816 using the slightly higher-level functions L</lex_peek_unichar> and
 817 L</lex_read_unichar>.
 818
 819 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 820
 821 Points to the start of the current line inside the lexer buffer.
 822 This is useful for indicating at which column an error occurred, and
 823 not much else.  This must be updated by any lexing code that consumes
 824 a newline; the function L</lex_read_to> handles this detail.
 825
 826 =cut
 827 */
 828
 829 /*
 830 =for apidoc Amx|bool|lex_bufutf8
 831
 832 Indicates whether the octets in the lexer buffer
 833 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 834 of Unicode characters.  If not, they should be interpreted as Latin-1
 835 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 836
 837 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 838 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 839 encoding.
 840
 841 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 842 is significant, but not the whole story regarding the input character
 843 encoding.  Normally, when a file is being read, the scalar contains octets
 844 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 845 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 846 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 847 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 848 is in effect.  This logic may change in the future; use this function
 849 instead of implementing the logic yourself.
 850
 851 =cut
 852 */
 853
 854 bool
 855 Perl_lex_bufutf8(pTHX)
 856 {
 857     return UTF;
 858 }
 859
 860 /*
 861 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 862
 863 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 864 at least I<len> octets (including terminating NUL).  Returns a
 865 pointer to the reallocated buffer.  This is necessary before making
 866 any direct modification of the buffer that would increase its length.
 867 L</lex_stuff_pvn> provides a more convenient way to insert text into
 868 the buffer.
 869
 870 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 871 this function updates all of the lexer's variables that point directly
 872 into the buffer.
 873
 874 =cut
 875 */
 876
 877 char *
 878 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 879 {
 880     SV *linestr;
 881     char *buf;
 882     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 883     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 884     linestr = PL_parser->linestr;
 885     buf = SvPVX(linestr);
 886     if (len <= SvLEN(linestr))
 887         return buf;
 888     bufend_pos = PL_parser->bufend - buf;
 889     bufptr_pos = PL_parser->bufptr - buf;
 890     oldbufptr_pos = PL_parser->oldbufptr - buf;
 891     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 892     linestart_pos = PL_parser->linestart - buf;
 893     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 894     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 895     buf = sv_grow(linestr, len);
 896     PL_parser->bufend = buf + bufend_pos;
 897     PL_parser->bufptr = buf + bufptr_pos;
 898     PL_parser->oldbufptr = buf + oldbufptr_pos;
 899     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 900     PL_parser->linestart = buf + linestart_pos;
 901     if (PL_parser->last_uni)
 902         PL_parser->last_uni = buf + last_uni_pos;
 903     if (PL_parser->last_lop)
 904         PL_parser->last_lop = buf + last_lop_pos;
 905     return buf;
 906 }
 907
 908 /*
 909 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 910
 911 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 912 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 913 reallocating the buffer if necessary.  This means that lexing code that
 914 runs later will see the characters as if they had appeared in the input.
 915 It is not recommended to do this as part of normal parsing, and most
 916 uses of this facility run the risk of the inserted characters being
 917 interpreted in an unintended manner.
 918
 919 The string to be inserted is represented by I<len> octets starting
 920 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 921 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 922 The characters are recoded for the lexer buffer, according to how the
 923 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 924 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 925 function is more convenient.
 926
 927 =cut
 928 */
 929
 930 void
 931 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 932 {
 933     dVAR;
 934     char *bufptr;
 935     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 936     if (flags & ~(LEX_STUFF_UTF8))
 937         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 938     if (UTF) {
 939         if (flags & LEX_STUFF_UTF8) {
 940             goto plain_copy;
 941         } else {
 942             STRLEN highhalf = 0;
 943             const char *p, *e = pv+len;
 944             for (p = pv; p != e; p++)
 945                 highhalf += !!(((U8)*p) & 0x80);
 946             if (!highhalf)
 947                 goto plain_copy;
 948             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 949             bufptr = PL_parser->bufptr;
 950             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 951             SvCUR_set(PL_parser->linestr,
 952                 SvCUR(PL_parser->linestr) + len+highhalf);
 953             PL_parser->bufend += len+highhalf;
 954             for (p = pv; p != e; p++) {
 955                 U8 c = (U8)*p;
 956                 if (c & 0x80) {
 957                     *bufptr++ = (char)(0xc0 | (c >> 6));
 958                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 959                 } else {
 960                     *bufptr++ = (char)c;
 961                 }
 962             }
 963         }
 964     } else {
 965         if (flags & LEX_STUFF_UTF8) {
 966             STRLEN highhalf = 0;
 967             const char *p, *e = pv+len;
 968             for (p = pv; p != e; p++) {
 969                 U8 c = (U8)*p;
 970                 if (c >= 0xc4) {
 971                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 972                                 "non-Latin-1 character into Latin-1 input");
 973                 } else if (c >= 0xc2 && p+1 != e &&
 974                             (((U8)p[1]) & 0xc0) == 0x80) {
 975                     p++;
 976                     highhalf++;
 977                 } else if (c >= 0x80) {
 978                     /* malformed UTF-8 */
 979                     ENTER;
 980                     SAVESPTR(PL_warnhook);
 981                     PL_warnhook = PERL_WARNHOOK_FATAL;
 982                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 983                     LEAVE;
 984                 }
 985             }
 986             if (!highhalf)
 987                 goto plain_copy;
 988             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 989             bufptr = PL_parser->bufptr;
 990             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 991             SvCUR_set(PL_parser->linestr,
 992                 SvCUR(PL_parser->linestr) + len-highhalf);
 993             PL_parser->bufend += len-highhalf;
 994             for (p = pv; p != e; p++) {
 995                 U8 c = (U8)*p;
 996                 if (c & 0x80) {
 997                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
 998                     p++;
 999                 } else {
1000                     *bufptr++ = (char)c;
1001                 }
1002             }
1003         } else {
1004             plain_copy:
1005             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1006             bufptr = PL_parser->bufptr;
1007             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1008             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1009             PL_parser->bufend += len;
1010             Copy(pv, bufptr, len, char);
1011         }
1012     }
1013 }
1014
1015 /*
1016 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1017
1018 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1019 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1020 reallocating the buffer if necessary.  This means that lexing code that
1021 runs later will see the characters as if they had appeared in the input.
1022 It is not recommended to do this as part of normal parsing, and most
1023 uses of this facility run the risk of the inserted characters being
1024 interpreted in an unintended manner.
1025
1026 The string to be inserted is represented by octets starting at I<pv>
1027 and continuing to the first nul.  These octets are interpreted as either
1028 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1029 in I<flags>.  The characters are recoded for the lexer buffer, according
1030 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1031 If it is not convenient to nul-terminate a string to be inserted, the
1032 L</lex_stuff_pvn> function is more appropriate.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1039 {
1040     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1041     lex_stuff_pvn(pv, strlen(pv), flags);
1042 }
1043
1044 /*
1045 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1046
1047 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1048 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1049 reallocating the buffer if necessary.  This means that lexing code that
1050 runs later will see the characters as if they had appeared in the input.
1051 It is not recommended to do this as part of normal parsing, and most
1052 uses of this facility run the risk of the inserted characters being
1053 interpreted in an unintended manner.
1054
1055 The string to be inserted is the string value of I<sv>.  The characters
1056 are recoded for the lexer buffer, according to how the buffer is currently
1057 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1058 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1059 need to construct a scalar.
1060
1061 =cut
1062 */
1063
1064 void
1065 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1066 {
1067     char *pv;
1068     STRLEN len;
1069     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1070     if (flags)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1072     pv = SvPV(sv, len);
1073     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1074 }
1075
1076 /*
1077 =for apidoc Amx|void|lex_unstuff|char *ptr
1078
1079 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1080 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1081 This hides the discarded text from any lexing code that runs later,
1082 as if the text had never appeared.
1083
1084 This is not the normal way to consume lexed text.  For that, use
1085 L</lex_read_to>.
1086
1087 =cut
1088 */
1089
1090 void
1091 Perl_lex_unstuff(pTHX_ char *ptr)
1092 {
1093     char *buf, *bufend;
1094     STRLEN unstuff_len;
1095     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1096     buf = PL_parser->bufptr;
1097     if (ptr < buf)
1098         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1099     if (ptr == buf)
1100         return;
1101     bufend = PL_parser->bufend;
1102     if (ptr > bufend)
1103         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1104     unstuff_len = ptr - buf;
1105     Move(ptr, buf, bufend+1-ptr, char);
1106     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1107     PL_parser->bufend = bufend - unstuff_len;
1108 }
1109
1110 /*
1111 =for apidoc Amx|void|lex_read_to|char *ptr
1112
1113 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1114 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1115 performing the correct bookkeeping whenever a newline character is passed.
1116 This is the normal way to consume lexed text.
1117
1118 Interpretation of the buffer's octets can be abstracted out by
1119 using the slightly higher-level functions L</lex_peek_unichar> and
1120 L</lex_read_unichar>.
1121
1122 =cut
1123 */
1124
1125 void
1126 Perl_lex_read_to(pTHX_ char *ptr)
1127 {
1128     char *s;
1129     PERL_ARGS_ASSERT_LEX_READ_TO;
1130     s = PL_parser->bufptr;
1131     if (ptr < s || ptr > PL_parser->bufend)
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1133     for (; s != ptr; s++)
1134         if (*s == '\n') {
1135             CopLINE_inc(PL_curcop);
1136             PL_parser->linestart = s+1;
1137         }
1138     PL_parser->bufptr = ptr;
1139 }
1140
1141 /*
1142 =for apidoc Amx|void|lex_discard_to|char *ptr
1143
1144 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1145 up to I<ptr>.  The remaining content of the buffer will be moved, and
1146 all pointers into the buffer updated appropriately.  I<ptr> must not
1147 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1148 it is not permitted to discard text that has yet to be lexed.
1149
1150 Normally it is not necessarily to do this directly, because it suffices to
1151 use the implicit discarding behaviour of L</lex_next_chunk> and things
1152 based on it.  However, if a token stretches across multiple lines,
1153 and the lexing code has kept multiple lines of text in the buffer for
1154 that purpose, then after completion of the token it would be wise to
1155 explicitly discard the now-unneeded earlier lines, to avoid future
1156 multi-line tokens growing the buffer without bound.
1157
1158 =cut
1159 */
1160
1161 void
1162 Perl_lex_discard_to(pTHX_ char *ptr)
1163 {
1164     char *buf;
1165     STRLEN discard_len;
1166     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1167     buf = SvPVX(PL_parser->linestr);
1168     if (ptr < buf)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170     if (ptr == buf)
1171         return;
1172     if (ptr > PL_parser->bufptr)
1173         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174     discard_len = ptr - buf;
1175     if (PL_parser->oldbufptr < ptr)
1176         PL_parser->oldbufptr = ptr;
1177     if (PL_parser->oldoldbufptr < ptr)
1178         PL_parser->oldoldbufptr = ptr;
1179     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1180         PL_parser->last_uni = NULL;
1181     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1182         PL_parser->last_lop = NULL;
1183     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1184     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1185     PL_parser->bufend -= discard_len;
1186     PL_parser->bufptr -= discard_len;
1187     PL_parser->oldbufptr -= discard_len;
1188     PL_parser->oldoldbufptr -= discard_len;
1189     if (PL_parser->last_uni)
1190         PL_parser->last_uni -= discard_len;
1191     if (PL_parser->last_lop)
1192         PL_parser->last_lop -= discard_len;
1193 }
1194
1195 /*
1196 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1197
1198 Reads in the next chunk of text to be lexed, appending it to
1199 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1200 looked to the end of the current chunk and wants to know more.  It is
1201 usual, but not necessary, for lexing to have consumed the entirety of
1202 the current chunk at this time.
1203
1204 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1205 chunk (i.e., the current chunk has been entirely consumed), normally the
1206 current chunk will be discarded at the same time that the new chunk is
1207 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1208 will not be discarded.  If the current chunk has not been entirely
1209 consumed, then it will not be discarded regardless of the flag.
1210
1211 Returns true if some new text was added to the buffer, or false if the
1212 buffer has reached the end of the input text.
1213
1214 =cut
1215 */
1216
1217 #define LEX_FAKE_EOF 0x80000000
1218
1219 bool
1220 Perl_lex_next_chunk(pTHX_ U32 flags)
1221 {
1222     SV *linestr;
1223     char *buf;
1224     STRLEN old_bufend_pos, new_bufend_pos;
1225     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1226     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1227     bool got_some_for_debugger = 0;
1228     bool got_some;
1229     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1230         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1231     linestr = PL_parser->linestr;
1232     buf = SvPVX(linestr);
1233     if (!(flags & LEX_KEEP_PREVIOUS) &&
1234             PL_parser->bufptr == PL_parser->bufend) {
1235         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1236         linestart_pos = 0;
1237         if (PL_parser->last_uni != PL_parser->bufend)
1238             PL_parser->last_uni = NULL;
1239         if (PL_parser->last_lop != PL_parser->bufend)
1240             PL_parser->last_lop = NULL;
1241         last_uni_pos = last_lop_pos = 0;
1242         *buf = 0;
1243         SvCUR(linestr) = 0;
1244     } else {
1245         old_bufend_pos = PL_parser->bufend - buf;
1246         bufptr_pos = PL_parser->bufptr - buf;
1247         oldbufptr_pos = PL_parser->oldbufptr - buf;
1248         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1249         linestart_pos = PL_parser->linestart - buf;
1250         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1251         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1252     }
1253     if (flags & LEX_FAKE_EOF) {
1254         goto eof;
1255     } else if (!PL_parser->rsfp) {
1256         got_some = 0;
1257     } else if (filter_gets(linestr, old_bufend_pos)) {
1258         got_some = 1;
1259         got_some_for_debugger = 1;
1260     } else {
1261         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1262             sv_setpvs(linestr, "");
1263         eof:
1264         /* End of real input.  Close filehandle (unless it was STDIN),
1265          * then add implicit termination.
1266          */
1267         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1268             PerlIO_clearerr(PL_parser->rsfp);
1269         else if (PL_parser->rsfp)
1270             (void)PerlIO_close(PL_parser->rsfp);
1271         PL_parser->rsfp = NULL;
1272         PL_parser->in_pod = 0;
1273 #ifdef PERL_MAD
1274         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1275             PL_faketokens = 1;
1276 #endif
1277         if (!PL_in_eval && PL_minus_p) {
1278             sv_catpvs(linestr,
1279                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1280             PL_minus_n = PL_minus_p = 0;
1281         } else if (!PL_in_eval && PL_minus_n) {
1282             sv_catpvs(linestr, /*{*/";}");
1283             PL_minus_n = 0;
1284         } else
1285             sv_catpvs(linestr, ";");
1286         got_some = 1;
1287     }
1288     buf = SvPVX(linestr);
1289     new_bufend_pos = SvCUR(linestr);
1290     PL_parser->bufend = buf + new_bufend_pos;
1291     PL_parser->bufptr = buf + bufptr_pos;
1292     PL_parser->oldbufptr = buf + oldbufptr_pos;
1293     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1294     PL_parser->linestart = buf + linestart_pos;
1295     if (PL_parser->last_uni)
1296         PL_parser->last_uni = buf + last_uni_pos;
1297     if (PL_parser->last_lop)
1298         PL_parser->last_lop = buf + last_lop_pos;
1299     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1300             PL_curstash != PL_debstash) {
1301         /* debugger active and we're not compiling the debugger code,
1302          * so store the line into the debugger's array of lines
1303          */
1304         update_debugger_info(NULL, buf+old_bufend_pos,
1305             new_bufend_pos-old_bufend_pos);
1306     }
1307     return got_some;
1308 }
1309
1310 /*
1311 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1312
1313 Looks ahead one (Unicode) character in the text currently being lexed.
1314 Returns the codepoint (unsigned integer value) of the next character,
1315 or -1 if lexing has reached the end of the input text.  To consume the
1316 peeked character, use L</lex_read_unichar>.
1317
1318 If the next character is in (or extends into) the next chunk of input
1319 text, the next chunk will be read in.  Normally the current chunk will be
1320 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1321 then the current chunk will not be discarded.
1322
1323 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1324 is encountered, an exception is generated.
1325
1326 =cut
1327 */
1328
1329 I32
1330 Perl_lex_peek_unichar(pTHX_ U32 flags)
1331 {
1332     dVAR;
1333     char *s, *bufend;
1334     if (flags & ~(LEX_KEEP_PREVIOUS))
1335         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1336     s = PL_parser->bufptr;
1337     bufend = PL_parser->bufend;
1338     if (UTF) {
1339         U8 head;
1340         I32 unichar;
1341         STRLEN len, retlen;
1342         if (s == bufend) {
1343             if (!lex_next_chunk(flags))
1344                 return -1;
1345             s = PL_parser->bufptr;
1346             bufend = PL_parser->bufend;
1347         }
1348         head = (U8)*s;
1349         if (!(head & 0x80))
1350             return head;
1351         if (head & 0x40) {
1352             len = PL_utf8skip[head];
1353             while ((STRLEN)(bufend-s) < len) {
1354                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1355                     break;
1356                 s = PL_parser->bufptr;
1357                 bufend = PL_parser->bufend;
1358             }
1359         }
1360         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1361         if (retlen == (STRLEN)-1) {
1362             /* malformed UTF-8 */
1363             ENTER;
1364             SAVESPTR(PL_warnhook);
1365             PL_warnhook = PERL_WARNHOOK_FATAL;
1366             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1367             LEAVE;
1368         }
1369         return unichar;
1370     } else {
1371         if (s == bufend) {
1372             if (!lex_next_chunk(flags))
1373                 return -1;
1374             s = PL_parser->bufptr;
1375         }
1376         return (U8)*s;
1377     }
1378 }
1379
1380 /*
1381 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1382
1383 Reads the next (Unicode) character in the text currently being lexed.
1384 Returns the codepoint (unsigned integer value) of the character read,
1385 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1386 if lexing has reached the end of the input text.  To non-destructively
1387 examine the next character, use L</lex_peek_unichar> instead.
1388
1389 If the next character is in (or extends into) the next chunk of input
1390 text, the next chunk will be read in.  Normally the current chunk will be
1391 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1392 then the current chunk will not be discarded.
1393
1394 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1395 is encountered, an exception is generated.
1396
1397 =cut
1398 */
1399
1400 I32
1401 Perl_lex_read_unichar(pTHX_ U32 flags)
1402 {
1403     I32 c;
1404     if (flags & ~(LEX_KEEP_PREVIOUS))
1405         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1406     c = lex_peek_unichar(flags);
1407     if (c != -1) {
1408         if (c == '\n')
1409             CopLINE_inc(PL_curcop);
1410         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1411     }
1412     return c;
1413 }
1414
1415 /*
1416 =for apidoc Amx|void|lex_read_space|U32 flags
1417
1418 Reads optional spaces, in Perl style, in the text currently being
1419 lexed.  The spaces may include ordinary whitespace characters and
1420 Perl-style comments.  C<#line> directives are processed if encountered.
1421 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1422 at a non-space character (or the end of the input text).
1423
1424 If spaces extend into the next chunk of input text, the next chunk will
1425 be read in.  Normally the current chunk will be discarded at the same
1426 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1427 chunk will not be discarded.
1428
1429 =cut
1430 */
1431
1432 #define LEX_NO_NEXT_CHUNK 0x80000000
1433
1434 void
1435 Perl_lex_read_space(pTHX_ U32 flags)
1436 {
1437     char *s, *bufend;
1438     bool need_incline = 0;
1439     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1440         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1441 #ifdef PERL_MAD
1442     if (PL_skipwhite) {
1443         sv_free(PL_skipwhite);
1444         PL_skipwhite = NULL;
1445     }
1446     if (PL_madskills)
1447         PL_skipwhite = newSVpvs("");
1448 #endif /* PERL_MAD */
1449     s = PL_parser->bufptr;
1450     bufend = PL_parser->bufend;
1451     while (1) {
1452         char c = *s;
1453         if (c == '#') {
1454             do {
1455                 c = *++s;
1456             } while (!(c == '\n' || (c == 0 && s == bufend)));
1457         } else if (c == '\n') {
1458             s++;
1459             PL_parser->linestart = s;
1460             if (s == bufend)
1461                 need_incline = 1;
1462             else
1463                 incline(s);
1464         } else if (isSPACE(c)) {
1465             s++;
1466         } else if (c == 0 && s == bufend) {
1467             bool got_more;
1468 #ifdef PERL_MAD
1469             if (PL_madskills)
1470                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1471 #endif /* PERL_MAD */
1472             if (flags & LEX_NO_NEXT_CHUNK)
1473                 break;
1474             PL_parser->bufptr = s;
1475             CopLINE_inc(PL_curcop);
1476             got_more = lex_next_chunk(flags);
1477             CopLINE_dec(PL_curcop);
1478             s = PL_parser->bufptr;
1479             bufend = PL_parser->bufend;
1480             if (!got_more)
1481                 break;
1482             if (need_incline && PL_parser->rsfp) {
1483                 incline(s);
1484                 need_incline = 0;
1485             }
1486         } else {
1487             break;
1488         }
1489     }
1490 #ifdef PERL_MAD
1491     if (PL_madskills)
1492         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1493 #endif /* PERL_MAD */
1494     PL_parser->bufptr = s;
1495 }
1496
1497 /*
1498  * S_incline
1499  * This subroutine has nothing to do with tilting, whether at windmills
1500  * or pinball tables.  Its name is short for "increment line".  It
1501  * increments the current line number in CopLINE(PL_curcop) and checks
1502  * to see whether the line starts with a comment of the form
1503  *    # line 500 "foo.pm"
1504  * If so, it sets the current line number and file to the values in the comment.
1505  */
1506
1507 STATIC void
1508 S_incline(pTHX_ const char *s)
1509 {
1510     dVAR;
1511     const char *t;
1512     const char *n;
1513     const char *e;
1514
1515     PERL_ARGS_ASSERT_INCLINE;
1516
1517     CopLINE_inc(PL_curcop);
1518     if (*s++ != '#')
1519         return;
1520     while (SPACE_OR_TAB(*s))
1521         s++;
1522     if (strnEQ(s, "line", 4))
1523         s += 4;
1524     else
1525         return;
1526     if (SPACE_OR_TAB(*s))
1527         s++;
1528     else
1529         return;
1530     while (SPACE_OR_TAB(*s))
1531         s++;
1532     if (!isDIGIT(*s))
1533         return;
1534
1535     n = s;
1536     while (isDIGIT(*s))
1537         s++;
1538     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1539         return;
1540     while (SPACE_OR_TAB(*s))
1541         s++;
1542     if (*s == '"' && (t = strchr(s+1, '"'))) {
1543         s++;
1544         e = t + 1;
1545     }
1546     else {
1547         t = s;
1548         while (!isSPACE(*t))
1549             t++;
1550         e = t;
1551     }
1552     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1553         e++;
1554     if (*e != '\n' && *e != '\0')
1555         return;         /* false alarm */
1556
1557     if (t - s > 0) {
1558         const STRLEN len = t - s;
1559 #ifndef USE_ITHREADS
1560         SV *const temp_sv = CopFILESV(PL_curcop);
1561         const char *cf;
1562         STRLEN tmplen;
1563
1564         if (temp_sv) {
1565             cf = SvPVX(temp_sv);
1566             tmplen = SvCUR(temp_sv);
1567         } else {
1568             cf = NULL;
1569             tmplen = 0;
1570         }
1571
1572         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1573             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1574              * to *{"::_<newfilename"} */
1575             /* However, the long form of evals is only turned on by the
1576                debugger - usually they're "(eval %lu)" */
1577             char smallbuf[128];
1578             char *tmpbuf;
1579             GV **gvp;
1580             STRLEN tmplen2 = len;
1581             if (tmplen + 2 <= sizeof smallbuf)
1582                 tmpbuf = smallbuf;
1583             else
1584                 Newx(tmpbuf, tmplen + 2, char);
1585             tmpbuf[0] = '_';
1586             tmpbuf[1] = '<';
1587             memcpy(tmpbuf + 2, cf, tmplen);
1588             tmplen += 2;
1589             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1590             if (gvp) {
1591                 char *tmpbuf2;
1592                 GV *gv2;
1593
1594                 if (tmplen2 + 2 <= sizeof smallbuf)
1595                     tmpbuf2 = smallbuf;
1596                 else
1597                     Newx(tmpbuf2, tmplen2 + 2, char);
1598
1599                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1600                     /* Either they malloc'd it, or we malloc'd it,
1601                        so no prefix is present in ours.  */
1602                     tmpbuf2[0] = '_';
1603                     tmpbuf2[1] = '<';
1604                 }
1605
1606                 memcpy(tmpbuf2 + 2, s, tmplen2);
1607                 tmplen2 += 2;
1608
1609                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1610                 if (!isGV(gv2)) {
1611                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1612                     /* adjust ${"::_<newfilename"} to store the new file name */
1613                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1614                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1615                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1616                 }
1617
1618                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1619             }
1620             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1621         }
1622 #endif
1623         CopFILE_free(PL_curcop);
1624         CopFILE_setn(PL_curcop, s, len);
1625     }
1626     CopLINE_set(PL_curcop, atoi(n)-1);
1627 }
1628
1629 #ifdef PERL_MAD
1630 /* skip space before PL_thistoken */
1631
1632 STATIC char *
1633 S_skipspace0(pTHX_ register char *s)
1634 {
1635     PERL_ARGS_ASSERT_SKIPSPACE0;
1636
1637     s = skipspace(s);
1638     if (!PL_madskills)
1639         return s;
1640     if (PL_skipwhite) {
1641         if (!PL_thiswhite)
1642             PL_thiswhite = newSVpvs("");
1643         sv_catsv(PL_thiswhite, PL_skipwhite);
1644         sv_free(PL_skipwhite);
1645         PL_skipwhite = 0;
1646     }
1647     PL_realtokenstart = s - SvPVX(PL_linestr);
1648     return s;
1649 }
1650
1651 /* skip space after PL_thistoken */
1652
1653 STATIC char *
1654 S_skipspace1(pTHX_ register char *s)
1655 {
1656     const char *start = s;
1657     I32 startoff = start - SvPVX(PL_linestr);
1658
1659     PERL_ARGS_ASSERT_SKIPSPACE1;
1660
1661     s = skipspace(s);
1662     if (!PL_madskills)
1663         return s;
1664     start = SvPVX(PL_linestr) + startoff;
1665     if (!PL_thistoken && PL_realtokenstart >= 0) {
1666         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1667         PL_thistoken = newSVpvn(tstart, start - tstart);
1668     }
1669     PL_realtokenstart = -1;
1670     if (PL_skipwhite) {
1671         if (!PL_nextwhite)
1672             PL_nextwhite = newSVpvs("");
1673         sv_catsv(PL_nextwhite, PL_skipwhite);
1674         sv_free(PL_skipwhite);
1675         PL_skipwhite = 0;
1676     }
1677     return s;
1678 }
1679
1680 STATIC char *
1681 S_skipspace2(pTHX_ register char *s, SV **svp)
1682 {
1683     char *start;
1684     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1685     const I32 startoff = s - SvPVX(PL_linestr);
1686
1687     PERL_ARGS_ASSERT_SKIPSPACE2;
1688
1689     s = skipspace(s);
1690     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1691     if (!PL_madskills || !svp)
1692         return s;
1693     start = SvPVX(PL_linestr) + startoff;
1694     if (!PL_thistoken && PL_realtokenstart >= 0) {
1695         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1696         PL_thistoken = newSVpvn(tstart, start - tstart);
1697         PL_realtokenstart = -1;
1698     }
1699     if (PL_skipwhite) {
1700         if (!*svp)
1701             *svp = newSVpvs("");
1702         sv_setsv(*svp, PL_skipwhite);
1703         sv_free(PL_skipwhite);
1704         PL_skipwhite = 0;
1705     }
1706
1707     return s;
1708 }
1709 #endif
1710
1711 STATIC void
1712 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1713 {
1714     AV *av = CopFILEAVx(PL_curcop);
1715     if (av) {
1716         SV * const sv = newSV_type(SVt_PVMG);
1717         if (orig_sv)
1718             sv_setsv(sv, orig_sv);
1719         else
1720             sv_setpvn(sv, buf, len);
1721         (void)SvIOK_on(sv);
1722         SvIV_set(sv, 0);
1723         av_store(av, (I32)CopLINE(PL_curcop), sv);
1724     }
1725 }
1726
1727 /*
1728  * S_skipspace
1729  * Called to gobble the appropriate amount and type of whitespace.
1730  * Skips comments as well.
1731  */
1732
1733 STATIC char *
1734 S_skipspace(pTHX_ register char *s)
1735 {
1736 #ifdef PERL_MAD
1737     char *start = s;
1738 #endif /* PERL_MAD */
1739     PERL_ARGS_ASSERT_SKIPSPACE;
1740 #ifdef PERL_MAD
1741     if (PL_skipwhite) {
1742         sv_free(PL_skipwhite);
1743         PL_skipwhite = NULL;
1744     }
1745 #endif /* PERL_MAD */
1746     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1747         while (s < PL_bufend && SPACE_OR_TAB(*s))
1748             s++;
1749     } else {
1750         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1751         PL_bufptr = s;
1752         lex_read_space(LEX_KEEP_PREVIOUS |
1753                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1754                     LEX_NO_NEXT_CHUNK : 0));
1755         s = PL_bufptr;
1756         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1757         if (PL_linestart > PL_bufptr)
1758             PL_bufptr = PL_linestart;
1759         return s;
1760     }
1761 #ifdef PERL_MAD
1762     if (PL_madskills)
1763         PL_skipwhite = newSVpvn(start, s-start);
1764 #endif /* PERL_MAD */
1765     return s;
1766 }
1767
1768 /*
1769  * S_check_uni
1770  * Check the unary operators to ensure there's no ambiguity in how they're
1771  * used.  An ambiguous piece of code would be:
1772  *     rand + 5
1773  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1774  * the +5 is its argument.
1775  */
1776
1777 STATIC void
1778 S_check_uni(pTHX)
1779 {
1780     dVAR;
1781     const char *s;
1782     const char *t;
1783
1784     if (PL_oldoldbufptr != PL_last_uni)
1785         return;
1786     while (isSPACE(*PL_last_uni))
1787         PL_last_uni++;
1788     s = PL_last_uni;
1789     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1790         s++;
1791     if ((t = strchr(s, '(')) && t < PL_bufptr)
1792         return;
1793
1794     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1795                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1796                      (int)(s - PL_last_uni), PL_last_uni);
1797 }
1798
1799 /*
1800  * LOP : macro to build a list operator.  Its behaviour has been replaced
1801  * with a subroutine, S_lop() for which LOP is just another name.
1802  */
1803
1804 #define LOP(f,x) return lop(f,x,s)
1805
1806 /*
1807  * S_lop
1808  * Build a list operator (or something that might be one).  The rules:
1809  *  - if we have a next token, then it's a list operator [why?]
1810  *  - if the next thing is an opening paren, then it's a function
1811  *  - else it's a list operator
1812  */
1813
1814 STATIC I32
1815 S_lop(pTHX_ I32 f, int x, char *s)
1816 {
1817     dVAR;
1818
1819     PERL_ARGS_ASSERT_LOP;
1820
1821     pl_yylval.ival = f;
1822     CLINE;
1823     PL_expect = x;
1824     PL_bufptr = s;
1825     PL_last_lop = PL_oldbufptr;
1826     PL_last_lop_op = (OPCODE)f;
1827 #ifdef PERL_MAD
1828     if (PL_lasttoke)
1829         goto lstop;
1830 #else
1831     if (PL_nexttoke)
1832         goto lstop;
1833 #endif
1834     if (*s == '(')
1835         return REPORT(FUNC);
1836     s = PEEKSPACE(s);
1837     if (*s == '(')
1838         return REPORT(FUNC);
1839     else {
1840         lstop:
1841         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1842             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1843         return REPORT(LSTOP);
1844     }
1845 }
1846
1847 #ifdef PERL_MAD
1848  /*
1849  * S_start_force
1850  * Sets up for an eventual force_next().  start_force(0) basically does
1851  * an unshift, while start_force(-1) does a push.  yylex removes items
1852  * on the "pop" end.
1853  */
1854
1855 STATIC void
1856 S_start_force(pTHX_ int where)
1857 {
1858     int i;
1859
1860     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1861         where = PL_lasttoke;
1862     assert(PL_curforce < 0 || PL_curforce == where);
1863     if (PL_curforce != where) {
1864         for (i = PL_lasttoke; i > where; --i) {
1865             PL_nexttoke[i] = PL_nexttoke[i-1];
1866         }
1867         PL_lasttoke++;
1868     }
1869     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1870         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1871     PL_curforce = where;
1872     if (PL_nextwhite) {
1873         if (PL_madskills)
1874             curmad('^', newSVpvs(""));
1875         CURMAD('_', PL_nextwhite);
1876     }
1877 }
1878
1879 STATIC void
1880 S_curmad(pTHX_ char slot, SV *sv)
1881 {
1882     MADPROP **where;
1883
1884     if (!sv)
1885         return;
1886     if (PL_curforce < 0)
1887         where = &PL_thismad;
1888     else
1889         where = &PL_nexttoke[PL_curforce].next_mad;
1890
1891     if (PL_faketokens)
1892         sv_setpvs(sv, "");
1893     else {
1894         if (!IN_BYTES) {
1895             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1896                 SvUTF8_on(sv);
1897             else if (PL_encoding) {
1898                 sv_recode_to_utf8(sv, PL_encoding);
1899             }
1900         }
1901     }
1902
1903     /* keep a slot open for the head of the list? */
1904     if (slot != '_' && *where && (*where)->mad_key == '^') {
1905         (*where)->mad_key = slot;
1906         sv_free(MUTABLE_SV(((*where)->mad_val)));
1907         (*where)->mad_val = (void*)sv;
1908     }
1909     else
1910         addmad(newMADsv(slot, sv), where, 0);
1911 }
1912 #else
1913 #  define start_force(where)    NOOP
1914 #  define curmad(slot, sv)      NOOP
1915 #endif
1916
1917 /*
1918  * S_force_next
1919  * When the lexer realizes it knows the next token (for instance,
1920  * it is reordering tokens for the parser) then it can call S_force_next
1921  * to know what token to return the next time the lexer is called.  Caller
1922  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1923  * and possibly PL_expect to ensure the lexer handles the token correctly.
1924  */
1925
1926 STATIC void
1927 S_force_next(pTHX_ I32 type)
1928 {
1929     dVAR;
1930 #ifdef DEBUGGING
1931     if (DEBUG_T_TEST) {
1932         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1933         tokereport(type, &NEXTVAL_NEXTTOKE);
1934     }
1935 #endif
1936 #ifdef PERL_MAD
1937     if (PL_curforce < 0)
1938         start_force(PL_lasttoke);
1939     PL_nexttoke[PL_curforce].next_type = type;
1940     if (PL_lex_state != LEX_KNOWNEXT)
1941         PL_lex_defer = PL_lex_state;
1942     PL_lex_state = LEX_KNOWNEXT;
1943     PL_lex_expect = PL_expect;
1944     PL_curforce = -1;
1945 #else
1946     PL_nexttype[PL_nexttoke] = type;
1947     PL_nexttoke++;
1948     if (PL_lex_state != LEX_KNOWNEXT) {
1949         PL_lex_defer = PL_lex_state;
1950         PL_lex_expect = PL_expect;
1951         PL_lex_state = LEX_KNOWNEXT;
1952     }
1953 #endif
1954 }
1955
1956 void
1957 Perl_yyunlex(pTHX)
1958 {
1959     int yyc = PL_parser->yychar;
1960     if (yyc != YYEMPTY) {
1961         if (yyc) {
1962             start_force(-1);
1963             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1964             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1965                 PL_lex_allbrackets--;
1966                 PL_lex_brackets--;
1967                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1968             } else if (yyc == '('/*)*/) {
1969                 PL_lex_allbrackets--;
1970                 yyc |= (2<<24);
1971             }
1972             force_next(yyc);
1973         }
1974         PL_parser->yychar = YYEMPTY;
1975     }
1976 }
1977
1978 STATIC SV *
1979 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1980 {
1981     dVAR;
1982     SV * const sv = newSVpvn_utf8(start, len,
1983                                   !IN_BYTES
1984                                   && UTF
1985                                   && !is_ascii_string((const U8*)start, len)
1986                                   && is_utf8_string((const U8*)start, len));
1987     return sv;
1988 }
1989
1990 /*
1991  * S_force_word
1992  * When the lexer knows the next thing is a word (for instance, it has
1993  * just seen -> and it knows that the next char is a word char, then
1994  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1995  * lookahead.
1996  *
1997  * Arguments:
1998  *   char *start : buffer position (must be within PL_linestr)
1999  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2000  *   int check_keyword : if true, Perl checks to make sure the word isn't
2001  *       a keyword (do this if the word is a label, e.g. goto FOO)
2002  *   int allow_pack : if true, : characters will also be allowed (require,
2003  *       use, etc. do this)
2004  *   int allow_initial_tick : used by the "sub" lexer only.
2005  */
2006
2007 STATIC char *
2008 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2009 {
2010     dVAR;
2011     register char *s;
2012     STRLEN len;
2013
2014     PERL_ARGS_ASSERT_FORCE_WORD;
2015
2016     start = SKIPSPACE1(start);
2017     s = start;
2018     if (isIDFIRST_lazy_if(s,UTF) ||
2019         (allow_pack && *s == ':') ||
2020         (allow_initial_tick && *s == '\'') )
2021     {
2022         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2023         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2024             return start;
2025         start_force(PL_curforce);
2026         if (PL_madskills)
2027             curmad('X', newSVpvn(start,s-start));
2028         if (token == METHOD) {
2029             s = SKIPSPACE1(s);
2030             if (*s == '(')
2031                 PL_expect = XTERM;
2032             else {
2033                 PL_expect = XOPERATOR;
2034             }
2035         }
2036         if (PL_madskills)
2037             curmad('g', newSVpvs( "forced" ));
2038         NEXTVAL_NEXTTOKE.opval
2039             = (OP*)newSVOP(OP_CONST,0,
2040                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2041         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2042         force_next(token);
2043     }
2044     return s;
2045 }
2046
2047 /*
2048  * S_force_ident
2049  * Called when the lexer wants $foo *foo &foo etc, but the program
2050  * text only contains the "foo" portion.  The first argument is a pointer
2051  * to the "foo", and the second argument is the type symbol to prefix.
2052  * Forces the next token to be a "WORD".
2053  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2054  */
2055
2056 STATIC void
2057 S_force_ident(pTHX_ register const char *s, int kind)
2058 {
2059     dVAR;
2060
2061     PERL_ARGS_ASSERT_FORCE_IDENT;
2062
2063     if (*s) {
2064         const STRLEN len = strlen(s);
2065         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2066         start_force(PL_curforce);
2067         NEXTVAL_NEXTTOKE.opval = o;
2068         force_next(WORD);
2069         if (kind) {
2070             o->op_private = OPpCONST_ENTERED;
2071             /* XXX see note in pp_entereval() for why we forgo typo
2072                warnings if the symbol must be introduced in an eval.
2073                GSAR 96-10-12 */
2074             gv_fetchpvn_flags(s, len,
2075                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2076                               : GV_ADD,
2077                               kind == '$' ? SVt_PV :
2078                               kind == '@' ? SVt_PVAV :
2079                               kind == '%' ? SVt_PVHV :
2080                               SVt_PVGV
2081                               );
2082         }
2083     }
2084 }
2085
2086 NV
2087 Perl_str_to_version(pTHX_ SV *sv)
2088 {
2089     NV retval = 0.0;
2090     NV nshift = 1.0;
2091     STRLEN len;
2092     const char *start = SvPV_const(sv,len);
2093     const char * const end = start + len;
2094     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2095
2096     PERL_ARGS_ASSERT_STR_TO_VERSION;
2097
2098     while (start < end) {
2099         STRLEN skip;
2100         UV n;
2101         if (utf)
2102             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2103         else {
2104             n = *(U8*)start;
2105             skip = 1;
2106         }
2107         retval += ((NV)n)/nshift;
2108         start += skip;
2109         nshift *= 1000;
2110     }
2111     return retval;
2112 }
2113
2114 /*
2115  * S_force_version
2116  * Forces the next token to be a version number.
2117  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2118  * and if "guessing" is TRUE, then no new token is created (and the caller
2119  * must use an alternative parsing method).
2120  */
2121
2122 STATIC char *
2123 S_force_version(pTHX_ char *s, int guessing)
2124 {
2125     dVAR;
2126     OP *version = NULL;
2127     char *d;
2128 #ifdef PERL_MAD
2129     I32 startoff = s - SvPVX(PL_linestr);
2130 #endif
2131
2132     PERL_ARGS_ASSERT_FORCE_VERSION;
2133
2134     s = SKIPSPACE1(s);
2135
2136     d = s;
2137     if (*d == 'v')
2138         d++;
2139     if (isDIGIT(*d)) {
2140         while (isDIGIT(*d) || *d == '_' || *d == '.')
2141             d++;
2142 #ifdef PERL_MAD
2143         if (PL_madskills) {
2144             start_force(PL_curforce);
2145             curmad('X', newSVpvn(s,d-s));
2146         }
2147 #endif
2148         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2149             SV *ver;
2150 #ifdef USE_LOCALE_NUMERIC
2151             char *loc = setlocale(LC_NUMERIC, "C");
2152 #endif
2153             s = scan_num(s, &pl_yylval);
2154 #ifdef USE_LOCALE_NUMERIC
2155             setlocale(LC_NUMERIC, loc);
2156 #endif
2157             version = pl_yylval.opval;
2158             ver = cSVOPx(version)->op_sv;
2159             if (SvPOK(ver) && !SvNIOK(ver)) {
2160                 SvUPGRADE(ver, SVt_PVNV);
2161                 SvNV_set(ver, str_to_version(ver));
2162                 SvNOK_on(ver);          /* hint that it is a version */
2163             }
2164         }
2165         else if (guessing) {
2166 #ifdef PERL_MAD
2167             if (PL_madskills) {
2168                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2169                 PL_nextwhite = 0;
2170                 s = SvPVX(PL_linestr) + startoff;
2171             }
2172 #endif
2173             return s;
2174         }
2175     }
2176
2177 #ifdef PERL_MAD
2178     if (PL_madskills && !version) {
2179         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2180         PL_nextwhite = 0;
2181         s = SvPVX(PL_linestr) + startoff;
2182     }
2183 #endif
2184     /* NOTE: The parser sees the package name and the VERSION swapped */
2185     start_force(PL_curforce);
2186     NEXTVAL_NEXTTOKE.opval = version;
2187     force_next(WORD);
2188
2189     return s;
2190 }
2191
2192 /*
2193  * S_force_strict_version
2194  * Forces the next token to be a version number using strict syntax rules.
2195  */
2196
2197 STATIC char *
2198 S_force_strict_version(pTHX_ char *s)
2199 {
2200     dVAR;
2201     OP *version = NULL;
2202 #ifdef PERL_MAD
2203     I32 startoff = s - SvPVX(PL_linestr);
2204 #endif
2205     const char *errstr = NULL;
2206
2207     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2208
2209     while (isSPACE(*s)) /* leading whitespace */
2210         s++;
2211
2212     if (is_STRICT_VERSION(s,&errstr)) {
2213         SV *ver = newSV(0);
2214         s = (char *)scan_version(s, ver, 0);
2215         version = newSVOP(OP_CONST, 0, ver);
2216     }
2217     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2218             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2219     {
2220         PL_bufptr = s;
2221         if (errstr)
2222             yyerror(errstr); /* version required */
2223         return s;
2224     }
2225
2226 #ifdef PERL_MAD
2227     if (PL_madskills && !version) {
2228         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2229         PL_nextwhite = 0;
2230         s = SvPVX(PL_linestr) + startoff;
2231     }
2232 #endif
2233     /* NOTE: The parser sees the package name and the VERSION swapped */
2234     start_force(PL_curforce);
2235     NEXTVAL_NEXTTOKE.opval = version;
2236     force_next(WORD);
2237
2238     return s;
2239 }
2240
2241 /*
2242  * S_tokeq
2243  * Tokenize a quoted string passed in as an SV.  It finds the next
2244  * chunk, up to end of string or a backslash.  It may make a new
2245  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2246  * turns \\ into \.
2247  */
2248
2249 STATIC SV *
2250 S_tokeq(pTHX_ SV *sv)
2251 {
2252     dVAR;
2253     register char *s;
2254     register char *send;
2255     register char *d;
2256     STRLEN len = 0;
2257     SV *pv = sv;
2258
2259     PERL_ARGS_ASSERT_TOKEQ;
2260
2261     if (!SvLEN(sv))
2262         goto finish;
2263
2264     s = SvPV_force(sv, len);
2265     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2266         goto finish;
2267     send = s + len;
2268     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2269     while (s < send && !(*s == '\\' && s[1] == '\\'))
2270         s++;
2271     if (s == send)
2272         goto finish;
2273     d = s;
2274     if ( PL_hints & HINT_NEW_STRING ) {
2275         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2276     }
2277     while (s < send) {
2278         if (*s == '\\') {
2279             if (s + 1 < send && (s[1] == '\\'))
2280                 s++;            /* all that, just for this */
2281         }
2282         *d++ = *s++;
2283     }
2284     *d = '\0';
2285     SvCUR_set(sv, d - SvPVX_const(sv));
2286   finish:
2287     if ( PL_hints & HINT_NEW_STRING )
2288        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2289     return sv;
2290 }
2291
2292 /*
2293  * Now come three functions related to double-quote context,
2294  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2295  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2296  * interact with PL_lex_state, and create fake ( ... ) argument lists
2297  * to handle functions and concatenation.
2298  * They assume that whoever calls them will be setting up a fake
2299  * join call, because each subthing puts a ',' after it.  This lets
2300  *   "lower \luPpEr"
2301  * become
2302  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2303  *
2304  * (I'm not sure whether the spurious commas at the end of lcfirst's
2305  * arguments and join's arguments are created or not).
2306  */
2307
2308 /*
2309  * S_sublex_start
2310  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2311  *
2312  * Pattern matching will set PL_lex_op to the pattern-matching op to
2313  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2314  *
2315  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2316  *
2317  * Everything else becomes a FUNC.
2318  *
2319  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2320  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2321  * call to S_sublex_push().
2322  */
2323
2324 STATIC I32
2325 S_sublex_start(pTHX)
2326 {
2327     dVAR;
2328     register const I32 op_type = pl_yylval.ival;
2329
2330     if (op_type == OP_NULL) {
2331         pl_yylval.opval = PL_lex_op;
2332         PL_lex_op = NULL;
2333         return THING;
2334     }
2335     if (op_type == OP_CONST || op_type == OP_READLINE) {
2336         SV *sv = tokeq(PL_lex_stuff);
2337
2338         if (SvTYPE(sv) == SVt_PVIV) {
2339             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2340             STRLEN len;
2341             const char * const p = SvPV_const(sv, len);
2342             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2343             SvREFCNT_dec(sv);
2344             sv = nsv;
2345         }
2346         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2347         PL_lex_stuff = NULL;
2348         /* Allow <FH> // "foo" */
2349         if (op_type == OP_READLINE)
2350             PL_expect = XTERMORDORDOR;
2351         return THING;
2352     }
2353     else if (op_type == OP_BACKTICK && PL_lex_op) {
2354         /* readpipe() vas overriden */
2355         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2356         pl_yylval.opval = PL_lex_op;
2357         PL_lex_op = NULL;
2358         PL_lex_stuff = NULL;
2359         return THING;
2360     }
2361
2362     PL_sublex_info.super_state = PL_lex_state;
2363     PL_sublex_info.sub_inwhat = (U16)op_type;
2364     PL_sublex_info.sub_op = PL_lex_op;
2365     PL_lex_state = LEX_INTERPPUSH;
2366
2367     PL_expect = XTERM;
2368     if (PL_lex_op) {
2369         pl_yylval.opval = PL_lex_op;
2370         PL_lex_op = NULL;
2371         return PMFUNC;
2372     }
2373     else
2374         return FUNC;
2375 }
2376
2377 /*
2378  * S_sublex_push
2379  * Create a new scope to save the lexing state.  The scope will be
2380  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2381  * to the uc, lc, etc. found before.
2382  * Sets PL_lex_state to LEX_INTERPCONCAT.
2383  */
2384
2385 STATIC I32
2386 S_sublex_push(pTHX)
2387 {
2388     dVAR;
2389     ENTER;
2390
2391     PL_lex_state = PL_sublex_info.super_state;
2392     SAVEBOOL(PL_lex_dojoin);
2393     SAVEI32(PL_lex_brackets);
2394     SAVEI32(PL_lex_allbrackets);
2395     SAVEI8(PL_lex_fakeeof);
2396     SAVEI32(PL_lex_casemods);
2397     SAVEI32(PL_lex_starts);
2398     SAVEI8(PL_lex_state);
2399     SAVEVPTR(PL_lex_inpat);
2400     SAVEI16(PL_lex_inwhat);
2401     SAVECOPLINE(PL_curcop);
2402     SAVEPPTR(PL_bufptr);
2403     SAVEPPTR(PL_bufend);
2404     SAVEPPTR(PL_oldbufptr);
2405     SAVEPPTR(PL_oldoldbufptr);
2406     SAVEPPTR(PL_last_lop);
2407     SAVEPPTR(PL_last_uni);
2408     SAVEPPTR(PL_linestart);
2409     SAVESPTR(PL_linestr);
2410     SAVEGENERICPV(PL_lex_brackstack);
2411     SAVEGENERICPV(PL_lex_casestack);
2412
2413     PL_linestr = PL_lex_stuff;
2414     PL_lex_stuff = NULL;
2415
2416     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2417         = SvPVX(PL_linestr);
2418     PL_bufend += SvCUR(PL_linestr);
2419     PL_last_lop = PL_last_uni = NULL;
2420     SAVEFREESV(PL_linestr);
2421
2422     PL_lex_dojoin = FALSE;
2423     PL_lex_brackets = 0;
2424     PL_lex_allbrackets = 0;
2425     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2426     Newx(PL_lex_brackstack, 120, char);
2427     Newx(PL_lex_casestack, 12, char);
2428     PL_lex_casemods = 0;
2429     *PL_lex_casestack = '\0';
2430     PL_lex_starts = 0;
2431     PL_lex_state = LEX_INTERPCONCAT;
2432     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2433
2434     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2435     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2436     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2437         PL_lex_inpat = PL_sublex_info.sub_op;
2438     else
2439         PL_lex_inpat = NULL;
2440
2441     return '(';
2442 }
2443
2444 /*
2445  * S_sublex_done
2446  * Restores lexer state after a S_sublex_push.
2447  */
2448
2449 STATIC I32
2450 S_sublex_done(pTHX)
2451 {
2452     dVAR;
2453     if (!PL_lex_starts++) {
2454         SV * const sv = newSVpvs("");
2455         if (SvUTF8(PL_linestr))
2456             SvUTF8_on(sv);
2457         PL_expect = XOPERATOR;
2458         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2459         return THING;
2460     }
2461
2462     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2463         PL_lex_state = LEX_INTERPCASEMOD;
2464         return yylex();
2465     }
2466
2467     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2468     assert(PL_lex_inwhat != OP_TRANSR);
2469     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2470         PL_linestr = PL_lex_repl;
2471         PL_lex_inpat = 0;
2472         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2473         PL_bufend += SvCUR(PL_linestr);
2474         PL_last_lop = PL_last_uni = NULL;
2475         SAVEFREESV(PL_linestr);
2476         PL_lex_dojoin = FALSE;
2477         PL_lex_brackets = 0;
2478         PL_lex_allbrackets = 0;
2479         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2480         PL_lex_casemods = 0;
2481         *PL_lex_casestack = '\0';
2482         PL_lex_starts = 0;
2483         if (SvEVALED(PL_lex_repl)) {
2484             PL_lex_state = LEX_INTERPNORMAL;
2485             PL_lex_starts++;
2486             /*  we don't clear PL_lex_repl here, so that we can check later
2487                 whether this is an evalled subst; that means we rely on the
2488                 logic to ensure sublex_done() is called again only via the
2489                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2490         }
2491         else {
2492             PL_lex_state = LEX_INTERPCONCAT;
2493             PL_lex_repl = NULL;
2494         }
2495         return ',';
2496     }
2497     else {
2498 #ifdef PERL_MAD
2499         if (PL_madskills) {
2500             if (PL_thiswhite) {
2501                 if (!PL_endwhite)
2502                     PL_endwhite = newSVpvs("");
2503                 sv_catsv(PL_endwhite, PL_thiswhite);
2504                 PL_thiswhite = 0;
2505             }
2506             if (PL_thistoken)
2507                 sv_setpvs(PL_thistoken,"");
2508             else
2509                 PL_realtokenstart = -1;
2510         }
2511 #endif
2512         LEAVE;
2513         PL_bufend = SvPVX(PL_linestr);
2514         PL_bufend += SvCUR(PL_linestr);
2515         PL_expect = XOPERATOR;
2516         PL_sublex_info.sub_inwhat = 0;
2517         return ')';
2518     }
2519 }
2520
2521 /*
2522   scan_const
2523
2524   Extracts a pattern, double-quoted string, or transliteration.  This
2525   is terrifying code.
2526
2527   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2528   processing a pattern (PL_lex_inpat is true), a transliteration
2529   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2530
2531   Returns a pointer to the character scanned up to. If this is
2532   advanced from the start pointer supplied (i.e. if anything was
2533   successfully parsed), will leave an OP for the substring scanned
2534   in pl_yylval. Caller must intuit reason for not parsing further
2535   by looking at the next characters herself.
2536
2537   In patterns:
2538     backslashes:
2539       constants: \N{NAME} only
2540       case and quoting: \U \Q \E
2541     stops on @ and $, but not for $ as tail anchor
2542
2543   In transliterations:
2544     characters are VERY literal, except for - not at the start or end
2545     of the string, which indicates a range. If the range is in bytes,
2546     scan_const expands the range to the full set of intermediate
2547     characters. If the range is in utf8, the hyphen is replaced with
2548     a certain range mark which will be handled by pmtrans() in op.c.
2549
2550   In double-quoted strings:
2551     backslashes:
2552       double-quoted style: \r and \n
2553       constants: \x31, etc.
2554       deprecated backrefs: \1 (in substitution replacements)
2555       case and quoting: \U \Q \E
2556     stops on @ and $
2557
2558   scan_const does *not* construct ops to handle interpolated strings.
2559   It stops processing as soon as it finds an embedded $ or @ variable
2560   and leaves it to the caller to work out what's going on.
2561
2562   embedded arrays (whether in pattern or not) could be:
2563       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2564
2565   $ in double-quoted strings must be the symbol of an embedded scalar.
2566
2567   $ in pattern could be $foo or could be tail anchor.  Assumption:
2568   it's a tail anchor if $ is the last thing in the string, or if it's
2569   followed by one of "()| \r\n\t"
2570
2571   \1 (backreferences) are turned into $1
2572
2573   The structure of the code is
2574       while (there's a character to process) {
2575           handle transliteration ranges
2576           skip regexp comments /(?#comment)/ and codes /(?{code})/
2577           skip #-initiated comments in //x patterns
2578           check for embedded arrays
2579           check for embedded scalars
2580           if (backslash) {
2581               deprecate \1 in substitution replacements
2582               handle string-changing backslashes \l \U \Q \E, etc.
2583               switch (what was escaped) {
2584                   handle \- in a transliteration (becomes a literal -)
2585                   if a pattern and not \N{, go treat as regular character
2586                   handle \132 (octal characters)
2587                   handle \x15 and \x{1234} (hex characters)
2588                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2589                   handle \cV (control characters)
2590                   handle printf-style backslashes (\f, \r, \n, etc)
2591               } (end switch)
2592               continue
2593           } (end if backslash)
2594           handle regular character
2595     } (end while character to read)
2596
2597 */
2598
2599 STATIC char *
2600 S_scan_const(pTHX_ char *start)
2601 {
2602     dVAR;
2603     register char *send = PL_bufend;            /* end of the constant */
2604     SV *sv = newSV(send - start);               /* sv for the constant.  See
2605                                                    note below on sizing. */
2606     register char *s = start;                   /* start of the constant */
2607     register char *d = SvPVX(sv);               /* destination for copies */
2608     bool dorange = FALSE;                       /* are we in a translit range? */
2609     bool didrange = FALSE;                      /* did we just finish a range? */
2610     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2611     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2612                                                    to be UTF8?  But, this can
2613                                                    show as true when the source
2614                                                    isn't utf8, as for example
2615                                                    when it is entirely composed
2616                                                    of hex constants */
2617
2618     /* Note on sizing:  The scanned constant is placed into sv, which is
2619      * initialized by newSV() assuming one byte of output for every byte of
2620      * input.  This routine expects newSV() to allocate an extra byte for a
2621      * trailing NUL, which this routine will append if it gets to the end of
2622      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2623      * CAPITAL LETTER A}), or more output than input if the constant ends up
2624      * recoded to utf8, but each time a construct is found that might increase
2625      * the needed size, SvGROW() is called.  Its size parameter each time is
2626      * based on the best guess estimate at the time, namely the length used so
2627      * far, plus the length the current construct will occupy, plus room for
2628      * the trailing NUL, plus one byte for every input byte still unscanned */
2629
2630     UV uv;
2631 #ifdef EBCDIC
2632     UV literal_endpoint = 0;
2633     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2634 #endif
2635
2636     PERL_ARGS_ASSERT_SCAN_CONST;
2637
2638     assert(PL_lex_inwhat != OP_TRANSR);
2639     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2640         /* If we are doing a trans and we know we want UTF8 set expectation */
2641         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2642         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2643     }
2644
2645
2646     while (s < send || dorange) {
2647
2648         /* get transliterations out of the way (they're most literal) */
2649         if (PL_lex_inwhat == OP_TRANS) {
2650             /* expand a range A-Z to the full set of characters.  AIE! */
2651             if (dorange) {
2652                 I32 i;                          /* current expanded character */
2653                 I32 min;                        /* first character in range */
2654                 I32 max;                        /* last character in range */
2655
2656 #ifdef EBCDIC
2657                 UV uvmax = 0;
2658 #endif
2659
2660                 if (has_utf8
2661 #ifdef EBCDIC
2662                     && !native_range
2663 #endif
2664                     ) {
2665                     char * const c = (char*)utf8_hop((U8*)d, -1);
2666                     char *e = d++;
2667                     while (e-- > c)
2668                         *(e + 1) = *e;
2669                     *c = (char)UTF_TO_NATIVE(0xff);
2670                     /* mark the range as done, and continue */
2671                     dorange = FALSE;
2672                     didrange = TRUE;
2673                     continue;
2674                 }
2675
2676                 i = d - SvPVX_const(sv);                /* remember current offset */
2677 #ifdef EBCDIC
2678                 SvGROW(sv,
2679                        SvLEN(sv) + (has_utf8 ?
2680                                     (512 - UTF_CONTINUATION_MARK +
2681                                      UNISKIP(0x100))
2682                                     : 256));
2683                 /* How many two-byte within 0..255: 128 in UTF-8,
2684                  * 96 in UTF-8-mod. */
2685 #else
2686                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2687 #endif
2688                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2689 #ifdef EBCDIC
2690                 if (has_utf8) {
2691                     int j;
2692                     for (j = 0; j <= 1; j++) {
2693                         char * const c = (char*)utf8_hop((U8*)d, -1);
2694                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2695                         if (j)
2696                             min = (U8)uv;
2697                         else if (uv < 256)
2698                             max = (U8)uv;
2699                         else {
2700                             max = (U8)0xff; /* only to \xff */
2701                             uvmax = uv; /* \x{100} to uvmax */
2702                         }
2703                         d = c; /* eat endpoint chars */
2704                      }
2705                 }
2706                else {
2707 #endif
2708                    d -= 2;              /* eat the first char and the - */
2709                    min = (U8)*d;        /* first char in range */
2710                    max = (U8)d[1];      /* last char in range  */
2711 #ifdef EBCDIC
2712                }
2713 #endif
2714
2715                 if (min > max) {
2716                     Perl_croak(aTHX_
2717                                "Invalid range \"%c-%c\" in transliteration operator",
2718                                (char)min, (char)max);
2719                 }
2720
2721 #ifdef EBCDIC
2722                 if (literal_endpoint == 2 &&
2723                     ((isLOWER(min) && isLOWER(max)) ||
2724                      (isUPPER(min) && isUPPER(max)))) {
2725                     if (isLOWER(min)) {
2726                         for (i = min; i <= max; i++)
2727                             if (isLOWER(i))
2728                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2729                     } else {
2730                         for (i = min; i <= max; i++)
2731                             if (isUPPER(i))
2732                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2733                     }
2734                 }
2735                 else
2736 #endif
2737                     for (i = min; i <= max; i++)
2738 #ifdef EBCDIC
2739                         if (has_utf8) {
2740                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2741                             if (UNI_IS_INVARIANT(ch))
2742                                 *d++ = (U8)i;
2743                             else {
2744                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2745                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2746                             }
2747                         }
2748                         else
2749 #endif
2750                             *d++ = (char)i;
2751
2752 #ifdef EBCDIC
2753                 if (uvmax) {
2754                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2755                     if (uvmax > 0x101)
2756                         *d++ = (char)UTF_TO_NATIVE(0xff);
2757                     if (uvmax > 0x100)
2758                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2759                 }
2760 #endif
2761
2762                 /* mark the range as done, and continue */
2763                 dorange = FALSE;
2764                 didrange = TRUE;
2765 #ifdef EBCDIC
2766                 literal_endpoint = 0;
2767 #endif
2768                 continue;
2769             }
2770
2771             /* range begins (ignore - as first or last char) */
2772             else if (*s == '-' && s+1 < send  && s != start) {
2773                 if (didrange) {
2774                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2775                 }
2776                 if (has_utf8
2777 #ifdef EBCDIC
2778                     && !native_range
2779 #endif
2780                     ) {
2781                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2782                     s++;
2783                     continue;
2784                 }
2785                 dorange = TRUE;
2786                 s++;
2787             }
2788             else {
2789                 didrange = FALSE;
2790 #ifdef EBCDIC
2791                 literal_endpoint = 0;
2792                 native_range = TRUE;
2793 #endif
2794             }
2795         }
2796
2797         /* if we get here, we're not doing a transliteration */
2798
2799         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2800            except for the last char, which will be done separately. */
2801         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2802             if (s[2] == '#') {
2803                 while (s+1 < send && *s != ')')
2804                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2805             }
2806             else if (s[2] == '{' /* This should match regcomp.c */
2807                     || (s[2] == '?' && s[3] == '{'))
2808             {
2809                 I32 count = 1;
2810                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2811                 char c;
2812
2813                 while (count && (c = *regparse)) {
2814                     if (c == '\\' && regparse[1])
2815                         regparse++;
2816                     else if (c == '{')
2817                         count++;
2818                     else if (c == '}')
2819                         count--;
2820                     regparse++;
2821                 }
2822                 if (*regparse != ')')
2823                     regparse--;         /* Leave one char for continuation. */
2824                 while (s < regparse)
2825                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2826             }
2827         }
2828
2829         /* likewise skip #-initiated comments in //x patterns */
2830         else if (*s == '#' && PL_lex_inpat &&
2831           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2832             while (s+1 < send && *s != '\n')
2833                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2834         }
2835
2836         /* check for embedded arrays
2837            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2838            */
2839         else if (*s == '@' && s[1]) {
2840             if (isALNUM_lazy_if(s+1,UTF))
2841                 break;
2842             if (strchr(":'{$", s[1]))
2843                 break;
2844             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2845                 break; /* in regexp, neither @+ nor @- are interpolated */
2846         }
2847
2848         /* check for embedded scalars.  only stop if we're sure it's a
2849            variable.
2850         */
2851         else if (*s == '$') {
2852             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2853                 break;
2854             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2855                 if (s[1] == '\\') {
2856                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2857                                    "Possible unintended interpolation of $\\ in regex");
2858                 }
2859                 break;          /* in regexp, $ might be tail anchor */
2860             }
2861         }
2862
2863         /* End of else if chain - OP_TRANS rejoin rest */
2864
2865         /* backslashes */
2866         if (*s == '\\' && s+1 < send) {
2867             char* e;    /* Can be used for ending '}', etc. */
2868
2869             s++;
2870
2871             /* warn on \1 - \9 in substitution replacements, but note that \11
2872              * is an octal; and \19 is \1 followed by '9' */
2873             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2874                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2875             {
2876                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2877                 *--s = '$';
2878                 break;
2879             }
2880
2881             /* string-change backslash escapes */
2882             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2883                 --s;
2884                 break;
2885             }
2886             /* In a pattern, process \N, but skip any other backslash escapes.
2887              * This is because we don't want to translate an escape sequence
2888              * into a meta symbol and have the regex compiler use the meta
2889              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2890              * in spite of this, we do have to process \N here while the proper
2891              * charnames handler is in scope.  See bugs #56444 and #62056.
2892              * There is a complication because \N in a pattern may also stand
2893              * for 'match a non-nl', and not mean a charname, in which case its
2894              * processing should be deferred to the regex compiler.  To be a
2895              * charname it must be followed immediately by a '{', and not look
2896              * like \N followed by a curly quantifier, i.e., not something like
2897              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2898              * quantifier */
2899             else if (PL_lex_inpat
2900                     && (*s != 'N'
2901                         || s[1] != '{'
2902                         || regcurly(s + 1)))
2903             {
2904                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2905                 goto default_action;
2906             }
2907
2908             switch (*s) {
2909
2910             /* quoted - in transliterations */
2911             case '-':
2912                 if (PL_lex_inwhat == OP_TRANS) {
2913                     *d++ = *s++;
2914                     continue;
2915                 }
2916                 /* FALL THROUGH */
2917             default:
2918                 {
2919                     if ((isALPHA(*s) || isDIGIT(*s)))
2920                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2921                                        "Unrecognized escape \\%c passed through",
2922                                        *s);
2923                     /* default action is to copy the quoted character */
2924                     goto default_action;
2925                 }
2926
2927             /* eg. \132 indicates the octal constant 0132 */
2928             case '0': case '1': case '2': case '3':
2929             case '4': case '5': case '6': case '7':
2930                 {
2931                     I32 flags = 0;
2932                     STRLEN len = 3;
2933                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2934                     s += len;
2935                 }
2936                 goto NUM_ESCAPE_INSERT;
2937
2938             /* eg. \o{24} indicates the octal constant \024 */
2939             case 'o':
2940                 {
2941                     STRLEN len;
2942                     const char* error;
2943
2944                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2945                     s += len;
2946                     if (! valid) {
2947                         yyerror(error);
2948                         continue;
2949                     }
2950                     goto NUM_ESCAPE_INSERT;
2951                 }
2952
2953             /* eg. \x24 indicates the hex constant 0x24 */
2954             case 'x':
2955                 ++s;
2956                 if (*s == '{') {
2957                     char* const e = strchr(s, '}');
2958                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2959                       PERL_SCAN_DISALLOW_PREFIX;
2960                     STRLEN len;
2961
2962                     ++s;
2963                     if (!e) {
2964                         yyerror("Missing right brace on \\x{}");
2965                         continue;
2966                     }
2967                     len = e - s;
2968                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2969                     s = e + 1;
2970                 }
2971                 else {
2972                     {
2973                         STRLEN len = 2;
2974                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2975                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2976                         s += len;
2977                     }
2978                 }
2979
2980               NUM_ESCAPE_INSERT:
2981                 /* Insert oct or hex escaped character.  There will always be
2982                  * enough room in sv since such escapes will be longer than any
2983                  * UTF-8 sequence they can end up as, except if they force us
2984                  * to recode the rest of the string into utf8 */
2985
2986                 /* Here uv is the ordinal of the next character being added in
2987                  * unicode (converted from native). */
2988                 if (!UNI_IS_INVARIANT(uv)) {
2989                     if (!has_utf8 && uv > 255) {
2990                         /* Might need to recode whatever we have accumulated so
2991                          * far if it contains any chars variant in utf8 or
2992                          * utf-ebcdic. */
2993
2994                         SvCUR_set(sv, d - SvPVX_const(sv));
2995                         SvPOK_on(sv);
2996                         *d = '\0';
2997                         /* See Note on sizing above.  */
2998                         sv_utf8_upgrade_flags_grow(sv,
2999                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3000                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3001                         d = SvPVX(sv) + SvCUR(sv);
3002                         has_utf8 = TRUE;
3003                     }
3004
3005                     if (has_utf8) {
3006                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3007                         if (PL_lex_inwhat == OP_TRANS &&
3008                             PL_sublex_info.sub_op) {
3009                             PL_sublex_info.sub_op->op_private |=
3010                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3011                                              : OPpTRANS_TO_UTF);
3012                         }
3013 #ifdef EBCDIC
3014                         if (uv > 255 && !dorange)
3015                             native_range = FALSE;
3016 #endif
3017                     }
3018                     else {
3019                         *d++ = (char)uv;
3020                     }
3021                 }
3022                 else {
3023                     *d++ = (char) uv;
3024                 }
3025                 continue;
3026
3027             case 'N':
3028                 /* In a non-pattern \N must be a named character, like \N{LATIN
3029                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3030                  * mean to match a non-newline.  For non-patterns, named
3031                  * characters are converted to their string equivalents. In
3032                  * patterns, named characters are not converted to their
3033                  * ultimate forms for the same reasons that other escapes
3034                  * aren't.  Instead, they are converted to the \N{U+...} form
3035                  * to get the value from the charnames that is in effect right
3036                  * now, while preserving the fact that it was a named character
3037                  * so that the regex compiler knows this */
3038
3039                 /* This section of code doesn't generally use the
3040                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3041                  * a close examination of this macro and determined it is a
3042                  * no-op except on utfebcdic variant characters.  Every
3043                  * character generated by this that would normally need to be
3044                  * enclosed by this macro is invariant, so the macro is not
3045                  * needed, and would complicate use of copy().  XXX There are
3046                  * other parts of this file where the macro is used
3047                  * inconsistently, but are saved by it being a no-op */
3048
3049                 /* The structure of this section of code (besides checking for
3050                  * errors and upgrading to utf8) is:
3051                  *  Further disambiguate between the two meanings of \N, and if
3052                  *      not a charname, go process it elsewhere
3053                  *  If of form \N{U+...}, pass it through if a pattern;
3054                  *      otherwise convert to utf8
3055                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3056                  *  pattern; otherwise convert to utf8 */
3057
3058                 /* Here, s points to the 'N'; the test below is guaranteed to
3059                  * succeed if we are being called on a pattern as we already
3060                  * know from a test above that the next character is a '{'.
3061                  * On a non-pattern \N must mean 'named sequence, which
3062                  * requires braces */
3063                 s++;
3064                 if (*s != '{') {
3065                     yyerror("Missing braces on \\N{}");
3066                     continue;
3067                 }
3068                 s++;
3069
3070                 /* If there is no matching '}', it is an error. */
3071                 if (! (e = strchr(s, '}'))) {
3072                     if (! PL_lex_inpat) {
3073                         yyerror("Missing right brace on \\N{}");
3074                     } else {
3075                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3076                     }
3077                     continue;
3078                 }
3079
3080                 /* Here it looks like a named character */
3081
3082                 if (PL_lex_inpat) {
3083
3084                     /* XXX This block is temporary code.  \N{} implies that the
3085                      * pattern is to have Unicode semantics, and therefore
3086                      * currently has to be encoded in utf8.  By putting it in
3087                      * utf8 now, we save a whole pass in the regular expression
3088                      * compiler.  Once that code is changed so Unicode
3089                      * semantics doesn't necessarily have to be in utf8, this
3090                      * block should be removed.  However, the code that parses
3091                      * the output of this would have to be changed to not
3092                      * necessarily expect utf8 */
3093                     if (!has_utf8) {
3094                         SvCUR_set(sv, d - SvPVX_const(sv));
3095                         SvPOK_on(sv);
3096                         *d = '\0';
3097                         /* See Note on sizing above.  */
3098                         sv_utf8_upgrade_flags_grow(sv,
3099                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3100                                         /* 5 = '\N{' + cur char + NUL */
3101                                         (STRLEN)(send - s) + 5);
3102                         d = SvPVX(sv) + SvCUR(sv);
3103                         has_utf8 = TRUE;
3104                     }
3105                 }
3106
3107                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3108                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3109                                 | PERL_SCAN_DISALLOW_PREFIX;
3110                     STRLEN len;
3111
3112                     /* For \N{U+...}, the '...' is a unicode value even on
3113                      * EBCDIC machines */
3114                     s += 2;         /* Skip to next char after the 'U+' */
3115                     len = e - s;
3116                     uv = grok_hex(s, &len, &flags, NULL);
3117                     if (len == 0 || len != (STRLEN)(e - s)) {
3118                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3119                         s = e + 1;
3120                         continue;
3121                     }
3122
3123                     if (PL_lex_inpat) {
3124
3125                         /* Pass through to the regex compiler unchanged.  The
3126                          * reason we evaluated the number above is to make sure
3127                          * there wasn't a syntax error. */
3128                         s -= 5;     /* Include the '\N{U+' */
3129                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3130                         d += e - s + 1;
3131                     }
3132                     else {  /* Not a pattern: convert the hex to string */
3133
3134                          /* If destination is not in utf8, unconditionally
3135                           * recode it to be so.  This is because \N{} implies
3136                           * Unicode semantics, and scalars have to be in utf8
3137                           * to guarantee those semantics */
3138                         if (! has_utf8) {
3139                             SvCUR_set(sv, d - SvPVX_const(sv));
3140                             SvPOK_on(sv);
3141                             *d = '\0';
3142                             /* See Note on sizing above.  */
3143                             sv_utf8_upgrade_flags_grow(
3144                                         sv,
3145                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3146                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3147                             d = SvPVX(sv) + SvCUR(sv);
3148                             has_utf8 = TRUE;
3149                         }
3150
3151                         /* Add the string to the output */
3152                         if (UNI_IS_INVARIANT(uv)) {
3153                             *d++ = (char) uv;
3154                         }
3155                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3156                     }
3157                 }
3158                 else { /* Here is \N{NAME} but not \N{U+...}. */
3159
3160                     SV *res;            /* result from charnames */
3161                     const char *str;    /* the string in 'res' */
3162                     STRLEN len;         /* its length */
3163
3164                     /* Get the value for NAME */
3165                     res = newSVpvn(s, e - s);
3166                     res = new_constant( NULL, 0, "charnames",
3167                                         /* includes all of: \N{...} */
3168                                         res, NULL, s - 3, e - s + 4 );
3169
3170                     /* Most likely res will be in utf8 already since the
3171                      * standard charnames uses pack U, but a custom translator
3172                      * can leave it otherwise, so make sure.  XXX This can be
3173                      * revisited to not have charnames use utf8 for characters
3174                      * that don't need it when regexes don't have to be in utf8
3175                      * for Unicode semantics.  If doing so, remember EBCDIC */
3176                     sv_utf8_upgrade(res);
3177                     str = SvPV_const(res, len);
3178
3179                     /* Don't accept malformed input */
3180                     if (! is_utf8_string((U8 *) str, len)) {
3181                         yyerror("Malformed UTF-8 returned by \\N");
3182                     }
3183                     else if (PL_lex_inpat) {
3184
3185                         if (! len) { /* The name resolved to an empty string */
3186                             Copy("\\N{}", d, 4, char);
3187                             d += 4;
3188                         }
3189                         else {
3190                             /* In order to not lose information for the regex
3191                             * compiler, pass the result in the specially made
3192                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3193                             * the code points in hex of each character
3194                             * returned by charnames */
3195
3196                             const char *str_end = str + len;
3197                             STRLEN char_length;     /* cur char's byte length */
3198                             STRLEN output_length;   /* and the number of bytes
3199                                                        after this is translated
3200                                                        into hex digits */
3201                             const STRLEN off = d - SvPVX_const(sv);
3202
3203                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3204                              * max('U+', '.'); and 1 for NUL */
3205                             char hex_string[2 * UTF8_MAXBYTES + 5];
3206
3207                             /* Get the first character of the result. */
3208                             U32 uv = utf8n_to_uvuni((U8 *) str,
3209                                                     len,
3210                                                     &char_length,
3211                                                     UTF8_ALLOW_ANYUV);
3212
3213                             /* The call to is_utf8_string() above hopefully
3214                              * guarantees that there won't be an error.  But
3215                              * it's easy here to make sure.  The function just
3216                              * above warns and returns 0 if invalid utf8, but
3217                              * it can also return 0 if the input is validly a
3218                              * NUL. Disambiguate */
3219                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3220                                 uv = UNICODE_REPLACEMENT;
3221                             }
3222
3223                             /* Convert first code point to hex, including the
3224                              * boiler plate before it */
3225                             output_length =
3226                                 my_snprintf(hex_string, sizeof(hex_string),
3227                                             "\\N{U+%X", (unsigned int) uv);
3228
3229                             /* Make sure there is enough space to hold it */
3230                             d = off + SvGROW(sv, off
3231                                                  + output_length
3232                                                  + (STRLEN)(send - e)
3233                                                  + 2);  /* '}' + NUL */
3234                             /* And output it */
3235                             Copy(hex_string, d, output_length, char);
3236                             d += output_length;
3237
3238                             /* For each subsequent character, append dot and
3239                              * its ordinal in hex */
3240                             while ((str += char_length) < str_end) {
3241                                 const STRLEN off = d - SvPVX_const(sv);
3242                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3243                                                         str_end - str,
3244                                                         &char_length,
3245                                                         UTF8_ALLOW_ANYUV);
3246                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3247                                     uv = UNICODE_REPLACEMENT;
3248                                 }
3249
3250                                 output_length =
3251                                     my_snprintf(hex_string, sizeof(hex_string),
3252                                                 ".%X", (unsigned int) uv);
3253
3254                                 d = off + SvGROW(sv, off
3255                                                      + output_length
3256                                                      + (STRLEN)(send - e)
3257                                                      + 2);      /* '}' +  NUL */
3258                                 Copy(hex_string, d, output_length, char);
3259                                 d += output_length;
3260                             }
3261
3262                             *d++ = '}'; /* Done.  Add the trailing brace */
3263                         }
3264                     }
3265                     else { /* Here, not in a pattern.  Convert the name to a
3266                             * string. */
3267
3268                          /* If destination is not in utf8, unconditionally
3269                           * recode it to be so.  This is because \N{} implies
3270                           * Unicode semantics, and scalars have to be in utf8
3271                           * to guarantee those semantics */
3272                         if (! has_utf8) {
3273                             SvCUR_set(sv, d - SvPVX_const(sv));
3274                             SvPOK_on(sv);
3275                             *d = '\0';
3276                             /* See Note on sizing above.  */
3277                             sv_utf8_upgrade_flags_grow(sv,
3278                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3279                                                 len + (STRLEN)(send - s) + 1);
3280                             d = SvPVX(sv) + SvCUR(sv);
3281                             has_utf8 = TRUE;
3282                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3283
3284                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3285                              * set correctly here). */
3286                             const STRLEN off = d - SvPVX_const(sv);
3287                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3288                         }
3289                         Copy(str, d, len, char);
3290                         d += len;
3291                     }
3292                     SvREFCNT_dec(res);
3293
3294                     /* Deprecate non-approved name syntax */
3295                     if (ckWARN_d(WARN_DEPRECATED)) {
3296                         bool problematic = FALSE;
3297                         char* i = s;
3298
3299                         /* For non-ut8 input, look to see that the first
3300                          * character is an alpha, then loop through the rest
3301                          * checking that each is a continuation */
3302                         if (! this_utf8) {
3303                             if (! isALPHAU(*i)) problematic = TRUE;
3304                             else for (i = s + 1; i < e; i++) {
3305                                 if (isCHARNAME_CONT(*i)) continue;
3306                                 problematic = TRUE;
3307                                 break;
3308                             }
3309                         }
3310                         else {
3311                             /* Similarly for utf8.  For invariants can check
3312                              * directly.  We accept anything above the latin1
3313                              * range because it is immaterial to Perl if it is
3314                              * correct or not, and is expensive to check.  But
3315                              * it is fairly easy in the latin1 range to convert
3316                              * the variants into a single character and check
3317                              * those */
3318                             if (UTF8_IS_INVARIANT(*i)) {
3319                                 if (! isALPHAU(*i)) problematic = TRUE;
3320                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3321                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3322                                                                             *(i+1)))))
3323                                 {
3324                                     problematic = TRUE;
3325                                 }
3326                             }
3327                             if (! problematic) for (i = s + UTF8SKIP(s);
3328                                                     i < e;
3329                                                     i+= UTF8SKIP(i))
3330                             {
3331                                 if (UTF8_IS_INVARIANT(*i)) {
3332                                     if (isCHARNAME_CONT(*i)) continue;
3333                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3334                                     continue;
3335                                 } else if (isCHARNAME_CONT(
3336                                             UNI_TO_NATIVE(
3337                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3338                                 {
3339                                     continue;
3340                                 }
3341                                 problematic = TRUE;
3342                                 break;
3343                             }
3344                         }
3345                         if (problematic) {
3346                             /* The e-i passed to the final %.*s makes sure that
3347                              * should the trailing NUL be missing that this
3348                              * print won't run off the end of the string */
3349                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3350                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3351                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3352                         }
3353                     }
3354                 } /* End \N{NAME} */
3355 #ifdef EBCDIC
3356                 if (!dorange)
3357                     native_range = FALSE; /* \N{} is defined to be Unicode */
3358 #endif
3359                 s = e + 1;  /* Point to just after the '}' */
3360                 continue;
3361
3362             /* \c is a control character */
3363             case 'c':
3364                 s++;
3365                 if (s < send) {
3366                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3367                 }
3368                 else {
3369                     yyerror("Missing control char name in \\c");
3370                 }
3371                 continue;
3372
3373             /* printf-style backslashes, formfeeds, newlines, etc */
3374             case 'b':
3375                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3376                 break;
3377             case 'n':
3378                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3379                 break;
3380             case 'r':
3381                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3382                 break;
3383             case 'f':
3384                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3385                 break;
3386             case 't':
3387                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3388                 break;
3389             case 'e':
3390                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3391                 break;
3392             case 'a':
3393                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3394                 break;
3395             } /* end switch */
3396
3397             s++;
3398             continue;
3399         } /* end if (backslash) */
3400 #ifdef EBCDIC
3401         else
3402             literal_endpoint++;
3403 #endif
3404
3405     default_action:
3406         /* If we started with encoded form, or already know we want it,
3407            then encode the next character */
3408         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3409             STRLEN len  = 1;
3410
3411
3412             /* One might think that it is wasted effort in the case of the
3413              * source being utf8 (this_utf8 == TRUE) to take the next character
3414              * in the source, convert it to an unsigned value, and then convert
3415              * it back again.  But the source has not been validated here.  The
3416              * routine that does the conversion checks for errors like
3417              * malformed utf8 */
3418
3419             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3420             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3421             if (!has_utf8) {
3422                 SvCUR_set(sv, d - SvPVX_const(sv));
3423                 SvPOK_on(sv);
3424                 *d = '\0';
3425                 /* See Note on sizing above.  */
3426                 sv_utf8_upgrade_flags_grow(sv,
3427                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3428                                         need + (STRLEN)(send - s) + 1);
3429                 d = SvPVX(sv) + SvCUR(sv);
3430                 has_utf8 = TRUE;
3431             } else if (need > len) {
3432                 /* encoded value larger than old, may need extra space (NOTE:
3433                  * SvCUR() is not set correctly here).   See Note on sizing
3434                  * above.  */
3435                 const STRLEN off = d - SvPVX_const(sv);
3436                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3437             }
3438             s += len;
3439
3440             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3441 #ifdef EBCDIC
3442             if (uv > 255 && !dorange)
3443                 native_range = FALSE;
3444 #endif
3445         }
3446         else {
3447             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3448         }
3449     } /* while loop to process each character */
3450
3451     /* terminate the string and set up the sv */
3452     *d = '\0';
3453     SvCUR_set(sv, d - SvPVX_const(sv));
3454     if (SvCUR(sv) >= SvLEN(sv))
3455         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3456
3457     SvPOK_on(sv);
3458     if (PL_encoding && !has_utf8) {
3459         sv_recode_to_utf8(sv, PL_encoding);
3460         if (SvUTF8(sv))
3461             has_utf8 = TRUE;
3462     }
3463     if (has_utf8) {
3464         SvUTF8_on(sv);
3465         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3466             PL_sublex_info.sub_op->op_private |=
3467                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3468         }
3469     }
3470
3471     /* shrink the sv if we allocated more than we used */
3472     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3473         SvPV_shrink_to_cur(sv);
3474     }
3475
3476     /* return the substring (via pl_yylval) only if we parsed anything */
3477     if (s > PL_bufptr) {
3478         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3479             const char *const key = PL_lex_inpat ? "qr" : "q";
3480             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3481             const char *type;
3482             STRLEN typelen;
3483
3484             if (PL_lex_inwhat == OP_TRANS) {
3485                 type = "tr";
3486                 typelen = 2;
3487             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3488                 type = "s";
3489                 typelen = 1;
3490             } else  {
3491                 type = "qq";
3492                 typelen = 2;
3493             }
3494
3495             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3496                                 type, typelen);
3497         }
3498         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3499     } else
3500         SvREFCNT_dec(sv);
3501     return s;
3502 }
3503
3504 /* S_intuit_more
3505  * Returns TRUE if there's more to the expression (e.g., a subscript),
3506  * FALSE otherwise.
3507  *
3508  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3509  *
3510  * ->[ and ->{ return TRUE
3511  * { and [ outside a pattern are always subscripts, so return TRUE
3512  * if we're outside a pattern and it's not { or [, then return FALSE
3513  * if we're in a pattern and the first char is a {
3514  *   {4,5} (any digits around the comma) returns FALSE
3515  * if we're in a pattern and the first char is a [
3516  *   [] returns FALSE
3517  *   [SOMETHING] has a funky algorithm to decide whether it's a
3518  *      character class or not.  It has to deal with things like
3519  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3520  * anything else returns TRUE
3521  */
3522
3523 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3524
3525 STATIC int
3526 S_intuit_more(pTHX_ register char *s)
3527 {
3528     dVAR;
3529
3530     PERL_ARGS_ASSERT_INTUIT_MORE;
3531
3532     if (PL_lex_brackets)
3533         return TRUE;
3534     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3535         return TRUE;
3536     if (*s != '{' && *s != '[')
3537         return FALSE;
3538     if (!PL_lex_inpat)
3539         return TRUE;
3540
3541     /* In a pattern, so maybe we have {n,m}. */
3542     if (*s == '{') {
3543         if (regcurly(s)) {
3544             return FALSE;
3545         }
3546         return TRUE;
3547     }
3548
3549     /* On the other hand, maybe we have a character class */
3550
3551     s++;
3552     if (*s == ']' || *s == '^')
3553         return FALSE;
3554     else {
3555         /* this is terrifying, and it works */
3556         int weight = 2;         /* let's weigh the evidence */
3557         char seen[256];
3558         unsigned char un_char = 255, last_un_char;
3559         const char * const send = strchr(s,']');
3560         char tmpbuf[sizeof PL_tokenbuf * 4];
3561
3562         if (!send)              /* has to be an expression */
3563             return TRUE;
3564
3565         Zero(seen,256,char);
3566         if (*s == '$')
3567             weight -= 3;
3568         else if (isDIGIT(*s)) {
3569             if (s[1] != ']') {
3570                 if (isDIGIT(s[1]) && s[2] == ']')
3571                     weight -= 10;
3572             }
3573             else
3574                 weight -= 100;
3575         }
3576         for (; s < send; s++) {
3577             last_un_char = un_char;
3578             un_char = (unsigned char)*s;
3579             switch (*s) {
3580             case '@':
3581             case '&':
3582             case '$':
3583                 weight -= seen[un_char] * 10;
3584                 if (isALNUM_lazy_if(s+1,UTF)) {
3585                     int len;
3586                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3587                     len = (int)strlen(tmpbuf);
3588                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3589                         weight -= 100;
3590                     else
3591                         weight -= 10;
3592                 }
3593                 else if (*s == '$' && s[1] &&
3594                   strchr("[#!%*<>()-=",s[1])) {
3595                     if (/*{*/ strchr("])} =",s[2]))
3596                         weight -= 10;
3597                     else
3598                         weight -= 1;
3599                 }
3600                 break;
3601             case '\\':
3602                 un_char = 254;
3603                 if (s[1]) {
3604                     if (strchr("wds]",s[1]))
3605                         weight += 100;
3606                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3607                         weight += 1;
3608                     else if (strchr("rnftbxcav",s[1]))
3609                         weight += 40;
3610                     else if (isDIGIT(s[1])) {
3611                         weight += 40;
3612                         while (s[1] && isDIGIT(s[1]))
3613                             s++;
3614                     }
3615                 }
3616                 else
3617                     weight += 100;
3618                 break;
3619             case '-':
3620                 if (s[1] == '\\')
3621                     weight += 50;
3622                 if (strchr("aA01! ",last_un_char))
3623                     weight += 30;
3624                 if (strchr("zZ79~",s[1]))
3625                     weight += 30;
3626                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3627                     weight -= 5;        /* cope with negative subscript */
3628                 break;
3629             default:
3630                 if (!isALNUM(last_un_char)
3631                     && !(last_un_char == '$' || last_un_char == '@'
3632                          || last_un_char == '&')
3633                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3634                     char *d = tmpbuf;
3635                     while (isALPHA(*s))
3636                         *d++ = *s++;
3637                     *d = '\0';
3638                     if (keyword(tmpbuf, d - tmpbuf, 0))
3639                         weight -= 150;
3640                 }
3641                 if (un_char == last_un_char + 1)
3642                     weight += 5;
3643                 weight -= seen[un_char];
3644                 break;
3645             }
3646             seen[un_char]++;
3647         }
3648         if (weight >= 0)        /* probably a character class */
3649             return FALSE;
3650     }
3651
3652     return TRUE;
3653 }
3654
3655 /*
3656  * S_intuit_method
3657  *
3658  * Does all the checking to disambiguate
3659  *   foo bar
3660  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3661  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3662  *
3663  * First argument is the stuff after the first token, e.g. "bar".
3664  *
3665  * Not a method if bar is a filehandle.
3666  * Not a method if foo is a subroutine prototyped to take a filehandle.
3667  * Not a method if it's really "Foo $bar"
3668  * Method if it's "foo $bar"
3669  * Not a method if it's really "print foo $bar"
3670  * Method if it's really "foo package::" (interpreted as package->foo)
3671  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3672  * Not a method if bar is a filehandle or package, but is quoted with
3673  *   =>
3674  */
3675
3676 STATIC int
3677 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3678 {
3679     dVAR;
3680     char *s = start + (*start == '$');
3681     char tmpbuf[sizeof PL_tokenbuf];
3682     STRLEN len;
3683     GV* indirgv;
3684 #ifdef PERL_MAD
3685     int soff;
3686 #endif
3687
3688     PERL_ARGS_ASSERT_INTUIT_METHOD;
3689
3690     if (gv) {
3691         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3692             return 0;
3693         if (cv) {
3694             if (SvPOK(cv)) {
3695                 const char *proto = SvPVX_const(cv);
3696                 if (proto) {
3697                     if (*proto == ';')
3698                         proto++;
3699                     if (*proto == '*')
3700                         return 0;
3701                 }
3702             }
3703         } else
3704             gv = NULL;
3705     }
3706     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3707     /* start is the beginning of the possible filehandle/object,
3708      * and s is the end of it
3709      * tmpbuf is a copy of it
3710      */
3711
3712     if (*start == '$') {
3713         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3714                 isUPPER(*PL_tokenbuf))
3715             return 0;
3716 #ifdef PERL_MAD
3717         len = start - SvPVX(PL_linestr);
3718 #endif
3719         s = PEEKSPACE(s);
3720 #ifdef PERL_MAD
3721         start = SvPVX(PL_linestr) + len;
3722 #endif
3723         PL_bufptr = start;
3724         PL_expect = XREF;
3725         return *s == '(' ? FUNCMETH : METHOD;
3726     }
3727     if (!keyword(tmpbuf, len, 0)) {
3728         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3729             len -= 2;
3730             tmpbuf[len] = '\0';
3731 #ifdef PERL_MAD
3732             soff = s - SvPVX(PL_linestr);
3733 #endif
3734             goto bare_package;
3735         }
3736         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3737         if (indirgv && GvCVu(indirgv))
3738             return 0;
3739         /* filehandle or package name makes it a method */
3740         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3741 #ifdef PERL_MAD
3742             soff = s - SvPVX(PL_linestr);
3743 #endif
3744             s = PEEKSPACE(s);
3745             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3746                 return 0;       /* no assumptions -- "=>" quotes bareword */
3747       bare_package:
3748             start_force(PL_curforce);
3749             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3750                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3751             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3752             if (PL_madskills)
3753                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3754             PL_expect = XTERM;
3755             force_next(WORD);
3756             PL_bufptr = s;
3757 #ifdef PERL_MAD
3758             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3759 #endif
3760             return *s == '(' ? FUNCMETH : METHOD;
3761         }
3762     }
3763     return 0;
3764 }
3765
3766 /* Encoded script support. filter_add() effectively inserts a
3767  * 'pre-processing' function into the current source input stream.
3768  * Note that the filter function only applies to the current source file
3769  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3770  *
3771  * The datasv parameter (which may be NULL) can be used to pass
3772  * private data to this instance of the filter. The filter function
3773  * can recover the SV using the FILTER_DATA macro and use it to
3774  * store private buffers and state information.
3775  *
3776  * The supplied datasv parameter is upgraded to a PVIO type
3777  * and the IoDIRP/IoANY field is used to store the function pointer,
3778  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3779  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3780  * private use must be set using malloc'd pointers.
3781  */
3782
3783 SV *
3784 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3785 {
3786     dVAR;
3787     if (!funcp)
3788         return NULL;
3789
3790     if (!PL_parser)
3791         return NULL;
3792
3793     if (!PL_rsfp_filters)
3794         PL_rsfp_filters = newAV();
3795     if (!datasv)
3796         datasv = newSV(0);
3797     SvUPGRADE(datasv, SVt_PVIO);
3798     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3799     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3800     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3801                           FPTR2DPTR(void *, IoANY(datasv)),
3802                           SvPV_nolen(datasv)));
3803     av_unshift(PL_rsfp_filters, 1);
3804     av_store(PL_rsfp_filters, 0, datasv) ;
3805     return(datasv);
3806 }
3807
3808
3809 /* Delete most recently added instance of this filter function. */
3810 void
3811 Perl_filter_del(pTHX_ filter_t funcp)
3812 {
3813     dVAR;
3814     SV *datasv;
3815
3816     PERL_ARGS_ASSERT_FILTER_DEL;
3817
3818 #ifdef DEBUGGING
3819     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3820                           FPTR2DPTR(void*, funcp)));
3821 #endif
3822     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3823         return;
3824     /* if filter is on top of stack (usual case) just pop it off */
3825     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3826     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3827         sv_free(av_pop(PL_rsfp_filters));
3828
3829         return;
3830     }
3831     /* we need to search for the correct entry and clear it     */
3832     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3833 }
3834
3835
3836 /* Invoke the idxth filter function for the current rsfp.        */
3837 /* maxlen 0 = read one text line */
3838 I32
3839 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3840 {
3841     dVAR;
3842     filter_t funcp;
3843     SV *datasv = NULL;
3844     /* This API is bad. It should have been using unsigned int for maxlen.
3845        Not sure if we want to change the API, but if not we should sanity
3846        check the value here.  */
3847     const unsigned int correct_length
3848         = maxlen < 0 ?
3849 #ifdef PERL_MICRO
3850         0x7FFFFFFF
3851 #else
3852         INT_MAX
3853 #endif
3854         : maxlen;
3855
3856     PERL_ARGS_ASSERT_FILTER_READ;
3857
3858     if (!PL_parser || !PL_rsfp_filters)
3859         return -1;
3860     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3861         /* Provide a default input filter to make life easy.    */
3862         /* Note that we append to the line. This is handy.      */
3863         DEBUG_P(PerlIO_printf(Perl_debug_log,
3864                               "filter_read %d: from rsfp\n", idx));
3865         if (correct_length) {
3866             /* Want a block */
3867             int len ;
3868             const int old_len = SvCUR(buf_sv);
3869
3870             /* ensure buf_sv is large enough */
3871             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3872             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3873                                    correct_length)) <= 0) {
3874                 if (PerlIO_error(PL_rsfp))
3875                     return -1;          /* error */
3876                 else
3877                     return 0 ;          /* end of file */
3878             }
3879             SvCUR_set(buf_sv, old_len + len) ;
3880             SvPVX(buf_sv)[old_len + len] = '\0';
3881         } else {
3882             /* Want a line */
3883             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3884                 if (PerlIO_error(PL_rsfp))
3885                     return -1;          /* error */
3886                 else
3887                     return 0 ;          /* end of file */
3888             }
3889         }
3890         return SvCUR(buf_sv);
3891     }
3892     /* Skip this filter slot if filter has been deleted */
3893     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3894         DEBUG_P(PerlIO_printf(Perl_debug_log,
3895                               "filter_read %d: skipped (filter deleted)\n",
3896                               idx));
3897         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3898     }
3899     /* Get function pointer hidden within datasv        */
3900     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3901     DEBUG_P(PerlIO_printf(Perl_debug_log,
3902                           "filter_read %d: via function %p (%s)\n",
3903                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3904     /* Call function. The function is expected to       */
3905     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3906     /* Return: <0:error, =0:eof, >0:not eof             */
3907     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3908 }
3909
3910 STATIC char *
3911 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3912 {
3913     dVAR;
3914
3915     PERL_ARGS_ASSERT_FILTER_GETS;
3916
3917 #ifdef PERL_CR_FILTER
3918     if (!PL_rsfp_filters) {
3919         filter_add(S_cr_textfilter,NULL);
3920     }
3921 #endif
3922     if (PL_rsfp_filters) {
3923         if (!append)
3924             SvCUR_set(sv, 0);   /* start with empty line        */
3925         if (FILTER_READ(0, sv, 0) > 0)
3926             return ( SvPVX(sv) ) ;
3927         else
3928             return NULL ;
3929     }
3930     else
3931         return (sv_gets(sv, PL_rsfp, append));
3932 }
3933
3934 STATIC HV *
3935 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3936 {
3937     dVAR;
3938     GV *gv;
3939
3940     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3941
3942     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3943         return PL_curstash;
3944
3945     if (len > 2 &&
3946         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3947         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3948     {
3949         return GvHV(gv);                        /* Foo:: */
3950     }
3951
3952     /* use constant CLASS => 'MyClass' */
3953     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3954     if (gv && GvCV(gv)) {
3955         SV * const sv = cv_const_sv(GvCV(gv));
3956         if (sv)
3957             pkgname = SvPV_const(sv, len);
3958     }
3959
3960     return gv_stashpvn(pkgname, len, 0);
3961 }
3962
3963 /*
3964  * S_readpipe_override
3965  * Check whether readpipe() is overridden, and generates the appropriate
3966  * optree, provided sublex_start() is called afterwards.
3967  */
3968 STATIC void
3969 S_readpipe_override(pTHX)
3970 {
3971     GV **gvp;
3972     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3973     pl_yylval.ival = OP_BACKTICK;
3974     if ((gv_readpipe
3975                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3976             ||
3977             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3978              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3979              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3980     {
3981         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3982             op_append_elem(OP_LIST,
3983                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3984                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3985     }
3986 }
3987
3988 #ifdef PERL_MAD
3989  /*
3990  * Perl_madlex
3991  * The intent of this yylex wrapper is to minimize the changes to the
3992  * tokener when we aren't interested in collecting madprops.  It remains
3993  * to be seen how successful this strategy will be...
3994  */
3995
3996 int
3997 Perl_madlex(pTHX)
3998 {
3999     int optype;
4000     char *s = PL_bufptr;
4001
4002     /* make sure PL_thiswhite is initialized */
4003     PL_thiswhite = 0;
4004     PL_thismad = 0;
4005
4006     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4007     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4008         return S_pending_ident(aTHX);
4009
4010     /* previous token ate up our whitespace? */
4011     if (!PL_lasttoke && PL_nextwhite) {
4012         PL_thiswhite = PL_nextwhite;
4013         PL_nextwhite = 0;
4014     }
4015
4016     /* isolate the token, and figure out where it is without whitespace */
4017     PL_realtokenstart = -1;
4018     PL_thistoken = 0;
4019     optype = yylex();
4020     s = PL_bufptr;
4021     assert(PL_curforce < 0);
4022
4023     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4024         if (!PL_thistoken) {
4025             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4026                 PL_thistoken = newSVpvs("");
4027             else {
4028                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4029                 PL_thistoken = newSVpvn(tstart, s - tstart);
4030             }
4031         }
4032         if (PL_thismad) /* install head */
4033             CURMAD('X', PL_thistoken);
4034     }
4035
4036     /* last whitespace of a sublex? */
4037     if (optype == ')' && PL_endwhite) {
4038         CURMAD('X', PL_endwhite);
4039     }
4040
4041     if (!PL_thismad) {
4042
4043         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4044         if (!PL_thiswhite && !PL_endwhite && !optype) {
4045             sv_free(PL_thistoken);
4046             PL_thistoken = 0;
4047             return 0;
4048         }
4049
4050         /* put off final whitespace till peg */
4051         if (optype == ';' && !PL_rsfp) {
4052             PL_nextwhite = PL_thiswhite;
4053             PL_thiswhite = 0;
4054         }
4055         else if (PL_thisopen) {
4056             CURMAD('q', PL_thisopen);
4057             if (PL_thistoken)
4058                 sv_free(PL_thistoken);
4059             PL_thistoken = 0;
4060         }
4061         else {
4062             /* Store actual token text as madprop X */
4063             CURMAD('X', PL_thistoken);
4064         }
4065
4066         if (PL_thiswhite) {
4067             /* add preceding whitespace as madprop _ */
4068             CURMAD('_', PL_thiswhite);
4069         }
4070
4071         if (PL_thisstuff) {
4072             /* add quoted material as madprop = */
4073             CURMAD('=', PL_thisstuff);
4074         }
4075
4076         if (PL_thisclose) {
4077             /* add terminating quote as madprop Q */
4078             CURMAD('Q', PL_thisclose);
4079         }
4080     }
4081
4082     /* special processing based on optype */
4083
4084     switch (optype) {
4085
4086     /* opval doesn't need a TOKEN since it can already store mp */
4087     case WORD:
4088     case METHOD:
4089     case FUNCMETH:
4090     case THING:
4091     case PMFUNC:
4092     case PRIVATEREF:
4093     case FUNC0SUB:
4094     case UNIOPSUB:
4095     case LSTOPSUB:
4096         if (pl_yylval.opval)
4097             append_madprops(PL_thismad, pl_yylval.opval, 0);
4098         PL_thismad = 0;
4099         return optype;
4100
4101     /* fake EOF */
4102     case 0:
4103         optype = PEG;
4104         if (PL_endwhite) {
4105             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4106             PL_endwhite = 0;
4107         }
4108         break;
4109
4110     case ']':
4111     case '}':
4112         if (PL_faketokens)
4113             break;
4114         /* remember any fake bracket that lexer is about to discard */
4115         if (PL_lex_brackets == 1 &&
4116             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4117         {
4118             s = PL_bufptr;
4119             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4120                 s++;
4121             if (*s == '}') {
4122                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4123                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4124                 PL_thiswhite = 0;
4125                 PL_bufptr = s - 1;
4126                 break;  /* don't bother looking for trailing comment */
4127             }
4128             else
4129                 s = PL_bufptr;
4130         }
4131         if (optype == ']')
4132             break;
4133         /* FALLTHROUGH */
4134
4135     /* attach a trailing comment to its statement instead of next token */
4136     case ';':
4137         if (PL_faketokens)
4138             break;
4139         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4140             s = PL_bufptr;
4141             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4142                 s++;
4143             if (*s == '\n' || *s == '#') {
4144                 while (s < PL_bufend && *s != '\n')
4145                     s++;
4146                 if (s < PL_bufend)
4147                     s++;
4148                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4149                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4150                 PL_thiswhite = 0;
4151                 PL_bufptr = s;
4152             }
4153         }
4154         break;
4155
4156     /* pval */
4157     case LABEL:
4158         break;
4159
4160     /* ival */
4161     default:
4162         break;
4163
4164     }
4165
4166     /* Create new token struct.  Note: opvals return early above. */
4167     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4168     PL_thismad = 0;
4169     return optype;
4170 }
4171 #endif
4172
4173 STATIC char *
4174 S_tokenize_use(pTHX_ int is_use, char *s) {
4175     dVAR;
4176
4177     PERL_ARGS_ASSERT_TOKENIZE_USE;
4178
4179     if (PL_expect != XSTATE)
4180         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4181                     is_use ? "use" : "no"));
4182     s = SKIPSPACE1(s);
4183     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4184         s = force_version(s, TRUE);
4185         if (*s == ';' || *s == '}'
4186                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4187             start_force(PL_curforce);
4188             NEXTVAL_NEXTTOKE.opval = NULL;
4189             force_next(WORD);
4190         }
4191         else if (*s == 'v') {
4192             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4193             s = force_version(s, FALSE);
4194         }
4195     }
4196     else {
4197         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4198         s = force_version(s, FALSE);
4199     }
4200     pl_yylval.ival = is_use;
4201     return s;
4202 }
4203 #ifdef DEBUGGING
4204     static const char* const exp_name[] =
4205         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4206           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4207         };
4208 #endif
4209
4210 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4211 STATIC bool
4212 S_word_takes_any_delimeter(char *p, STRLEN len)
4213 {
4214     return (len == 1 && strchr("msyq", p[0])) ||
4215            (len == 2 && (
4216             (p[0] == 't' && p[1] == 'r') ||
4217             (p[0] == 'q' && strchr("qwxr", p[1]))));
4218 }
4219
4220 /*
4221   yylex
4222
4223   Works out what to call the token just pulled out of the input
4224   stream.  The yacc parser takes care of taking the ops we return and
4225   stitching them into a tree.
4226
4227   Returns:
4228     PRIVATEREF
4229
4230   Structure:
4231       if read an identifier
4232           if we're in a my declaration
4233               croak if they tried to say my($foo::bar)
4234               build the ops for a my() declaration
4235           if it's an access to a my() variable
4236               are we in a sort block?
4237                   croak if my($a); $a <=> $b
4238               build ops for access to a my() variable
4239           if in a dq string, and they've said @foo and we can't find @foo
4240               croak
4241           build ops for a bareword
4242       if we already built the token before, use it.
4243 */
4244
4245
4246 #ifdef __SC__
4247 #pragma segment Perl_yylex
4248 #endif
4249 int
4250 Perl_yylex(pTHX)
4251 {
4252     dVAR;
4253     register char *s = PL_bufptr;
4254     register char *d;
4255     STRLEN len;
4256     bool bof = FALSE;
4257     U32 fake_eof = 0;
4258
4259     /* orig_keyword, gvp, and gv are initialized here because
4260      * jump to the label just_a_word_zero can bypass their
4261      * initialization later. */
4262     I32 orig_keyword = 0;
4263     GV *gv = NULL;
4264     GV **gvp = NULL;
4265
4266     DEBUG_T( {
4267         SV* tmp = newSVpvs("");
4268         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4269             (IV)CopLINE(PL_curcop),
4270             lex_state_names[PL_lex_state],
4271             exp_name[PL_expect],
4272             pv_display(tmp, s, strlen(s), 0, 60));
4273         SvREFCNT_dec(tmp);
4274     } );
4275     /* check if there's an identifier for us to look at */
4276     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4277         return REPORT(S_pending_ident(aTHX));
4278
4279     /* no identifier pending identification */
4280
4281     switch (PL_lex_state) {
4282 #ifdef COMMENTARY
4283     case LEX_NORMAL:            /* Some compilers will produce faster */
4284     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4285         break;
4286 #endif
4287
4288     /* when we've already built the next token, just pull it out of the queue */
4289     case LEX_KNOWNEXT:
4290 #ifdef PERL_MAD
4291         PL_lasttoke--;
4292         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4293         if (PL_madskills) {
4294             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4295             PL_nexttoke[PL_lasttoke].next_mad = 0;
4296             if (PL_thismad && PL_thismad->mad_key == '_') {
4297                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4298                 PL_thismad->mad_val = 0;
4299                 mad_free(PL_thismad);
4300                 PL_thismad = 0;
4301             }
4302         }
4303         if (!PL_lasttoke) {
4304             PL_lex_state = PL_lex_defer;
4305             PL_expect = PL_lex_expect;
4306             PL_lex_defer = LEX_NORMAL;
4307             if (!PL_nexttoke[PL_lasttoke].next_type)
4308                 return yylex();
4309         }
4310 #else
4311         PL_nexttoke--;
4312         pl_yylval = PL_nextval[PL_nexttoke];
4313         if (!PL_nexttoke) {
4314             PL_lex_state = PL_lex_defer;
4315             PL_expect = PL_lex_expect;
4316             PL_lex_defer = LEX_NORMAL;
4317         }
4318 #endif
4319         {
4320             I32 next_type;
4321 #ifdef PERL_MAD
4322             next_type = PL_nexttoke[PL_lasttoke].next_type;
4323 #else
4324             next_type = PL_nexttype[PL_nexttoke];
4325 #endif
4326             if (next_type & (7<<24)) {
4327                 if (next_type & (1<<24)) {
4328                     if (PL_lex_brackets > 100)
4329                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4330                     PL_lex_brackstack[PL_lex_brackets++] =
4331                         (char) ((next_type >> 16) & 0xff);
4332                 }
4333                 if (next_type & (2<<24))
4334                     PL_lex_allbrackets++;
4335                 if (next_type & (4<<24))
4336                     PL_lex_allbrackets--;
4337                 next_type &= 0xffff;
4338             }
4339 #ifdef PERL_MAD
4340             /* FIXME - can these be merged?  */
4341             return next_type;
4342 #else
4343             return REPORT(next_type);
4344 #endif
4345         }
4346
4347     /* interpolated case modifiers like \L \U, including \Q and \E.
4348        when we get here, PL_bufptr is at the \
4349     */
4350     case LEX_INTERPCASEMOD:
4351 #ifdef DEBUGGING
4352         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4353             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4354 #endif
4355         /* handle \E or end of string */
4356         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4357             /* if at a \E */
4358             if (PL_lex_casemods) {
4359                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4360                 PL_lex_casestack[PL_lex_casemods] = '\0';
4361
4362                 if (PL_bufptr != PL_bufend
4363                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4364                     PL_bufptr += 2;
4365                     PL_lex_state = LEX_INTERPCONCAT;
4366 #ifdef PERL_MAD
4367                     if (PL_madskills)
4368                         PL_thistoken = newSVpvs("\\E");
4369 #endif
4370                 }
4371                 PL_lex_allbrackets--;
4372                 return REPORT(')');
4373             }
4374 #ifdef PERL_MAD
4375             while (PL_bufptr != PL_bufend &&
4376               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4377                 if (!PL_thiswhite)
4378                     PL_thiswhite = newSVpvs("");
4379                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4380                 PL_bufptr += 2;
4381             }
4382 #else
4383             if (PL_bufptr != PL_bufend)
4384                 PL_bufptr += 2;
4385 #endif
4386             PL_lex_state = LEX_INTERPCONCAT;
4387             return yylex();
4388         }
4389         else {
4390             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4391               "### Saw case modifier\n"); });
4392             s = PL_bufptr + 1;
4393             if (s[1] == '\\' && s[2] == 'E') {
4394 #ifdef PERL_MAD
4395                 if (!PL_thiswhite)
4396                     PL_thiswhite = newSVpvs("");
4397                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4398 #endif
4399                 PL_bufptr = s + 3;
4400                 PL_lex_state = LEX_INTERPCONCAT;
4401                 return yylex();
4402             }
4403             else {
4404                 I32 tmp;
4405                 if (!PL_madskills) /* when just compiling don't need correct */
4406                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4407                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4408                 if ((*s == 'L' || *s == 'U') &&
4409                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4410                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4411                     PL_lex_allbrackets--;
4412                     return REPORT(')');
4413                 }
4414                 if (PL_lex_casemods > 10)
4415                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4416                 PL_lex_casestack[PL_lex_casemods++] = *s;
4417                 PL_lex_casestack[PL_lex_casemods] = '\0';
4418                 PL_lex_state = LEX_INTERPCONCAT;
4419                 start_force(PL_curforce);
4420                 NEXTVAL_NEXTTOKE.ival = 0;
4421                 force_next((2<<24)|'(');
4422                 start_force(PL_curforce);
4423                 if (*s == 'l')
4424                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4425                 else if (*s == 'u')
4426                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4427                 else if (*s == 'L')
4428                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4429                 else if (*s == 'U')
4430                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4431                 else if (*s == 'Q')
4432                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4433                 else
4434                     Perl_croak(aTHX_ "panic: yylex");
4435                 if (PL_madskills) {
4436                     SV* const tmpsv = newSVpvs("\\ ");
4437                     /* replace the space with the character we want to escape
4438                      */
4439                     SvPVX(tmpsv)[1] = *s;
4440                     curmad('_', tmpsv);
4441                 }
4442                 PL_bufptr = s + 1;
4443             }
4444             force_next(FUNC);
4445             if (PL_lex_starts) {
4446                 s = PL_bufptr;
4447                 PL_lex_starts = 0;
4448 #ifdef PERL_MAD
4449                 if (PL_madskills) {
4450                     if (PL_thistoken)
4451                         sv_free(PL_thistoken);
4452                     PL_thistoken = newSVpvs("");
4453                 }
4454 #endif
4455                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4456                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4457                     OPERATOR(',');
4458                 else
4459                     Aop(OP_CONCAT);
4460             }
4461             else
4462                 return yylex();
4463         }
4464
4465     case LEX_INTERPPUSH:
4466         return REPORT(sublex_push());
4467
4468     case LEX_INTERPSTART:
4469         if (PL_bufptr == PL_bufend)
4470             return REPORT(sublex_done());
4471         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4472               "### Interpolated variable\n"); });
4473         PL_expect = XTERM;
4474         PL_lex_dojoin = (*PL_bufptr == '@');
4475         PL_lex_state = LEX_INTERPNORMAL;
4476         if (PL_lex_dojoin) {
4477             start_force(PL_curforce);
4478             NEXTVAL_NEXTTOKE.ival = 0;
4479             force_next(',');
4480             start_force(PL_curforce);
4481             force_ident("\"", '$');
4482             start_force(PL_curforce);
4483             NEXTVAL_NEXTTOKE.ival = 0;
4484             force_next('$');
4485             start_force(PL_curforce);
4486             NEXTVAL_NEXTTOKE.ival = 0;
4487             force_next((2<<24)|'(');
4488             start_force(PL_curforce);
4489             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4490             force_next(FUNC);
4491         }
4492         if (PL_lex_starts++) {
4493             s = PL_bufptr;
4494 #ifdef PERL_MAD
4495             if (PL_madskills) {
4496                 if (PL_thistoken)
4497                     sv_free(PL_thistoken);
4498                 PL_thistoken = newSVpvs("");
4499             }
4500 #endif
4501             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4502             if (!PL_lex_casemods && PL_lex_inpat)
4503                 OPERATOR(',');
4504             else
4505                 Aop(OP_CONCAT);
4506         }
4507         return yylex();
4508
4509     case LEX_INTERPENDMAYBE:
4510         if (intuit_more(PL_bufptr)) {
4511             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4512             break;
4513         }
4514         /* FALL THROUGH */
4515
4516     case LEX_INTERPEND:
4517         if (PL_lex_dojoin) {
4518             PL_lex_dojoin = FALSE;
4519             PL_lex_state = LEX_INTERPCONCAT;
4520 #ifdef PERL_MAD
4521             if (PL_madskills) {
4522                 if (PL_thistoken)
4523                     sv_free(PL_thistoken);
4524                 PL_thistoken = newSVpvs("");
4525             }
4526 #endif
4527             PL_lex_allbrackets--;
4528             return REPORT(')');
4529         }
4530         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4531             && SvEVALED(PL_lex_repl))
4532         {
4533             if (PL_bufptr != PL_bufend)
4534                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4535             PL_lex_repl = NULL;
4536         }
4537         /* FALLTHROUGH */
4538     case LEX_INTERPCONCAT:
4539 #ifdef DEBUGGING
4540         if (PL_lex_brackets)
4541             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4542 #endif
4543         if (PL_bufptr == PL_bufend)
4544             return REPORT(sublex_done());
4545
4546         if (SvIVX(PL_linestr) == '\'') {
4547             SV *sv = newSVsv(PL_linestr);
4548             if (!PL_lex_inpat)
4549                 sv = tokeq(sv);
4550             else if ( PL_hints & HINT_NEW_RE )
4551                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4552             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4553             s = PL_bufend;
4554         }
4555         else {
4556             s = scan_const(PL_bufptr);
4557             if (*s == '\\')
4558                 PL_lex_state = LEX_INTERPCASEMOD;
4559             else
4560                 PL_lex_state = LEX_INTERPSTART;
4561         }
4562
4563         if (s != PL_bufptr) {
4564             start_force(PL_curforce);
4565             if (PL_madskills) {
4566                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4567             }
4568             NEXTVAL_NEXTTOKE = pl_yylval;
4569             PL_expect = XTERM;
4570             force_next(THING);
4571             if (PL_lex_starts++) {
4572 #ifdef PERL_MAD
4573                 if (PL_madskills) {
4574                     if (PL_thistoken)
4575                         sv_free(PL_thistoken);
4576                     PL_thistoken = newSVpvs("");
4577                 }
4578 #endif
4579                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4580                 if (!PL_lex_casemods && PL_lex_inpat)
4581                     OPERATOR(',');
4582                 else
4583                     Aop(OP_CONCAT);
4584             }
4585             else {
4586                 PL_bufptr = s;
4587                 return yylex();
4588             }
4589         }
4590
4591         return yylex();
4592     case LEX_FORMLINE:
4593         PL_lex_state = LEX_NORMAL;
4594         s = scan_formline(PL_bufptr);
4595         if (!PL_lex_formbrack)
4596             goto rightbracket;
4597         OPERATOR(';');
4598     }
4599
4600     s = PL_bufptr;
4601     PL_oldoldbufptr = PL_oldbufptr;
4602     PL_oldbufptr = s;
4603
4604   retry:
4605 #ifdef PERL_MAD
4606     if (PL_thistoken) {
4607         sv_free(PL_thistoken);
4608         PL_thistoken = 0;
4609     }
4610     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4611 #endif
4612     switch (*s) {
4613     default:
4614         if (isIDFIRST_lazy_if(s,UTF))
4615             goto keylookup;
4616         {
4617         unsigned char c = *s;
4618         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4619         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4620             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4621         } else {
4622             d = PL_linestart;
4623         }
4624         *s = '\0';
4625         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4626     }
4627     case 4:
4628     case 26:
4629         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4630     case 0:
4631 #ifdef PERL_MAD
4632         if (PL_madskills)
4633             PL_faketokens = 0;
4634 #endif
4635         if (!PL_rsfp) {
4636             PL_last_uni = 0;
4637             PL_last_lop = 0;
4638             if (PL_lex_brackets &&
4639                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4640                 yyerror((const char *)
4641                         (PL_lex_formbrack
4642                          ? "Format not terminated"
4643                          : "Missing right curly or square bracket"));
4644             }
4645             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4646                         "### Tokener got EOF\n");
4647             } );
4648             TOKEN(0);
4649         }
4650         if (s++ < PL_bufend)
4651             goto retry;                 /* ignore stray nulls */
4652         PL_last_uni = 0;
4653         PL_last_lop = 0;
4654         if (!PL_in_eval && !PL_preambled) {
4655             PL_preambled = TRUE;
4656 #ifdef PERL_MAD
4657             if (PL_madskills)
4658                 PL_faketokens = 1;
4659 #endif
4660             if (PL_perldb) {
4661                 /* Generate a string of Perl code to load the debugger.
4662                  * If PERL5DB is set, it will return the contents of that,
4663                  * otherwise a compile-time require of perl5db.pl.  */
4664
4665                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4666
4667                 if (pdb) {
4668                     sv_setpv(PL_linestr, pdb);
4669                     sv_catpvs(PL_linestr,";");
4670                 } else {
4671                     SETERRNO(0,SS_NORMAL);
4672                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4673                 }
4674             } else
4675                 sv_setpvs(PL_linestr,"");
4676             if (PL_preambleav) {
4677                 SV **svp = AvARRAY(PL_preambleav);
4678                 SV **const end = svp + AvFILLp(PL_preambleav);
4679                 while(svp <= end) {
4680                     sv_catsv(PL_linestr, *svp);
4681                     ++svp;
4682                     sv_catpvs(PL_linestr, ";");
4683                 }
4684                 sv_free(MUTABLE_SV(PL_preambleav));
4685                 PL_preambleav = NULL;
4686             }
4687             if (PL_minus_E)
4688                 sv_catpvs(PL_linestr,
4689                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4690             if (PL_minus_n || PL_minus_p) {
4691                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4692                 if (PL_minus_l)
4693                     sv_catpvs(PL_linestr,"chomp;");
4694                 if (PL_minus_a) {
4695                     if (PL_minus_F) {
4696                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4697                              || *PL_splitstr == '"')
4698                               && strchr(PL_splitstr + 1, *PL_splitstr))
4699                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4700                         else {
4701                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4702                                bytes can be used as quoting characters.  :-) */
4703                             const char *splits = PL_splitstr;
4704                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4705                             do {
4706                                 /* Need to \ \s  */
4707                                 if (*splits == '\\')
4708                                     sv_catpvn(PL_linestr, splits, 1);
4709                                 sv_catpvn(PL_linestr, splits, 1);
4710                             } while (*splits++);
4711                             /* This loop will embed the trailing NUL of
4712                                PL_linestr as the last thing it does before
4713                                terminating.  */
4714                             sv_catpvs(PL_linestr, ");");
4715                         }
4716                     }
4717                     else
4718                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4719                 }
4720             }
4721             sv_catpvs(PL_linestr, "\n");
4722             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4723             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4724             PL_last_lop = PL_last_uni = NULL;
4725             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4726                 update_debugger_info(PL_linestr, NULL, 0);
4727             goto retry;
4728         }
4729         do {
4730             fake_eof = 0;
4731             bof = PL_rsfp ? TRUE : FALSE;
4732             if (0) {
4733               fake_eof:
4734                 fake_eof = LEX_FAKE_EOF;
4735             }
4736             PL_bufptr = PL_bufend;
4737             CopLINE_inc(PL_curcop);
4738             if (!lex_next_chunk(fake_eof)) {
4739                 CopLINE_dec(PL_curcop);
4740                 s = PL_bufptr;
4741                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4742             }
4743             CopLINE_dec(PL_curcop);
4744 #ifdef PERL_MAD
4745             if (!PL_rsfp)
4746                 PL_realtokenstart = -1;
4747 #endif
4748             s = PL_bufptr;
4749             /* If it looks like the start of a BOM or raw UTF-16,
4750              * check if it in fact is. */
4751             if (bof && PL_rsfp &&
4752                      (*s == 0 ||
4753                       *(U8*)s == 0xEF ||
4754                       *(U8*)s >= 0xFE ||
4755                       s[1] == 0)) {
4756                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4757                 if (bof) {
4758                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4759                     s = swallow_bom((U8*)s);
4760                 }
4761             }
4762             if (PL_parser->in_pod) {
4763                 /* Incest with pod. */
4764 #ifdef PERL_MAD
4765                 if (PL_madskills)
4766                     sv_catsv(PL_thiswhite, PL_linestr);
4767 #endif
4768                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4769                     sv_setpvs(PL_linestr, "");
4770                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4771                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4772                     PL_last_lop = PL_last_uni = NULL;
4773                     PL_parser->in_pod = 0;
4774                 }
4775             }
4776             if (PL_rsfp)
4777                 incline(s);
4778         } while (PL_parser->in_pod);
4779         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4780         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4781         PL_last_lop = PL_last_uni = NULL;
4782         if (CopLINE(PL_curcop) == 1) {
4783             while (s < PL_bufend && isSPACE(*s))
4784                 s++;
4785             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4786                 s++;
4787 #ifdef PERL_MAD
4788             if (PL_madskills)
4789                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4790 #endif
4791             d = NULL;
4792             if (!PL_in_eval) {
4793                 if (*s == '#' && *(s+1) == '!')
4794                     d = s + 2;
4795 #ifdef ALTERNATE_SHEBANG
4796                 else {
4797                     static char const as[] = ALTERNATE_SHEBANG;
4798                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4799                         d = s + (sizeof(as) - 1);
4800                 }
4801 #endif /* ALTERNATE_SHEBANG */
4802             }
4803             if (d) {
4804                 char *ipath;
4805                 char *ipathend;
4806
4807                 while (isSPACE(*d))
4808                     d++;
4809                 ipath = d;
4810                 while (*d && !isSPACE(*d))
4811                     d++;
4812                 ipathend = d;
4813
4814 #ifdef ARG_ZERO_IS_SCRIPT
4815                 if (ipathend > ipath) {
4816                     /*
4817                      * HP-UX (at least) sets argv[0] to the script name,
4818                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4819                      * at least, set argv[0] to the basename of the Perl
4820                      * interpreter. So, having found "#!", we'll set it right.
4821                      */
4822                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4823                                                     SVt_PV)); /* $^X */
4824                     assert(SvPOK(x) || SvGMAGICAL(x));
4825                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4826                         sv_setpvn(x, ipath, ipathend - ipath);
4827                         SvSETMAGIC(x);
4828                     }
4829                     else {
4830                         STRLEN blen;
4831                         STRLEN llen;
4832                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4833                         const char * const lstart = SvPV_const(x,llen);
4834                         if (llen < blen) {
4835                             bstart += blen - llen;
4836                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4837                                 sv_setpvn(x, ipath, ipathend - ipath);
4838                                 SvSETMAGIC(x);
4839                             }
4840                         }
4841                     }
4842                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4843                 }
4844 #endif /* ARG_ZERO_IS_SCRIPT */
4845
4846                 /*
4847                  * Look for options.
4848                  */
4849                 d = instr(s,"perl -");
4850                 if (!d) {
4851                     d = instr(s,"perl");
4852 #if defined(DOSISH)
4853                     /* avoid getting into infinite loops when shebang
4854                      * line contains "Perl" rather than "perl" */
4855                     if (!d) {
4856                         for (d = ipathend-4; d >= ipath; --d) {
4857                             if ((*d == 'p' || *d == 'P')
4858                                 && !ibcmp(d, "perl", 4))
4859                             {
4860                                 break;
4861                             }
4862                         }
4863                         if (d < ipath)
4864                             d = NULL;
4865                     }
4866 #endif
4867                 }
4868 #ifdef ALTERNATE_SHEBANG
4869                 /*
4870                  * If the ALTERNATE_SHEBANG on this system starts with a
4871                  * character that can be part of a Perl expression, then if
4872                  * we see it but not "perl", we're probably looking at the
4873                  * start of Perl code, not a request to hand off to some
4874                  * other interpreter.  Similarly, if "perl" is there, but
4875                  * not in the first 'word' of the line, we assume the line
4876                  * contains the start of the Perl program.
4877                  */
4878                 if (d && *s != '#') {
4879                     const char *c = ipath;
4880                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4881                         c++;
4882                     if (c < d)
4883                         d = NULL;       /* "perl" not in first word; ignore */
4884                     else
4885                         *s = '#';       /* Don't try to parse shebang line */
4886                 }
4887 #endif /* ALTERNATE_SHEBANG */
4888                 if (!d &&
4889                     *s == '#' &&
4890                     ipathend > ipath &&
4891                     !PL_minus_c &&
4892                     !instr(s,"indir") &&
4893                     instr(PL_origargv[0],"perl"))
4894                 {
4895                     dVAR;
4896                     char **newargv;
4897
4898                     *ipathend = '\0';
4899                     s = ipathend + 1;
4900                     while (s < PL_bufend && isSPACE(*s))
4901                         s++;
4902                     if (s < PL_bufend) {
4903                         Newx(newargv,PL_origargc+3,char*);
4904                         newargv[1] = s;
4905                         while (s < PL_bufend && !isSPACE(*s))
4906                             s++;
4907                         *s = '\0';
4908                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4909                     }
4910                     else
4911                         newargv = PL_origargv;
4912                     newargv[0] = ipath;
4913                     PERL_FPU_PRE_EXEC
4914                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4915                     PERL_FPU_POST_EXEC
4916                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4917                 }
4918                 if (d) {
4919                     while (*d && !isSPACE(*d))
4920                         d++;
4921                     while (SPACE_OR_TAB(*d))
4922                         d++;
4923
4924                     if (*d++ == '-') {
4925                         const bool switches_done = PL_doswitches;
4926                         const U32 oldpdb = PL_perldb;
4927                         const bool oldn = PL_minus_n;
4928                         const bool oldp = PL_minus_p;
4929                         const char *d1 = d;
4930
4931                         do {
4932                             bool baduni = FALSE;
4933                             if (*d1 == 'C') {
4934                                 const char *d2 = d1 + 1;
4935                                 if (parse_unicode_opts((const char **)&d2)
4936                                     != PL_unicode)
4937                                     baduni = TRUE;
4938                             }
4939                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4940                                 const char * const m = d1;
4941                                 while (*d1 && !isSPACE(*d1))
4942                                     d1++;
4943                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4944                                       (int)(d1 - m), m);
4945                             }
4946                             d1 = moreswitches(d1);
4947                         } while (d1);
4948                         if (PL_doswitches && !switches_done) {
4949                             int argc = PL_origargc;
4950                             char **argv = PL_origargv;
4951                             do {
4952                                 argc--,argv++;
4953                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4954                             init_argv_symbols(argc,argv);
4955                         }
4956                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4957                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4958                               /* if we have already added "LINE: while (<>) {",
4959                                  we must not do it again */
4960                         {
4961                             sv_setpvs(PL_linestr, "");
4962                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4963                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4964                             PL_last_lop = PL_last_uni = NULL;
4965                             PL_preambled = FALSE;
4966                             if (PERLDB_LINE || PERLDB_SAVESRC)
4967                                 (void)gv_fetchfile(PL_origfilename);
4968                             goto retry;
4969                         }
4970                     }
4971                 }
4972             }
4973         }
4974         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4975             PL_bufptr = s;
4976             PL_lex_state = LEX_FORMLINE;
4977             return yylex();
4978         }
4979         goto retry;
4980     case '\r':
4981 #ifdef PERL_STRICT_CR
4982         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4983         Perl_croak(aTHX_
4984       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4985 #endif
4986     case ' ': case '\t': case '\f': case 013:
4987 #ifdef PERL_MAD
4988         PL_realtokenstart = -1;
4989         if (!PL_thiswhite)
4990             PL_thiswhite = newSVpvs("");
4991         sv_catpvn(PL_thiswhite, s, 1);
4992 #endif
4993         s++;
4994         goto retry;
4995     case '#':
4996     case '\n':
4997 #ifdef PERL_MAD
4998         PL_realtokenstart = -1;
4999         if (PL_madskills)
5000             PL_faketokens = 0;
5001 #endif
5002         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5003             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5004                 /* handle eval qq[#line 1 "foo"\n ...] */
5005                 CopLINE_dec(PL_curcop);
5006                 incline(s);
5007             }
5008             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5009                 s = SKIPSPACE0(s);
5010                 if (!PL_in_eval || PL_rsfp)
5011                     incline(s);
5012             }
5013             else {
5014                 d = s;
5015                 while (d < PL_bufend && *d != '\n')
5016                     d++;
5017                 if (d < PL_bufend)
5018                     d++;
5019                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5020                   Perl_croak(aTHX_ "panic: input overflow");
5021 #ifdef PERL_MAD
5022                 if (PL_madskills)
5023                     PL_thiswhite = newSVpvn(s, d - s);
5024 #endif
5025                 s = d;
5026                 incline(s);
5027             }
5028             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5029                 PL_bufptr = s;
5030                 PL_lex_state = LEX_FORMLINE;
5031                 return yylex();
5032             }
5033         }
5034         else {
5035 #ifdef PERL_MAD
5036             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5037                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5038                     PL_faketokens = 0;
5039                     s = SKIPSPACE0(s);
5040                     TOKEN(PEG); /* make sure any #! line is accessible */
5041                 }
5042                 s = SKIPSPACE0(s);
5043             }
5044             else {
5045 /*              if (PL_madskills && PL_lex_formbrack) { */
5046                     d = s;
5047                     while (d < PL_bufend && *d != '\n')
5048                         d++;
5049                     if (d < PL_bufend)
5050                         d++;
5051                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5052                       Perl_croak(aTHX_ "panic: input overflow");
5053                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5054                         if (!PL_thiswhite)
5055                             PL_thiswhite = newSVpvs("");
5056                         if (CopLINE(PL_curcop) == 1) {
5057                             sv_setpvs(PL_thiswhite, "");
5058                             PL_faketokens = 0;
5059                         }
5060                         sv_catpvn(PL_thiswhite, s, d - s);
5061                     }
5062                     s = d;
5063 /*              }
5064                 *s = '\0';
5065                 PL_bufend = s; */
5066             }
5067 #else
5068             *s = '\0';
5069             PL_bufend = s;
5070 #endif
5071         }
5072         goto retry;
5073     case '-':
5074         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5075             I32 ftst = 0;
5076             char tmp;
5077
5078             s++;
5079             PL_bufptr = s;
5080             tmp = *s++;
5081
5082             while (s < PL_bufend && SPACE_OR_TAB(*s))
5083                 s++;
5084
5085             if (strnEQ(s,"=>",2)) {
5086                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5087                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5088                 OPERATOR('-');          /* unary minus */
5089             }
5090             PL_last_uni = PL_oldbufptr;
5091             switch (tmp) {
5092             case 'r': ftst = OP_FTEREAD;        break;
5093             case 'w': ftst = OP_FTEWRITE;       break;
5094             case 'x': ftst = OP_FTEEXEC;        break;
5095             case 'o': ftst = OP_FTEOWNED;       break;
5096             case 'R': ftst = OP_FTRREAD;        break;
5097             case 'W': ftst = OP_FTRWRITE;       break;
5098             case 'X': ftst = OP_FTREXEC;        break;
5099             case 'O': ftst = OP_FTROWNED;       break;
5100             case 'e': ftst = OP_FTIS;           break;
5101             case 'z': ftst = OP_FTZERO;         break;
5102             case 's': ftst = OP_FTSIZE;         break;
5103             case 'f': ftst = OP_FTFILE;         break;
5104             case 'd': ftst = OP_FTDIR;          break;
5105             case 'l': ftst = OP_FTLINK;         break;
5106             case 'p': ftst = OP_FTPIPE;         break;
5107             case 'S': ftst = OP_FTSOCK;         break;
5108             case 'u': ftst = OP_FTSUID;         break;
5109             case 'g': ftst = OP_FTSGID;         break;
5110             case 'k': ftst = OP_FTSVTX;         break;
5111             case 'b': ftst = OP_FTBLK;          break;
5112             case 'c': ftst = OP_FTCHR;          break;
5113             case 't': ftst = OP_FTTTY;          break;
5114             case 'T': ftst = OP_FTTEXT;         break;
5115             case 'B': ftst = OP_FTBINARY;       break;
5116             case 'M': case 'A': case 'C':
5117                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5118                 switch (tmp) {
5119                 case 'M': ftst = OP_FTMTIME;    break;
5120                 case 'A': ftst = OP_FTATIME;    break;
5121                 case 'C': ftst = OP_FTCTIME;    break;
5122                 default:                        break;
5123                 }
5124                 break;
5125             default:
5126                 break;
5127             }
5128             if (ftst) {
5129                 PL_last_lop_op = (OPCODE)ftst;
5130                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5131                         "### Saw file test %c\n", (int)tmp);
5132                 } );
5133                 FTST(ftst);
5134             }
5135             else {
5136                 /* Assume it was a minus followed by a one-letter named
5137                  * subroutine call (or a -bareword), then. */
5138                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5139                         "### '-%c' looked like a file test but was not\n",
5140                         (int) tmp);
5141                 } );
5142                 s = --PL_bufptr;
5143             }
5144         }
5145         {
5146             const char tmp = *s++;
5147             if (*s == tmp) {
5148                 s++;
5149                 if (PL_expect == XOPERATOR)
5150                     TERM(POSTDEC);
5151                 else
5152                     OPERATOR(PREDEC);
5153             }
5154             else if (*s == '>') {
5155                 s++;
5156                 s = SKIPSPACE1(s);
5157                 if (isIDFIRST_lazy_if(s,UTF)) {
5158                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5159                     TOKEN(ARROW);
5160                 }
5161                 else if (*s == '$')
5162                     OPERATOR(ARROW);
5163                 else
5164                     TERM(ARROW);
5165             }
5166             if (PL_expect == XOPERATOR) {
5167                 if (*s == '=' && !PL_lex_allbrackets &&
5168                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5169                     s--;
5170                     TOKEN(0);
5171                 }
5172                 Aop(OP_SUBTRACT);
5173             }
5174             else {
5175                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5176                     check_uni();
5177                 OPERATOR('-');          /* unary minus */
5178             }
5179         }
5180
5181     case '+':
5182         {
5183             const char tmp = *s++;
5184             if (*s == tmp) {
5185                 s++;
5186                 if (PL_expect == XOPERATOR)
5187                     TERM(POSTINC);
5188                 else
5189                     OPERATOR(PREINC);
5190             }
5191             if (PL_expect == XOPERATOR) {
5192                 if (*s == '=' && !PL_lex_allbrackets &&
5193                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5194                     s--;
5195                     TOKEN(0);
5196                 }
5197                 Aop(OP_ADD);
5198             }
5199             else {
5200                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5201                     check_uni();
5202                 OPERATOR('+');
5203             }
5204         }
5205
5206     case '*':
5207         if (PL_expect != XOPERATOR) {
5208             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5209             PL_expect = XOPERATOR;
5210             force_ident(PL_tokenbuf, '*');
5211             if (!*PL_tokenbuf)
5212                 PREREF('*');
5213             TERM('*');
5214         }
5215         s++;
5216         if (*s == '*') {
5217             s++;
5218             if (*s == '=' && !PL_lex_allbrackets &&
5219                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5220                 s -= 2;
5221                 TOKEN(0);
5222             }
5223             PWop(OP_POW);
5224         }
5225         if (*s == '=' && !PL_lex_allbrackets &&
5226                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5227             s--;
5228             TOKEN(0);
5229         }
5230         Mop(OP_MULTIPLY);
5231
5232     case '%':
5233         if (PL_expect == XOPERATOR) {
5234             if (s[1] == '=' && !PL_lex_allbrackets &&
5235                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5236                 TOKEN(0);
5237             ++s;
5238             Mop(OP_MODULO);
5239         }
5240         PL_tokenbuf[0] = '%';
5241         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5242                 sizeof PL_tokenbuf - 1, FALSE);
5243         if (!PL_tokenbuf[1]) {
5244             PREREF('%');
5245         }
5246         PL_pending_ident = '%';
5247         TERM('%');
5248
5249     case '^':
5250         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5251                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5252             TOKEN(0);
5253         s++;
5254         BOop(OP_BIT_XOR);
5255     case '[':
5256         if (PL_lex_brackets > 100)
5257             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5258         PL_lex_brackstack[PL_lex_brackets++] = 0;
5259         PL_lex_allbrackets++;
5260         {
5261             const char tmp = *s++;
5262             OPERATOR(tmp);
5263         }
5264     case '~':
5265         if (s[1] == '~'
5266             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5267         {
5268             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5269                 TOKEN(0);
5270             s += 2;
5271             Eop(OP_SMARTMATCH);
5272         }
5273         s++;
5274         OPERATOR('~');
5275     case ',':
5276         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5277             TOKEN(0);
5278         s++;
5279         OPERATOR(',');
5280     case ':':
5281         if (s[1] == ':') {
5282             len = 0;
5283             goto just_a_word_zero_gv;
5284         }
5285         s++;
5286         switch (PL_expect) {
5287             OP *attrs;
5288 #ifdef PERL_MAD
5289             I32 stuffstart;
5290 #endif
5291         case XOPERATOR:
5292             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5293                 break;
5294             PL_bufptr = s;      /* update in case we back off */
5295             if (*s == '=') {
5296                 Perl_croak(aTHX_
5297                            "Use of := for an empty attribute list is not allowed");
5298             }
5299             goto grabattrs;
5300         case XATTRBLOCK:
5301             PL_expect = XBLOCK;
5302             goto grabattrs;
5303         case XATTRTERM:
5304             PL_expect = XTERMBLOCK;
5305          grabattrs:
5306 #ifdef PERL_MAD
5307             stuffstart = s - SvPVX(PL_linestr) - 1;
5308 #endif
5309             s = PEEKSPACE(s);
5310             attrs = NULL;
5311             while (isIDFIRST_lazy_if(s,UTF)) {
5312                 I32 tmp;
5313                 SV *sv;
5314                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5315                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5316                     if (tmp < 0) tmp = -tmp;
5317                     switch (tmp) {
5318                     case KEY_or:
5319                     case KEY_and:
5320                     case KEY_for:
5321                     case KEY_foreach:
5322                     case KEY_unless:
5323                     case KEY_if:
5324                     case KEY_while:
5325                     case KEY_until:
5326                         goto got_attrs;
5327                     default:
5328                         break;
5329                     }
5330                 }
5331                 sv = newSVpvn(s, len);
5332                 if (*d == '(') {
5333                     d = scan_str(d,TRUE,TRUE);
5334                     if (!d) {
5335                         /* MUST advance bufptr here to avoid bogus
5336                            "at end of line" context messages from yyerror().
5337                          */
5338                         PL_bufptr = s + len;
5339                         yyerror("Unterminated attribute parameter in attribute list");
5340                         if (attrs)
5341                             op_free(attrs);
5342                         sv_free(sv);
5343                         return REPORT(0);       /* EOF indicator */
5344                     }
5345                 }
5346                 if (PL_lex_stuff) {
5347                     sv_catsv(sv, PL_lex_stuff);
5348                     attrs = op_append_elem(OP_LIST, attrs,
5349                                         newSVOP(OP_CONST, 0, sv));
5350                     SvREFCNT_dec(PL_lex_stuff);
5351                     PL_lex_stuff = NULL;
5352                 }
5353                 else {
5354                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5355                         sv_free(sv);
5356                         if (PL_in_my == KEY_our) {
5357                             deprecate(":unique");
5358                         }
5359                         else
5360                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5361                     }
5362
5363                     /* NOTE: any CV attrs applied here need to be part of
5364                        the CVf_BUILTIN_ATTRS define in cv.h! */
5365                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5366                         sv_free(sv);
5367                         CvLVALUE_on(PL_compcv);
5368                     }
5369                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5370                         sv_free(sv);
5371                         deprecate(":locked");
5372                     }
5373                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5374                         sv_free(sv);
5375                         CvMETHOD_on(PL_compcv);
5376                     }
5377                     /* After we've set the flags, it could be argued that
5378                        we don't need to do the attributes.pm-based setting
5379                        process, and shouldn't bother appending recognized
5380                        flags.  To experiment with that, uncomment the
5381                        following "else".  (Note that's already been
5382                        uncommented.  That keeps the above-applied built-in
5383                        attributes from being intercepted (and possibly
5384                        rejected) by a package's attribute routines, but is
5385                        justified by the performance win for the common case
5386                        of applying only built-in attributes.) */
5387                     else
5388                         attrs = op_append_elem(OP_LIST, attrs,
5389                                             newSVOP(OP_CONST, 0,
5390                                                     sv));
5391                 }
5392                 s = PEEKSPACE(d);
5393                 if (*s == ':' && s[1] != ':')
5394                     s = PEEKSPACE(s+1);
5395                 else if (s == d)
5396                     break;      /* require real whitespace or :'s */
5397                 /* XXX losing whitespace on sequential attributes here */
5398             }
5399             {
5400                 const char tmp
5401                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5402                 if (*s != ';' && *s != '}' && *s != tmp
5403                     && (tmp != '=' || *s != ')')) {
5404                     const char q = ((*s == '\'') ? '"' : '\'');
5405                     /* If here for an expression, and parsed no attrs, back
5406                        off. */
5407                     if (tmp == '=' && !attrs) {
5408                         s = PL_bufptr;
5409                         break;
5410                     }
5411                     /* MUST advance bufptr here to avoid bogus "at end of line"
5412                        context messages from yyerror().
5413                     */
5414                     PL_bufptr = s;
5415                     yyerror( (const char *)
5416                              (*s
5417                               ? Perl_form(aTHX_ "Invalid separator character "
5418                                           "%c%c%c in attribute list", q, *s, q)
5419                               : "Unterminated attribute list" ) );
5420                     if (attrs)
5421                         op_free(attrs);
5422                     OPERATOR(':');
5423                 }
5424             }
5425         got_attrs:
5426             if (attrs) {
5427                 start_force(PL_curforce);
5428                 NEXTVAL_NEXTTOKE.opval = attrs;
5429                 CURMAD('_', PL_nextwhite);
5430                 force_next(THING);
5431             }
5432 #ifdef PERL_MAD
5433             if (PL_madskills) {
5434                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5435                                      (s - SvPVX(PL_linestr)) - stuffstart);
5436             }
5437 #endif
5438             TOKEN(COLONATTR);
5439         }
5440         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5441             s--;
5442             TOKEN(0);
5443         }
5444         PL_lex_allbrackets--;
5445         OPERATOR(':');
5446     case '(':
5447         s++;
5448         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5449             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5450         else
5451             PL_expect = XTERM;
5452         s = SKIPSPACE1(s);
5453         PL_lex_allbrackets++;
5454         TOKEN('(');
5455     case ';':
5456         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5457             TOKEN(0);
5458         CLINE;
5459         s++;
5460         OPERATOR(';');
5461     case ')':
5462         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5463             TOKEN(0);
5464         s++;
5465         PL_lex_allbrackets--;
5466         s = SKIPSPACE1(s);
5467         if (*s == '{')
5468             PREBLOCK(')');
5469         TERM(')');
5470     case ']':
5471         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5472             TOKEN(0);
5473         s++;
5474         if (PL_lex_brackets <= 0)
5475             yyerror("Unmatched right square bracket");
5476         else
5477             --PL_lex_brackets;
5478         PL_lex_allbrackets--;
5479         if (PL_lex_state == LEX_INTERPNORMAL) {
5480             if (PL_lex_brackets == 0) {
5481                 if (*s == '-' && s[1] == '>')
5482                     PL_lex_state = LEX_INTERPENDMAYBE;
5483                 else if (*s != '[' && *s != '{')
5484                     PL_lex_state = LEX_INTERPEND;
5485             }
5486         }
5487         TERM(']');
5488     case '{':
5489       leftbracket:
5490         s++;
5491         if (PL_lex_brackets > 100) {
5492             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5493         }
5494         switch (PL_expect) {
5495         case XTERM:
5496             if (PL_lex_formbrack) {
5497                 s--;
5498                 PRETERMBLOCK(DO);
5499             }
5500             if (PL_oldoldbufptr == PL_last_lop)
5501                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5502             else
5503                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5504             PL_lex_allbrackets++;
5505             OPERATOR(HASHBRACK);
5506         case XOPERATOR:
5507             while (s < PL_bufend && SPACE_OR_TAB(*s))
5508                 s++;
5509             d = s;
5510             PL_tokenbuf[0] = '\0';
5511             if (d < PL_bufend && *d == '-') {
5512                 PL_tokenbuf[0] = '-';
5513                 d++;
5514                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5515                     d++;
5516             }
5517             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5518                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5519                               FALSE, &len);
5520                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5521                     d++;
5522                 if (*d == '}') {
5523                     const char minus = (PL_tokenbuf[0] == '-');
5524                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5525                     if (minus)
5526                         force_next('-');
5527                 }
5528             }
5529             /* FALL THROUGH */
5530         case XATTRBLOCK:
5531         case XBLOCK:
5532             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5533             PL_lex_allbrackets++;
5534             PL_expect = XSTATE;
5535             break;
5536         case XATTRTERM:
5537         case XTERMBLOCK:
5538             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5539             PL_lex_allbrackets++;
5540             PL_expect = XSTATE;
5541             break;
5542         default: {
5543                 const char *t;
5544                 if (PL_oldoldbufptr == PL_last_lop)
5545                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5546                 else
5547                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5548                 PL_lex_allbrackets++;
5549                 s = SKIPSPACE1(s);
5550                 if (*s == '}') {
5551                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5552                         PL_expect = XTERM;
5553                         /* This hack is to get the ${} in the message. */
5554                         PL_bufptr = s+1;
5555                         yyerror("syntax error");
5556                         break;
5557                     }
5558                     OPERATOR(HASHBRACK);
5559                 }
5560                 /* This hack serves to disambiguate a pair of curlies
5561                  * as being a block or an anon hash.  Normally, expectation
5562                  * determines that, but in cases where we're not in a
5563                  * position to expect anything in particular (like inside
5564                  * eval"") we have to resolve the ambiguity.  This code
5565                  * covers the case where the first term in the curlies is a
5566                  * quoted string.  Most other cases need to be explicitly
5567                  * disambiguated by prepending a "+" before the opening
5568                  * curly in order to force resolution as an anon hash.
5569                  *
5570                  * XXX should probably propagate the outer expectation
5571                  * into eval"" to rely less on this hack, but that could
5572                  * potentially break current behavior of eval"".
5573                  * GSAR 97-07-21
5574                  */
5575                 t = s;
5576                 if (*s == '\'' || *s == '"' || *s == '`') {
5577                     /* common case: get past first string, handling escapes */
5578                     for (t++; t < PL_bufend && *t != *s;)
5579                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5580                             t++;
5581                     t++;
5582                 }
5583                 else if (*s == 'q') {
5584                     if (++t < PL_bufend
5585                         && (!isALNUM(*t)
5586                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5587                                 && !isALNUM(*t))))
5588                     {
5589                         /* skip q//-like construct */
5590                         const char *tmps;
5591                         char open, close, term;
5592                         I32 brackets = 1;
5593
5594                         while (t < PL_bufend && isSPACE(*t))
5595                             t++;
5596                         /* check for q => */
5597                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5598                             OPERATOR(HASHBRACK);
5599                         }
5600                         term = *t;
5601                         open = term;
5602                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5603                             term = tmps[5];
5604                         close = term;
5605                         if (open == close)
5606                             for (t++; t < PL_bufend; t++) {
5607                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5608                                     t++;
5609                                 else if (*t == open)
5610                                     break;
5611                             }
5612                         else {
5613                             for (t++; t < PL_bufend; t++) {
5614                                 if (*t == '\\' && t+1 < PL_bufend)
5615                                     t++;
5616                                 else if (*t == close && --brackets <= 0)
5617                                     break;
5618                                 else if (*t == open)
5619                                     brackets++;
5620                             }
5621                         }
5622                         t++;
5623                     }
5624                     else
5625                         /* skip plain q word */
5626                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5627                              t += UTF8SKIP(t);
5628                 }
5629                 else if (isALNUM_lazy_if(t,UTF)) {
5630                     t += UTF8SKIP(t);
5631                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5632                          t += UTF8SKIP(t);
5633                 }
5634                 while (t < PL_bufend && isSPACE(*t))
5635                     t++;
5636                 /* if comma follows first term, call it an anon hash */
5637                 /* XXX it could be a comma expression with loop modifiers */
5638                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5639                                    || (*t == '=' && t[1] == '>')))
5640                     OPERATOR(HASHBRACK);
5641                 if (PL_expect == XREF)
5642                     PL_expect = XTERM;
5643                 else {
5644                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5645                     PL_expect = XSTATE;
5646                 }
5647             }
5648             break;
5649         }
5650         pl_yylval.ival = CopLINE(PL_curcop);
5651         if (isSPACE(*s) || *s == '#')
5652             PL_copline = NOLINE;   /* invalidate current command line number */
5653         TOKEN('{');
5654     case '}':
5655         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5656             TOKEN(0);
5657       rightbracket:
5658         s++;
5659         if (PL_lex_brackets <= 0)
5660             yyerror("Unmatched right curly bracket");
5661         else
5662             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5663         PL_lex_allbrackets--;
5664         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5665             PL_lex_formbrack = 0;
5666         if (PL_lex_state == LEX_INTERPNORMAL) {
5667             if (PL_lex_brackets == 0) {
5668                 if (PL_expect & XFAKEBRACK) {
5669                     PL_expect &= XENUMMASK;
5670                     PL_lex_state = LEX_INTERPEND;
5671                     PL_bufptr = s;
5672 #if 0
5673                     if (PL_madskills) {
5674                         if (!PL_thiswhite)
5675                             PL_thiswhite = newSVpvs("");
5676                         sv_catpvs(PL_thiswhite,"}");
5677                     }
5678 #endif
5679                     return yylex();     /* ignore fake brackets */
5680                 }
5681                 if (*s == '-' && s[1] == '>')
5682                     PL_lex_state = LEX_INTERPENDMAYBE;
5683                 else if (*s != '[' && *s != '{')
5684                     PL_lex_state = LEX_INTERPEND;
5685             }
5686         }
5687         if (PL_expect & XFAKEBRACK) {
5688             PL_expect &= XENUMMASK;
5689             PL_bufptr = s;
5690             return yylex();             /* ignore fake brackets */
5691         }
5692         start_force(PL_curforce);
5693         if (PL_madskills) {
5694             curmad('X', newSVpvn(s-1,1));
5695             CURMAD('_', PL_thiswhite);
5696         }
5697         force_next('}');
5698 #ifdef PERL_MAD
5699         if (!PL_thistoken)
5700             PL_thistoken = newSVpvs("");
5701 #endif
5702         TOKEN(';');
5703     case '&':
5704         s++;
5705         if (*s++ == '&') {
5706             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5707                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5708                 s -= 2;
5709                 TOKEN(0);
5710             }
5711             AOPERATOR(ANDAND);
5712         }
5713         s--;
5714         if (PL_expect == XOPERATOR) {
5715             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5716                 && isIDFIRST_lazy_if(s,UTF))
5717             {
5718                 CopLINE_dec(PL_curcop);
5719                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5720                 CopLINE_inc(PL_curcop);
5721             }
5722             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5723                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5724                 s--;
5725                 TOKEN(0);
5726             }
5727             BAop(OP_BIT_AND);
5728         }
5729
5730         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5731         if (*PL_tokenbuf) {
5732             PL_expect = XOPERATOR;
5733             force_ident(PL_tokenbuf, '&');
5734         }
5735         else
5736             PREREF('&');
5737         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5738         TERM('&');
5739
5740     case '|':
5741         s++;
5742         if (*s++ == '|') {
5743             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5744                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5745                 s -= 2;
5746                 TOKEN(0);
5747             }
5748             AOPERATOR(OROR);
5749         }
5750         s--;
5751         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5752                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5753             s--;
5754             TOKEN(0);
5755         }
5756         BOop(OP_BIT_OR);
5757     case '=':
5758         s++;
5759         {
5760             const char tmp = *s++;
5761             if (tmp == '=') {
5762                 if (!PL_lex_allbrackets &&
5763                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5764                     s -= 2;
5765                     TOKEN(0);
5766                 }
5767                 Eop(OP_EQ);
5768             }
5769             if (tmp == '>') {
5770                 if (!PL_lex_allbrackets &&
5771                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5772                     s -= 2;
5773                     TOKEN(0);
5774                 }
5775                 OPERATOR(',');
5776             }
5777             if (tmp == '~')
5778                 PMop(OP_MATCH);
5779             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5780                 && strchr("+-*/%.^&|<",tmp))
5781                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5782                             "Reversed %c= operator",(int)tmp);
5783             s--;
5784             if (PL_expect == XSTATE && isALPHA(tmp) &&
5785                 (s == PL_linestart+1 || s[-2] == '\n') )
5786                 {
5787                     if (PL_in_eval && !PL_rsfp) {
5788                         d = PL_bufend;
5789                         while (s < d) {
5790                             if (*s++ == '\n') {
5791                                 incline(s);
5792                                 if (strnEQ(s,"=cut",4)) {
5793                                     s = strchr(s,'\n');
5794                                     if (s)
5795                                         s++;
5796                                     else
5797                                         s = d;
5798                                     incline(s);
5799                                     goto retry;
5800                                 }
5801                             }
5802                         }
5803                         goto retry;
5804                     }
5805 #ifdef PERL_MAD
5806                     if (PL_madskills) {
5807                         if (!PL_thiswhite)
5808                             PL_thiswhite = newSVpvs("");
5809                         sv_catpvn(PL_thiswhite, PL_linestart,
5810                                   PL_bufend - PL_linestart);
5811                     }
5812 #endif
5813                     s = PL_bufend;
5814                     PL_parser->in_pod = 1;
5815                     goto retry;
5816                 }
5817         }
5818         if (PL_lex_brackets < PL_lex_formbrack) {
5819             const char *t = s;
5820 #ifdef PERL_STRICT_CR
5821             while (SPACE_OR_TAB(*t))
5822 #else
5823             while (SPACE_OR_TAB(*t) || *t == '\r')
5824 #endif
5825                 t++;
5826             if (*t == '\n' || *t == '#') {
5827                 s--;
5828                 PL_expect = XBLOCK;
5829                 goto leftbracket;
5830             }
5831         }
5832         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5833             s--;
5834             TOKEN(0);
5835         }
5836         pl_yylval.ival = 0;
5837         OPERATOR(ASSIGNOP);
5838     case '!':
5839         s++;
5840         {
5841             const char tmp = *s++;
5842             if (tmp == '=') {
5843                 /* was this !=~ where !~ was meant?
5844                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5845
5846                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5847                     const char *t = s+1;
5848
5849                     while (t < PL_bufend && isSPACE(*t))
5850                         ++t;
5851
5852                     if (*t == '/' || *t == '?' ||
5853                         ((*t == 'm' || *t == 's' || *t == 'y')
5854                          && !isALNUM(t[1])) ||
5855                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5856                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5857                                     "!=~ should be !~");
5858                 }
5859                 if (!PL_lex_allbrackets &&
5860                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5861                     s -= 2;
5862                     TOKEN(0);
5863                 }
5864                 Eop(OP_NE);
5865             }
5866             if (tmp == '~')
5867                 PMop(OP_NOT);
5868         }
5869         s--;
5870         OPERATOR('!');
5871     case '<':
5872         if (PL_expect != XOPERATOR) {
5873             if (s[1] != '<' && !strchr(s,'>'))
5874                 check_uni();
5875             if (s[1] == '<')
5876                 s = scan_heredoc(s);
5877             else
5878                 s = scan_inputsymbol(s);
5879             TERM(sublex_start());
5880         }
5881         s++;
5882         {
5883             char tmp = *s++;
5884             if (tmp == '<') {
5885                 if (*s == '=' && !PL_lex_allbrackets &&
5886                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5887                     s -= 2;
5888                     TOKEN(0);
5889                 }
5890                 SHop(OP_LEFT_SHIFT);
5891             }
5892             if (tmp == '=') {
5893                 tmp = *s++;
5894                 if (tmp == '>') {
5895                     if (!PL_lex_allbrackets &&
5896                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5897                         s -= 3;
5898                         TOKEN(0);
5899                     }
5900                     Eop(OP_NCMP);
5901                 }
5902                 s--;
5903                 if (!PL_lex_allbrackets &&
5904                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5905                     s -= 2;
5906                     TOKEN(0);
5907                 }
5908                 Rop(OP_LE);
5909             }
5910         }
5911         s--;
5912         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5913             s--;
5914             TOKEN(0);
5915         }
5916         Rop(OP_LT);
5917     case '>':
5918         s++;
5919         {
5920             const char tmp = *s++;
5921             if (tmp == '>') {
5922                 if (*s == '=' && !PL_lex_allbrackets &&
5923                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5924                     s -= 2;
5925                     TOKEN(0);
5926                 }
5927                 SHop(OP_RIGHT_SHIFT);
5928             }
5929             else if (tmp == '=') {
5930                 if (!PL_lex_allbrackets &&
5931                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5932                     s -= 2;
5933                     TOKEN(0);
5934                 }
5935                 Rop(OP_GE);
5936             }
5937         }
5938         s--;
5939         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5940             s--;
5941             TOKEN(0);
5942         }
5943         Rop(OP_GT);
5944
5945     case '$':
5946         CLINE;
5947
5948         if (PL_expect == XOPERATOR) {
5949             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5950                 return deprecate_commaless_var_list();
5951             }
5952         }
5953
5954         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5955             PL_tokenbuf[0] = '@';
5956             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5957                            sizeof PL_tokenbuf - 1, FALSE);
5958             if (PL_expect == XOPERATOR)
5959                 no_op("Array length", s);
5960             if (!PL_tokenbuf[1])
5961                 PREREF(DOLSHARP);
5962             PL_expect = XOPERATOR;
5963             PL_pending_ident = '#';
5964             TOKEN(DOLSHARP);
5965         }
5966
5967         PL_tokenbuf[0] = '$';
5968         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5969                        sizeof PL_tokenbuf - 1, FALSE);
5970         if (PL_expect == XOPERATOR)
5971             no_op("Scalar", s);
5972         if (!PL_tokenbuf[1]) {
5973             if (s == PL_bufend)
5974                 yyerror("Final $ should be \\$ or $name");
5975             PREREF('$');
5976         }
5977
5978         /* This kludge not intended to be bulletproof. */
5979         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5980             pl_yylval.opval = newSVOP(OP_CONST, 0,
5981                                    newSViv(CopARYBASE_get(&PL_compiling)));
5982             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5983             TERM(THING);
5984         }
5985
5986         d = s;
5987         {
5988             const char tmp = *s;
5989             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5990                 s = SKIPSPACE1(s);
5991
5992             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5993                 && intuit_more(s)) {
5994                 if (*s == '[') {
5995                     PL_tokenbuf[0] = '@';
5996                     if (ckWARN(WARN_SYNTAX)) {
5997                         char *t = s+1;
5998
5999                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6000                             t++;
6001                         if (*t++ == ',') {
6002                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6003                             while (t < PL_bufend && *t != ']')
6004                                 t++;
6005                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6006                                         "Multidimensional syntax %.*s not supported",
6007                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6008                         }
6009                     }
6010                 }
6011                 else if (*s == '{') {
6012                     char *t;
6013                     PL_tokenbuf[0] = '%';
6014                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6015                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6016                         {
6017                             char tmpbuf[sizeof PL_tokenbuf];
6018                             do {
6019                                 t++;
6020                             } while (isSPACE(*t));
6021                             if (isIDFIRST_lazy_if(t,UTF)) {
6022                                 STRLEN len;
6023                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6024                                               &len);
6025                                 while (isSPACE(*t))
6026                                     t++;
6027                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6028                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6029                                                 "You need to quote \"%s\"",
6030                                                 tmpbuf);
6031                             }
6032                         }
6033                 }
6034             }
6035
6036             PL_expect = XOPERATOR;
6037             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6038                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6039                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6040                     PL_expect = XOPERATOR;
6041                 else if (strchr("$@\"'`q", *s))
6042                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6043                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6044                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6045                 else if (isIDFIRST_lazy_if(s,UTF)) {
6046                     char tmpbuf[sizeof PL_tokenbuf];
6047                     int t2;
6048                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6049                     if ((t2 = keyword(tmpbuf, len, 0))) {
6050                         /* binary operators exclude handle interpretations */
6051                         switch (t2) {
6052                         case -KEY_x:
6053                         case -KEY_eq:
6054                         case -KEY_ne:
6055                         case -KEY_gt:
6056                         case -KEY_lt:
6057                         case -KEY_ge:
6058                         case -KEY_le:
6059                         case -KEY_cmp:
6060                             break;
6061                         default:
6062                             PL_expect = XTERM;  /* e.g. print $fh length() */
6063                             break;
6064                         }
6065                     }
6066                     else {
6067                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6068                     }
6069                 }
6070                 else if (isDIGIT(*s))
6071                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6072                 else if (*s == '.' && isDIGIT(s[1]))
6073                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6074                 else if ((*s == '?' || *s == '-' || *s == '+')
6075                          && !isSPACE(s[1]) && s[1] != '=')
6076                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6077                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6078                          && s[1] != '/')
6079                     PL_expect = XTERM;          /* e.g. print $fh /.../
6080                                                    XXX except DORDOR operator
6081                                                 */
6082                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6083                          && s[2] != '=')
6084                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6085             }
6086         }
6087         PL_pending_ident = '$';
6088         TOKEN('$');
6089
6090     case '@':
6091         if (PL_expect == XOPERATOR)
6092             no_op("Array", s);
6093         PL_tokenbuf[0] = '@';
6094         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6095         if (!PL_tokenbuf[1]) {
6096             PREREF('@');
6097         }
6098         if (PL_lex_state == LEX_NORMAL)
6099             s = SKIPSPACE1(s);
6100         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6101             if (*s == '{')
6102                 PL_tokenbuf[0] = '%';
6103
6104             /* Warn about @ where they meant $. */
6105             if (*s == '[' || *s == '{') {
6106                 if (ckWARN(WARN_SYNTAX)) {
6107                     const char *t = s + 1;
6108                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6109                         t++;
6110                     if (*t == '}' || *t == ']') {
6111                         t++;
6112                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6113                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6114                             "Scalar value %.*s better written as $%.*s",
6115                             (int)(t-PL_bufptr), PL_bufptr,
6116                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6117                     }
6118                 }
6119             }
6120         }
6121         PL_pending_ident = '@';
6122         TERM('@');
6123
6124      case '/':                  /* may be division, defined-or, or pattern */
6125         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6126             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6127                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6128                 TOKEN(0);
6129             s += 2;
6130             AOPERATOR(DORDOR);
6131         }
6132      case '?':                  /* may either be conditional or pattern */
6133         if (PL_expect == XOPERATOR) {
6134              char tmp = *s++;
6135              if(tmp == '?') {
6136                 if (!PL_lex_allbrackets &&
6137                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6138                     s--;
6139                     TOKEN(0);
6140                 }
6141                 PL_lex_allbrackets++;
6142                 OPERATOR('?');
6143              }
6144              else {
6145                  tmp = *s++;
6146                  if(tmp == '/') {
6147                      /* A // operator. */
6148                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6149                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6150                                             LEX_FAKEEOF_LOGIC)) {
6151                         s -= 2;
6152                         TOKEN(0);
6153                     }
6154                     AOPERATOR(DORDOR);
6155                  }
6156                  else {
6157                      s--;
6158                      if (*s == '=' && !PL_lex_allbrackets &&
6159                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6160                          s--;
6161                          TOKEN(0);
6162                      }
6163                      Mop(OP_DIVIDE);
6164                  }
6165              }
6166          }
6167          else {
6168              /* Disable warning on "study /blah/" */
6169              if (PL_oldoldbufptr == PL_last_uni
6170               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6171                   || memNE(PL_last_uni, "study", 5)
6172                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6173               ))
6174                  check_uni();
6175              if (*s == '?')
6176                  deprecate("?PATTERN? without explicit operator");
6177              s = scan_pat(s,OP_MATCH);
6178              TERM(sublex_start());
6179          }
6180
6181     case '.':
6182         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6183 #ifdef PERL_STRICT_CR
6184             && s[1] == '\n'
6185 #else
6186             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6187 #endif
6188             && (s == PL_linestart || s[-1] == '\n') )
6189         {
6190             PL_lex_formbrack = 0;
6191             PL_expect = XSTATE;
6192             goto rightbracket;
6193         }
6194         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6195             s += 3;
6196             OPERATOR(YADAYADA);
6197         }
6198         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6199             char tmp = *s++;
6200             if (*s == tmp) {
6201                 if (!PL_lex_allbrackets &&
6202                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6203                     s--;
6204                     TOKEN(0);
6205                 }
6206                 s++;
6207                 if (*s == tmp) {
6208                     s++;
6209                     pl_yylval.ival = OPf_SPECIAL;
6210                 }
6211                 else
6212                     pl_yylval.ival = 0;
6213                 OPERATOR(DOTDOT);
6214             }
6215             if (*s == '=' && !PL_lex_allbrackets &&
6216                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6217                 s--;
6218                 TOKEN(0);
6219             }
6220             Aop(OP_CONCAT);
6221         }
6222         /* FALL THROUGH */
6223     case '0': case '1': case '2': case '3': case '4':
6224     case '5': case '6': case '7': case '8': case '9':
6225         s = scan_num(s, &pl_yylval);
6226         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6227         if (PL_expect == XOPERATOR)
6228             no_op("Number",s);
6229         TERM(THING);
6230
6231     case '\'':
6232         s = scan_str(s,!!PL_madskills,FALSE);
6233         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6234         if (PL_expect == XOPERATOR) {
6235             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6236                 return deprecate_commaless_var_list();
6237             }
6238             else
6239                 no_op("String",s);
6240         }
6241         if (!s)
6242             missingterm(NULL);
6243         pl_yylval.ival = OP_CONST;
6244         TERM(sublex_start());
6245
6246     case '"':
6247         s = scan_str(s,!!PL_madskills,FALSE);
6248         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6249         if (PL_expect == XOPERATOR) {
6250             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6251                 return deprecate_commaless_var_list();
6252             }
6253             else
6254                 no_op("String",s);
6255         }
6256         if (!s)
6257             missingterm(NULL);
6258         pl_yylval.ival = OP_CONST;
6259         /* FIXME. I think that this can be const if char *d is replaced by
6260            more localised variables.  */
6261         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6262             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6263                 pl_yylval.ival = OP_STRINGIFY;
6264                 break;
6265             }
6266         }
6267         TERM(sublex_start());
6268
6269     case '`':
6270         s = scan_str(s,!!PL_madskills,FALSE);
6271         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6272         if (PL_expect == XOPERATOR)
6273             no_op("Backticks",s);
6274         if (!s)
6275             missingterm(NULL);
6276         readpipe_override();
6277         TERM(sublex_start());
6278
6279     case '\\':
6280         s++;
6281         if (PL_lex_inwhat && isDIGIT(*s))
6282             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6283                            *s, *s);
6284         if (PL_expect == XOPERATOR)
6285             no_op("Backslash",s);
6286         OPERATOR(REFGEN);
6287
6288     case 'v':
6289         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6290             char *start = s + 2;
6291             while (isDIGIT(*start) || *start == '_')
6292                 start++;
6293             if (*start == '.' && isDIGIT(start[1])) {
6294                 s = scan_num(s, &pl_yylval);
6295                 TERM(THING);
6296             }
6297             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6298             else if (!isALPHA(*start) && (PL_expect == XTERM
6299                         || PL_expect == XREF || PL_expect == XSTATE
6300                         || PL_expect == XTERMORDORDOR)) {
6301                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6302                 if (!gv) {
6303                     s = scan_num(s, &pl_yylval);
6304                     TERM(THING);
6305                 }
6306             }
6307         }
6308         goto keylookup;
6309     case 'x':
6310         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6311             s++;
6312             Mop(OP_REPEAT);
6313         }
6314         goto keylookup;
6315
6316     case '_':
6317     case 'a': case 'A':
6318     case 'b': case 'B':
6319     case 'c': case 'C':
6320     case 'd': case 'D':
6321     case 'e': case 'E':
6322     case 'f': case 'F':
6323     case 'g': case 'G':
6324     case 'h': case 'H':
6325     case 'i': case 'I':
6326     case 'j': case 'J':
6327     case 'k': case 'K':
6328     case 'l': case 'L':
6329     case 'm': case 'M':
6330     case 'n': case 'N':
6331     case 'o': case 'O':
6332     case 'p': case 'P':
6333     case 'q': case 'Q':
6334     case 'r': case 'R':
6335     case 's': case 'S':
6336     case 't': case 'T':
6337     case 'u': case 'U':
6338               case 'V':
6339     case 'w': case 'W':
6340               case 'X':
6341     case 'y': case 'Y':
6342     case 'z': case 'Z':
6343
6344       keylookup: {
6345         bool anydelim;
6346         I32 tmp;
6347
6348         orig_keyword = 0;
6349         gv = NULL;
6350         gvp = NULL;
6351
6352         PL_bufptr = s;
6353         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6354
6355         /* Some keywords can be followed by any delimiter, including ':' */
6356         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6357
6358         /* x::* is just a word, unless x is "CORE" */
6359         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6360             goto just_a_word;
6361
6362         d = s;
6363         while (d < PL_bufend && isSPACE(*d))
6364                 d++;    /* no comments skipped here, or s### is misparsed */
6365
6366         /* Is this a word before a => operator? */
6367         if (*d == '=' && d[1] == '>') {
6368             CLINE;
6369             pl_yylval.opval
6370                 = (OP*)newSVOP(OP_CONST, 0,
6371                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6372             pl_yylval.opval->op_private = OPpCONST_BARE;
6373             TERM(WORD);
6374         }
6375
6376         /* Check for plugged-in keyword */
6377         {
6378             OP *o;
6379             int result;
6380             char *saved_bufptr = PL_bufptr;
6381             PL_bufptr = s;
6382             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6383             s = PL_bufptr;
6384             if (result == KEYWORD_PLUGIN_DECLINE) {
6385                 /* not a plugged-in keyword */
6386                 PL_bufptr = saved_bufptr;
6387             } else if (result == KEYWORD_PLUGIN_STMT) {
6388                 pl_yylval.opval = o;
6389                 CLINE;
6390                 PL_expect = XSTATE;
6391                 return REPORT(PLUGSTMT);
6392             } else if (result == KEYWORD_PLUGIN_EXPR) {
6393                 pl_yylval.opval = o;
6394                 CLINE;
6395                 PL_expect = XOPERATOR;
6396                 return REPORT(PLUGEXPR);
6397             } else {
6398                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6399                                         PL_tokenbuf);
6400             }
6401         }
6402
6403         /* Check for built-in keyword */
6404         tmp = keyword(PL_tokenbuf, len, 0);
6405
6406         /* Is this a label? */
6407         if (!anydelim && PL_expect == XSTATE
6408               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6409             s = d + 1;
6410             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6411             CLINE;
6412             TOKEN(LABEL);
6413         }
6414
6415         if (tmp < 0) {                  /* second-class keyword? */
6416             GV *ogv = NULL;     /* override (winner) */
6417             GV *hgv = NULL;     /* hidden (loser) */
6418             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6419                 CV *cv;
6420                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6421                     (cv = GvCVu(gv)))
6422                 {
6423                     if (GvIMPORTED_CV(gv))
6424                         ogv = gv;
6425                     else if (! CvMETHOD(cv))
6426                         hgv = gv;
6427                 }
6428                 if (!ogv &&
6429                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6430                     (gv = *gvp) && isGV_with_GP(gv) &&
6431                     GvCVu(gv) && GvIMPORTED_CV(gv))
6432                 {
6433                     ogv = gv;
6434                 }
6435             }
6436             if (ogv) {
6437                 orig_keyword = tmp;
6438                 tmp = 0;                /* overridden by import or by GLOBAL */
6439             }
6440             else if (gv && !gvp
6441                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6442                      && GvCVu(gv))
6443             {
6444                 tmp = 0;                /* any sub overrides "weak" keyword */
6445             }
6446             else {                      /* no override */
6447                 tmp = -tmp;
6448                 if (tmp == KEY_dump) {
6449                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6450                                    "dump() better written as CORE::dump()");
6451                 }
6452                 gv = NULL;
6453                 gvp = 0;
6454                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6455                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6456                                    "Ambiguous call resolved as CORE::%s(), "
6457                                    "qualify as such or use &",
6458                                    GvENAME(hgv));
6459             }
6460         }
6461
6462       reserved_word:
6463         switch (tmp) {
6464
6465         default:                        /* not a keyword */
6466             /* Trade off - by using this evil construction we can pull the
6467                variable gv into the block labelled keylookup. If not, then
6468                we have to give it function scope so that the goto from the
6469                earlier ':' case doesn't bypass the initialisation.  */
6470             if (0) {
6471             just_a_word_zero_gv:
6472                 gv = NULL;
6473                 gvp = NULL;
6474                 orig_keyword = 0;
6475             }
6476           just_a_word: {
6477                 SV *sv;
6478                 int pkgname = 0;
6479                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6480                 OP *rv2cv_op;
6481                 CV *cv;
6482 #ifdef PERL_MAD
6483                 SV *nextPL_nextwhite = 0;
6484 #endif
6485
6486
6487                 /* Get the rest if it looks like a package qualifier */
6488
6489                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6490                     STRLEN morelen;
6491                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6492                                   TRUE, &morelen);
6493                     if (!morelen)
6494                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6495                                 *s == '\'' ? "'" : "::");
6496                     len += morelen;
6497                     pkgname = 1;
6498                 }
6499
6500                 if (PL_expect == XOPERATOR) {
6501                     if (PL_bufptr == PL_linestart) {
6502                         CopLINE_dec(PL_curcop);
6503                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6504                         CopLINE_inc(PL_curcop);
6505                     }
6506                     else
6507                         no_op("Bareword",s);
6508                 }
6509
6510                 /* Look for a subroutine with this name in current package,
6511                    unless name is "Foo::", in which case Foo is a bareword
6512                    (and a package name). */
6513
6514                 if (len > 2 && !PL_madskills &&
6515                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6516                 {
6517                     if (ckWARN(WARN_BAREWORD)
6518                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6519                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6520                             "Bareword \"%s\" refers to nonexistent package",
6521                              PL_tokenbuf);
6522                     len -= 2;
6523                     PL_tokenbuf[len] = '\0';
6524                     gv = NULL;
6525                     gvp = 0;
6526                 }
6527                 else {
6528                     if (!gv) {
6529                         /* Mustn't actually add anything to a symbol table.
6530                            But also don't want to "initialise" any placeholder
6531                            constants that might already be there into full
6532                            blown PVGVs with attached PVCV.  */
6533                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6534                                                GV_NOADD_NOINIT, SVt_PVCV);
6535                     }
6536                     len = 0;
6537                 }
6538
6539                 /* if we saw a global override before, get the right name */
6540
6541                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6542                     len ? len : strlen(PL_tokenbuf));
6543                 if (gvp) {
6544                     SV * const tmp_sv = sv;
6545                     sv = newSVpvs("CORE::GLOBAL::");
6546                     sv_catsv(sv, tmp_sv);
6547                     SvREFCNT_dec(tmp_sv);
6548                 }
6549
6550 #ifdef PERL_MAD
6551                 if (PL_madskills && !PL_thistoken) {
6552                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6553                     PL_thistoken = newSVpvn(start,s - start);
6554                     PL_realtokenstart = s - SvPVX(PL_linestr);
6555                 }
6556 #endif
6557
6558                 /* Presume this is going to be a bareword of some sort. */
6559                 CLINE;
6560                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6561                 pl_yylval.opval->op_private = OPpCONST_BARE;
6562
6563                 /* And if "Foo::", then that's what it certainly is. */
6564                 if (len)
6565                     goto safe_bareword;
6566
6567                 {
6568                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6569                     const_op->op_private = OPpCONST_BARE;
6570                     rv2cv_op = newCVREF(0, const_op);
6571                 }
6572                 cv = rv2cv_op_cv(rv2cv_op, 0);
6573
6574                 /* See if it's the indirect object for a list operator. */
6575
6576                 if (PL_oldoldbufptr &&
6577                     PL_oldoldbufptr < PL_bufptr &&
6578                     (PL_oldoldbufptr == PL_last_lop
6579                      || PL_oldoldbufptr == PL_last_uni) &&
6580                     /* NO SKIPSPACE BEFORE HERE! */
6581                     (PL_expect == XREF ||
6582                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6583                 {
6584                     bool immediate_paren = *s == '(';
6585
6586                     /* (Now we can afford to cross potential line boundary.) */
6587                     s = SKIPSPACE2(s,nextPL_nextwhite);
6588 #ifdef PERL_MAD
6589                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6590 #endif
6591
6592                     /* Two barewords in a row may indicate method call. */
6593
6594                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6595                         (tmp = intuit_method(s, gv, cv))) {
6596                         op_free(rv2cv_op);
6597                         if (tmp == METHOD && !PL_lex_allbrackets &&
6598                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6599                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6600                         return REPORT(tmp);
6601                     }
6602
6603                     /* If not a declared subroutine, it's an indirect object. */
6604                     /* (But it's an indir obj regardless for sort.) */
6605                     /* Also, if "_" follows a filetest operator, it's a bareword */
6606
6607                     if (
6608                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6609                          (!cv &&
6610                         (PL_last_lop_op != OP_MAPSTART &&
6611                          PL_last_lop_op != OP_GREPSTART))))
6612                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6613                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6614                        )
6615                     {
6616                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6617                         goto bareword;
6618                     }
6619                 }
6620
6621                 PL_expect = XOPERATOR;
6622 #ifdef PERL_MAD
6623                 if (isSPACE(*s))
6624                     s = SKIPSPACE2(s,nextPL_nextwhite);
6625                 PL_nextwhite = nextPL_nextwhite;
6626 #else
6627                 s = skipspace(s);
6628 #endif
6629
6630                 /* Is this a word before a => operator? */
6631                 if (*s == '=' && s[1] == '>' && !pkgname) {
6632                     op_free(rv2cv_op);
6633                     CLINE;
6634                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6635                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6636                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6637                     TERM(WORD);
6638                 }
6639
6640                 /* If followed by a paren, it's certainly a subroutine. */
6641                 if (*s == '(') {
6642                     CLINE;
6643                     if (cv) {
6644                         d = s + 1;
6645                         while (SPACE_OR_TAB(*d))
6646                             d++;
6647                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6648                             s = d + 1;
6649                             goto its_constant;
6650                         }
6651                     }
6652 #ifdef PERL_MAD
6653                     if (PL_madskills) {
6654                         PL_nextwhite = PL_thiswhite;
6655                         PL_thiswhite = 0;
6656                     }
6657                     start_force(PL_curforce);
6658 #endif
6659                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6660                     PL_expect = XOPERATOR;
6661 #ifdef PERL_MAD
6662                     if (PL_madskills) {
6663                         PL_nextwhite = nextPL_nextwhite;
6664                         curmad('X', PL_thistoken);
6665                         PL_thistoken = newSVpvs("");
6666                     }
6667 #endif
6668                     op_free(rv2cv_op);
6669                     force_next(WORD);
6670                     pl_yylval.ival = 0;
6671                     TOKEN('&');
6672                 }
6673
6674                 /* If followed by var or block, call it a method (unless sub) */
6675
6676                 if ((*s == '$' || *s == '{') && !cv) {
6677                     op_free(rv2cv_op);
6678                     PL_last_lop = PL_oldbufptr;
6679                     PL_last_lop_op = OP_METHOD;
6680                     if (!PL_lex_allbrackets &&
6681                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6682                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6683                     PREBLOCK(METHOD);
6684                 }
6685
6686                 /* If followed by a bareword, see if it looks like indir obj. */
6687
6688                 if (!orig_keyword
6689                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6690                         && (tmp = intuit_method(s, gv, cv))) {
6691                     op_free(rv2cv_op);
6692                     if (tmp == METHOD && !PL_lex_allbrackets &&
6693                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6694                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6695                     return REPORT(tmp);
6696                 }
6697
6698                 /* Not a method, so call it a subroutine (if defined) */
6699
6700                 if (cv) {
6701                     if (lastchar == '-')
6702                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6703                                          "Ambiguous use of -%s resolved as -&%s()",
6704                                          PL_tokenbuf, PL_tokenbuf);
6705                     /* Check for a constant sub */
6706                     if ((sv = cv_const_sv(cv))) {
6707                   its_constant:
6708                         op_free(rv2cv_op);
6709                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6710                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6711                         pl_yylval.opval->op_private = 0;
6712                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6713                         TOKEN(WORD);
6714                     }
6715
6716                     op_free(pl_yylval.opval);
6717                     pl_yylval.opval = rv2cv_op;
6718                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6719                     PL_last_lop = PL_oldbufptr;
6720                     PL_last_lop_op = OP_ENTERSUB;
6721                     /* Is there a prototype? */
6722                     if (
6723 #ifdef PERL_MAD
6724                         cv &&
6725 #endif
6726                         SvPOK(cv))
6727                     {
6728                         STRLEN protolen;
6729                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6730                         if (!protolen)
6731                             TERM(FUNC0SUB);
6732                         while (*proto == ';')
6733                             proto++;
6734                         if (
6735                             (
6736                                 (
6737                                     *proto == '$' || *proto == '_'
6738                                  || *proto == '*' || *proto == '+'
6739                                 )
6740                              && proto[1] == '\0'
6741                             )
6742                          || (
6743                              *proto == '\\' && proto[1] && proto[2] == '\0'
6744                             )
6745                         )
6746                             OPERATOR(UNIOPSUB);
6747                         if (*proto == '\\' && proto[1] == '[') {
6748                             const char *p = proto + 2;
6749                             while(*p && *p != ']')
6750                                 ++p;
6751                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6752                         }
6753                         if (*proto == '&' && *s == '{') {
6754                             if (PL_curstash)
6755                                 sv_setpvs(PL_subname, "__ANON__");
6756                             else
6757                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6758                             if (!PL_lex_allbrackets &&
6759                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6760                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6761                             PREBLOCK(LSTOPSUB);
6762                         }
6763                     }
6764 #ifdef PERL_MAD
6765                     {
6766                         if (PL_madskills) {
6767                             PL_nextwhite = PL_thiswhite;
6768                             PL_thiswhite = 0;
6769                         }
6770                         start_force(PL_curforce);
6771                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6772                         PL_expect = XTERM;
6773                         if (PL_madskills) {
6774                             PL_nextwhite = nextPL_nextwhite;
6775                             curmad('X', PL_thistoken);
6776                             PL_thistoken = newSVpvs("");
6777                         }
6778                         force_next(WORD);
6779                         if (!PL_lex_allbrackets &&
6780                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6781                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6782                         TOKEN(NOAMP);
6783                     }
6784                 }
6785
6786                 /* Guess harder when madskills require "best effort". */
6787                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6788                     int probable_sub = 0;
6789                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6790                         probable_sub = 1;
6791                     else if (isALPHA(*s)) {
6792                         char tmpbuf[1024];
6793                         STRLEN tmplen;
6794                         d = s;
6795                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6796                         if (!keyword(tmpbuf, tmplen, 0))
6797                             probable_sub = 1;
6798                         else {
6799                             while (d < PL_bufend && isSPACE(*d))
6800                                 d++;
6801                             if (*d == '=' && d[1] == '>')
6802                                 probable_sub = 1;
6803                         }
6804                     }
6805                     if (probable_sub) {
6806                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6807                         op_free(pl_yylval.opval);
6808                         pl_yylval.opval = rv2cv_op;
6809                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6810                         PL_last_lop = PL_oldbufptr;
6811                         PL_last_lop_op = OP_ENTERSUB;
6812                         PL_nextwhite = PL_thiswhite;
6813                         PL_thiswhite = 0;
6814                         start_force(PL_curforce);
6815                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6816                         PL_expect = XTERM;
6817                         PL_nextwhite = nextPL_nextwhite;
6818                         curmad('X', PL_thistoken);
6819                         PL_thistoken = newSVpvs("");
6820                         force_next(WORD);
6821                         if (!PL_lex_allbrackets &&
6822                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6823                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6824                         TOKEN(NOAMP);
6825                     }
6826 #else
6827                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6828                     PL_expect = XTERM;
6829                     force_next(WORD);
6830                     if (!PL_lex_allbrackets &&
6831                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6832                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6833                     TOKEN(NOAMP);
6834 #endif
6835                 }
6836
6837                 /* Call it a bare word */
6838
6839                 if (PL_hints & HINT_STRICT_SUBS)
6840                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6841                 else {
6842                 bareword:
6843                     /* after "print" and similar functions (corresponding to
6844                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6845                      * a filehandle should be subject to "strict subs".
6846                      * Likewise for the optional indirect-object argument to system
6847                      * or exec, which can't be a bareword */
6848                     if ((PL_last_lop_op == OP_PRINT
6849                             || PL_last_lop_op == OP_PRTF
6850                             || PL_last_lop_op == OP_SAY
6851                             || PL_last_lop_op == OP_SYSTEM
6852                             || PL_last_lop_op == OP_EXEC)
6853                             && (PL_hints & HINT_STRICT_SUBS))
6854                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6855                     if (lastchar != '-') {
6856                         if (ckWARN(WARN_RESERVED)) {
6857                             d = PL_tokenbuf;
6858                             while (isLOWER(*d))
6859                                 d++;
6860                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6861                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6862                                        PL_tokenbuf);
6863                         }
6864                     }
6865                 }
6866                 op_free(rv2cv_op);
6867
6868             safe_bareword:
6869                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6870                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6871                                      "Operator or semicolon missing before %c%s",
6872                                      lastchar, PL_tokenbuf);
6873                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6874                                      "Ambiguous use of %c resolved as operator %c",
6875                                      lastchar, lastchar);
6876                 }
6877                 TOKEN(WORD);
6878             }
6879
6880         case KEY___FILE__:
6881             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6882                                         newSVpv(CopFILE(PL_curcop),0));
6883             TERM(THING);
6884
6885         case KEY___LINE__:
6886             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6887                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6888             TERM(THING);
6889
6890         case KEY___PACKAGE__:
6891             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6892                                         (PL_curstash
6893                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6894                                          : &PL_sv_undef));
6895             TERM(THING);
6896
6897         case KEY___DATA__:
6898         case KEY___END__: {
6899             GV *gv;
6900             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6901                 const char *pname = "main";
6902                 if (PL_tokenbuf[2] == 'D')
6903                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6904                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6905                                 SVt_PVIO);
6906                 GvMULTI_on(gv);
6907                 if (!GvIO(gv))
6908                     GvIOp(gv) = newIO();
6909                 IoIFP(GvIOp(gv)) = PL_rsfp;
6910 #if defined(HAS_FCNTL) && defined(F_SETFD)
6911                 {
6912                     const int fd = PerlIO_fileno(PL_rsfp);
6913                     fcntl(fd,F_SETFD,fd >= 3);
6914                 }
6915 #endif
6916                 /* Mark this internal pseudo-handle as clean */
6917                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6918                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6919                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6920                 else
6921                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6922 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6923                 /* if the script was opened in binmode, we need to revert
6924                  * it to text mode for compatibility; but only iff it has CRs
6925                  * XXX this is a questionable hack at best. */
6926                 if (PL_bufend-PL_bufptr > 2
6927                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6928                 {
6929                     Off_t loc = 0;
6930                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6931                         loc = PerlIO_tell(PL_rsfp);
6932                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6933                     }
6934 #ifdef NETWARE
6935                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6936 #else
6937                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6938 #endif  /* NETWARE */
6939 #ifdef PERLIO_IS_STDIO /* really? */
6940 #  if defined(__BORLANDC__)
6941                         /* XXX see note in do_binmode() */
6942                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6943 #  endif
6944 #endif
6945                         if (loc > 0)
6946                             PerlIO_seek(PL_rsfp, loc, 0);
6947                     }
6948                 }
6949 #endif
6950 #ifdef PERLIO_LAYERS
6951                 if (!IN_BYTES) {
6952                     if (UTF)
6953                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6954                     else if (PL_encoding) {
6955                         SV *name;
6956                         dSP;
6957                         ENTER;
6958                         SAVETMPS;
6959                         PUSHMARK(sp);
6960                         EXTEND(SP, 1);
6961                         XPUSHs(PL_encoding);
6962                         PUTBACK;
6963                         call_method("name", G_SCALAR);
6964                         SPAGAIN;
6965                         name = POPs;
6966                         PUTBACK;
6967                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6968                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6969                                                       SVfARG(name)));
6970                         FREETMPS;
6971                         LEAVE;
6972                     }
6973                 }
6974 #endif
6975 #ifdef PERL_MAD
6976                 if (PL_madskills) {
6977                     if (PL_realtokenstart >= 0) {
6978                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6979                         if (!PL_endwhite)
6980                             PL_endwhite = newSVpvs("");
6981                         sv_catsv(PL_endwhite, PL_thiswhite);
6982                         PL_thiswhite = 0;
6983                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6984                         PL_realtokenstart = -1;
6985                     }
6986                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6987                            != NULL) ;
6988                 }
6989 #endif
6990                 PL_rsfp = NULL;
6991             }
6992             goto fake_eof;
6993         }
6994
6995         case KEY_AUTOLOAD:
6996         case KEY_DESTROY:
6997         case KEY_BEGIN:
6998         case KEY_UNITCHECK:
6999         case KEY_CHECK:
7000         case KEY_INIT:
7001         case KEY_END:
7002             if (PL_expect == XSTATE) {
7003                 s = PL_bufptr;
7004                 goto really_sub;
7005             }
7006             goto just_a_word;
7007
7008         case KEY_CORE:
7009             if (*s == ':' && s[1] == ':') {
7010                 s += 2;
7011                 d = s;
7012                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7013                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
7014                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7015                 if (tmp < 0)
7016                     tmp = -tmp;
7017                 else if (tmp == KEY_require || tmp == KEY_do)
7018                     /* that's a way to remember we saw "CORE::" */
7019                     orig_keyword = tmp;
7020                 goto reserved_word;
7021             }
7022             goto just_a_word;
7023
7024         case KEY_abs:
7025             UNI(OP_ABS);
7026
7027         case KEY_alarm:
7028             UNI(OP_ALARM);
7029
7030         case KEY_accept:
7031             LOP(OP_ACCEPT,XTERM);
7032
7033         case KEY_and:
7034             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7035                 return REPORT(0);
7036             OPERATOR(ANDOP);
7037
7038         case KEY_atan2:
7039             LOP(OP_ATAN2,XTERM);
7040
7041         case KEY_bind:
7042             LOP(OP_BIND,XTERM);
7043
7044         case KEY_binmode:
7045             LOP(OP_BINMODE,XTERM);
7046
7047         case KEY_bless:
7048             LOP(OP_BLESS,XTERM);
7049
7050         case KEY_break:
7051             FUN0(OP_BREAK);
7052
7053         case KEY_chop:
7054             UNI(OP_CHOP);
7055
7056         case KEY_continue:
7057             /* When 'use switch' is in effect, continue has a dual
7058                life as a control operator. */
7059             {
7060                 if (!FEATURE_IS_ENABLED("switch"))
7061                     PREBLOCK(CONTINUE);
7062                 else {
7063                     /* We have to disambiguate the two senses of
7064                       "continue". If the next token is a '{' then
7065                       treat it as the start of a continue block;
7066                       otherwise treat it as a control operator.
7067                      */
7068                     s = skipspace(s);
7069                     if (*s == '{')
7070             PREBLOCK(CONTINUE);
7071                     else
7072                         FUN0(OP_CONTINUE);
7073                 }
7074             }
7075
7076         case KEY_chdir:
7077             /* may use HOME */
7078             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7079             UNI(OP_CHDIR);
7080
7081         case KEY_close:
7082             UNI(OP_CLOSE);
7083
7084         case KEY_closedir:
7085             UNI(OP_CLOSEDIR);
7086
7087         case KEY_cmp:
7088             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7089                 return REPORT(0);
7090             Eop(OP_SCMP);
7091
7092         case KEY_caller:
7093             UNI(OP_CALLER);
7094
7095         case KEY_crypt:
7096 #ifdef FCRYPT
7097             if (!PL_cryptseen) {
7098                 PL_cryptseen = TRUE;
7099                 init_des();
7100             }
7101 #endif
7102             LOP(OP_CRYPT,XTERM);
7103
7104         case KEY_chmod:
7105             LOP(OP_CHMOD,XTERM);
7106
7107         case KEY_chown:
7108             LOP(OP_CHOWN,XTERM);
7109
7110         case KEY_connect:
7111             LOP(OP_CONNECT,XTERM);
7112
7113         case KEY_chr:
7114             UNI(OP_CHR);
7115
7116         case KEY_cos:
7117             UNI(OP_COS);
7118
7119         case KEY_chroot:
7120             UNI(OP_CHROOT);
7121
7122         case KEY_default:
7123             PREBLOCK(DEFAULT);
7124
7125         case KEY_do:
7126             s = SKIPSPACE1(s);
7127             if (*s == '{')
7128                 PRETERMBLOCK(DO);
7129             if (*s != '\'')
7130                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7131             if (orig_keyword == KEY_do) {
7132                 orig_keyword = 0;
7133                 pl_yylval.ival = 1;
7134             }
7135             else
7136                 pl_yylval.ival = 0;
7137             OPERATOR(DO);
7138
7139         case KEY_die:
7140             PL_hints |= HINT_BLOCK_SCOPE;
7141             LOP(OP_DIE,XTERM);
7142
7143         case KEY_defined:
7144             UNI(OP_DEFINED);
7145
7146         case KEY_delete:
7147             UNI(OP_DELETE);
7148
7149         case KEY_dbmopen:
7150             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7151                               STR_WITH_LEN("NDBM_File::"),
7152                               STR_WITH_LEN("DB_File::"),
7153                               STR_WITH_LEN("GDBM_File::"),
7154                               STR_WITH_LEN("SDBM_File::"),
7155                               STR_WITH_LEN("ODBM_File::"),
7156                               NULL);
7157             LOP(OP_DBMOPEN,XTERM);
7158
7159         case KEY_dbmclose:
7160             UNI(OP_DBMCLOSE);
7161
7162         case KEY_dump:
7163             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7164             LOOPX(OP_DUMP);
7165
7166         case KEY_else:
7167             PREBLOCK(ELSE);
7168
7169         case KEY_elsif:
7170             pl_yylval.ival = CopLINE(PL_curcop);
7171             OPERATOR(ELSIF);
7172
7173         case KEY_eq:
7174             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7175                 return REPORT(0);
7176             Eop(OP_SEQ);
7177
7178         case KEY_exists:
7179             UNI(OP_EXISTS);
7180
7181         case KEY_exit:
7182             if (PL_madskills)
7183                 UNI(OP_INT);
7184             UNI(OP_EXIT);
7185
7186         case KEY_eval:
7187             s = SKIPSPACE1(s);
7188             if (*s == '{') { /* block eval */
7189                 PL_expect = XTERMBLOCK;
7190                 UNIBRACK(OP_ENTERTRY);
7191             }
7192             else { /* string eval */
7193                 PL_expect = XTERM;
7194                 UNIBRACK(OP_ENTEREVAL);
7195             }
7196
7197         case KEY_eof:
7198             UNI(OP_EOF);
7199
7200         case KEY_exp:
7201             UNI(OP_EXP);
7202
7203         case KEY_each:
7204             UNI(OP_EACH);
7205
7206         case KEY_exec:
7207             LOP(OP_EXEC,XREF);
7208
7209         case KEY_endhostent:
7210             FUN0(OP_EHOSTENT);
7211
7212         case KEY_endnetent:
7213             FUN0(OP_ENETENT);
7214
7215         case KEY_endservent:
7216             FUN0(OP_ESERVENT);
7217
7218         case KEY_endprotoent:
7219             FUN0(OP_EPROTOENT);
7220
7221         case KEY_endpwent:
7222             FUN0(OP_EPWENT);
7223
7224         case KEY_endgrent:
7225             FUN0(OP_EGRENT);
7226
7227         case KEY_for:
7228         case KEY_foreach:
7229             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7230                 return REPORT(0);
7231             pl_yylval.ival = CopLINE(PL_curcop);
7232             s = SKIPSPACE1(s);
7233             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7234                 char *p = s;
7235 #ifdef PERL_MAD
7236                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7237 #endif
7238
7239                 if ((PL_bufend - p) >= 3 &&
7240                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7241                     p += 2;
7242                 else if ((PL_bufend - p) >= 4 &&
7243                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7244                     p += 3;
7245                 p = PEEKSPACE(p);
7246                 if (isIDFIRST_lazy_if(p,UTF)) {
7247                     p = scan_ident(p, PL_bufend,
7248                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7249                     p = PEEKSPACE(p);
7250                 }
7251                 if (*p != '$')
7252                     Perl_croak(aTHX_ "Missing $ on loop variable");
7253 #ifdef PERL_MAD
7254                 s = SvPVX(PL_linestr) + soff;
7255 #endif
7256             }
7257             OPERATOR(FOR);
7258
7259         case KEY_formline:
7260             LOP(OP_FORMLINE,XTERM);
7261
7262         case KEY_fork:
7263             FUN0(OP_FORK);
7264
7265         case KEY_fcntl:
7266             LOP(OP_FCNTL,XTERM);
7267
7268         case KEY_fileno:
7269             UNI(OP_FILENO);
7270
7271         case KEY_flock:
7272             LOP(OP_FLOCK,XTERM);
7273
7274         case KEY_gt:
7275             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7276                 return REPORT(0);
7277             Rop(OP_SGT);
7278
7279         case KEY_ge:
7280             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7281                 return REPORT(0);
7282             Rop(OP_SGE);
7283
7284         case KEY_grep:
7285             LOP(OP_GREPSTART, XREF);
7286
7287         case KEY_goto:
7288             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7289             LOOPX(OP_GOTO);
7290
7291         case KEY_gmtime:
7292             UNI(OP_GMTIME);
7293
7294         case KEY_getc:
7295             UNIDOR(OP_GETC);
7296
7297         case KEY_getppid:
7298             FUN0(OP_GETPPID);
7299
7300         case KEY_getpgrp:
7301             UNI(OP_GETPGRP);
7302
7303         case KEY_getpriority:
7304             LOP(OP_GETPRIORITY,XTERM);
7305
7306         case KEY_getprotobyname:
7307             UNI(OP_GPBYNAME);
7308
7309         case KEY_getprotobynumber:
7310             LOP(OP_GPBYNUMBER,XTERM);
7311
7312         case KEY_getprotoent:
7313             FUN0(OP_GPROTOENT);
7314
7315         case KEY_getpwent:
7316             FUN0(OP_GPWENT);
7317
7318         case KEY_getpwnam:
7319             UNI(OP_GPWNAM);
7320
7321         case KEY_getpwuid:
7322             UNI(OP_GPWUID);
7323
7324         case KEY_getpeername:
7325             UNI(OP_GETPEERNAME);
7326
7327         case KEY_gethostbyname:
7328             UNI(OP_GHBYNAME);
7329
7330         case KEY_gethostbyaddr:
7331             LOP(OP_GHBYADDR,XTERM);
7332
7333         case KEY_gethostent:
7334             FUN0(OP_GHOSTENT);
7335
7336         case KEY_getnetbyname:
7337             UNI(OP_GNBYNAME);
7338
7339         case KEY_getnetbyaddr:
7340             LOP(OP_GNBYADDR,XTERM);
7341
7342         case KEY_getnetent:
7343             FUN0(OP_GNETENT);
7344
7345         case KEY_getservbyname:
7346             LOP(OP_GSBYNAME,XTERM);
7347
7348         case KEY_getservbyport:
7349             LOP(OP_GSBYPORT,XTERM);
7350
7351         case KEY_getservent:
7352             FUN0(OP_GSERVENT);
7353
7354         case KEY_getsockname:
7355             UNI(OP_GETSOCKNAME);
7356
7357         case KEY_getsockopt:
7358             LOP(OP_GSOCKOPT,XTERM);
7359
7360         case KEY_getgrent:
7361             FUN0(OP_GGRENT);
7362
7363         case KEY_getgrnam:
7364             UNI(OP_GGRNAM);
7365
7366         case KEY_getgrgid:
7367             UNI(OP_GGRGID);
7368
7369         case KEY_getlogin:
7370             FUN0(OP_GETLOGIN);
7371
7372         case KEY_given:
7373             pl_yylval.ival = CopLINE(PL_curcop);
7374             OPERATOR(GIVEN);
7375
7376         case KEY_glob:
7377             LOP(OP_GLOB,XTERM);
7378
7379         case KEY_hex:
7380             UNI(OP_HEX);
7381
7382         case KEY_if:
7383             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7384                 return REPORT(0);
7385             pl_yylval.ival = CopLINE(PL_curcop);
7386             OPERATOR(IF);
7387
7388         case KEY_index:
7389             LOP(OP_INDEX,XTERM);
7390
7391         case KEY_int:
7392             UNI(OP_INT);
7393
7394         case KEY_ioctl:
7395             LOP(OP_IOCTL,XTERM);
7396
7397         case KEY_join:
7398             LOP(OP_JOIN,XTERM);
7399
7400         case KEY_keys:
7401             UNI(OP_KEYS);
7402
7403         case KEY_kill:
7404             LOP(OP_KILL,XTERM);
7405
7406         case KEY_last:
7407             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7408             LOOPX(OP_LAST);
7409
7410         case KEY_lc:
7411             UNI(OP_LC);
7412
7413         case KEY_lcfirst:
7414             UNI(OP_LCFIRST);
7415
7416         case KEY_local:
7417             pl_yylval.ival = 0;
7418             OPERATOR(LOCAL);
7419
7420         case KEY_length:
7421             UNI(OP_LENGTH);
7422
7423         case KEY_lt:
7424             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7425                 return REPORT(0);
7426             Rop(OP_SLT);
7427
7428         case KEY_le:
7429             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7430                 return REPORT(0);
7431             Rop(OP_SLE);
7432
7433         case KEY_localtime:
7434             UNI(OP_LOCALTIME);
7435
7436         case KEY_log:
7437             UNI(OP_LOG);
7438
7439         case KEY_link:
7440             LOP(OP_LINK,XTERM);
7441
7442         case KEY_listen:
7443             LOP(OP_LISTEN,XTERM);
7444
7445         case KEY_lock:
7446             UNI(OP_LOCK);
7447
7448         case KEY_lstat:
7449             UNI(OP_LSTAT);
7450
7451         case KEY_m:
7452             s = scan_pat(s,OP_MATCH);
7453             TERM(sublex_start());
7454
7455         case KEY_map:
7456             LOP(OP_MAPSTART, XREF);
7457
7458         case KEY_mkdir:
7459             LOP(OP_MKDIR,XTERM);
7460
7461         case KEY_msgctl:
7462             LOP(OP_MSGCTL,XTERM);
7463
7464         case KEY_msgget:
7465             LOP(OP_MSGGET,XTERM);
7466
7467         case KEY_msgrcv:
7468             LOP(OP_MSGRCV,XTERM);
7469
7470         case KEY_msgsnd:
7471             LOP(OP_MSGSND,XTERM);
7472
7473         case KEY_our:
7474         case KEY_my:
7475         case KEY_state:
7476             PL_in_my = (U16)tmp;
7477             s = SKIPSPACE1(s);
7478             if (isIDFIRST_lazy_if(s,UTF)) {
7479 #ifdef PERL_MAD
7480                 char* start = s;
7481 #endif
7482                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7483                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7484                     goto really_sub;
7485                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7486                 if (!PL_in_my_stash) {
7487                     char tmpbuf[1024];
7488                     PL_bufptr = s;
7489                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7490                     yyerror(tmpbuf);
7491                 }
7492 #ifdef PERL_MAD
7493                 if (PL_madskills) {     /* just add type to declarator token */
7494                     sv_catsv(PL_thistoken, PL_nextwhite);
7495                     PL_nextwhite = 0;
7496                     sv_catpvn(PL_thistoken, start, s - start);
7497                 }
7498 #endif
7499             }
7500             pl_yylval.ival = 1;
7501             OPERATOR(MY);
7502
7503         case KEY_next:
7504             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7505             LOOPX(OP_NEXT);
7506
7507         case KEY_ne:
7508             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7509                 return REPORT(0);
7510             Eop(OP_SNE);
7511
7512         case KEY_no:
7513             s = tokenize_use(0, s);
7514             OPERATOR(USE);
7515
7516         case KEY_not:
7517             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7518                 FUN1(OP_NOT);
7519             else {
7520                 if (!PL_lex_allbrackets &&
7521                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7522                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7523                 OPERATOR(NOTOP);
7524             }
7525
7526         case KEY_open:
7527             s = SKIPSPACE1(s);
7528             if (isIDFIRST_lazy_if(s,UTF)) {
7529                 const char *t;
7530                 for (d = s; isALNUM_lazy_if(d,UTF);)
7531                     d++;
7532                 for (t=d; isSPACE(*t);)
7533                     t++;
7534                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7535                     /* [perl #16184] */
7536                     && !(t[0] == '=' && t[1] == '>')
7537                 ) {
7538                     int parms_len = (int)(d-s);
7539                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7540                            "Precedence problem: open %.*s should be open(%.*s)",
7541                             parms_len, s, parms_len, s);
7542                 }
7543             }
7544             LOP(OP_OPEN,XTERM);
7545
7546         case KEY_or:
7547             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7548                 return REPORT(0);
7549             pl_yylval.ival = OP_OR;
7550             OPERATOR(OROP);
7551
7552         case KEY_ord:
7553             UNI(OP_ORD);
7554
7555         case KEY_oct:
7556             UNI(OP_OCT);
7557
7558         case KEY_opendir:
7559             LOP(OP_OPEN_DIR,XTERM);
7560
7561         case KEY_print:
7562             checkcomma(s,PL_tokenbuf,"filehandle");
7563             LOP(OP_PRINT,XREF);
7564
7565         case KEY_printf:
7566             checkcomma(s,PL_tokenbuf,"filehandle");
7567             LOP(OP_PRTF,XREF);
7568
7569         case KEY_prototype:
7570             UNI(OP_PROTOTYPE);
7571
7572         case KEY_push:
7573             LOP(OP_PUSH,XTERM);
7574
7575         case KEY_pop:
7576             UNIDOR(OP_POP);
7577
7578         case KEY_pos:
7579             UNIDOR(OP_POS);
7580
7581         case KEY_pack:
7582             LOP(OP_PACK,XTERM);
7583
7584         case KEY_package:
7585             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7586             s = SKIPSPACE1(s);
7587             s = force_strict_version(s);
7588             PL_lex_expect = XBLOCK;
7589             OPERATOR(PACKAGE);
7590
7591         case KEY_pipe:
7592             LOP(OP_PIPE_OP,XTERM);
7593
7594         case KEY_q:
7595             s = scan_str(s,!!PL_madskills,FALSE);
7596             if (!s)
7597                 missingterm(NULL);
7598             pl_yylval.ival = OP_CONST;
7599             TERM(sublex_start());
7600
7601         case KEY_quotemeta:
7602             UNI(OP_QUOTEMETA);
7603
7604         case KEY_qw: {
7605             OP *words = NULL;
7606             s = scan_str(s,!!PL_madskills,FALSE);
7607             if (!s)
7608                 missingterm(NULL);
7609             PL_expect = XOPERATOR;
7610             if (SvCUR(PL_lex_stuff)) {
7611                 int warned = 0;
7612                 d = SvPV_force(PL_lex_stuff, len);
7613                 while (len) {
7614                     for (; isSPACE(*d) && len; --len, ++d)
7615                         /**/;
7616                     if (len) {
7617                         SV *sv;
7618                         const char *b = d;
7619                         if (!warned && ckWARN(WARN_QW)) {
7620                             for (; !isSPACE(*d) && len; --len, ++d) {
7621                                 if (*d == ',') {
7622                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7623                                         "Possible attempt to separate words with commas");
7624                                     ++warned;
7625                                 }
7626                                 else if (*d == '#') {
7627                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7628                                         "Possible attempt to put comments in qw() list");
7629                                     ++warned;
7630                                 }
7631                             }
7632                         }
7633                         else {
7634                             for (; !isSPACE(*d) && len; --len, ++d)
7635                                 /**/;
7636                         }
7637                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7638                         words = op_append_elem(OP_LIST, words,
7639                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7640                     }
7641                 }
7642             }
7643             if (!words)
7644                 words = newNULLLIST();
7645             if (PL_lex_stuff) {
7646                 SvREFCNT_dec(PL_lex_stuff);
7647                 PL_lex_stuff = NULL;
7648             }
7649             PL_expect = XOPERATOR;
7650             pl_yylval.opval = sawparens(words);
7651             TOKEN(QWLIST);
7652         }
7653
7654         case KEY_qq:
7655             s = scan_str(s,!!PL_madskills,FALSE);
7656             if (!s)
7657                 missingterm(NULL);
7658             pl_yylval.ival = OP_STRINGIFY;
7659             if (SvIVX(PL_lex_stuff) == '\'')
7660                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7661             TERM(sublex_start());
7662
7663         case KEY_qr:
7664             s = scan_pat(s,OP_QR);
7665             TERM(sublex_start());
7666
7667         case KEY_qx:
7668             s = scan_str(s,!!PL_madskills,FALSE);
7669             if (!s)
7670                 missingterm(NULL);
7671             readpipe_override();
7672             TERM(sublex_start());
7673
7674         case KEY_return:
7675             OLDLOP(OP_RETURN);
7676
7677         case KEY_require:
7678             s = SKIPSPACE1(s);
7679             if (isDIGIT(*s)) {
7680                 s = force_version(s, FALSE);
7681             }
7682             else if (*s != 'v' || !isDIGIT(s[1])
7683                     || (s = force_version(s, TRUE), *s == 'v'))
7684             {
7685                 *PL_tokenbuf = '\0';
7686                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7687                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7688                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7689                 else if (*s == '<')
7690                     yyerror("<> should be quotes");
7691             }
7692             if (orig_keyword == KEY_require) {
7693                 orig_keyword = 0;
7694                 pl_yylval.ival = 1;
7695             }
7696             else
7697                 pl_yylval.ival = 0;
7698             PL_expect = XTERM;
7699             PL_bufptr = s;
7700             PL_last_uni = PL_oldbufptr;
7701             PL_last_lop_op = OP_REQUIRE;
7702             s = skipspace(s);
7703             return REPORT( (int)REQUIRE );
7704
7705         case KEY_reset:
7706             UNI(OP_RESET);
7707
7708         case KEY_redo:
7709             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7710             LOOPX(OP_REDO);
7711
7712         case KEY_rename:
7713             LOP(OP_RENAME,XTERM);
7714
7715         case KEY_rand:
7716             UNI(OP_RAND);
7717
7718         case KEY_rmdir:
7719             UNI(OP_RMDIR);
7720
7721         case KEY_rindex:
7722             LOP(OP_RINDEX,XTERM);
7723
7724         case KEY_read:
7725             LOP(OP_READ,XTERM);
7726
7727         case KEY_readdir:
7728             UNI(OP_READDIR);
7729
7730         case KEY_readline:
7731             UNIDOR(OP_READLINE);
7732
7733         case KEY_readpipe:
7734             UNIDOR(OP_BACKTICK);
7735
7736         case KEY_rewinddir:
7737             UNI(OP_REWINDDIR);
7738
7739         case KEY_recv:
7740             LOP(OP_RECV,XTERM);
7741
7742         case KEY_reverse:
7743             LOP(OP_REVERSE,XTERM);
7744
7745         case KEY_readlink:
7746             UNIDOR(OP_READLINK);
7747
7748         case KEY_ref:
7749             UNI(OP_REF);
7750
7751         case KEY_s:
7752             s = scan_subst(s);
7753             if (pl_yylval.opval)
7754                 TERM(sublex_start());
7755             else
7756                 TOKEN(1);       /* force error */
7757
7758         case KEY_say:
7759             checkcomma(s,PL_tokenbuf,"filehandle");
7760             LOP(OP_SAY,XREF);
7761
7762         case KEY_chomp:
7763             UNI(OP_CHOMP);
7764
7765         case KEY_scalar:
7766             UNI(OP_SCALAR);
7767
7768         case KEY_select:
7769             LOP(OP_SELECT,XTERM);
7770
7771         case KEY_seek:
7772             LOP(OP_SEEK,XTERM);
7773
7774         case KEY_semctl:
7775             LOP(OP_SEMCTL,XTERM);
7776
7777         case KEY_semget:
7778             LOP(OP_SEMGET,XTERM);
7779
7780         case KEY_semop:
7781             LOP(OP_SEMOP,XTERM);
7782
7783         case KEY_send:
7784             LOP(OP_SEND,XTERM);
7785
7786         case KEY_setpgrp:
7787             LOP(OP_SETPGRP,XTERM);
7788
7789         case KEY_setpriority:
7790             LOP(OP_SETPRIORITY,XTERM);
7791
7792         case KEY_sethostent:
7793             UNI(OP_SHOSTENT);
7794
7795         case KEY_setnetent:
7796             UNI(OP_SNETENT);
7797
7798         case KEY_setservent:
7799             UNI(OP_SSERVENT);
7800
7801         case KEY_setprotoent:
7802             UNI(OP_SPROTOENT);
7803
7804         case KEY_setpwent:
7805             FUN0(OP_SPWENT);
7806
7807         case KEY_setgrent:
7808             FUN0(OP_SGRENT);
7809
7810         case KEY_seekdir:
7811             LOP(OP_SEEKDIR,XTERM);
7812
7813         case KEY_setsockopt:
7814             LOP(OP_SSOCKOPT,XTERM);
7815
7816         case KEY_shift:
7817             UNIDOR(OP_SHIFT);
7818
7819         case KEY_shmctl:
7820             LOP(OP_SHMCTL,XTERM);
7821
7822         case KEY_shmget:
7823             LOP(OP_SHMGET,XTERM);
7824
7825         case KEY_shmread:
7826             LOP(OP_SHMREAD,XTERM);
7827
7828         case KEY_shmwrite:
7829             LOP(OP_SHMWRITE,XTERM);
7830
7831         case KEY_shutdown:
7832             LOP(OP_SHUTDOWN,XTERM);
7833
7834         case KEY_sin:
7835             UNI(OP_SIN);
7836
7837         case KEY_sleep:
7838             UNI(OP_SLEEP);
7839
7840         case KEY_socket:
7841             LOP(OP_SOCKET,XTERM);
7842
7843         case KEY_socketpair:
7844             LOP(OP_SOCKPAIR,XTERM);
7845
7846         case KEY_sort:
7847             checkcomma(s,PL_tokenbuf,"subroutine name");
7848             s = SKIPSPACE1(s);
7849             if (*s == ';' || *s == ')')         /* probably a close */
7850                 Perl_croak(aTHX_ "sort is now a reserved word");
7851             PL_expect = XTERM;
7852             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7853             LOP(OP_SORT,XREF);
7854
7855         case KEY_split:
7856             LOP(OP_SPLIT,XTERM);
7857
7858         case KEY_sprintf:
7859             LOP(OP_SPRINTF,XTERM);
7860
7861         case KEY_splice:
7862             LOP(OP_SPLICE,XTERM);
7863
7864         case KEY_sqrt:
7865             UNI(OP_SQRT);
7866
7867         case KEY_srand:
7868             UNI(OP_SRAND);
7869
7870         case KEY_stat:
7871             UNI(OP_STAT);
7872
7873         case KEY_study:
7874             UNI(OP_STUDY);
7875
7876         case KEY_substr:
7877             LOP(OP_SUBSTR,XTERM);
7878
7879         case KEY_format:
7880         case KEY_sub:
7881           really_sub:
7882             {
7883                 char tmpbuf[sizeof PL_tokenbuf];
7884                 SSize_t tboffset = 0;
7885                 expectation attrful;
7886                 bool have_name, have_proto;
7887                 const int key = tmp;
7888
7889 #ifdef PERL_MAD
7890                 SV *tmpwhite = 0;
7891
7892                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7893                 SV *subtoken = newSVpvn(tstart, s - tstart);
7894                 PL_thistoken = 0;
7895
7896                 d = s;
7897                 s = SKIPSPACE2(s,tmpwhite);
7898 #else
7899                 s = skipspace(s);
7900 #endif
7901
7902                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7903                     (*s == ':' && s[1] == ':'))
7904                 {
7905 #ifdef PERL_MAD
7906                     SV *nametoke = NULL;
7907 #endif
7908
7909                     PL_expect = XBLOCK;
7910                     attrful = XATTRBLOCK;
7911                     /* remember buffer pos'n for later force_word */
7912                     tboffset = s - PL_oldbufptr;
7913                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7914 #ifdef PERL_MAD
7915                     if (PL_madskills)
7916                         nametoke = newSVpvn(s, d - s);
7917 #endif
7918                     if (memchr(tmpbuf, ':', len))
7919                         sv_setpvn(PL_subname, tmpbuf, len);
7920                     else {
7921                         sv_setsv(PL_subname,PL_curstname);
7922                         sv_catpvs(PL_subname,"::");
7923                         sv_catpvn(PL_subname,tmpbuf,len);
7924                     }
7925                     have_name = TRUE;
7926
7927 #ifdef PERL_MAD
7928
7929                     start_force(0);
7930                     CURMAD('X', nametoke);
7931                     CURMAD('_', tmpwhite);
7932                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7933                                       FALSE, TRUE, TRUE);
7934
7935                     s = SKIPSPACE2(d,tmpwhite);
7936 #else
7937                     s = skipspace(d);
7938 #endif
7939                 }
7940                 else {
7941                     if (key == KEY_my)
7942                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7943                     PL_expect = XTERMBLOCK;
7944                     attrful = XATTRTERM;
7945                     sv_setpvs(PL_subname,"?");
7946                     have_name = FALSE;
7947                 }
7948
7949                 if (key == KEY_format) {
7950                     if (*s == '=')
7951                         PL_lex_formbrack = PL_lex_brackets + 1;
7952 #ifdef PERL_MAD
7953                     PL_thistoken = subtoken;
7954                     s = d;
7955 #else
7956                     if (have_name)
7957                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7958                                           FALSE, TRUE, TRUE);
7959 #endif
7960                     OPERATOR(FORMAT);
7961                 }
7962
7963                 /* Look for a prototype */
7964                 if (*s == '(') {
7965                     char *p;
7966                     bool bad_proto = FALSE;
7967                     bool in_brackets = FALSE;
7968                     char greedy_proto = ' ';
7969                     bool proto_after_greedy_proto = FALSE;
7970                     bool must_be_last = FALSE;
7971                     bool underscore = FALSE;
7972                     bool seen_underscore = FALSE;
7973                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7974
7975                     s = scan_str(s,!!PL_madskills,FALSE);
7976                     if (!s)
7977                         Perl_croak(aTHX_ "Prototype not terminated");
7978                     /* strip spaces and check for bad characters */
7979                     d = SvPVX(PL_lex_stuff);
7980                     tmp = 0;
7981                     for (p = d; *p; ++p) {
7982                         if (!isSPACE(*p)) {
7983                             d[tmp++] = *p;
7984
7985                             if (warnillegalproto) {
7986                                 if (must_be_last)
7987                                     proto_after_greedy_proto = TRUE;
7988                                 if (!strchr("$@%*;[]&\\_+", *p)) {
7989                                     bad_proto = TRUE;
7990                                 }
7991                                 else {
7992                                     if ( underscore ) {
7993                                         if ( *p != ';' )
7994                                             bad_proto = TRUE;
7995                                         underscore = FALSE;
7996                                     }
7997                                     if ( *p == '[' ) {
7998                                         in_brackets = TRUE;
7999                                     }
8000                                     else if ( *p == ']' ) {
8001                                         in_brackets = FALSE;
8002                                     }
8003                                     else if ( (*p == '@' || *p == '%') &&
8004                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8005                                          !in_brackets ) {
8006                                         must_be_last = TRUE;
8007                                         greedy_proto = *p;
8008                                     }
8009                                     else if ( *p == '_' ) {
8010                                         underscore = seen_underscore = TRUE;
8011                                     }
8012                                 }
8013                             }
8014                         }
8015                     }
8016                     d[tmp] = '\0';
8017                     if (proto_after_greedy_proto)
8018                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8019                                     "Prototype after '%c' for %"SVf" : %s",
8020                                     greedy_proto, SVfARG(PL_subname), d);
8021                     if (bad_proto)
8022                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8023                                     "Illegal character %sin prototype for %"SVf" : %s",
8024                                     seen_underscore ? "after '_' " : "",
8025                                     SVfARG(PL_subname), d);
8026                     SvCUR_set(PL_lex_stuff, tmp);
8027                     have_proto = TRUE;
8028
8029 #ifdef PERL_MAD
8030                     start_force(0);
8031                     CURMAD('q', PL_thisopen);
8032                     CURMAD('_', tmpwhite);
8033                     CURMAD('=', PL_thisstuff);
8034                     CURMAD('Q', PL_thisclose);
8035                     NEXTVAL_NEXTTOKE.opval =
8036                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8037                     PL_lex_stuff = NULL;
8038                     force_next(THING);
8039
8040                     s = SKIPSPACE2(s,tmpwhite);
8041 #else
8042                     s = skipspace(s);
8043 #endif
8044                 }
8045                 else
8046                     have_proto = FALSE;
8047
8048                 if (*s == ':' && s[1] != ':')
8049                     PL_expect = attrful;
8050                 else if (*s != '{' && key == KEY_sub) {
8051                     if (!have_name)
8052                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8053                     else if (*s != ';' && *s != '}')
8054                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8055                 }
8056
8057 #ifdef PERL_MAD
8058                 start_force(0);
8059                 if (tmpwhite) {
8060                     if (PL_madskills)
8061                         curmad('^', newSVpvs(""));
8062                     CURMAD('_', tmpwhite);
8063                 }
8064                 force_next(0);
8065
8066                 PL_thistoken = subtoken;
8067 #else
8068                 if (have_proto) {
8069                     NEXTVAL_NEXTTOKE.opval =
8070                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8071                     PL_lex_stuff = NULL;
8072                     force_next(THING);
8073                 }
8074 #endif
8075                 if (!have_name) {
8076                     if (PL_curstash)
8077                         sv_setpvs(PL_subname, "__ANON__");
8078                     else
8079                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8080                     TOKEN(ANONSUB);
8081                 }
8082 #ifndef PERL_MAD
8083                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8084                                   FALSE, TRUE, TRUE);
8085 #endif
8086                 if (key == KEY_my)
8087                     TOKEN(MYSUB);
8088                 TOKEN(SUB);
8089             }
8090
8091         case KEY_system:
8092             LOP(OP_SYSTEM,XREF);
8093
8094         case KEY_symlink:
8095             LOP(OP_SYMLINK,XTERM);
8096
8097         case KEY_syscall:
8098             LOP(OP_SYSCALL,XTERM);
8099
8100         case KEY_sysopen:
8101             LOP(OP_SYSOPEN,XTERM);
8102
8103         case KEY_sysseek:
8104             LOP(OP_SYSSEEK,XTERM);
8105
8106         case KEY_sysread:
8107             LOP(OP_SYSREAD,XTERM);
8108
8109         case KEY_syswrite:
8110             LOP(OP_SYSWRITE,XTERM);
8111
8112         case KEY_tr:
8113             s = scan_trans(s);
8114             TERM(sublex_start());
8115
8116         case KEY_tell:
8117             UNI(OP_TELL);
8118
8119         case KEY_telldir:
8120             UNI(OP_TELLDIR);
8121
8122         case KEY_tie:
8123             LOP(OP_TIE,XTERM);
8124
8125         case KEY_tied:
8126             UNI(OP_TIED);
8127
8128         case KEY_time:
8129             FUN0(OP_TIME);
8130
8131         case KEY_times:
8132             FUN0(OP_TMS);
8133
8134         case KEY_truncate:
8135             LOP(OP_TRUNCATE,XTERM);
8136
8137         case KEY_uc:
8138             UNI(OP_UC);
8139
8140         case KEY_ucfirst:
8141             UNI(OP_UCFIRST);
8142
8143         case KEY_untie:
8144             UNI(OP_UNTIE);
8145
8146         case KEY_until:
8147             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8148                 return REPORT(0);
8149             pl_yylval.ival = CopLINE(PL_curcop);
8150             OPERATOR(UNTIL);
8151
8152         case KEY_unless:
8153             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8154                 return REPORT(0);
8155             pl_yylval.ival = CopLINE(PL_curcop);
8156             OPERATOR(UNLESS);
8157
8158         case KEY_unlink:
8159             LOP(OP_UNLINK,XTERM);
8160
8161         case KEY_undef:
8162             UNIDOR(OP_UNDEF);
8163
8164         case KEY_unpack:
8165             LOP(OP_UNPACK,XTERM);
8166
8167         case KEY_utime:
8168             LOP(OP_UTIME,XTERM);
8169
8170         case KEY_umask:
8171             UNIDOR(OP_UMASK);
8172
8173         case KEY_unshift:
8174             LOP(OP_UNSHIFT,XTERM);
8175
8176         case KEY_use:
8177             s = tokenize_use(1, s);
8178             OPERATOR(USE);
8179
8180         case KEY_values:
8181             UNI(OP_VALUES);
8182
8183         case KEY_vec:
8184             LOP(OP_VEC,XTERM);
8185
8186         case KEY_when:
8187             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8188                 return REPORT(0);
8189             pl_yylval.ival = CopLINE(PL_curcop);
8190             OPERATOR(WHEN);
8191
8192         case KEY_while:
8193             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8194                 return REPORT(0);
8195             pl_yylval.ival = CopLINE(PL_curcop);
8196             OPERATOR(WHILE);
8197
8198         case KEY_warn:
8199             PL_hints |= HINT_BLOCK_SCOPE;
8200             LOP(OP_WARN,XTERM);
8201
8202         case KEY_wait:
8203             FUN0(OP_WAIT);
8204
8205         case KEY_waitpid:
8206             LOP(OP_WAITPID,XTERM);
8207
8208         case KEY_wantarray:
8209             FUN0(OP_WANTARRAY);
8210
8211         case KEY_write:
8212 #ifdef EBCDIC
8213         {
8214             char ctl_l[2];
8215             ctl_l[0] = toCTRL('L');
8216             ctl_l[1] = '\0';
8217             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8218         }
8219 #else
8220             /* Make sure $^L is defined */
8221             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8222 #endif
8223             UNI(OP_ENTERWRITE);
8224
8225         case KEY_x:
8226             if (PL_expect == XOPERATOR) {
8227                 if (*s == '=' && !PL_lex_allbrackets &&
8228                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8229                     return REPORT(0);
8230                 Mop(OP_REPEAT);
8231             }
8232             check_uni();
8233             goto just_a_word;
8234
8235         case KEY_xor:
8236             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8237                 return REPORT(0);
8238             pl_yylval.ival = OP_XOR;
8239             OPERATOR(OROP);
8240
8241         case KEY_y:
8242             s = scan_trans(s);
8243             TERM(sublex_start());
8244         }
8245     }}
8246 }
8247 #ifdef __SC__
8248 #pragma segment Main
8249 #endif
8250
8251 static int
8252 S_pending_ident(pTHX)
8253 {
8254     dVAR;
8255     register char *d;
8256     PADOFFSET tmp = 0;
8257     /* pit holds the identifier we read and pending_ident is reset */
8258     char pit = PL_pending_ident;
8259     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8260     /* All routes through this function want to know if there is a colon.  */
8261     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8262     PL_pending_ident = 0;
8263
8264     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8265     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8266           "### Pending identifier '%s'\n", PL_tokenbuf); });
8267
8268     /* if we're in a my(), we can't allow dynamics here.
8269        $foo'bar has already been turned into $foo::bar, so
8270        just check for colons.
8271
8272        if it's a legal name, the OP is a PADANY.
8273     */
8274     if (PL_in_my) {
8275         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8276             if (has_colon)
8277                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8278                                   "variable %s in \"our\"",
8279                                   PL_tokenbuf));
8280             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8281         }
8282         else {
8283             if (has_colon)
8284                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8285                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8286
8287             pl_yylval.opval = newOP(OP_PADANY, 0);
8288             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8289             return PRIVATEREF;
8290         }
8291     }
8292
8293     /*
8294        build the ops for accesses to a my() variable.
8295
8296        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8297        then used in a comparison.  This catches most, but not
8298        all cases.  For instance, it catches
8299            sort { my($a); $a <=> $b }
8300        but not
8301            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8302        (although why you'd do that is anyone's guess).
8303     */
8304
8305     if (!has_colon) {
8306         if (!PL_in_my)
8307             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8308         if (tmp != NOT_IN_PAD) {
8309             /* might be an "our" variable" */
8310             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8311                 /* build ops for a bareword */
8312                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8313                 HEK * const stashname = HvNAME_HEK(stash);
8314                 SV *  const sym = newSVhek(stashname);
8315                 sv_catpvs(sym, "::");
8316                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8317                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8318                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8319                 gv_fetchsv(sym,
8320                     (PL_in_eval
8321                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8322                         : GV_ADDMULTI
8323                     ),
8324                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8325                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8326                      : SVt_PVHV));
8327                 return WORD;
8328             }
8329
8330             /* if it's a sort block and they're naming $a or $b */
8331             if (PL_last_lop_op == OP_SORT &&
8332                 PL_tokenbuf[0] == '$' &&
8333                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8334                 && !PL_tokenbuf[2])
8335             {
8336                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8337                      d < PL_bufend && *d != '\n';
8338                      d++)
8339                 {
8340                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8341                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8342                               PL_tokenbuf);
8343                     }
8344                 }
8345             }
8346
8347             pl_yylval.opval = newOP(OP_PADANY, 0);
8348             pl_yylval.opval->op_targ = tmp;
8349             return PRIVATEREF;
8350         }
8351     }
8352
8353     /*
8354        Whine if they've said @foo in a doublequoted string,
8355        and @foo isn't a variable we can find in the symbol
8356        table.
8357     */
8358     if (ckWARN(WARN_AMBIGUOUS) &&
8359         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8360         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8361                                          SVt_PVAV);
8362         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8363                 /* DO NOT warn for @- and @+ */
8364                 && !( PL_tokenbuf[2] == '\0' &&
8365                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8366            )
8367         {
8368             /* Downgraded from fatal to warning 20000522 mjd */
8369             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8370                         "Possible unintended interpolation of %s in string",
8371                         PL_tokenbuf);
8372         }
8373     }
8374
8375     /* build ops for a bareword */
8376     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8377                                                       tokenbuf_len - 1));
8378     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8379     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8380                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8381                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8382                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8383                       : SVt_PVHV));
8384     return WORD;
8385 }
8386
8387 STATIC void
8388 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8389 {
8390     dVAR;
8391
8392     PERL_ARGS_ASSERT_CHECKCOMMA;
8393
8394     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8395         if (ckWARN(WARN_SYNTAX)) {
8396             int level = 1;
8397             const char *w;
8398             for (w = s+2; *w && level; w++) {
8399                 if (*w == '(')
8400                     ++level;
8401                 else if (*w == ')')
8402                     --level;
8403             }
8404             while (isSPACE(*w))
8405                 ++w;
8406             /* the list of chars below is for end of statements or
8407              * block / parens, boolean operators (&&, ||, //) and branch
8408              * constructs (or, and, if, until, unless, while, err, for).
8409              * Not a very solid hack... */
8410             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8411                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8412                             "%s (...) interpreted as function",name);
8413         }
8414     }
8415     while (s < PL_bufend && isSPACE(*s))
8416         s++;
8417     if (*s == '(')
8418         s++;
8419     while (s < PL_bufend && isSPACE(*s))
8420         s++;
8421     if (isIDFIRST_lazy_if(s,UTF)) {
8422         const char * const w = s++;
8423         while (isALNUM_lazy_if(s,UTF))
8424             s++;
8425         while (s < PL_bufend && isSPACE(*s))
8426             s++;
8427         if (*s == ',') {
8428             GV* gv;
8429             if (keyword(w, s - w, 0))
8430                 return;
8431
8432             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8433             if (gv && GvCVu(gv))
8434                 return;
8435             Perl_croak(aTHX_ "No comma allowed after %s", what);
8436         }
8437     }
8438 }
8439
8440 /* Either returns sv, or mortalizes sv and returns a new SV*.
8441    Best used as sv=new_constant(..., sv, ...).
8442    If s, pv are NULL, calls subroutine with one argument,
8443    and type is used with error messages only. */
8444
8445 STATIC SV *
8446 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8447                SV *sv, SV *pv, const char *type, STRLEN typelen)
8448 {
8449     dVAR; dSP;
8450     HV * const table = GvHV(PL_hintgv);          /* ^H */
8451     SV *res;
8452     SV **cvp;
8453     SV *cv, *typesv;
8454     const char *why1 = "", *why2 = "", *why3 = "";
8455
8456     PERL_ARGS_ASSERT_NEW_CONSTANT;
8457
8458     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8459         SV *msg;
8460
8461         why2 = (const char *)
8462             (strEQ(key,"charnames")
8463              ? "(possibly a missing \"use charnames ...\")"
8464              : "");
8465         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8466                             (type ? type: "undef"), why2);
8467
8468         /* This is convoluted and evil ("goto considered harmful")
8469          * but I do not understand the intricacies of all the different
8470          * failure modes of %^H in here.  The goal here is to make
8471          * the most probable error message user-friendly. --jhi */
8472
8473         goto msgdone;
8474
8475     report:
8476         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8477                             (type ? type: "undef"), why1, why2, why3);
8478     msgdone:
8479         yyerror(SvPVX_const(msg));
8480         SvREFCNT_dec(msg);
8481         return sv;
8482     }
8483
8484     /* charnames doesn't work well if there have been errors found */
8485     if (PL_error_count > 0 && strEQ(key,"charnames"))
8486         return &PL_sv_undef;
8487
8488     cvp = hv_fetch(table, key, keylen, FALSE);
8489     if (!cvp || !SvOK(*cvp)) {
8490         why1 = "$^H{";
8491         why2 = key;
8492         why3 = "} is not defined";
8493         goto report;
8494     }
8495     sv_2mortal(sv);                     /* Parent created it permanently */
8496     cv = *cvp;
8497     if (!pv && s)
8498         pv = newSVpvn_flags(s, len, SVs_TEMP);
8499     if (type && pv)
8500         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8501     else
8502         typesv = &PL_sv_undef;
8503
8504     PUSHSTACKi(PERLSI_OVERLOAD);
8505     ENTER ;
8506     SAVETMPS;
8507
8508     PUSHMARK(SP) ;
8509     EXTEND(sp, 3);
8510     if (pv)
8511         PUSHs(pv);
8512     PUSHs(sv);
8513     if (pv)
8514         PUSHs(typesv);
8515     PUTBACK;
8516     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8517
8518     SPAGAIN ;
8519
8520     /* Check the eval first */
8521     if (!PL_in_eval && SvTRUE(ERRSV)) {
8522         sv_catpvs(ERRSV, "Propagated");
8523         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8524         (void)POPs;
8525         res = SvREFCNT_inc_simple(sv);
8526     }
8527     else {
8528         res = POPs;
8529         SvREFCNT_inc_simple_void(res);
8530     }
8531
8532     PUTBACK ;
8533     FREETMPS ;
8534     LEAVE ;
8535     POPSTACK;
8536
8537     if (!SvOK(res)) {
8538         why1 = "Call to &{$^H{";
8539         why2 = key;
8540         why3 = "}} did not return a defined value";
8541         sv = res;
8542         goto report;
8543     }
8544
8545     return res;
8546 }
8547
8548 /* Returns a NUL terminated string, with the length of the string written to
8549    *slp
8550    */
8551 STATIC char *
8552 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8553 {
8554     dVAR;
8555     register char *d = dest;
8556     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8557
8558     PERL_ARGS_ASSERT_SCAN_WORD;
8559
8560     for (;;) {
8561         if (d >= e)
8562             Perl_croak(aTHX_ ident_too_long);
8563         if (isALNUM(*s))        /* UTF handled below */
8564             *d++ = *s++;
8565         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8566             *d++ = ':';
8567             *d++ = ':';
8568             s++;
8569         }
8570         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8571             *d++ = *s++;
8572             *d++ = *s++;
8573         }
8574         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8575             char *t = s + UTF8SKIP(s);
8576             size_t len;
8577             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8578                 t += UTF8SKIP(t);
8579             len = t - s;
8580             if (d + len > e)
8581                 Perl_croak(aTHX_ ident_too_long);
8582             Copy(s, d, len, char);
8583             d += len;
8584             s = t;
8585         }
8586         else {
8587             *d = '\0';
8588             *slp = d - dest;
8589             return s;
8590         }
8591     }
8592 }
8593
8594 STATIC char *
8595 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8596 {
8597     dVAR;
8598     char *bracket = NULL;
8599     char funny = *s++;
8600     register char *d = dest;
8601     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8602
8603     PERL_ARGS_ASSERT_SCAN_IDENT;
8604
8605     if (isSPACE(*s))
8606         s = PEEKSPACE(s);
8607     if (isDIGIT(*s)) {
8608         while (isDIGIT(*s)) {
8609             if (d >= e)
8610                 Perl_croak(aTHX_ ident_too_long);
8611             *d++ = *s++;
8612         }
8613     }
8614     else {
8615         for (;;) {
8616             if (d >= e)
8617                 Perl_croak(aTHX_ ident_too_long);
8618             if (isALNUM(*s))    /* UTF handled below */
8619                 *d++ = *s++;
8620             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8621                 *d++ = ':';
8622                 *d++ = ':';
8623                 s++;
8624             }
8625             else if (*s == ':' && s[1] == ':') {
8626                 *d++ = *s++;
8627                 *d++ = *s++;
8628             }
8629             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8630                 char *t = s + UTF8SKIP(s);
8631                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8632                     t += UTF8SKIP(t);
8633                 if (d + (t - s) > e)
8634                     Perl_croak(aTHX_ ident_too_long);
8635                 Copy(s, d, t - s, char);
8636                 d += t - s;
8637                 s = t;
8638             }
8639             else
8640                 break;
8641         }
8642     }
8643     *d = '\0';
8644     d = dest;
8645     if (*d) {
8646         if (PL_lex_state != LEX_NORMAL)
8647             PL_lex_state = LEX_INTERPENDMAYBE;
8648         return s;
8649     }
8650     if (*s == '$' && s[1] &&
8651         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8652     {
8653         return s;
8654     }
8655     if (*s == '{') {
8656         bracket = s;
8657         s++;
8658     }
8659     else if (ck_uni)
8660         check_uni();
8661     if (s < send)
8662         *d = *s++;
8663     d[1] = '\0';
8664     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8665         *d = toCTRL(*s);
8666         s++;
8667     }
8668     if (bracket) {
8669         if (isSPACE(s[-1])) {
8670             while (s < send) {
8671                 const char ch = *s++;
8672                 if (!SPACE_OR_TAB(ch)) {
8673                     *d = ch;
8674                     break;
8675                 }
8676             }
8677         }
8678         if (isIDFIRST_lazy_if(d,UTF)) {
8679             d++;
8680             if (UTF) {
8681                 char *end = s;
8682                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8683                     end += UTF8SKIP(end);
8684                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8685                         end += UTF8SKIP(end);
8686                 }
8687                 Copy(s, d, end - s, char);
8688                 d += end - s;
8689                 s = end;
8690             }
8691             else {
8692                 while ((isALNUM(*s) || *s == ':') && d < e)
8693                     *d++ = *s++;
8694                 if (d >= e)
8695                     Perl_croak(aTHX_ ident_too_long);
8696             }
8697             *d = '\0';
8698             while (s < send && SPACE_OR_TAB(*s))
8699                 s++;
8700             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8701                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8702                     const char * const brack =
8703                         (const char *)
8704                         ((*s == '[') ? "[...]" : "{...}");
8705    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8706                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8707                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8708                         funny, dest, brack, funny, dest, brack);
8709                 }
8710                 bracket++;
8711                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8712                 PL_lex_allbrackets++;
8713                 return s;
8714             }
8715         }
8716         /* Handle extended ${^Foo} variables
8717          * 1999-02-27 mjd-perl-patch@plover.com */
8718         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8719                  && isALNUM(*s))
8720         {
8721             d++;
8722             while (isALNUM(*s) && d < e) {
8723                 *d++ = *s++;
8724             }
8725             if (d >= e)
8726                 Perl_croak(aTHX_ ident_too_long);
8727             *d = '\0';
8728         }
8729         if (*s == '}') {
8730             s++;
8731             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8732                 PL_lex_state = LEX_INTERPEND;
8733                 PL_expect = XREF;
8734             }
8735             if (PL_lex_state == LEX_NORMAL) {
8736                 if (ckWARN(WARN_AMBIGUOUS) &&
8737                     (keyword(dest, d - dest, 0)
8738                      || get_cvn_flags(dest, d - dest, 0)))
8739                 {
8740                     if (funny == '#')
8741                         funny = '@';
8742                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8743                         "Ambiguous use of %c{%s} resolved to %c%s",
8744                         funny, dest, funny, dest);
8745                 }
8746             }
8747         }
8748         else {
8749             s = bracket;                /* let the parser handle it */
8750             *dest = '\0';
8751         }
8752     }
8753     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8754         PL_lex_state = LEX_INTERPEND;
8755     return s;
8756 }
8757
8758 static bool
8759 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
8760
8761     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8762      * the parse starting at 's', based on the subset that are valid in this
8763      * context input to this routine in 'valid_flags'. Advances s.  Returns
8764      * TRUE if the input was a valid flag, so the next char may be as well;
8765      * otherwise FALSE */
8766
8767     const char c = **s;
8768
8769     if (! strchr(valid_flags, c)) {
8770         if (isALNUM(c)) {
8771             goto deprecate;
8772         }
8773         return FALSE;
8774     }
8775
8776     switch (c) {
8777
8778         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8779         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8780         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8781         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8782         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8783         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8784         case LOCALE_PAT_MOD:
8785
8786             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8787              * can't be regex modifiers.
8788              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8789              * much as easily done.  s///lei, for example, has to mean regex
8790              * modifiers if it's not an error (as does any word character
8791              * following the 'e').  Otherwise, we resolve to the backwards-
8792              * compatible, but less likely 's/// le ...', i.e. as meaning
8793              * less-than-or-equal.  The reason it's not likely is that s//
8794              * returns a number for code in the field (/r returns a string, but
8795              * that wasn't added until the 5.13 series), and so '<=' should be
8796              * used for comparing, not 'le'. */
8797             if (*((*s) + 1) == 't') {
8798                 goto deprecate;
8799             }
8800             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8801
8802                 /* 'e' is valid only for substitutes, s///e.  If it is not
8803                  * valid in the current context, then 'm//le' must mean the
8804                  * comparison operator, so use the regular deprecation message.
8805                  */
8806                 if (! strchr(valid_flags, 'e')) {
8807                     goto deprecate;
8808                 }
8809                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8810                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8811                 return FALSE;
8812             }
8813             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8814             break;
8815         case UNICODE_PAT_MOD:
8816             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8817              * 'n' means they can't be regex modifiers */
8818             if (*((*s) + 1) == 'n') {
8819                 goto deprecate;
8820             }
8821             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8822             break;
8823         case ASCII_RESTRICT_PAT_MOD:
8824             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8825              * can't be regex modifiers */
8826             if (*((*s) + 1) == 'n') {
8827                 goto deprecate;
8828             }
8829             if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
8830                 /* Doubled modifier implies more restricted */
8831                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8832                 (*s)++;
8833             }
8834             else {
8835                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8836             }
8837             break;
8838         case DEPENDS_PAT_MOD:
8839             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8840             break;
8841     }
8842
8843     (*s)++;
8844     return TRUE;
8845
8846     deprecate:
8847         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8848             "Having no space between pattern and following word is deprecated");
8849         return FALSE;
8850 }
8851
8852 STATIC char *
8853 S_scan_pat(pTHX_ char *start, I32 type)
8854 {
8855     dVAR;
8856     PMOP *pm;
8857     char *s = scan_str(start,!!PL_madskills,FALSE);
8858     const char * const valid_flags =
8859         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8860 #ifdef PERL_MAD
8861     char *modstart;
8862 #endif
8863
8864     PERL_ARGS_ASSERT_SCAN_PAT;
8865
8866     if (!s) {
8867         const char * const delimiter = skipspace(start);
8868         Perl_croak(aTHX_
8869                    (const char *)
8870                    (*delimiter == '?'
8871                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8872                     : "Search pattern not terminated" ));
8873     }
8874
8875     pm = (PMOP*)newPMOP(type, 0);
8876     if (PL_multi_open == '?') {
8877         /* This is the only point in the code that sets PMf_ONCE:  */
8878         pm->op_pmflags |= PMf_ONCE;
8879
8880         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8881            allows us to restrict the list needed by reset to just the ??
8882            matches.  */
8883         assert(type != OP_TRANS);
8884         if (PL_curstash) {
8885             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8886             U32 elements;
8887             if (!mg) {
8888                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8889                                  0);
8890             }
8891             elements = mg->mg_len / sizeof(PMOP**);
8892             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8893             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8894             mg->mg_len = elements * sizeof(PMOP**);
8895             PmopSTASH_set(pm,PL_curstash);
8896         }
8897     }
8898 #ifdef PERL_MAD
8899     modstart = s;
8900 #endif
8901     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s)) {};
8902 #ifdef PERL_MAD
8903     if (PL_madskills && modstart != s) {
8904         SV* tmptoken = newSVpvn(modstart, s - modstart);
8905         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8906     }
8907 #endif
8908     /* issue a warning if /c is specified,but /g is not */
8909     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8910     {
8911         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8912                        "Use of /c modifier is meaningless without /g" );
8913     }
8914
8915     PL_lex_op = (OP*)pm;
8916     pl_yylval.ival = OP_MATCH;
8917     return s;
8918 }
8919
8920 STATIC char *
8921 S_scan_subst(pTHX_ char *start)
8922 {
8923     dVAR;
8924     char *s;
8925     register PMOP *pm;
8926     I32 first_start;
8927     I32 es = 0;
8928 #ifdef PERL_MAD
8929     char *modstart;
8930 #endif
8931
8932     PERL_ARGS_ASSERT_SCAN_SUBST;
8933
8934     pl_yylval.ival = OP_NULL;
8935
8936     s = scan_str(start,!!PL_madskills,FALSE);
8937
8938     if (!s)
8939         Perl_croak(aTHX_ "Substitution pattern not terminated");
8940
8941     if (s[-1] == PL_multi_open)
8942         s--;
8943 #ifdef PERL_MAD
8944     if (PL_madskills) {
8945         CURMAD('q', PL_thisopen);
8946         CURMAD('_', PL_thiswhite);
8947         CURMAD('E', PL_thisstuff);
8948         CURMAD('Q', PL_thisclose);
8949         PL_realtokenstart = s - SvPVX(PL_linestr);
8950     }
8951 #endif
8952
8953     first_start = PL_multi_start;
8954     s = scan_str(s,!!PL_madskills,FALSE);
8955     if (!s) {
8956         if (PL_lex_stuff) {
8957             SvREFCNT_dec(PL_lex_stuff);
8958             PL_lex_stuff = NULL;
8959         }
8960         Perl_croak(aTHX_ "Substitution replacement not terminated");
8961     }
8962     PL_multi_start = first_start;       /* so whole substitution is taken together */
8963
8964     pm = (PMOP*)newPMOP(OP_SUBST, 0);
8965
8966 #ifdef PERL_MAD
8967     if (PL_madskills) {
8968         CURMAD('z', PL_thisopen);
8969         CURMAD('R', PL_thisstuff);
8970         CURMAD('Z', PL_thisclose);
8971     }
8972     modstart = s;
8973 #endif
8974
8975     while (*s) {
8976         if (*s == EXEC_PAT_MOD) {
8977             s++;
8978             es++;
8979         }
8980         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s)) {
8981             break;
8982         }
8983     }
8984
8985 #ifdef PERL_MAD
8986     if (PL_madskills) {
8987         if (modstart != s)
8988             curmad('m', newSVpvn(modstart, s - modstart));
8989         append_madprops(PL_thismad, (OP*)pm, 0);
8990         PL_thismad = 0;
8991     }
8992 #endif
8993     if ((pm->op_pmflags & PMf_CONTINUE)) {
8994         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
8995     }
8996
8997     if (es) {
8998         SV * const repl = newSVpvs("");
8999
9000         PL_sublex_info.super_bufptr = s;
9001         PL_sublex_info.super_bufend = PL_bufend;
9002         PL_multi_end = 0;
9003         pm->op_pmflags |= PMf_EVAL;
9004         while (es-- > 0) {
9005             if (es)
9006                 sv_catpvs(repl, "eval ");
9007             else
9008                 sv_catpvs(repl, "do ");
9009         }
9010         sv_catpvs(repl, "{");
9011         sv_catsv(repl, PL_lex_repl);
9012         if (strchr(SvPVX(PL_lex_repl), '#'))
9013             sv_catpvs(repl, "\n");
9014         sv_catpvs(repl, "}");
9015         SvEVALED_on(repl);
9016         SvREFCNT_dec(PL_lex_repl);
9017         PL_lex_repl = repl;
9018     }
9019
9020     PL_lex_op = (OP*)pm;
9021     pl_yylval.ival = OP_SUBST;
9022     return s;
9023 }
9024
9025 STATIC char *
9026 S_scan_trans(pTHX_ char *start)
9027 {
9028     dVAR;
9029     register char* s;
9030     OP *o;
9031     short *tbl;
9032     U8 squash;
9033     U8 del;
9034     U8 complement;
9035     bool nondestruct = 0;
9036 #ifdef PERL_MAD
9037     char *modstart;
9038 #endif
9039
9040     PERL_ARGS_ASSERT_SCAN_TRANS;
9041
9042     pl_yylval.ival = OP_NULL;
9043
9044     s = scan_str(start,!!PL_madskills,FALSE);
9045     if (!s)
9046         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9047
9048     if (s[-1] == PL_multi_open)
9049         s--;
9050 #ifdef PERL_MAD
9051     if (PL_madskills) {
9052         CURMAD('q', PL_thisopen);
9053         CURMAD('_', PL_thiswhite);
9054         CURMAD('E', PL_thisstuff);
9055         CURMAD('Q', PL_thisclose);
9056         PL_realtokenstart = s - SvPVX(PL_linestr);
9057     }
9058 #endif
9059
9060     s = scan_str(s,!!PL_madskills,FALSE);
9061     if (!s) {
9062         if (PL_lex_stuff) {
9063             SvREFCNT_dec(PL_lex_stuff);
9064             PL_lex_stuff = NULL;
9065         }
9066         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9067     }
9068     if (PL_madskills) {
9069         CURMAD('z', PL_thisopen);
9070         CURMAD('R', PL_thisstuff);
9071         CURMAD('Z', PL_thisclose);
9072     }
9073
9074     complement = del = squash = 0;
9075 #ifdef PERL_MAD
9076     modstart = s;
9077 #endif
9078     while (1) {
9079         switch (*s) {
9080         case 'c':
9081             complement = OPpTRANS_COMPLEMENT;
9082             break;
9083         case 'd':
9084             del = OPpTRANS_DELETE;
9085             break;
9086         case 's':
9087             squash = OPpTRANS_SQUASH;
9088             break;
9089         case 'r':
9090             nondestruct = 1;
9091             break;
9092         default:
9093             goto no_more;
9094         }
9095         s++;
9096     }
9097   no_more:
9098
9099     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9100     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9101     o->op_private &= ~OPpTRANS_ALL;
9102     o->op_private |= del|squash|complement|
9103       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9104       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9105
9106     PL_lex_op = o;
9107     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9108
9109 #ifdef PERL_MAD
9110     if (PL_madskills) {
9111         if (modstart != s)
9112             curmad('m', newSVpvn(modstart, s - modstart));
9113         append_madprops(PL_thismad, o, 0);
9114         PL_thismad = 0;
9115     }
9116 #endif
9117
9118     return s;
9119 }
9120
9121 STATIC char *
9122 S_scan_heredoc(pTHX_ register char *s)
9123 {
9124     dVAR;
9125     SV *herewas;
9126     I32 op_type = OP_SCALAR;
9127     I32 len;
9128     SV *tmpstr;
9129     char term;
9130     const char *found_newline;
9131     register char *d;
9132     register char *e;
9133     char *peek;
9134     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9135 #ifdef PERL_MAD
9136     I32 stuffstart = s - SvPVX(PL_linestr);
9137     char *tstart;
9138
9139     PL_realtokenstart = -1;
9140 #endif
9141
9142     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9143
9144     s += 2;
9145     d = PL_tokenbuf;
9146     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9147     if (!outer)
9148         *d++ = '\n';
9149     peek = s;
9150     while (SPACE_OR_TAB(*peek))
9151         peek++;
9152     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9153         s = peek;
9154         term = *s++;
9155         s = delimcpy(d, e, s, PL_bufend, term, &len);
9156         d += len;
9157         if (s < PL_bufend)
9158             s++;
9159     }
9160     else {
9161         if (*s == '\\')
9162             s++, term = '\'';
9163         else
9164             term = '"';
9165         if (!isALNUM_lazy_if(s,UTF))
9166             deprecate("bare << to mean <<\"\"");
9167         for (; isALNUM_lazy_if(s,UTF); s++) {
9168             if (d < e)
9169                 *d++ = *s;
9170         }
9171     }
9172     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9173         Perl_croak(aTHX_ "Delimiter for here document is too long");
9174     *d++ = '\n';
9175     *d = '\0';
9176     len = d - PL_tokenbuf;
9177
9178 #ifdef PERL_MAD
9179     if (PL_madskills) {
9180         tstart = PL_tokenbuf + !outer;
9181         PL_thisclose = newSVpvn(tstart, len - !outer);
9182         tstart = SvPVX(PL_linestr) + stuffstart;
9183         PL_thisopen = newSVpvn(tstart, s - tstart);
9184         stuffstart = s - SvPVX(PL_linestr);
9185     }
9186 #endif
9187 #ifndef PERL_STRICT_CR
9188     d = strchr(s, '\r');
9189     if (d) {
9190         char * const olds = s;
9191         s = d;
9192         while (s < PL_bufend) {
9193             if (*s == '\r') {
9194                 *d++ = '\n';
9195                 if (*++s == '\n')
9196                     s++;
9197             }
9198             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9199                 *d++ = *s++;
9200                 s++;
9201             }
9202             else
9203                 *d++ = *s++;
9204         }
9205         *d = '\0';
9206         PL_bufend = d;
9207         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9208         s = olds;
9209     }
9210 #endif
9211 #ifdef PERL_MAD
9212     found_newline = 0;
9213 #endif
9214     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9215         herewas = newSVpvn(s,PL_bufend-s);
9216     }
9217     else {
9218 #ifdef PERL_MAD
9219         herewas = newSVpvn(s-1,found_newline-s+1);
9220 #else
9221         s--;
9222         herewas = newSVpvn(s,found_newline-s);
9223 #endif
9224     }
9225 #ifdef PERL_MAD
9226     if (PL_madskills) {
9227         tstart = SvPVX(PL_linestr) + stuffstart;
9228         if (PL_thisstuff)
9229             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9230         else
9231             PL_thisstuff = newSVpvn(tstart, s - tstart);
9232     }
9233 #endif
9234     s += SvCUR(herewas);
9235
9236 #ifdef PERL_MAD
9237     stuffstart = s - SvPVX(PL_linestr);
9238
9239     if (found_newline)
9240         s--;
9241 #endif
9242
9243     tmpstr = newSV_type(SVt_PVIV);
9244     SvGROW(tmpstr, 80);
9245     if (term == '\'') {
9246         op_type = OP_CONST;
9247         SvIV_set(tmpstr, -1);
9248     }
9249     else if (term == '`') {
9250         op_type = OP_BACKTICK;
9251         SvIV_set(tmpstr, '\\');
9252     }
9253
9254     CLINE;
9255     PL_multi_start = CopLINE(PL_curcop);
9256     PL_multi_open = PL_multi_close = '<';
9257     term = *PL_tokenbuf;
9258     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9259         char * const bufptr = PL_sublex_info.super_bufptr;
9260         char * const bufend = PL_sublex_info.super_bufend;
9261         char * const olds = s - SvCUR(herewas);
9262         s = strchr(bufptr, '\n');
9263         if (!s)
9264             s = bufend;
9265         d = s;
9266         while (s < bufend &&
9267           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9268             if (*s++ == '\n')
9269                 CopLINE_inc(PL_curcop);
9270         }
9271         if (s >= bufend) {
9272             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9273             missingterm(PL_tokenbuf);
9274         }
9275         sv_setpvn(herewas,bufptr,d-bufptr+1);
9276         sv_setpvn(tmpstr,d+1,s-d);
9277         s += len - 1;
9278         sv_catpvn(herewas,s,bufend-s);
9279         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9280
9281         s = olds;
9282         goto retval;
9283     }
9284     else if (!outer) {
9285         d = s;
9286         while (s < PL_bufend &&
9287           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9288             if (*s++ == '\n')
9289                 CopLINE_inc(PL_curcop);
9290         }
9291         if (s >= PL_bufend) {
9292             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9293             missingterm(PL_tokenbuf);
9294         }
9295         sv_setpvn(tmpstr,d+1,s-d);
9296 #ifdef PERL_MAD
9297         if (PL_madskills) {
9298             if (PL_thisstuff)
9299                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9300             else
9301                 PL_thisstuff = newSVpvn(d + 1, s - d);
9302             stuffstart = s - SvPVX(PL_linestr);
9303         }
9304 #endif
9305         s += len - 1;
9306         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9307
9308         sv_catpvn(herewas,s,PL_bufend-s);
9309         sv_setsv(PL_linestr,herewas);
9310         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9311         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9312         PL_last_lop = PL_last_uni = NULL;
9313     }
9314     else
9315         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9316     while (s >= PL_bufend) {    /* multiple line string? */
9317 #ifdef PERL_MAD
9318         if (PL_madskills) {
9319             tstart = SvPVX(PL_linestr) + stuffstart;
9320             if (PL_thisstuff)
9321                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9322             else
9323                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9324         }
9325 #endif
9326         PL_bufptr = s;
9327         CopLINE_inc(PL_curcop);
9328         if (!outer || !lex_next_chunk(0)) {
9329             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9330             missingterm(PL_tokenbuf);
9331         }
9332         CopLINE_dec(PL_curcop);
9333         s = PL_bufptr;
9334 #ifdef PERL_MAD
9335         stuffstart = s - SvPVX(PL_linestr);
9336 #endif
9337         CopLINE_inc(PL_curcop);
9338         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9339         PL_last_lop = PL_last_uni = NULL;
9340 #ifndef PERL_STRICT_CR
9341         if (PL_bufend - PL_linestart >= 2) {
9342             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9343                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9344             {
9345                 PL_bufend[-2] = '\n';
9346                 PL_bufend--;
9347                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9348             }
9349             else if (PL_bufend[-1] == '\r')
9350                 PL_bufend[-1] = '\n';
9351         }
9352         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9353             PL_bufend[-1] = '\n';
9354 #endif
9355         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9356             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9357             *(SvPVX(PL_linestr) + off ) = ' ';
9358             sv_catsv(PL_linestr,herewas);
9359             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9360             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9361         }
9362         else {
9363             s = PL_bufend;
9364             sv_catsv(tmpstr,PL_linestr);
9365         }
9366     }
9367     s++;
9368 retval:
9369     PL_multi_end = CopLINE(PL_curcop);
9370     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9371         SvPV_shrink_to_cur(tmpstr);
9372     }
9373     SvREFCNT_dec(herewas);
9374     if (!IN_BYTES) {
9375         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9376             SvUTF8_on(tmpstr);
9377         else if (PL_encoding)
9378             sv_recode_to_utf8(tmpstr, PL_encoding);
9379     }
9380     PL_lex_stuff = tmpstr;
9381     pl_yylval.ival = op_type;
9382     return s;
9383 }
9384
9385 /* scan_inputsymbol
9386    takes: current position in input buffer
9387    returns: new position in input buffer
9388    side-effects: pl_yylval and lex_op are set.
9389
9390    This code handles:
9391
9392    <>           read from ARGV
9393    <FH>         read from filehandle
9394    <pkg::FH>    read from package qualified filehandle
9395    <pkg'FH>     read from package qualified filehandle
9396    <$fh>        read from filehandle in $fh
9397    <*.h>        filename glob
9398
9399 */
9400
9401 STATIC char *
9402 S_scan_inputsymbol(pTHX_ char *start)
9403 {
9404     dVAR;
9405     register char *s = start;           /* current position in buffer */
9406     char *end;
9407     I32 len;
9408     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9409     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9410
9411     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9412
9413     end = strchr(s, '\n');
9414     if (!end)
9415         end = PL_bufend;
9416     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9417
9418     /* die if we didn't have space for the contents of the <>,
9419        or if it didn't end, or if we see a newline
9420     */
9421
9422     if (len >= (I32)sizeof PL_tokenbuf)
9423         Perl_croak(aTHX_ "Excessively long <> operator");
9424     if (s >= end)
9425         Perl_croak(aTHX_ "Unterminated <> operator");
9426
9427     s++;
9428
9429     /* check for <$fh>
9430        Remember, only scalar variables are interpreted as filehandles by
9431        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9432        treated as a glob() call.
9433        This code makes use of the fact that except for the $ at the front,
9434        a scalar variable and a filehandle look the same.
9435     */
9436     if (*d == '$' && d[1]) d++;
9437
9438     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9439     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9440         d++;
9441
9442     /* If we've tried to read what we allow filehandles to look like, and
9443        there's still text left, then it must be a glob() and not a getline.
9444        Use scan_str to pull out the stuff between the <> and treat it
9445        as nothing more than a string.
9446     */
9447
9448     if (d - PL_tokenbuf != len) {
9449         pl_yylval.ival = OP_GLOB;
9450         s = scan_str(start,!!PL_madskills,FALSE);
9451         if (!s)
9452            Perl_croak(aTHX_ "Glob not terminated");
9453         return s;
9454     }
9455     else {
9456         bool readline_overriden = FALSE;
9457         GV *gv_readline;
9458         GV **gvp;
9459         /* we're in a filehandle read situation */
9460         d = PL_tokenbuf;
9461
9462         /* turn <> into <ARGV> */
9463         if (!len)
9464             Copy("ARGV",d,5,char);
9465
9466         /* Check whether readline() is overriden */
9467         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9468         if ((gv_readline
9469                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9470                 ||
9471                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9472                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9473                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9474             readline_overriden = TRUE;
9475
9476         /* if <$fh>, create the ops to turn the variable into a
9477            filehandle
9478         */
9479         if (*d == '$') {
9480             /* try to find it in the pad for this block, otherwise find
9481                add symbol table ops
9482             */
9483             const PADOFFSET tmp = pad_findmy(d, len, 0);
9484             if (tmp != NOT_IN_PAD) {
9485                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9486                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9487                     HEK * const stashname = HvNAME_HEK(stash);
9488                     SV * const sym = sv_2mortal(newSVhek(stashname));
9489                     sv_catpvs(sym, "::");
9490                     sv_catpv(sym, d+1);
9491                     d = SvPVX(sym);
9492                     goto intro_sym;
9493                 }
9494                 else {
9495                     OP * const o = newOP(OP_PADSV, 0);
9496                     o->op_targ = tmp;
9497                     PL_lex_op = readline_overriden
9498                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9499                                 op_append_elem(OP_LIST, o,
9500                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9501                         : (OP*)newUNOP(OP_READLINE, 0, o);
9502                 }
9503             }
9504             else {
9505                 GV *gv;
9506                 ++d;
9507 intro_sym:
9508                 gv = gv_fetchpv(d,
9509                                 (PL_in_eval
9510                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9511                                  : GV_ADDMULTI),
9512                                 SVt_PV);
9513                 PL_lex_op = readline_overriden
9514                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9515                             op_append_elem(OP_LIST,
9516                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9517                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9518                     : (OP*)newUNOP(OP_READLINE, 0,
9519                             newUNOP(OP_RV2SV, 0,
9520                                 newGVOP(OP_GV, 0, gv)));
9521             }
9522             if (!readline_overriden)
9523                 PL_lex_op->op_flags |= OPf_SPECIAL;
9524             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9525             pl_yylval.ival = OP_NULL;
9526         }
9527
9528         /* If it's none of the above, it must be a literal filehandle
9529            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9530         else {
9531             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9532             PL_lex_op = readline_overriden
9533                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9534                         op_append_elem(OP_LIST,
9535                             newGVOP(OP_GV, 0, gv),
9536                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9537                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9538             pl_yylval.ival = OP_NULL;
9539         }
9540     }
9541
9542     return s;
9543 }
9544
9545
9546 /* scan_str
9547    takes: start position in buffer
9548           keep_quoted preserve \ on the embedded delimiter(s)
9549           keep_delims preserve the delimiters around the string
9550    returns: position to continue reading from buffer
9551    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9552         updates the read buffer.
9553
9554    This subroutine pulls a string out of the input.  It is called for:
9555         q               single quotes           q(literal text)
9556         '               single quotes           'literal text'
9557         qq              double quotes           qq(interpolate $here please)
9558         "               double quotes           "interpolate $here please"
9559         qx              backticks               qx(/bin/ls -l)
9560         `               backticks               `/bin/ls -l`
9561         qw              quote words             @EXPORT_OK = qw( func() $spam )
9562         m//             regexp match            m/this/
9563         s///            regexp substitute       s/this/that/
9564         tr///           string transliterate    tr/this/that/
9565         y///            string transliterate    y/this/that/
9566         ($*@)           sub prototypes          sub foo ($)
9567         (stuff)         sub attr parameters     sub foo : attr(stuff)
9568         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9569
9570    In most of these cases (all but <>, patterns and transliterate)
9571    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9572    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9573    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9574    calls scan_str().
9575
9576    It skips whitespace before the string starts, and treats the first
9577    character as the delimiter.  If the delimiter is one of ([{< then
9578    the corresponding "close" character )]}> is used as the closing
9579    delimiter.  It allows quoting of delimiters, and if the string has
9580    balanced delimiters ([{<>}]) it allows nesting.
9581
9582    On success, the SV with the resulting string is put into lex_stuff or,
9583    if that is already non-NULL, into lex_repl. The second case occurs only
9584    when parsing the RHS of the special constructs s/// and tr/// (y///).
9585    For convenience, the terminating delimiter character is stuffed into
9586    SvIVX of the SV.
9587 */
9588
9589 STATIC char *
9590 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9591 {
9592     dVAR;
9593     SV *sv;                             /* scalar value: string */
9594     const char *tmps;                   /* temp string, used for delimiter matching */
9595     register char *s = start;           /* current position in the buffer */
9596     register char term;                 /* terminating character */
9597     register char *to;                  /* current position in the sv's data */
9598     I32 brackets = 1;                   /* bracket nesting level */
9599     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9600     I32 termcode;                       /* terminating char. code */
9601     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9602     STRLEN termlen;                     /* length of terminating string */
9603     int last_off = 0;                   /* last position for nesting bracket */
9604 #ifdef PERL_MAD
9605     int stuffstart;
9606     char *tstart;
9607 #endif
9608
9609     PERL_ARGS_ASSERT_SCAN_STR;
9610
9611     /* skip space before the delimiter */
9612     if (isSPACE(*s)) {
9613         s = PEEKSPACE(s);
9614     }
9615
9616 #ifdef PERL_MAD
9617     if (PL_realtokenstart >= 0) {
9618         stuffstart = PL_realtokenstart;
9619         PL_realtokenstart = -1;
9620     }
9621     else
9622         stuffstart = start - SvPVX(PL_linestr);
9623 #endif
9624     /* mark where we are, in case we need to report errors */
9625     CLINE;
9626
9627     /* after skipping whitespace, the next character is the terminator */
9628     term = *s;
9629     if (!UTF) {
9630         termcode = termstr[0] = term;
9631         termlen = 1;
9632     }
9633     else {
9634         termcode = utf8_to_uvchr((U8*)s, &termlen);
9635         Copy(s, termstr, termlen, U8);
9636         if (!UTF8_IS_INVARIANT(term))
9637             has_utf8 = TRUE;
9638     }
9639
9640     /* mark where we are */
9641     PL_multi_start = CopLINE(PL_curcop);
9642     PL_multi_open = term;
9643
9644     /* find corresponding closing delimiter */
9645     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9646         termcode = termstr[0] = term = tmps[5];
9647
9648     PL_multi_close = term;
9649
9650     /* create a new SV to hold the contents.  79 is the SV's initial length.
9651        What a random number. */
9652     sv = newSV_type(SVt_PVIV);
9653     SvGROW(sv, 80);
9654     SvIV_set(sv, termcode);
9655     (void)SvPOK_only(sv);               /* validate pointer */
9656
9657     /* move past delimiter and try to read a complete string */
9658     if (keep_delims)
9659         sv_catpvn(sv, s, termlen);
9660     s += termlen;
9661 #ifdef PERL_MAD
9662     tstart = SvPVX(PL_linestr) + stuffstart;
9663     if (!PL_thisopen && !keep_delims) {
9664         PL_thisopen = newSVpvn(tstart, s - tstart);
9665         stuffstart = s - SvPVX(PL_linestr);
9666     }
9667 #endif
9668     for (;;) {
9669         if (PL_encoding && !UTF) {
9670             bool cont = TRUE;
9671
9672             while (cont) {
9673                 int offset = s - SvPVX_const(PL_linestr);
9674                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9675                                            &offset, (char*)termstr, termlen);
9676                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9677                 char * const svlast = SvEND(sv) - 1;
9678
9679                 for (; s < ns; s++) {
9680                     if (*s == '\n' && !PL_rsfp)
9681                         CopLINE_inc(PL_curcop);
9682                 }
9683                 if (!found)
9684                     goto read_more_line;
9685                 else {
9686                     /* handle quoted delimiters */
9687                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9688                         const char *t;
9689                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9690                             t--;
9691                         if ((svlast-1 - t) % 2) {
9692                             if (!keep_quoted) {
9693                                 *(svlast-1) = term;
9694                                 *svlast = '\0';
9695                                 SvCUR_set(sv, SvCUR(sv) - 1);
9696                             }
9697                             continue;
9698                         }
9699                     }
9700                     if (PL_multi_open == PL_multi_close) {
9701                         cont = FALSE;
9702                     }
9703                     else {
9704                         const char *t;
9705                         char *w;
9706                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9707                             /* At here, all closes are "was quoted" one,
9708                                so we don't check PL_multi_close. */
9709                             if (*t == '\\') {
9710                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9711                                     t++;
9712                                 else
9713                                     *w++ = *t++;
9714                             }
9715                             else if (*t == PL_multi_open)
9716                                 brackets++;
9717
9718                             *w = *t;
9719                         }
9720                         if (w < t) {
9721                             *w++ = term;
9722                             *w = '\0';
9723                             SvCUR_set(sv, w - SvPVX_const(sv));
9724                         }
9725                         last_off = w - SvPVX(sv);
9726                         if (--brackets <= 0)
9727                             cont = FALSE;
9728                     }
9729                 }
9730             }
9731             if (!keep_delims) {
9732                 SvCUR_set(sv, SvCUR(sv) - 1);
9733                 *SvEND(sv) = '\0';
9734             }
9735             break;
9736         }
9737
9738         /* extend sv if need be */
9739         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9740         /* set 'to' to the next character in the sv's string */
9741         to = SvPVX(sv)+SvCUR(sv);
9742
9743         /* if open delimiter is the close delimiter read unbridle */
9744         if (PL_multi_open == PL_multi_close) {
9745             for (; s < PL_bufend; s++,to++) {
9746                 /* embedded newlines increment the current line number */
9747                 if (*s == '\n' && !PL_rsfp)
9748                     CopLINE_inc(PL_curcop);
9749                 /* handle quoted delimiters */
9750                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9751                     if (!keep_quoted && s[1] == term)
9752                         s++;
9753                 /* any other quotes are simply copied straight through */
9754                     else
9755                         *to++ = *s++;
9756                 }
9757                 /* terminate when run out of buffer (the for() condition), or
9758                    have found the terminator */
9759                 else if (*s == term) {
9760                     if (termlen == 1)
9761                         break;
9762                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9763                         break;
9764                 }
9765                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9766                     has_utf8 = TRUE;
9767                 *to = *s;
9768             }
9769         }
9770
9771         /* if the terminator isn't the same as the start character (e.g.,
9772            matched brackets), we have to allow more in the quoting, and
9773            be prepared for nested brackets.
9774         */
9775         else {
9776             /* read until we run out of string, or we find the terminator */
9777             for (; s < PL_bufend; s++,to++) {
9778                 /* embedded newlines increment the line count */
9779                 if (*s == '\n' && !PL_rsfp)
9780                     CopLINE_inc(PL_curcop);
9781                 /* backslashes can escape the open or closing characters */
9782                 if (*s == '\\' && s+1 < PL_bufend) {
9783                     if (!keep_quoted &&
9784                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9785                         s++;
9786                     else
9787                         *to++ = *s++;
9788                 }
9789                 /* allow nested opens and closes */
9790                 else if (*s == PL_multi_close && --brackets <= 0)
9791                     break;
9792                 else if (*s == PL_multi_open)
9793                     brackets++;
9794                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9795                     has_utf8 = TRUE;
9796                 *to = *s;
9797             }
9798         }
9799         /* terminate the copied string and update the sv's end-of-string */
9800         *to = '\0';
9801         SvCUR_set(sv, to - SvPVX_const(sv));
9802
9803         /*
9804          * this next chunk reads more into the buffer if we're not done yet
9805          */
9806
9807         if (s < PL_bufend)
9808             break;              /* handle case where we are done yet :-) */
9809
9810 #ifndef PERL_STRICT_CR
9811         if (to - SvPVX_const(sv) >= 2) {
9812             if ((to[-2] == '\r' && to[-1] == '\n') ||
9813                 (to[-2] == '\n' && to[-1] == '\r'))
9814             {
9815                 to[-2] = '\n';
9816                 to--;
9817                 SvCUR_set(sv, to - SvPVX_const(sv));
9818             }
9819             else if (to[-1] == '\r')
9820                 to[-1] = '\n';
9821         }
9822         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9823             to[-1] = '\n';
9824 #endif
9825
9826      read_more_line:
9827         /* if we're out of file, or a read fails, bail and reset the current
9828            line marker so we can report where the unterminated string began
9829         */
9830 #ifdef PERL_MAD
9831         if (PL_madskills) {
9832             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9833             if (PL_thisstuff)
9834                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9835             else
9836                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9837         }
9838 #endif
9839         CopLINE_inc(PL_curcop);
9840         PL_bufptr = PL_bufend;
9841         if (!lex_next_chunk(0)) {
9842             sv_free(sv);
9843             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9844             return NULL;
9845         }
9846         s = PL_bufptr;
9847 #ifdef PERL_MAD
9848         stuffstart = 0;
9849 #endif
9850     }
9851
9852     /* at this point, we have successfully read the delimited string */
9853
9854     if (!PL_encoding || UTF) {
9855 #ifdef PERL_MAD
9856         if (PL_madskills) {
9857             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9858             const int len = s - tstart;
9859             if (PL_thisstuff)
9860                 sv_catpvn(PL_thisstuff, tstart, len);
9861             else
9862                 PL_thisstuff = newSVpvn(tstart, len);
9863             if (!PL_thisclose && !keep_delims)
9864                 PL_thisclose = newSVpvn(s,termlen);
9865         }
9866 #endif
9867
9868         if (keep_delims)
9869             sv_catpvn(sv, s, termlen);
9870         s += termlen;
9871     }
9872 #ifdef PERL_MAD
9873     else {
9874         if (PL_madskills) {
9875             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9876             const int len = s - tstart - termlen;
9877             if (PL_thisstuff)
9878                 sv_catpvn(PL_thisstuff, tstart, len);
9879             else
9880                 PL_thisstuff = newSVpvn(tstart, len);
9881             if (!PL_thisclose && !keep_delims)
9882                 PL_thisclose = newSVpvn(s - termlen,termlen);
9883         }
9884     }
9885 #endif
9886     if (has_utf8 || PL_encoding)
9887         SvUTF8_on(sv);
9888
9889     PL_multi_end = CopLINE(PL_curcop);
9890
9891     /* if we allocated too much space, give some back */
9892     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9893         SvLEN_set(sv, SvCUR(sv) + 1);
9894         SvPV_renew(sv, SvLEN(sv));
9895     }
9896
9897     /* decide whether this is the first or second quoted string we've read
9898        for this op
9899     */
9900
9901     if (PL_lex_stuff)
9902         PL_lex_repl = sv;
9903     else
9904         PL_lex_stuff = sv;
9905     return s;
9906 }
9907
9908 /*
9909   scan_num
9910   takes: pointer to position in buffer
9911   returns: pointer to new position in buffer
9912   side-effects: builds ops for the constant in pl_yylval.op
9913
9914   Read a number in any of the formats that Perl accepts:
9915
9916   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9917   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9918   0b[01](_?[01])*
9919   0[0-7](_?[0-7])*
9920   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9921
9922   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9923   thing it reads.
9924
9925   If it reads a number without a decimal point or an exponent, it will
9926   try converting the number to an integer and see if it can do so
9927   without loss of precision.
9928 */
9929
9930 char *
9931 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9932 {
9933     dVAR;
9934     register const char *s = start;     /* current position in buffer */
9935     register char *d;                   /* destination in temp buffer */
9936     register char *e;                   /* end of temp buffer */
9937     NV nv;                              /* number read, as a double */
9938     SV *sv = NULL;                      /* place to put the converted number */
9939     bool floatit;                       /* boolean: int or float? */
9940     const char *lastub = NULL;          /* position of last underbar */
9941     static char const number_too_long[] = "Number too long";
9942
9943     PERL_ARGS_ASSERT_SCAN_NUM;
9944
9945     /* We use the first character to decide what type of number this is */
9946
9947     switch (*s) {
9948     default:
9949       Perl_croak(aTHX_ "panic: scan_num");
9950
9951     /* if it starts with a 0, it could be an octal number, a decimal in
9952        0.13 disguise, or a hexadecimal number, or a binary number. */
9953     case '0':
9954         {
9955           /* variables:
9956              u          holds the "number so far"
9957              shift      the power of 2 of the base
9958                         (hex == 4, octal == 3, binary == 1)
9959              overflowed was the number more than we can hold?
9960
9961              Shift is used when we add a digit.  It also serves as an "are
9962              we in octal/hex/binary?" indicator to disallow hex characters
9963              when in octal mode.
9964            */
9965             NV n = 0.0;
9966             UV u = 0;
9967             I32 shift;
9968             bool overflowed = FALSE;
9969             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
9970             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
9971             static const char* const bases[5] =
9972               { "", "binary", "", "octal", "hexadecimal" };
9973             static const char* const Bases[5] =
9974               { "", "Binary", "", "Octal", "Hexadecimal" };
9975             static const char* const maxima[5] =
9976               { "",
9977                 "0b11111111111111111111111111111111",
9978                 "",
9979                 "037777777777",
9980                 "0xffffffff" };
9981             const char *base, *Base, *max;
9982
9983             /* check for hex */
9984             if (s[1] == 'x' || s[1] == 'X') {
9985                 shift = 4;
9986                 s += 2;
9987                 just_zero = FALSE;
9988             } else if (s[1] == 'b' || s[1] == 'B') {
9989                 shift = 1;
9990                 s += 2;
9991                 just_zero = FALSE;
9992             }
9993             /* check for a decimal in disguise */
9994             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
9995                 goto decimal;
9996             /* so it must be octal */
9997             else {
9998                 shift = 3;
9999                 s++;
10000             }
10001
10002             if (*s == '_') {
10003                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10004                                "Misplaced _ in number");
10005                lastub = s++;
10006             }
10007
10008             base = bases[shift];
10009             Base = Bases[shift];
10010             max  = maxima[shift];
10011
10012             /* read the rest of the number */
10013             for (;;) {
10014                 /* x is used in the overflow test,
10015                    b is the digit we're adding on. */
10016                 UV x, b;
10017
10018                 switch (*s) {
10019
10020                 /* if we don't mention it, we're done */
10021                 default:
10022                     goto out;
10023
10024                 /* _ are ignored -- but warned about if consecutive */
10025                 case '_':
10026                     if (lastub && s == lastub + 1)
10027                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10028                                        "Misplaced _ in number");
10029                     lastub = s++;
10030                     break;
10031
10032                 /* 8 and 9 are not octal */
10033                 case '8': case '9':
10034                     if (shift == 3)
10035                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10036                     /* FALL THROUGH */
10037
10038                 /* octal digits */
10039                 case '2': case '3': case '4':
10040                 case '5': case '6': case '7':
10041                     if (shift == 1)
10042                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10043                     /* FALL THROUGH */
10044
10045                 case '0': case '1':
10046                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10047                     goto digit;
10048
10049                 /* hex digits */
10050                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10051                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10052                     /* make sure they said 0x */
10053                     if (shift != 4)
10054                         goto out;
10055                     b = (*s++ & 7) + 9;
10056
10057                     /* Prepare to put the digit we have onto the end
10058                        of the number so far.  We check for overflows.
10059                     */
10060
10061                   digit:
10062                     just_zero = FALSE;
10063                     if (!overflowed) {
10064                         x = u << shift; /* make room for the digit */
10065
10066                         if ((x >> shift) != u
10067                             && !(PL_hints & HINT_NEW_BINARY)) {
10068                             overflowed = TRUE;
10069                             n = (NV) u;
10070                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10071                                              "Integer overflow in %s number",
10072                                              base);
10073                         } else
10074                             u = x | b;          /* add the digit to the end */
10075                     }
10076                     if (overflowed) {
10077                         n *= nvshift[shift];
10078                         /* If an NV has not enough bits in its
10079                          * mantissa to represent an UV this summing of
10080                          * small low-order numbers is a waste of time
10081                          * (because the NV cannot preserve the
10082                          * low-order bits anyway): we could just
10083                          * remember when did we overflow and in the
10084                          * end just multiply n by the right
10085                          * amount. */
10086                         n += (NV) b;
10087                     }
10088                     break;
10089                 }
10090             }
10091
10092           /* if we get here, we had success: make a scalar value from
10093              the number.
10094           */
10095           out:
10096
10097             /* final misplaced underbar check */
10098             if (s[-1] == '_') {
10099                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10100             }
10101
10102             if (overflowed) {
10103                 if (n > 4294967295.0)
10104                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10105                                    "%s number > %s non-portable",
10106                                    Base, max);
10107                 sv = newSVnv(n);
10108             }
10109             else {
10110 #if UVSIZE > 4
10111                 if (u > 0xffffffff)
10112                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10113                                    "%s number > %s non-portable",
10114                                    Base, max);
10115 #endif
10116                 sv = newSVuv(u);
10117             }
10118             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10119                 sv = new_constant(start, s - start, "integer",
10120                                   sv, NULL, NULL, 0);
10121             else if (PL_hints & HINT_NEW_BINARY)
10122                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10123         }
10124         break;
10125
10126     /*
10127       handle decimal numbers.
10128       we're also sent here when we read a 0 as the first digit
10129     */
10130     case '1': case '2': case '3': case '4': case '5':
10131     case '6': case '7': case '8': case '9': case '.':
10132       decimal:
10133         d = PL_tokenbuf;
10134         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10135         floatit = FALSE;
10136
10137         /* read next group of digits and _ and copy into d */
10138         while (isDIGIT(*s) || *s == '_') {
10139             /* skip underscores, checking for misplaced ones
10140                if -w is on
10141             */
10142             if (*s == '_') {
10143                 if (lastub && s == lastub + 1)
10144                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10145                                    "Misplaced _ in number");
10146                 lastub = s++;
10147             }
10148             else {
10149                 /* check for end of fixed-length buffer */
10150                 if (d >= e)
10151                     Perl_croak(aTHX_ number_too_long);
10152                 /* if we're ok, copy the character */
10153                 *d++ = *s++;
10154             }
10155         }
10156
10157         /* final misplaced underbar check */
10158         if (lastub && s == lastub + 1) {
10159             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10160         }
10161
10162         /* read a decimal portion if there is one.  avoid
10163            3..5 being interpreted as the number 3. followed
10164            by .5
10165         */
10166         if (*s == '.' && s[1] != '.') {
10167             floatit = TRUE;
10168             *d++ = *s++;
10169
10170             if (*s == '_') {
10171                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10172                                "Misplaced _ in number");
10173                 lastub = s;
10174             }
10175
10176             /* copy, ignoring underbars, until we run out of digits.
10177             */
10178             for (; isDIGIT(*s) || *s == '_'; s++) {
10179                 /* fixed length buffer check */
10180                 if (d >= e)
10181                     Perl_croak(aTHX_ number_too_long);
10182                 if (*s == '_') {
10183                    if (lastub && s == lastub + 1)
10184                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10185                                       "Misplaced _ in number");
10186                    lastub = s;
10187                 }
10188                 else
10189                     *d++ = *s;
10190             }
10191             /* fractional part ending in underbar? */
10192             if (s[-1] == '_') {
10193                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10194                                "Misplaced _ in number");
10195             }
10196             if (*s == '.' && isDIGIT(s[1])) {
10197                 /* oops, it's really a v-string, but without the "v" */
10198                 s = start;
10199                 goto vstring;
10200             }
10201         }
10202
10203         /* read exponent part, if present */
10204         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10205             floatit = TRUE;
10206             s++;
10207
10208             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10209             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10210
10211             /* stray preinitial _ */
10212             if (*s == '_') {
10213                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10214                                "Misplaced _ in number");
10215                 lastub = s++;
10216             }
10217
10218             /* allow positive or negative exponent */
10219             if (*s == '+' || *s == '-')
10220                 *d++ = *s++;
10221
10222             /* stray initial _ */
10223             if (*s == '_') {
10224                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10225                                "Misplaced _ in number");
10226                 lastub = s++;
10227             }
10228
10229             /* read digits of exponent */
10230             while (isDIGIT(*s) || *s == '_') {
10231                 if (isDIGIT(*s)) {
10232                     if (d >= e)
10233                         Perl_croak(aTHX_ number_too_long);
10234                     *d++ = *s++;
10235                 }
10236                 else {
10237                    if (((lastub && s == lastub + 1) ||
10238                         (!isDIGIT(s[1]) && s[1] != '_')))
10239                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10240                                       "Misplaced _ in number");
10241                    lastub = s++;
10242                 }
10243             }
10244         }
10245
10246
10247         /*
10248            We try to do an integer conversion first if no characters
10249            indicating "float" have been found.
10250          */
10251
10252         if (!floatit) {
10253             UV uv;
10254             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10255
10256             if (flags == IS_NUMBER_IN_UV) {
10257               if (uv <= IV_MAX)
10258                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10259               else
10260                 sv = newSVuv(uv);
10261             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10262               if (uv <= (UV) IV_MIN)
10263                 sv = newSViv(-(IV)uv);
10264               else
10265                 floatit = TRUE;
10266             } else
10267               floatit = TRUE;
10268         }
10269         if (floatit) {
10270             /* terminate the string */
10271             *d = '\0';
10272             nv = Atof(PL_tokenbuf);
10273             sv = newSVnv(nv);
10274         }
10275
10276         if ( floatit
10277              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10278             const char *const key = floatit ? "float" : "integer";
10279             const STRLEN keylen = floatit ? 5 : 7;
10280             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10281                                 key, keylen, sv, NULL, NULL, 0);
10282         }
10283         break;
10284
10285     /* if it starts with a v, it could be a v-string */
10286     case 'v':
10287 vstring:
10288                 sv = newSV(5); /* preallocate storage space */
10289                 s = scan_vstring(s, PL_bufend, sv);
10290         break;
10291     }
10292
10293     /* make the op for the constant and return */
10294
10295     if (sv)
10296         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10297     else
10298         lvalp->opval = NULL;
10299
10300     return (char *)s;
10301 }
10302
10303 STATIC char *
10304 S_scan_formline(pTHX_ register char *s)
10305 {
10306     dVAR;
10307     register char *eol;
10308     register char *t;
10309     SV * const stuff = newSVpvs("");
10310     bool needargs = FALSE;
10311     bool eofmt = FALSE;
10312 #ifdef PERL_MAD
10313     char *tokenstart = s;
10314     SV* savewhite = NULL;
10315
10316     if (PL_madskills) {
10317         savewhite = PL_thiswhite;
10318         PL_thiswhite = 0;
10319     }
10320 #endif
10321
10322     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10323
10324     while (!needargs) {
10325         if (*s == '.') {
10326             t = s+1;
10327 #ifdef PERL_STRICT_CR
10328             while (SPACE_OR_TAB(*t))
10329                 t++;
10330 #else
10331             while (SPACE_OR_TAB(*t) || *t == '\r')
10332                 t++;
10333 #endif
10334             if (*t == '\n' || t == PL_bufend) {
10335                 eofmt = TRUE;
10336                 break;
10337             }
10338         }
10339         if (PL_in_eval && !PL_rsfp) {
10340             eol = (char *) memchr(s,'\n',PL_bufend-s);
10341             if (!eol++)
10342                 eol = PL_bufend;
10343         }
10344         else
10345             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10346         if (*s != '#') {
10347             for (t = s; t < eol; t++) {
10348                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10349                     needargs = FALSE;
10350                     goto enough;        /* ~~ must be first line in formline */
10351                 }
10352                 if (*t == '@' || *t == '^')
10353                     needargs = TRUE;
10354             }
10355             if (eol > s) {
10356                 sv_catpvn(stuff, s, eol-s);
10357 #ifndef PERL_STRICT_CR
10358                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10359                     char *end = SvPVX(stuff) + SvCUR(stuff);
10360                     end[-2] = '\n';
10361                     end[-1] = '\0';
10362                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10363                 }
10364 #endif
10365             }
10366             else
10367               break;
10368         }
10369         s = (char*)eol;
10370         if (PL_rsfp) {
10371             bool got_some;
10372 #ifdef PERL_MAD
10373             if (PL_madskills) {
10374                 if (PL_thistoken)
10375                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10376                 else
10377                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10378             }
10379 #endif
10380             PL_bufptr = PL_bufend;
10381             CopLINE_inc(PL_curcop);
10382             got_some = lex_next_chunk(0);
10383             CopLINE_dec(PL_curcop);
10384             s = PL_bufptr;
10385 #ifdef PERL_MAD
10386             tokenstart = PL_bufptr;
10387 #endif
10388             if (!got_some)
10389                 break;
10390         }
10391         incline(s);
10392     }
10393   enough:
10394     if (SvCUR(stuff)) {
10395         PL_expect = XTERM;
10396         if (needargs) {
10397             PL_lex_state = LEX_NORMAL;
10398             start_force(PL_curforce);
10399             NEXTVAL_NEXTTOKE.ival = 0;
10400             force_next(',');
10401         }
10402         else
10403             PL_lex_state = LEX_FORMLINE;
10404         if (!IN_BYTES) {
10405             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10406                 SvUTF8_on(stuff);
10407             else if (PL_encoding)
10408                 sv_recode_to_utf8(stuff, PL_encoding);
10409         }
10410         start_force(PL_curforce);
10411         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10412         force_next(THING);
10413         start_force(PL_curforce);
10414         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10415         force_next(LSTOP);
10416     }
10417     else {
10418         SvREFCNT_dec(stuff);
10419         if (eofmt)
10420             PL_lex_formbrack = 0;
10421         PL_bufptr = s;
10422     }
10423 #ifdef PERL_MAD
10424     if (PL_madskills) {
10425         if (PL_thistoken)
10426             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10427         else
10428             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10429         PL_thiswhite = savewhite;
10430     }
10431 #endif
10432     return s;
10433 }
10434
10435 I32
10436 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10437 {
10438     dVAR;
10439     const I32 oldsavestack_ix = PL_savestack_ix;
10440     CV* const outsidecv = PL_compcv;
10441
10442     if (PL_compcv) {
10443         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10444     }
10445     SAVEI32(PL_subline);
10446     save_item(PL_subname);
10447     SAVESPTR(PL_compcv);
10448
10449     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10450     CvFLAGS(PL_compcv) |= flags;
10451
10452     PL_subline = CopLINE(PL_curcop);
10453     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10454     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10455     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10456
10457     return oldsavestack_ix;
10458 }
10459
10460 #ifdef __SC__
10461 #pragma segment Perl_yylex
10462 #endif
10463 static int
10464 S_yywarn(pTHX_ const char *const s)
10465 {
10466     dVAR;
10467
10468     PERL_ARGS_ASSERT_YYWARN;
10469
10470     PL_in_eval |= EVAL_WARNONLY;
10471     yyerror(s);
10472     PL_in_eval &= ~EVAL_WARNONLY;
10473     return 0;
10474 }
10475
10476 int
10477 Perl_yyerror(pTHX_ const char *const s)
10478 {
10479     dVAR;
10480     const char *where = NULL;
10481     const char *context = NULL;
10482     int contlen = -1;
10483     SV *msg;
10484     int yychar  = PL_parser->yychar;
10485
10486     PERL_ARGS_ASSERT_YYERROR;
10487
10488     if (!yychar || (yychar == ';' && !PL_rsfp))
10489         where = "at EOF";
10490     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10491       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10492       PL_oldbufptr != PL_bufptr) {
10493         /*
10494                 Only for NetWare:
10495                 The code below is removed for NetWare because it abends/crashes on NetWare
10496                 when the script has error such as not having the closing quotes like:
10497                     if ($var eq "value)
10498                 Checking of white spaces is anyway done in NetWare code.
10499         */
10500 #ifndef NETWARE
10501         while (isSPACE(*PL_oldoldbufptr))
10502             PL_oldoldbufptr++;
10503 #endif
10504         context = PL_oldoldbufptr;
10505         contlen = PL_bufptr - PL_oldoldbufptr;
10506     }
10507     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10508       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10509         /*
10510                 Only for NetWare:
10511                 The code below is removed for NetWare because it abends/crashes on NetWare
10512                 when the script has error such as not having the closing quotes like:
10513                     if ($var eq "value)
10514                 Checking of white spaces is anyway done in NetWare code.
10515         */
10516 #ifndef NETWARE
10517         while (isSPACE(*PL_oldbufptr))
10518             PL_oldbufptr++;
10519 #endif
10520         context = PL_oldbufptr;
10521         contlen = PL_bufptr - PL_oldbufptr;
10522     }
10523     else if (yychar > 255)
10524         where = "next token ???";
10525     else if (yychar == -2) { /* YYEMPTY */
10526         if (PL_lex_state == LEX_NORMAL ||
10527            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10528             where = "at end of line";
10529         else if (PL_lex_inpat)
10530             where = "within pattern";
10531         else
10532             where = "within string";
10533     }
10534     else {
10535         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10536         if (yychar < 32)
10537             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10538         else if (isPRINT_LC(yychar)) {
10539             const char string = yychar;
10540             sv_catpvn(where_sv, &string, 1);
10541         }
10542         else
10543             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10544         where = SvPVX_const(where_sv);
10545     }
10546     msg = sv_2mortal(newSVpv(s, 0));
10547     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10548         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10549     if (context)
10550         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10551     else
10552         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10553     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10554         Perl_sv_catpvf(aTHX_ msg,
10555         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10556                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10557         PL_multi_end = 0;
10558     }
10559     if (PL_in_eval & EVAL_WARNONLY) {
10560         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10561     }
10562     else
10563         qerror(msg);
10564     if (PL_error_count >= 10) {
10565         if (PL_in_eval && SvCUR(ERRSV))
10566             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10567                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10568         else
10569             Perl_croak(aTHX_ "%s has too many errors.\n",
10570             OutCopFILE(PL_curcop));
10571     }
10572     PL_in_my = 0;
10573     PL_in_my_stash = NULL;
10574     return 0;
10575 }
10576 #ifdef __SC__
10577 #pragma segment Main
10578 #endif
10579
10580 STATIC char*
10581 S_swallow_bom(pTHX_ U8 *s)
10582 {
10583     dVAR;
10584     const STRLEN slen = SvCUR(PL_linestr);
10585
10586     PERL_ARGS_ASSERT_SWALLOW_BOM;
10587
10588     switch (s[0]) {
10589     case 0xFF:
10590         if (s[1] == 0xFE) {
10591             /* UTF-16 little-endian? (or UTF-32LE?) */
10592             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10593                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10594 #ifndef PERL_NO_UTF16_FILTER
10595             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10596             s += 2;
10597             if (PL_bufend > (char*)s) {
10598                 s = add_utf16_textfilter(s, TRUE);
10599             }
10600 #else
10601             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10602 #endif
10603         }
10604         break;
10605     case 0xFE:
10606         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10607 #ifndef PERL_NO_UTF16_FILTER
10608             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10609             s += 2;
10610             if (PL_bufend > (char *)s) {
10611                 s = add_utf16_textfilter(s, FALSE);
10612             }
10613 #else
10614             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10615 #endif
10616         }
10617         break;
10618     case 0xEF:
10619         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10620             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10621             s += 3;                      /* UTF-8 */
10622         }
10623         break;
10624     case 0:
10625         if (slen > 3) {
10626              if (s[1] == 0) {
10627                   if (s[2] == 0xFE && s[3] == 0xFF) {
10628                        /* UTF-32 big-endian */
10629                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10630                   }
10631              }
10632              else if (s[2] == 0 && s[3] != 0) {
10633                   /* Leading bytes
10634                    * 00 xx 00 xx
10635                    * are a good indicator of UTF-16BE. */
10636 #ifndef PERL_NO_UTF16_FILTER
10637                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10638                   s = add_utf16_textfilter(s, FALSE);
10639 #else
10640                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10641 #endif
10642              }
10643         }
10644 #ifdef EBCDIC
10645     case 0xDD:
10646         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10647             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10648             s += 4;                      /* UTF-8 */
10649         }
10650         break;
10651 #endif
10652
10653     default:
10654          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10655                   /* Leading bytes
10656                    * xx 00 xx 00
10657                    * are a good indicator of UTF-16LE. */
10658 #ifndef PERL_NO_UTF16_FILTER
10659               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10660               s = add_utf16_textfilter(s, TRUE);
10661 #else
10662               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10663 #endif
10664          }
10665     }
10666     return (char*)s;
10667 }
10668
10669
10670 #ifndef PERL_NO_UTF16_FILTER
10671 static I32
10672 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10673 {
10674     dVAR;
10675     SV *const filter = FILTER_DATA(idx);
10676     /* We re-use this each time round, throwing the contents away before we
10677        return.  */
10678     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10679     SV *const utf8_buffer = filter;
10680     IV status = IoPAGE(filter);
10681     const bool reverse = cBOOL(IoLINES(filter));
10682     I32 retval;
10683
10684     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10685
10686     /* As we're automatically added, at the lowest level, and hence only called
10687        from this file, we can be sure that we're not called in block mode. Hence
10688        don't bother writing code to deal with block mode.  */
10689     if (maxlen) {
10690         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10691     }
10692     if (status < 0) {
10693         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10694     }
10695     DEBUG_P(PerlIO_printf(Perl_debug_log,
10696                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10697                           FPTR2DPTR(void *, S_utf16_textfilter),
10698                           reverse ? 'l' : 'b', idx, maxlen, status,
10699                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10700
10701     while (1) {
10702         STRLEN chars;
10703         STRLEN have;
10704         I32 newlen;
10705         U8 *end;
10706         /* First, look in our buffer of existing UTF-8 data:  */
10707         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10708
10709         if (nl) {
10710             ++nl;
10711         } else if (status == 0) {
10712             /* EOF */
10713             IoPAGE(filter) = 0;
10714             nl = SvEND(utf8_buffer);
10715         }
10716         if (nl) {
10717             STRLEN got = nl - SvPVX(utf8_buffer);
10718             /* Did we have anything to append?  */
10719             retval = got != 0;
10720             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10721             /* Everything else in this code works just fine if SVp_POK isn't
10722                set.  This, however, needs it, and we need it to work, else
10723                we loop infinitely because the buffer is never consumed.  */
10724             sv_chop(utf8_buffer, nl);
10725             break;
10726         }
10727
10728         /* OK, not a complete line there, so need to read some more UTF-16.
10729            Read an extra octect if the buffer currently has an odd number. */
10730         while (1) {
10731             if (status <= 0)
10732                 break;
10733             if (SvCUR(utf16_buffer) >= 2) {
10734                 /* Location of the high octet of the last complete code point.
10735                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10736                    *coupled* with all the benefits of partial reads and
10737                    endianness.  */
10738                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10739                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10740
10741                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10742                     break;
10743                 }
10744
10745                 /* We have the first half of a surrogate. Read more.  */
10746                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10747             }
10748
10749             status = FILTER_READ(idx + 1, utf16_buffer,
10750                                  160 + (SvCUR(utf16_buffer) & 1));
10751             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10752             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10753             if (status < 0) {
10754                 /* Error */
10755                 IoPAGE(filter) = status;
10756                 return status;
10757             }
10758         }
10759
10760         chars = SvCUR(utf16_buffer) >> 1;
10761         have = SvCUR(utf8_buffer);
10762         SvGROW(utf8_buffer, have + chars * 3 + 1);
10763
10764         if (reverse) {
10765             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10766                                          (U8*)SvPVX_const(utf8_buffer) + have,
10767                                          chars * 2, &newlen);
10768         } else {
10769             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10770                                 (U8*)SvPVX_const(utf8_buffer) + have,
10771                                 chars * 2, &newlen);
10772         }
10773         SvCUR_set(utf8_buffer, have + newlen);
10774         *end = '\0';
10775
10776         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10777            it's private to us, and utf16_to_utf8{,reversed} take a
10778            (pointer,length) pair, rather than a NUL-terminated string.  */
10779         if(SvCUR(utf16_buffer) & 1) {
10780             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10781             SvCUR_set(utf16_buffer, 1);
10782         } else {
10783             SvCUR_set(utf16_buffer, 0);
10784         }
10785     }
10786     DEBUG_P(PerlIO_printf(Perl_debug_log,
10787                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10788                           status,
10789                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10790     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10791     return retval;
10792 }
10793
10794 static U8 *
10795 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10796 {
10797     SV *filter = filter_add(S_utf16_textfilter, NULL);
10798
10799     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10800
10801     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10802     sv_setpvs(filter, "");
10803     IoLINES(filter) = reversed;
10804     IoPAGE(filter) = 1; /* Not EOF */
10805
10806     /* Sadly, we have to return a valid pointer, come what may, so we have to
10807        ignore any error return from this.  */
10808     SvCUR_set(PL_linestr, 0);
10809     if (FILTER_READ(0, PL_linestr, 0)) {
10810         SvUTF8_on(PL_linestr);
10811     } else {
10812         SvUTF8_on(PL_linestr);
10813     }
10814     PL_bufend = SvEND(PL_linestr);
10815     return (U8*)SvPVX(PL_linestr);
10816 }
10817 #endif
10818
10819 /*
10820 Returns a pointer to the next character after the parsed
10821 vstring, as well as updating the passed in sv.
10822
10823 Function must be called like
10824
10825         sv = newSV(5);
10826         s = scan_vstring(s,e,sv);
10827
10828 where s and e are the start and end of the string.
10829 The sv should already be large enough to store the vstring
10830 passed in, for performance reasons.
10831
10832 */
10833
10834 char *
10835 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10836 {
10837     dVAR;
10838     const char *pos = s;
10839     const char *start = s;
10840
10841     PERL_ARGS_ASSERT_SCAN_VSTRING;
10842
10843     if (*pos == 'v') pos++;  /* get past 'v' */
10844     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10845         pos++;
10846     if ( *pos != '.') {
10847         /* this may not be a v-string if followed by => */
10848         const char *next = pos;
10849         while (next < e && isSPACE(*next))
10850             ++next;
10851         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10852             /* return string not v-string */
10853             sv_setpvn(sv,(char *)s,pos-s);
10854             return (char *)pos;
10855         }
10856     }
10857
10858     if (!isALPHA(*pos)) {
10859         U8 tmpbuf[UTF8_MAXBYTES+1];
10860
10861         if (*s == 'v')
10862             s++;  /* get past 'v' */
10863
10864         sv_setpvs(sv, "");
10865
10866         for (;;) {
10867             /* this is atoi() that tolerates underscores */
10868             U8 *tmpend;
10869             UV rev = 0;
10870             const char *end = pos;
10871             UV mult = 1;
10872             while (--end >= s) {
10873                 if (*end != '_') {
10874                     const UV orev = rev;
10875                     rev += (*end - '0') * mult;
10876                     mult *= 10;
10877                     if (orev > rev)
10878                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10879                                          "Integer overflow in decimal number");
10880                 }
10881             }
10882 #ifdef EBCDIC
10883             if (rev > 0x7FFFFFFF)
10884                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10885 #endif
10886             /* Append native character for the rev point */
10887             tmpend = uvchr_to_utf8(tmpbuf, rev);
10888             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10889             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10890                  SvUTF8_on(sv);
10891             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10892                  s = ++pos;
10893             else {
10894                  s = pos;
10895                  break;
10896             }
10897             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10898                  pos++;
10899         }
10900         SvPOK_on(sv);
10901         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10902         SvRMAGICAL_on(sv);
10903     }
10904     return (char *)s;
10905 }
10906
10907 int
10908 Perl_keyword_plugin_standard(pTHX_
10909         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10910 {
10911     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10912     PERL_UNUSED_CONTEXT;
10913     PERL_UNUSED_ARG(keyword_ptr);
10914     PERL_UNUSED_ARG(keyword_len);
10915     PERL_UNUSED_ARG(op_ptr);
10916     return KEYWORD_PLUGIN_DECLINE;
10917 }
10918
10919 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10920 static void
10921 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
10922 {
10923     SAVEI32(PL_lex_brackets);
10924     if (PL_lex_brackets > 100)
10925         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10926     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
10927     SAVEI32(PL_lex_allbrackets);
10928     PL_lex_allbrackets = 0;
10929     SAVEI8(PL_lex_fakeeof);
10930     PL_lex_fakeeof = (U8)fakeeof;
10931     if(yyparse(gramtype) && !PL_parser->error_count)
10932         qerror(Perl_mess(aTHX_ "Parse error"));
10933 }
10934
10935 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
10936 static OP *
10937 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
10938 {
10939     OP *o;
10940     ENTER;
10941     SAVEVPTR(PL_eval_root);
10942     PL_eval_root = NULL;
10943     parse_recdescent(gramtype, fakeeof);
10944     o = PL_eval_root;
10945     LEAVE;
10946     return o;
10947 }
10948
10949 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
10950 static OP *
10951 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
10952 {
10953     OP *exprop;
10954     if (flags & ~PARSE_OPTIONAL)
10955         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
10956     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
10957     if (!exprop && !(flags & PARSE_OPTIONAL)) {
10958         if (!PL_parser->error_count)
10959             qerror(Perl_mess(aTHX_ "Parse error"));
10960         exprop = newOP(OP_NULL, 0);
10961     }
10962     return exprop;
10963 }
10964
10965 /*
10966 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
10967
10968 Parse a Perl arithmetic expression.  This may contain operators of precedence
10969 down to the bit shift operators.  The expression must be followed (and thus
10970 terminated) either by a comparison or lower-precedence operator or by
10971 something that would normally terminate an expression such as semicolon.
10972 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10973 otherwise it is mandatory.  It is up to the caller to ensure that the
10974 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10975 the source of the code to be parsed and the lexical context for the
10976 expression.
10977
10978 The op tree representing the expression is returned.  If an optional
10979 expression is absent, a null pointer is returned, otherwise the pointer
10980 will be non-null.
10981
10982 If an error occurs in parsing or compilation, in most cases a valid op
10983 tree is returned anyway.  The error is reflected in the parser state,
10984 normally resulting in a single exception at the top level of parsing
10985 which covers all the compilation errors that occurred.  Some compilation
10986 errors, however, will throw an exception immediately.
10987
10988 =cut
10989 */
10990
10991 OP *
10992 Perl_parse_arithexpr(pTHX_ U32 flags)
10993 {
10994     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
10995 }
10996
10997 /*
10998 =for apidoc Amx|OP *|parse_termexpr|U32 flags
10999
11000 Parse a Perl term expression.  This may contain operators of precedence
11001 down to the assignment operators.  The expression must be followed (and thus
11002 terminated) either by a comma or lower-precedence operator or by
11003 something that would normally terminate an expression such as semicolon.
11004 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11005 otherwise it is mandatory.  It is up to the caller to ensure that the
11006 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11007 the source of the code to be parsed and the lexical context for the
11008 expression.
11009
11010 The op tree representing the expression is returned.  If an optional
11011 expression is absent, a null pointer is returned, otherwise the pointer
11012 will be non-null.
11013
11014 If an error occurs in parsing or compilation, in most cases a valid op
11015 tree is returned anyway.  The error is reflected in the parser state,
11016 normally resulting in a single exception at the top level of parsing
11017 which covers all the compilation errors that occurred.  Some compilation
11018 errors, however, will throw an exception immediately.
11019
11020 =cut
11021 */
11022
11023 OP *
11024 Perl_parse_termexpr(pTHX_ U32 flags)
11025 {
11026     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11027 }
11028
11029 /*
11030 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11031
11032 Parse a Perl list expression.  This may contain operators of precedence
11033 down to the comma operator.  The expression must be followed (and thus
11034 terminated) either by a low-precedence logic operator such as C<or> or by
11035 something that would normally terminate an expression such as semicolon.
11036 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11037 otherwise it is mandatory.  It is up to the caller to ensure that the
11038 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11039 the source of the code to be parsed and the lexical context for the
11040 expression.
11041
11042 The op tree representing the expression is returned.  If an optional
11043 expression is absent, a null pointer is returned, otherwise the pointer
11044 will be non-null.
11045
11046 If an error occurs in parsing or compilation, in most cases a valid op
11047 tree is returned anyway.  The error is reflected in the parser state,
11048 normally resulting in a single exception at the top level of parsing
11049 which covers all the compilation errors that occurred.  Some compilation
11050 errors, however, will throw an exception immediately.
11051
11052 =cut
11053 */
11054
11055 OP *
11056 Perl_parse_listexpr(pTHX_ U32 flags)
11057 {
11058     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11059 }
11060
11061 /*
11062 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11063
11064 Parse a single complete Perl expression.  This allows the full
11065 expression grammar, including the lowest-precedence operators such
11066 as C<or>.  The expression must be followed (and thus terminated) by a
11067 token that an expression would normally be terminated by: end-of-file,
11068 closing bracketing punctuation, semicolon, or one of the keywords that
11069 signals a postfix expression-statement modifier.  If I<flags> includes
11070 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11071 mandatory.  It is up to the caller to ensure that the dynamic parser
11072 state (L</PL_parser> et al) is correctly set to reflect the source of
11073 the code to be parsed and the lexical context for the expression.
11074
11075 The op tree representing the expression is returned.  If an optional
11076 expression is absent, a null pointer is returned, otherwise the pointer
11077 will be non-null.
11078
11079 If an error occurs in parsing or compilation, in most cases a valid op
11080 tree is returned anyway.  The error is reflected in the parser state,
11081 normally resulting in a single exception at the top level of parsing
11082 which covers all the compilation errors that occurred.  Some compilation
11083 errors, however, will throw an exception immediately.
11084
11085 =cut
11086 */
11087
11088 OP *
11089 Perl_parse_fullexpr(pTHX_ U32 flags)
11090 {
11091     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11092 }
11093
11094 /*
11095 =for apidoc Amx|OP *|parse_block|U32 flags
11096
11097 Parse a single complete Perl code block.  This consists of an opening
11098 brace, a sequence of statements, and a closing brace.  The block
11099 constitutes a lexical scope, so C<my> variables and various compile-time
11100 effects can be contained within it.  It is up to the caller to ensure
11101 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11102 reflect the source of the code to be parsed and the lexical context for
11103 the statement.
11104
11105 The op tree representing the code block is returned.  This is always a
11106 real op, never a null pointer.  It will normally be a C<lineseq> list,
11107 including C<nextstate> or equivalent ops.  No ops to construct any kind
11108 of runtime scope are included by virtue of it being a block.
11109
11110 If an error occurs in parsing or compilation, in most cases a valid op
11111 tree (most likely null) is returned anyway.  The error is reflected in
11112 the parser state, normally resulting in a single exception at the top
11113 level of parsing which covers all the compilation errors that occurred.
11114 Some compilation errors, however, will throw an exception immediately.
11115
11116 The I<flags> parameter is reserved for future use, and must always
11117 be zero.
11118
11119 =cut
11120 */
11121
11122 OP *
11123 Perl_parse_block(pTHX_ U32 flags)
11124 {
11125     if (flags)
11126         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11127     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11128 }
11129
11130 /*
11131 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11132
11133 Parse a single unadorned Perl statement.  This may be a normal imperative
11134 statement or a declaration that has compile-time effect.  It does not
11135 include any label or other affixture.  It is up to the caller to ensure
11136 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11137 reflect the source of the code to be parsed and the lexical context for
11138 the statement.
11139
11140 The op tree representing the statement is returned.  This may be a
11141 null pointer if the statement is null, for example if it was actually
11142 a subroutine definition (which has compile-time side effects).  If not
11143 null, it will be ops directly implementing the statement, suitable to
11144 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11145 equivalent op (except for those embedded in a scope contained entirely
11146 within the statement).
11147
11148 If an error occurs in parsing or compilation, in most cases a valid op
11149 tree (most likely null) is returned anyway.  The error is reflected in
11150 the parser state, normally resulting in a single exception at the top
11151 level of parsing which covers all the compilation errors that occurred.
11152 Some compilation errors, however, will throw an exception immediately.
11153
11154 The I<flags> parameter is reserved for future use, and must always
11155 be zero.
11156
11157 =cut
11158 */
11159
11160 OP *
11161 Perl_parse_barestmt(pTHX_ U32 flags)
11162 {
11163     if (flags)
11164         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11165     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11166 }
11167
11168 /*
11169 =for apidoc Amx|SV *|parse_label|U32 flags
11170
11171 Parse a single label, possibly optional, of the type that may prefix a
11172 Perl statement.  It is up to the caller to ensure that the dynamic parser
11173 state (L</PL_parser> et al) is correctly set to reflect the source of
11174 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11175 label is optional, otherwise it is mandatory.
11176
11177 The name of the label is returned in the form of a fresh scalar.  If an
11178 optional label is absent, a null pointer is returned.
11179
11180 If an error occurs in parsing, which can only occur if the label is
11181 mandatory, a valid label is returned anyway.  The error is reflected in
11182 the parser state, normally resulting in a single exception at the top
11183 level of parsing which covers all the compilation errors that occurred.
11184
11185 =cut
11186 */
11187
11188 SV *
11189 Perl_parse_label(pTHX_ U32 flags)
11190 {
11191     if (flags & ~PARSE_OPTIONAL)
11192         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11193     if (PL_lex_state == LEX_KNOWNEXT) {
11194         PL_parser->yychar = yylex();
11195         if (PL_parser->yychar == LABEL) {
11196             char *lpv = pl_yylval.pval;
11197             STRLEN llen = strlen(lpv);
11198             SV *lsv;
11199             PL_parser->yychar = YYEMPTY;
11200             lsv = newSV_type(SVt_PV);
11201             SvPV_set(lsv, lpv);
11202             SvCUR_set(lsv, llen);
11203             SvLEN_set(lsv, llen+1);
11204             SvPOK_on(lsv);
11205             return lsv;
11206         } else {
11207             yyunlex();
11208             goto no_label;
11209         }
11210     } else {
11211         char *s, *t;
11212         U8 c;
11213         STRLEN wlen, bufptr_pos;
11214         lex_read_space(0);
11215         t = s = PL_bufptr;
11216         c = (U8)*s;
11217         if (!isIDFIRST_A(c))
11218             goto no_label;
11219         do {
11220             c = (U8)*++t;
11221         } while(isWORDCHAR_A(c));
11222         wlen = t - s;
11223         if (word_takes_any_delimeter(s, wlen))
11224             goto no_label;
11225         bufptr_pos = s - SvPVX(PL_linestr);
11226         PL_bufptr = t;
11227         lex_read_space(LEX_KEEP_PREVIOUS);
11228         t = PL_bufptr;
11229         s = SvPVX(PL_linestr) + bufptr_pos;
11230         if (t[0] == ':' && t[1] != ':') {
11231             PL_oldoldbufptr = PL_oldbufptr;
11232             PL_oldbufptr = s;
11233             PL_bufptr = t+1;
11234             return newSVpvn(s, wlen);
11235         } else {
11236             PL_bufptr = s;
11237             no_label:
11238             if (flags & PARSE_OPTIONAL) {
11239                 return NULL;
11240             } else {
11241                 qerror(Perl_mess(aTHX_ "Parse error"));
11242                 return newSVpvs("x");
11243             }
11244         }
11245     }
11246 }
11247
11248 /*
11249 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11250
11251 Parse a single complete Perl statement.  This may be a normal imperative
11252 statement or a declaration that has compile-time effect, and may include
11253 optional labels.  It is up to the caller to ensure that the dynamic
11254 parser state (L</PL_parser> et al) is correctly set to reflect the source
11255 of the code to be parsed and the lexical context for the statement.
11256
11257 The op tree representing the statement is returned.  This may be a
11258 null pointer if the statement is null, for example if it was actually
11259 a subroutine definition (which has compile-time side effects).  If not
11260 null, it will be the result of a L</newSTATEOP> call, normally including
11261 a C<nextstate> or equivalent op.
11262
11263 If an error occurs in parsing or compilation, in most cases a valid op
11264 tree (most likely null) is returned anyway.  The error is reflected in
11265 the parser state, normally resulting in a single exception at the top
11266 level of parsing which covers all the compilation errors that occurred.
11267 Some compilation errors, however, will throw an exception immediately.
11268
11269 The I<flags> parameter is reserved for future use, and must always
11270 be zero.
11271
11272 =cut
11273 */
11274
11275 OP *
11276 Perl_parse_fullstmt(pTHX_ U32 flags)
11277 {
11278     if (flags)
11279         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11280     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11281 }
11282
11283 /*
11284 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11285
11286 Parse a sequence of zero or more Perl statements.  These may be normal
11287 imperative statements, including optional labels, or declarations
11288 that have compile-time effect, or any mixture thereof.  The statement
11289 sequence ends when a closing brace or end-of-file is encountered in a
11290 place where a new statement could have validly started.  It is up to
11291 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11292 is correctly set to reflect the source of the code to be parsed and the
11293 lexical context for the statements.
11294
11295 The op tree representing the statement sequence is returned.  This may
11296 be a null pointer if the statements were all null, for example if there
11297 were no statements or if there were only subroutine definitions (which
11298 have compile-time side effects).  If not null, it will be a C<lineseq>
11299 list, normally including C<nextstate> or equivalent ops.
11300
11301 If an error occurs in parsing or compilation, in most cases a valid op
11302 tree is returned anyway.  The error is reflected in the parser state,
11303 normally resulting in a single exception at the top level of parsing
11304 which covers all the compilation errors that occurred.  Some compilation
11305 errors, however, will throw an exception immediately.
11306
11307 The I<flags> parameter is reserved for future use, and must always
11308 be zero.
11309
11310 =cut
11311 */
11312
11313 OP *
11314 Perl_parse_stmtseq(pTHX_ U32 flags)
11315 {
11316     OP *stmtseqop;
11317     I32 c;
11318     if (flags)
11319         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11320     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11321     c = lex_peek_unichar(0);
11322     if (c != -1 && c != /*{*/'}')
11323         qerror(Perl_mess(aTHX_ "Parse error"));
11324     return stmtseqop;
11325 }
11326
11327 void
11328 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11329 {
11330     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11331     deprecate("qw(...) as parentheses");
11332     force_next((4<<24)|')');
11333     if (qwlist->op_type == OP_STUB) {
11334         op_free(qwlist);
11335     }
11336     else {
11337         start_force(PL_curforce);
11338         NEXTVAL_NEXTTOKE.opval = qwlist;
11339         force_next(THING);
11340     }
11341     force_next((2<<24)|'(');
11342 }
11343
11344 /*
11345  * Local variables:
11346  * c-indentation-style: bsd
11347  * c-basic-offset: 4
11348  * indent-tabs-mode: t
11349  * End:
11350  *
11351  * ex: set ts=8 sts=4 sw=4 noet:
11352  */