toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN1         : not used, except for not, which isn't a UNIOP
 228  * BOop         : bitwise or or xor
 229  * BAop         : bitwise and
 230  * SHop         : shift operator
 231  * PWop         : power operator
 232  * PMop         : pattern-matching operator
 233  * Aop          : addition-level operator
 234  * Mop          : multiplication-level operator
 235  * Eop          : equality-testing operator
 236  * Rop          : relational operator <= != gt
 237  *
 238  * Also see LOP and lop() below.
 239  */
 240
 241 #ifdef DEBUGGING /* Serve -DT. */
 242 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 243 #else
 244 #   define REPORT(retval) (retval)
 245 #endif
 246
 247 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 248 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 249 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 250 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 253 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 254 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 255 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 256 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 257 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 258 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 259 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 260 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 261 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 262 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 263 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 264 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 265 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 266 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 267
 268 /* This bit of chicanery makes a unary function followed by
 269  * a parenthesis into a function with one argument, highest precedence.
 270  * The UNIDOR macro is for unary functions that can be followed by the //
 271  * operator (such as C<shift // 0>).
 272  */
 273 #define UNI2(f,x) { \
 274         pl_yylval.ival = f; \
 275         PL_expect = x; \
 276         PL_bufptr = s; \
 277         PL_last_uni = PL_oldbufptr; \
 278         PL_last_lop_op = f; \
 279         if (*s == '(') \
 280             return REPORT( (int)FUNC1 ); \
 281         s = PEEKSPACE(s); \
 282         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 283         }
 284 #define UNI(f)    UNI2(f,XTERM)
 285 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 286
 287 #define UNIBRACK(f) { \
 288         pl_yylval.ival = f; \
 289         PL_bufptr = s; \
 290         PL_last_uni = PL_oldbufptr; \
 291         if (*s == '(') \
 292             return REPORT( (int)FUNC1 ); \
 293         s = PEEKSPACE(s); \
 294         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 295         }
 296
 297 /* grandfather return to old style */
 298 #define OLDLOP(f) \
 299         do { \
 300             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 301                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 302             pl_yylval.ival = (f); \
 303             PL_expect = XTERM; \
 304             PL_bufptr = s; \
 305             return (int)LSTOP; \
 306         } while(0)
 307
 308 #ifdef DEBUGGING
 309
 310 /* how to interpret the pl_yylval associated with the token */
 311 enum token_type {
 312     TOKENTYPE_NONE,
 313     TOKENTYPE_IVAL,
 314     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 315     TOKENTYPE_PVAL,
 316     TOKENTYPE_OPVAL,
 317     TOKENTYPE_GVVAL
 318 };
 319
 320 static struct debug_tokens {
 321     const int token;
 322     enum token_type type;
 323     const char *name;
 324 } const debug_tokens[] =
 325 {
 326     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 327     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 328     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 329     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 330     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 331     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 332     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 333     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 334     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 335     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 336     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 337     { DO,               TOKENTYPE_NONE,         "DO" },
 338     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 339     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 340     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 341     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 342     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 343     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 344     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 345     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 346     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 347     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 348     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 349     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 350     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 351     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 352     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 353     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 354     { IF,               TOKENTYPE_IVAL,         "IF" },
 355     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 356     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 357     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 358     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 359     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 360     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 361     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 362     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 363     { MY,               TOKENTYPE_IVAL,         "MY" },
 364     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 365     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 366     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 367     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 368     { OROR,             TOKENTYPE_NONE,         "OROR" },
 369     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 370     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 371     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 372     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 373     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 374     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 375     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 376     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 377     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 378     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 379     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 380     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 381     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 382     { SUB,              TOKENTYPE_NONE,         "SUB" },
 383     { THING,            TOKENTYPE_OPVAL,        "THING" },
 384     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 385     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 386     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 387     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 388     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 389     { USE,              TOKENTYPE_IVAL,         "USE" },
 390     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 391     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 392     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 393     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 394     { 0,                TOKENTYPE_NONE,         NULL }
 395 };
 396
 397 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 398
 399 STATIC int
 400 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 401 {
 402     dVAR;
 403
 404     PERL_ARGS_ASSERT_TOKEREPORT;
 405
 406     if (DEBUG_T_TEST) {
 407         const char *name = NULL;
 408         enum token_type type = TOKENTYPE_NONE;
 409         const struct debug_tokens *p;
 410         SV* const report = newSVpvs("<== ");
 411
 412         for (p = debug_tokens; p->token; p++) {
 413             if (p->token == (int)rv) {
 414                 name = p->name;
 415                 type = p->type;
 416                 break;
 417             }
 418         }
 419         if (name)
 420             Perl_sv_catpv(aTHX_ report, name);
 421         else if ((char)rv > ' ' && (char)rv < '~')
 422             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 423         else if (!rv)
 424             sv_catpvs(report, "EOF");
 425         else
 426             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 427         switch (type) {
 428         case TOKENTYPE_NONE:
 429         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 430             break;
 431         case TOKENTYPE_IVAL:
 432             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 433             break;
 434         case TOKENTYPE_OPNUM:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 436                                     PL_op_name[lvalp->ival]);
 437             break;
 438         case TOKENTYPE_PVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 440             break;
 441         case TOKENTYPE_OPVAL:
 442             if (lvalp->opval) {
 443                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 444                                     PL_op_name[lvalp->opval->op_type]);
 445                 if (lvalp->opval->op_type == OP_CONST) {
 446                     Perl_sv_catpvf(aTHX_ report, " %s",
 447                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 448                 }
 449
 450             }
 451             else
 452                 sv_catpvs(report, "(opval=null)");
 453             break;
 454         }
 455         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 456     };
 457     return (int)rv;
 458 }
 459
 460
 461 /* print the buffer with suitable escapes */
 462
 463 STATIC void
 464 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 465 {
 466     SV* const tmp = newSVpvs("");
 467
 468     PERL_ARGS_ASSERT_PRINTBUF;
 469
 470     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 471     SvREFCNT_dec(tmp);
 472 }
 473
 474 #endif
 475
 476 static int
 477 S_deprecate_commaless_var_list(pTHX) {
 478     PL_expect = XTERM;
 479     deprecate("comma-less variable list");
 480     return REPORT(','); /* grandfather non-comma-format format */
 481 }
 482
 483 /*
 484  * S_ao
 485  *
 486  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 487  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 488  */
 489
 490 STATIC int
 491 S_ao(pTHX_ int toketype)
 492 {
 493     dVAR;
 494     if (*PL_bufptr == '=') {
 495         PL_bufptr++;
 496         if (toketype == ANDAND)
 497             pl_yylval.ival = OP_ANDASSIGN;
 498         else if (toketype == OROR)
 499             pl_yylval.ival = OP_ORASSIGN;
 500         else if (toketype == DORDOR)
 501             pl_yylval.ival = OP_DORASSIGN;
 502         toketype = ASSIGNOP;
 503     }
 504     return toketype;
 505 }
 506
 507 /*
 508  * S_no_op
 509  * When Perl expects an operator and finds something else, no_op
 510  * prints the warning.  It always prints "<something> found where
 511  * operator expected.  It prints "Missing semicolon on previous line?"
 512  * if the surprise occurs at the start of the line.  "do you need to
 513  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 514  * where the compiler doesn't know if foo is a method call or a function.
 515  * It prints "Missing operator before end of line" if there's nothing
 516  * after the missing operator, or "... before <...>" if there is something
 517  * after the missing operator.
 518  */
 519
 520 STATIC void
 521 S_no_op(pTHX_ const char *const what, char *s)
 522 {
 523     dVAR;
 524     char * const oldbp = PL_bufptr;
 525     const bool is_first = (PL_oldbufptr == PL_linestart);
 526
 527     PERL_ARGS_ASSERT_NO_OP;
 528
 529     if (!s)
 530         s = oldbp;
 531     else
 532         PL_bufptr = s;
 533     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 534     if (ckWARN_d(WARN_SYNTAX)) {
 535         if (is_first)
 536             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 537                     "\t(Missing semicolon on previous line?)\n");
 538         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 539             const char *t;
 540             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 541                 NOOP;
 542             if (t < PL_bufptr && isSPACE(*t))
 543                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                         "\t(Do you need to predeclare %.*s?)\n",
 545                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 546         }
 547         else {
 548             assert(s >= oldbp);
 549             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 550                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 551         }
 552     }
 553     PL_bufptr = oldbp;
 554 }
 555
 556 /*
 557  * S_missingterm
 558  * Complain about missing quote/regexp/heredoc terminator.
 559  * If it's called with NULL then it cauterizes the line buffer.
 560  * If we're in a delimited string and the delimiter is a control
 561  * character, it's reformatted into a two-char sequence like ^C.
 562  * This is fatal.
 563  */
 564
 565 STATIC void
 566 S_missingterm(pTHX_ char *s)
 567 {
 568     dVAR;
 569     char tmpbuf[3];
 570     char q;
 571     if (s) {
 572         char * const nl = strrchr(s,'\n');
 573         if (nl)
 574             *nl = '\0';
 575     }
 576     else if (isCNTRL(PL_multi_close)) {
 577         *tmpbuf = '^';
 578         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 579         tmpbuf[2] = '\0';
 580         s = tmpbuf;
 581     }
 582     else {
 583         *tmpbuf = (char)PL_multi_close;
 584         tmpbuf[1] = '\0';
 585         s = tmpbuf;
 586     }
 587     q = strchr(s,'"') ? '\'' : '"';
 588     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 589 }
 590
 591 /*
 592  * Check whether the named feature is enabled.
 593  */
 594 bool
 595 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 596 {
 597     dVAR;
 598     HV * const hinthv = GvHV(PL_hintgv);
 599     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 600
 601     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 602
 603     if (namelen > MAX_FEATURE_LEN)
 604         return FALSE;
 605     memcpy(&he_name[8], name, namelen);
 606
 607     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 608 }
 609
 610 /*
 611  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 612  * utf16-to-utf8-reversed.
 613  */
 614
 615 #ifdef PERL_CR_FILTER
 616 static void
 617 strip_return(SV *sv)
 618 {
 619     register const char *s = SvPVX_const(sv);
 620     register const char * const e = s + SvCUR(sv);
 621
 622     PERL_ARGS_ASSERT_STRIP_RETURN;
 623
 624     /* outer loop optimized to do nothing if there are no CR-LFs */
 625     while (s < e) {
 626         if (*s++ == '\r' && *s == '\n') {
 627             /* hit a CR-LF, need to copy the rest */
 628             register char *d = s - 1;
 629             *d++ = *s++;
 630             while (s < e) {
 631                 if (*s == '\r' && s[1] == '\n')
 632                     s++;
 633                 *d++ = *s++;
 634             }
 635             SvCUR(sv) -= s - d;
 636             return;
 637         }
 638     }
 639 }
 640
 641 STATIC I32
 642 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 643 {
 644     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 645     if (count > 0 && !maxlen)
 646         strip_return(sv);
 647     return count;
 648 }
 649 #endif
 650
 651 /*
 652 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 653
 654 Creates and initialises a new lexer/parser state object, supplying
 655 a context in which to lex and parse from a new source of Perl code.
 656 A pointer to the new state object is placed in L</PL_parser>.  An entry
 657 is made on the save stack so that upon unwinding the new state object
 658 will be destroyed and the former value of L</PL_parser> will be restored.
 659 Nothing else need be done to clean up the parsing context.
 660
 661 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 662 non-null, provides a string (in SV form) containing code to be parsed.
 663 A copy of the string is made, so subsequent modification of I<line>
 664 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 665 from which code will be read to be parsed.  If both are non-null, the
 666 code in I<line> comes first and must consist of complete lines of input,
 667 and I<rsfp> supplies the remainder of the source.
 668
 669 The I<flags> parameter is reserved for future use, and must always
 670 be zero.
 671
 672 =cut
 673 */
 674
 675 void
 676 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 677 {
 678     dVAR;
 679     const char *s = NULL;
 680     STRLEN len;
 681     yy_parser *parser, *oparser;
 682     if (flags)
 683         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 684
 685     /* create and initialise a parser */
 686
 687     Newxz(parser, 1, yy_parser);
 688     parser->old_parser = oparser = PL_parser;
 689     PL_parser = parser;
 690
 691     parser->stack = NULL;
 692     parser->ps = NULL;
 693     parser->stack_size = 0;
 694
 695     /* on scope exit, free this parser and restore any outer one */
 696     SAVEPARSER(parser);
 697     parser->saved_curcop = PL_curcop;
 698
 699     /* initialise lexer state */
 700
 701 #ifdef PERL_MAD
 702     parser->curforce = -1;
 703 #else
 704     parser->nexttoke = 0;
 705 #endif
 706     parser->error_count = oparser ? oparser->error_count : 0;
 707     parser->copline = NOLINE;
 708     parser->lex_state = LEX_NORMAL;
 709     parser->expect = XSTATE;
 710     parser->rsfp = rsfp;
 711     parser->rsfp_filters = newAV();
 712
 713     Newx(parser->lex_brackstack, 120, char);
 714     Newx(parser->lex_casestack, 12, char);
 715     *parser->lex_casestack = '\0';
 716
 717     if (line) {
 718         s = SvPV_const(line, len);
 719     } else {
 720         len = 0;
 721     }
 722
 723     if (!len) {
 724         parser->linestr = newSVpvs("\n;");
 725     } else {
 726         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 727         if (s[len-1] != ';')
 728             sv_catpvs(parser->linestr, "\n;");
 729     }
 730     parser->oldoldbufptr =
 731         parser->oldbufptr =
 732         parser->bufptr =
 733         parser->linestart = SvPVX(parser->linestr);
 734     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 735     parser->last_lop = parser->last_uni = NULL;
 736
 737     parser->in_pod = 0;
 738 }
 739
 740
 741 /* delete a parser object */
 742
 743 void
 744 Perl_parser_free(pTHX_  const yy_parser *parser)
 745 {
 746     PERL_ARGS_ASSERT_PARSER_FREE;
 747
 748     PL_curcop = parser->saved_curcop;
 749     SvREFCNT_dec(parser->linestr);
 750
 751     if (parser->rsfp == PerlIO_stdin())
 752         PerlIO_clearerr(parser->rsfp);
 753     else if (parser->rsfp && (!parser->old_parser ||
 754                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 755         PerlIO_close(parser->rsfp);
 756     SvREFCNT_dec(parser->rsfp_filters);
 757
 758     Safefree(parser->lex_brackstack);
 759     Safefree(parser->lex_casestack);
 760     PL_parser = parser->old_parser;
 761     Safefree(parser);
 762 }
 763
 764
 765 /*
 766 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 767
 768 Buffer scalar containing the chunk currently under consideration of the
 769 text currently being lexed.  This is always a plain string scalar (for
 770 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 771 normal scalar means; instead refer to the buffer directly by the pointer
 772 variables described below.
 773
 774 The lexer maintains various C<char*> pointers to things in the
 775 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 776 reallocated, all of these pointers must be updated.  Don't attempt to
 777 do this manually, but rather use L</lex_grow_linestr> if you need to
 778 reallocate the buffer.
 779
 780 The content of the text chunk in the buffer is commonly exactly one
 781 complete line of input, up to and including a newline terminator,
 782 but there are situations where it is otherwise.  The octets of the
 783 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 784 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 785 flag on this scalar, which may disagree with it.
 786
 787 For direct examination of the buffer, the variable
 788 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 789 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 790 of these pointers is usually preferable to examination of the scalar
 791 through normal scalar means.
 792
 793 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 794
 795 Direct pointer to the end of the chunk of text currently being lexed, the
 796 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 797 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 798 always located at the end of the buffer, and does not count as part of
 799 the buffer's contents.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 802
 803 Points to the current position of lexing inside the lexer buffer.
 804 Characters around this point may be freely examined, within
 805 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 806 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 807 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 808
 809 Lexing code (whether in the Perl core or not) moves this pointer past
 810 the characters that it consumes.  It is also expected to perform some
 811 bookkeeping whenever a newline character is consumed.  This movement
 812 can be more conveniently performed by the function L</lex_read_to>,
 813 which handles newlines appropriately.
 814
 815 Interpretation of the buffer's octets can be abstracted out by
 816 using the slightly higher-level functions L</lex_peek_unichar> and
 817 L</lex_read_unichar>.
 818
 819 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 820
 821 Points to the start of the current line inside the lexer buffer.
 822 This is useful for indicating at which column an error occurred, and
 823 not much else.  This must be updated by any lexing code that consumes
 824 a newline; the function L</lex_read_to> handles this detail.
 825
 826 =cut
 827 */
 828
 829 /*
 830 =for apidoc Amx|bool|lex_bufutf8
 831
 832 Indicates whether the octets in the lexer buffer
 833 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 834 of Unicode characters.  If not, they should be interpreted as Latin-1
 835 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 836
 837 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 838 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 839 encoding.
 840
 841 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 842 is significant, but not the whole story regarding the input character
 843 encoding.  Normally, when a file is being read, the scalar contains octets
 844 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 845 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 846 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 847 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 848 is in effect.  This logic may change in the future; use this function
 849 instead of implementing the logic yourself.
 850
 851 =cut
 852 */
 853
 854 bool
 855 Perl_lex_bufutf8(pTHX)
 856 {
 857     return UTF;
 858 }
 859
 860 /*
 861 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 862
 863 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 864 at least I<len> octets (including terminating NUL).  Returns a
 865 pointer to the reallocated buffer.  This is necessary before making
 866 any direct modification of the buffer that would increase its length.
 867 L</lex_stuff_pvn> provides a more convenient way to insert text into
 868 the buffer.
 869
 870 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 871 this function updates all of the lexer's variables that point directly
 872 into the buffer.
 873
 874 =cut
 875 */
 876
 877 char *
 878 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 879 {
 880     SV *linestr;
 881     char *buf;
 882     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 883     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 884     linestr = PL_parser->linestr;
 885     buf = SvPVX(linestr);
 886     if (len <= SvLEN(linestr))
 887         return buf;
 888     bufend_pos = PL_parser->bufend - buf;
 889     bufptr_pos = PL_parser->bufptr - buf;
 890     oldbufptr_pos = PL_parser->oldbufptr - buf;
 891     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 892     linestart_pos = PL_parser->linestart - buf;
 893     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 894     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 895     buf = sv_grow(linestr, len);
 896     PL_parser->bufend = buf + bufend_pos;
 897     PL_parser->bufptr = buf + bufptr_pos;
 898     PL_parser->oldbufptr = buf + oldbufptr_pos;
 899     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 900     PL_parser->linestart = buf + linestart_pos;
 901     if (PL_parser->last_uni)
 902         PL_parser->last_uni = buf + last_uni_pos;
 903     if (PL_parser->last_lop)
 904         PL_parser->last_lop = buf + last_lop_pos;
 905     return buf;
 906 }
 907
 908 /*
 909 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 910
 911 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 912 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 913 reallocating the buffer if necessary.  This means that lexing code that
 914 runs later will see the characters as if they had appeared in the input.
 915 It is not recommended to do this as part of normal parsing, and most
 916 uses of this facility run the risk of the inserted characters being
 917 interpreted in an unintended manner.
 918
 919 The string to be inserted is represented by I<len> octets starting
 920 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 921 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 922 The characters are recoded for the lexer buffer, according to how the
 923 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 924 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 925 function is more convenient.
 926
 927 =cut
 928 */
 929
 930 void
 931 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 932 {
 933     dVAR;
 934     char *bufptr;
 935     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 936     if (flags & ~(LEX_STUFF_UTF8))
 937         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 938     if (UTF) {
 939         if (flags & LEX_STUFF_UTF8) {
 940             goto plain_copy;
 941         } else {
 942             STRLEN highhalf = 0;
 943             const char *p, *e = pv+len;
 944             for (p = pv; p != e; p++)
 945                 highhalf += !!(((U8)*p) & 0x80);
 946             if (!highhalf)
 947                 goto plain_copy;
 948             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 949             bufptr = PL_parser->bufptr;
 950             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 951             SvCUR_set(PL_parser->linestr,
 952                 SvCUR(PL_parser->linestr) + len+highhalf);
 953             PL_parser->bufend += len+highhalf;
 954             for (p = pv; p != e; p++) {
 955                 U8 c = (U8)*p;
 956                 if (c & 0x80) {
 957                     *bufptr++ = (char)(0xc0 | (c >> 6));
 958                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 959                 } else {
 960                     *bufptr++ = (char)c;
 961                 }
 962             }
 963         }
 964     } else {
 965         if (flags & LEX_STUFF_UTF8) {
 966             STRLEN highhalf = 0;
 967             const char *p, *e = pv+len;
 968             for (p = pv; p != e; p++) {
 969                 U8 c = (U8)*p;
 970                 if (c >= 0xc4) {
 971                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 972                                 "non-Latin-1 character into Latin-1 input");
 973                 } else if (c >= 0xc2 && p+1 != e &&
 974                             (((U8)p[1]) & 0xc0) == 0x80) {
 975                     p++;
 976                     highhalf++;
 977                 } else if (c >= 0x80) {
 978                     /* malformed UTF-8 */
 979                     ENTER;
 980                     SAVESPTR(PL_warnhook);
 981                     PL_warnhook = PERL_WARNHOOK_FATAL;
 982                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 983                     LEAVE;
 984                 }
 985             }
 986             if (!highhalf)
 987                 goto plain_copy;
 988             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 989             bufptr = PL_parser->bufptr;
 990             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 991             SvCUR_set(PL_parser->linestr,
 992                 SvCUR(PL_parser->linestr) + len-highhalf);
 993             PL_parser->bufend += len-highhalf;
 994             for (p = pv; p != e; p++) {
 995                 U8 c = (U8)*p;
 996                 if (c & 0x80) {
 997                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
 998                     p++;
 999                 } else {
1000                     *bufptr++ = (char)c;
1001                 }
1002             }
1003         } else {
1004             plain_copy:
1005             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1006             bufptr = PL_parser->bufptr;
1007             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1008             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1009             PL_parser->bufend += len;
1010             Copy(pv, bufptr, len, char);
1011         }
1012     }
1013 }
1014
1015 /*
1016 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1017
1018 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1019 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1020 reallocating the buffer if necessary.  This means that lexing code that
1021 runs later will see the characters as if they had appeared in the input.
1022 It is not recommended to do this as part of normal parsing, and most
1023 uses of this facility run the risk of the inserted characters being
1024 interpreted in an unintended manner.
1025
1026 The string to be inserted is represented by octets starting at I<pv>
1027 and continuing to the first nul.  These octets are interpreted as either
1028 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1029 in I<flags>.  The characters are recoded for the lexer buffer, according
1030 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1031 If it is not convenient to nul-terminate a string to be inserted, the
1032 L</lex_stuff_pvn> function is more appropriate.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1039 {
1040     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1041     lex_stuff_pvn(pv, strlen(pv), flags);
1042 }
1043
1044 /*
1045 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1046
1047 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1048 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1049 reallocating the buffer if necessary.  This means that lexing code that
1050 runs later will see the characters as if they had appeared in the input.
1051 It is not recommended to do this as part of normal parsing, and most
1052 uses of this facility run the risk of the inserted characters being
1053 interpreted in an unintended manner.
1054
1055 The string to be inserted is the string value of I<sv>.  The characters
1056 are recoded for the lexer buffer, according to how the buffer is currently
1057 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1058 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1059 need to construct a scalar.
1060
1061 =cut
1062 */
1063
1064 void
1065 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1066 {
1067     char *pv;
1068     STRLEN len;
1069     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1070     if (flags)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1072     pv = SvPV(sv, len);
1073     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1074 }
1075
1076 /*
1077 =for apidoc Amx|void|lex_unstuff|char *ptr
1078
1079 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1080 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1081 This hides the discarded text from any lexing code that runs later,
1082 as if the text had never appeared.
1083
1084 This is not the normal way to consume lexed text.  For that, use
1085 L</lex_read_to>.
1086
1087 =cut
1088 */
1089
1090 void
1091 Perl_lex_unstuff(pTHX_ char *ptr)
1092 {
1093     char *buf, *bufend;
1094     STRLEN unstuff_len;
1095     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1096     buf = PL_parser->bufptr;
1097     if (ptr < buf)
1098         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1099     if (ptr == buf)
1100         return;
1101     bufend = PL_parser->bufend;
1102     if (ptr > bufend)
1103         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1104     unstuff_len = ptr - buf;
1105     Move(ptr, buf, bufend+1-ptr, char);
1106     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1107     PL_parser->bufend = bufend - unstuff_len;
1108 }
1109
1110 /*
1111 =for apidoc Amx|void|lex_read_to|char *ptr
1112
1113 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1114 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1115 performing the correct bookkeeping whenever a newline character is passed.
1116 This is the normal way to consume lexed text.
1117
1118 Interpretation of the buffer's octets can be abstracted out by
1119 using the slightly higher-level functions L</lex_peek_unichar> and
1120 L</lex_read_unichar>.
1121
1122 =cut
1123 */
1124
1125 void
1126 Perl_lex_read_to(pTHX_ char *ptr)
1127 {
1128     char *s;
1129     PERL_ARGS_ASSERT_LEX_READ_TO;
1130     s = PL_parser->bufptr;
1131     if (ptr < s || ptr > PL_parser->bufend)
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1133     for (; s != ptr; s++)
1134         if (*s == '\n') {
1135             CopLINE_inc(PL_curcop);
1136             PL_parser->linestart = s+1;
1137         }
1138     PL_parser->bufptr = ptr;
1139 }
1140
1141 /*
1142 =for apidoc Amx|void|lex_discard_to|char *ptr
1143
1144 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1145 up to I<ptr>.  The remaining content of the buffer will be moved, and
1146 all pointers into the buffer updated appropriately.  I<ptr> must not
1147 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1148 it is not permitted to discard text that has yet to be lexed.
1149
1150 Normally it is not necessarily to do this directly, because it suffices to
1151 use the implicit discarding behaviour of L</lex_next_chunk> and things
1152 based on it.  However, if a token stretches across multiple lines,
1153 and the lexing code has kept multiple lines of text in the buffer for
1154 that purpose, then after completion of the token it would be wise to
1155 explicitly discard the now-unneeded earlier lines, to avoid future
1156 multi-line tokens growing the buffer without bound.
1157
1158 =cut
1159 */
1160
1161 void
1162 Perl_lex_discard_to(pTHX_ char *ptr)
1163 {
1164     char *buf;
1165     STRLEN discard_len;
1166     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1167     buf = SvPVX(PL_parser->linestr);
1168     if (ptr < buf)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170     if (ptr == buf)
1171         return;
1172     if (ptr > PL_parser->bufptr)
1173         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174     discard_len = ptr - buf;
1175     if (PL_parser->oldbufptr < ptr)
1176         PL_parser->oldbufptr = ptr;
1177     if (PL_parser->oldoldbufptr < ptr)
1178         PL_parser->oldoldbufptr = ptr;
1179     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1180         PL_parser->last_uni = NULL;
1181     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1182         PL_parser->last_lop = NULL;
1183     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1184     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1185     PL_parser->bufend -= discard_len;
1186     PL_parser->bufptr -= discard_len;
1187     PL_parser->oldbufptr -= discard_len;
1188     PL_parser->oldoldbufptr -= discard_len;
1189     if (PL_parser->last_uni)
1190         PL_parser->last_uni -= discard_len;
1191     if (PL_parser->last_lop)
1192         PL_parser->last_lop -= discard_len;
1193 }
1194
1195 /*
1196 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1197
1198 Reads in the next chunk of text to be lexed, appending it to
1199 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1200 looked to the end of the current chunk and wants to know more.  It is
1201 usual, but not necessary, for lexing to have consumed the entirety of
1202 the current chunk at this time.
1203
1204 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1205 chunk (i.e., the current chunk has been entirely consumed), normally the
1206 current chunk will be discarded at the same time that the new chunk is
1207 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1208 will not be discarded.  If the current chunk has not been entirely
1209 consumed, then it will not be discarded regardless of the flag.
1210
1211 Returns true if some new text was added to the buffer, or false if the
1212 buffer has reached the end of the input text.
1213
1214 =cut
1215 */
1216
1217 #define LEX_FAKE_EOF 0x80000000
1218
1219 bool
1220 Perl_lex_next_chunk(pTHX_ U32 flags)
1221 {
1222     SV *linestr;
1223     char *buf;
1224     STRLEN old_bufend_pos, new_bufend_pos;
1225     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1226     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1227     bool got_some_for_debugger = 0;
1228     bool got_some;
1229     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1230         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1231     linestr = PL_parser->linestr;
1232     buf = SvPVX(linestr);
1233     if (!(flags & LEX_KEEP_PREVIOUS) &&
1234             PL_parser->bufptr == PL_parser->bufend) {
1235         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1236         linestart_pos = 0;
1237         if (PL_parser->last_uni != PL_parser->bufend)
1238             PL_parser->last_uni = NULL;
1239         if (PL_parser->last_lop != PL_parser->bufend)
1240             PL_parser->last_lop = NULL;
1241         last_uni_pos = last_lop_pos = 0;
1242         *buf = 0;
1243         SvCUR(linestr) = 0;
1244     } else {
1245         old_bufend_pos = PL_parser->bufend - buf;
1246         bufptr_pos = PL_parser->bufptr - buf;
1247         oldbufptr_pos = PL_parser->oldbufptr - buf;
1248         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1249         linestart_pos = PL_parser->linestart - buf;
1250         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1251         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1252     }
1253     if (flags & LEX_FAKE_EOF) {
1254         goto eof;
1255     } else if (!PL_parser->rsfp) {
1256         got_some = 0;
1257     } else if (filter_gets(linestr, old_bufend_pos)) {
1258         got_some = 1;
1259         got_some_for_debugger = 1;
1260     } else {
1261         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1262             sv_setpvs(linestr, "");
1263         eof:
1264         /* End of real input.  Close filehandle (unless it was STDIN),
1265          * then add implicit termination.
1266          */
1267         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1268             PerlIO_clearerr(PL_parser->rsfp);
1269         else if (PL_parser->rsfp)
1270             (void)PerlIO_close(PL_parser->rsfp);
1271         PL_parser->rsfp = NULL;
1272         PL_parser->in_pod = 0;
1273 #ifdef PERL_MAD
1274         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1275             PL_faketokens = 1;
1276 #endif
1277         if (!PL_in_eval && PL_minus_p) {
1278             sv_catpvs(linestr,
1279                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1280             PL_minus_n = PL_minus_p = 0;
1281         } else if (!PL_in_eval && PL_minus_n) {
1282             sv_catpvs(linestr, /*{*/";}");
1283             PL_minus_n = 0;
1284         } else
1285             sv_catpvs(linestr, ";");
1286         got_some = 1;
1287     }
1288     buf = SvPVX(linestr);
1289     new_bufend_pos = SvCUR(linestr);
1290     PL_parser->bufend = buf + new_bufend_pos;
1291     PL_parser->bufptr = buf + bufptr_pos;
1292     PL_parser->oldbufptr = buf + oldbufptr_pos;
1293     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1294     PL_parser->linestart = buf + linestart_pos;
1295     if (PL_parser->last_uni)
1296         PL_parser->last_uni = buf + last_uni_pos;
1297     if (PL_parser->last_lop)
1298         PL_parser->last_lop = buf + last_lop_pos;
1299     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1300             PL_curstash != PL_debstash) {
1301         /* debugger active and we're not compiling the debugger code,
1302          * so store the line into the debugger's array of lines
1303          */
1304         update_debugger_info(NULL, buf+old_bufend_pos,
1305             new_bufend_pos-old_bufend_pos);
1306     }
1307     return got_some;
1308 }
1309
1310 /*
1311 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1312
1313 Looks ahead one (Unicode) character in the text currently being lexed.
1314 Returns the codepoint (unsigned integer value) of the next character,
1315 or -1 if lexing has reached the end of the input text.  To consume the
1316 peeked character, use L</lex_read_unichar>.
1317
1318 If the next character is in (or extends into) the next chunk of input
1319 text, the next chunk will be read in.  Normally the current chunk will be
1320 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1321 then the current chunk will not be discarded.
1322
1323 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1324 is encountered, an exception is generated.
1325
1326 =cut
1327 */
1328
1329 I32
1330 Perl_lex_peek_unichar(pTHX_ U32 flags)
1331 {
1332     dVAR;
1333     char *s, *bufend;
1334     if (flags & ~(LEX_KEEP_PREVIOUS))
1335         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1336     s = PL_parser->bufptr;
1337     bufend = PL_parser->bufend;
1338     if (UTF) {
1339         U8 head;
1340         I32 unichar;
1341         STRLEN len, retlen;
1342         if (s == bufend) {
1343             if (!lex_next_chunk(flags))
1344                 return -1;
1345             s = PL_parser->bufptr;
1346             bufend = PL_parser->bufend;
1347         }
1348         head = (U8)*s;
1349         if (!(head & 0x80))
1350             return head;
1351         if (head & 0x40) {
1352             len = PL_utf8skip[head];
1353             while ((STRLEN)(bufend-s) < len) {
1354                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1355                     break;
1356                 s = PL_parser->bufptr;
1357                 bufend = PL_parser->bufend;
1358             }
1359         }
1360         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1361         if (retlen == (STRLEN)-1) {
1362             /* malformed UTF-8 */
1363             ENTER;
1364             SAVESPTR(PL_warnhook);
1365             PL_warnhook = PERL_WARNHOOK_FATAL;
1366             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1367             LEAVE;
1368         }
1369         return unichar;
1370     } else {
1371         if (s == bufend) {
1372             if (!lex_next_chunk(flags))
1373                 return -1;
1374             s = PL_parser->bufptr;
1375         }
1376         return (U8)*s;
1377     }
1378 }
1379
1380 /*
1381 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1382
1383 Reads the next (Unicode) character in the text currently being lexed.
1384 Returns the codepoint (unsigned integer value) of the character read,
1385 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1386 if lexing has reached the end of the input text.  To non-destructively
1387 examine the next character, use L</lex_peek_unichar> instead.
1388
1389 If the next character is in (or extends into) the next chunk of input
1390 text, the next chunk will be read in.  Normally the current chunk will be
1391 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1392 then the current chunk will not be discarded.
1393
1394 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1395 is encountered, an exception is generated.
1396
1397 =cut
1398 */
1399
1400 I32
1401 Perl_lex_read_unichar(pTHX_ U32 flags)
1402 {
1403     I32 c;
1404     if (flags & ~(LEX_KEEP_PREVIOUS))
1405         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1406     c = lex_peek_unichar(flags);
1407     if (c != -1) {
1408         if (c == '\n')
1409             CopLINE_inc(PL_curcop);
1410         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1411     }
1412     return c;
1413 }
1414
1415 /*
1416 =for apidoc Amx|void|lex_read_space|U32 flags
1417
1418 Reads optional spaces, in Perl style, in the text currently being
1419 lexed.  The spaces may include ordinary whitespace characters and
1420 Perl-style comments.  C<#line> directives are processed if encountered.
1421 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1422 at a non-space character (or the end of the input text).
1423
1424 If spaces extend into the next chunk of input text, the next chunk will
1425 be read in.  Normally the current chunk will be discarded at the same
1426 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1427 chunk will not be discarded.
1428
1429 =cut
1430 */
1431
1432 #define LEX_NO_NEXT_CHUNK 0x80000000
1433
1434 void
1435 Perl_lex_read_space(pTHX_ U32 flags)
1436 {
1437     char *s, *bufend;
1438     bool need_incline = 0;
1439     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1440         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1441 #ifdef PERL_MAD
1442     if (PL_skipwhite) {
1443         sv_free(PL_skipwhite);
1444         PL_skipwhite = NULL;
1445     }
1446     if (PL_madskills)
1447         PL_skipwhite = newSVpvs("");
1448 #endif /* PERL_MAD */
1449     s = PL_parser->bufptr;
1450     bufend = PL_parser->bufend;
1451     while (1) {
1452         char c = *s;
1453         if (c == '#') {
1454             do {
1455                 c = *++s;
1456             } while (!(c == '\n' || (c == 0 && s == bufend)));
1457         } else if (c == '\n') {
1458             s++;
1459             PL_parser->linestart = s;
1460             if (s == bufend)
1461                 need_incline = 1;
1462             else
1463                 incline(s);
1464         } else if (isSPACE(c)) {
1465             s++;
1466         } else if (c == 0 && s == bufend) {
1467             bool got_more;
1468 #ifdef PERL_MAD
1469             if (PL_madskills)
1470                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1471 #endif /* PERL_MAD */
1472             if (flags & LEX_NO_NEXT_CHUNK)
1473                 break;
1474             PL_parser->bufptr = s;
1475             CopLINE_inc(PL_curcop);
1476             got_more = lex_next_chunk(flags);
1477             CopLINE_dec(PL_curcop);
1478             s = PL_parser->bufptr;
1479             bufend = PL_parser->bufend;
1480             if (!got_more)
1481                 break;
1482             if (need_incline && PL_parser->rsfp) {
1483                 incline(s);
1484                 need_incline = 0;
1485             }
1486         } else {
1487             break;
1488         }
1489     }
1490 #ifdef PERL_MAD
1491     if (PL_madskills)
1492         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1493 #endif /* PERL_MAD */
1494     PL_parser->bufptr = s;
1495 }
1496
1497 /*
1498  * S_incline
1499  * This subroutine has nothing to do with tilting, whether at windmills
1500  * or pinball tables.  Its name is short for "increment line".  It
1501  * increments the current line number in CopLINE(PL_curcop) and checks
1502  * to see whether the line starts with a comment of the form
1503  *    # line 500 "foo.pm"
1504  * If so, it sets the current line number and file to the values in the comment.
1505  */
1506
1507 STATIC void
1508 S_incline(pTHX_ const char *s)
1509 {
1510     dVAR;
1511     const char *t;
1512     const char *n;
1513     const char *e;
1514     line_t line_num;
1515
1516     PERL_ARGS_ASSERT_INCLINE;
1517
1518     CopLINE_inc(PL_curcop);
1519     if (*s++ != '#')
1520         return;
1521     while (SPACE_OR_TAB(*s))
1522         s++;
1523     if (strnEQ(s, "line", 4))
1524         s += 4;
1525     else
1526         return;
1527     if (SPACE_OR_TAB(*s))
1528         s++;
1529     else
1530         return;
1531     while (SPACE_OR_TAB(*s))
1532         s++;
1533     if (!isDIGIT(*s))
1534         return;
1535
1536     n = s;
1537     while (isDIGIT(*s))
1538         s++;
1539     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1540         return;
1541     while (SPACE_OR_TAB(*s))
1542         s++;
1543     if (*s == '"' && (t = strchr(s+1, '"'))) {
1544         s++;
1545         e = t + 1;
1546     }
1547     else {
1548         t = s;
1549         while (!isSPACE(*t))
1550             t++;
1551         e = t;
1552     }
1553     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1554         e++;
1555     if (*e != '\n' && *e != '\0')
1556         return;         /* false alarm */
1557
1558     line_num = atoi(n)-1;
1559
1560     if (t - s > 0) {
1561         const STRLEN len = t - s;
1562         SV *const temp_sv = CopFILESV(PL_curcop);
1563         const char *cf;
1564         STRLEN tmplen;
1565
1566         if (temp_sv) {
1567             cf = SvPVX(temp_sv);
1568             tmplen = SvCUR(temp_sv);
1569         } else {
1570             cf = NULL;
1571             tmplen = 0;
1572         }
1573
1574         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1575             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576              * to *{"::_<newfilename"} */
1577             /* However, the long form of evals is only turned on by the
1578                debugger - usually they're "(eval %lu)" */
1579             char smallbuf[128];
1580             char *tmpbuf;
1581             GV **gvp;
1582             STRLEN tmplen2 = len;
1583             if (tmplen + 2 <= sizeof smallbuf)
1584                 tmpbuf = smallbuf;
1585             else
1586                 Newx(tmpbuf, tmplen + 2, char);
1587             tmpbuf[0] = '_';
1588             tmpbuf[1] = '<';
1589             memcpy(tmpbuf + 2, cf, tmplen);
1590             tmplen += 2;
1591             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1592             if (gvp) {
1593                 char *tmpbuf2;
1594                 GV *gv2;
1595
1596                 if (tmplen2 + 2 <= sizeof smallbuf)
1597                     tmpbuf2 = smallbuf;
1598                 else
1599                     Newx(tmpbuf2, tmplen2 + 2, char);
1600
1601                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602                     /* Either they malloc'd it, or we malloc'd it,
1603                        so no prefix is present in ours.  */
1604                     tmpbuf2[0] = '_';
1605                     tmpbuf2[1] = '<';
1606                 }
1607
1608                 memcpy(tmpbuf2 + 2, s, tmplen2);
1609                 tmplen2 += 2;
1610
1611                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1612                 if (!isGV(gv2)) {
1613                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1614                     /* adjust ${"::_<newfilename"} to store the new file name */
1615                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1616                     /* The line number may differ. If that is the case,
1617                        alias the saved lines that are in the array.
1618                        Otherwise alias the whole array. */
1619                     if (CopLINE(PL_curcop) == line_num) {
1620                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1621                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1622                     }
1623                     else if (GvAV(*gvp)) {
1624                         AV * const av = GvAV(*gvp);
1625                         const I32 start = CopLINE(PL_curcop)+1;
1626                         I32 items = AvFILLp(av) - start;
1627                         if (items > 0) {
1628                             AV * const av2 = GvAVn(gv2);
1629                             SV **svp = AvARRAY(av) + start;
1630                             I32 l = (I32)line_num+1;
1631                             while (items--)
1632                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1633                         }
1634                     }
1635                 }
1636
1637                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1638             }
1639             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1640         }
1641         CopFILE_free(PL_curcop);
1642         CopFILE_setn(PL_curcop, s, len);
1643     }
1644     CopLINE_set(PL_curcop, line_num);
1645 }
1646
1647 #ifdef PERL_MAD
1648 /* skip space before PL_thistoken */
1649
1650 STATIC char *
1651 S_skipspace0(pTHX_ register char *s)
1652 {
1653     PERL_ARGS_ASSERT_SKIPSPACE0;
1654
1655     s = skipspace(s);
1656     if (!PL_madskills)
1657         return s;
1658     if (PL_skipwhite) {
1659         if (!PL_thiswhite)
1660             PL_thiswhite = newSVpvs("");
1661         sv_catsv(PL_thiswhite, PL_skipwhite);
1662         sv_free(PL_skipwhite);
1663         PL_skipwhite = 0;
1664     }
1665     PL_realtokenstart = s - SvPVX(PL_linestr);
1666     return s;
1667 }
1668
1669 /* skip space after PL_thistoken */
1670
1671 STATIC char *
1672 S_skipspace1(pTHX_ register char *s)
1673 {
1674     const char *start = s;
1675     I32 startoff = start - SvPVX(PL_linestr);
1676
1677     PERL_ARGS_ASSERT_SKIPSPACE1;
1678
1679     s = skipspace(s);
1680     if (!PL_madskills)
1681         return s;
1682     start = SvPVX(PL_linestr) + startoff;
1683     if (!PL_thistoken && PL_realtokenstart >= 0) {
1684         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1685         PL_thistoken = newSVpvn(tstart, start - tstart);
1686     }
1687     PL_realtokenstart = -1;
1688     if (PL_skipwhite) {
1689         if (!PL_nextwhite)
1690             PL_nextwhite = newSVpvs("");
1691         sv_catsv(PL_nextwhite, PL_skipwhite);
1692         sv_free(PL_skipwhite);
1693         PL_skipwhite = 0;
1694     }
1695     return s;
1696 }
1697
1698 STATIC char *
1699 S_skipspace2(pTHX_ register char *s, SV **svp)
1700 {
1701     char *start;
1702     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1703     const I32 startoff = s - SvPVX(PL_linestr);
1704
1705     PERL_ARGS_ASSERT_SKIPSPACE2;
1706
1707     s = skipspace(s);
1708     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1709     if (!PL_madskills || !svp)
1710         return s;
1711     start = SvPVX(PL_linestr) + startoff;
1712     if (!PL_thistoken && PL_realtokenstart >= 0) {
1713         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1714         PL_thistoken = newSVpvn(tstart, start - tstart);
1715         PL_realtokenstart = -1;
1716     }
1717     if (PL_skipwhite) {
1718         if (!*svp)
1719             *svp = newSVpvs("");
1720         sv_setsv(*svp, PL_skipwhite);
1721         sv_free(PL_skipwhite);
1722         PL_skipwhite = 0;
1723     }
1724
1725     return s;
1726 }
1727 #endif
1728
1729 STATIC void
1730 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1731 {
1732     AV *av = CopFILEAVx(PL_curcop);
1733     if (av) {
1734         SV * const sv = newSV_type(SVt_PVMG);
1735         if (orig_sv)
1736             sv_setsv(sv, orig_sv);
1737         else
1738             sv_setpvn(sv, buf, len);
1739         (void)SvIOK_on(sv);
1740         SvIV_set(sv, 0);
1741         av_store(av, (I32)CopLINE(PL_curcop), sv);
1742     }
1743 }
1744
1745 /*
1746  * S_skipspace
1747  * Called to gobble the appropriate amount and type of whitespace.
1748  * Skips comments as well.
1749  */
1750
1751 STATIC char *
1752 S_skipspace(pTHX_ register char *s)
1753 {
1754 #ifdef PERL_MAD
1755     char *start = s;
1756 #endif /* PERL_MAD */
1757     PERL_ARGS_ASSERT_SKIPSPACE;
1758 #ifdef PERL_MAD
1759     if (PL_skipwhite) {
1760         sv_free(PL_skipwhite);
1761         PL_skipwhite = NULL;
1762     }
1763 #endif /* PERL_MAD */
1764     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1765         while (s < PL_bufend && SPACE_OR_TAB(*s))
1766             s++;
1767     } else {
1768         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1769         PL_bufptr = s;
1770         lex_read_space(LEX_KEEP_PREVIOUS |
1771                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1772                     LEX_NO_NEXT_CHUNK : 0));
1773         s = PL_bufptr;
1774         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1775         if (PL_linestart > PL_bufptr)
1776             PL_bufptr = PL_linestart;
1777         return s;
1778     }
1779 #ifdef PERL_MAD
1780     if (PL_madskills)
1781         PL_skipwhite = newSVpvn(start, s-start);
1782 #endif /* PERL_MAD */
1783     return s;
1784 }
1785
1786 /*
1787  * S_check_uni
1788  * Check the unary operators to ensure there's no ambiguity in how they're
1789  * used.  An ambiguous piece of code would be:
1790  *     rand + 5
1791  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1792  * the +5 is its argument.
1793  */
1794
1795 STATIC void
1796 S_check_uni(pTHX)
1797 {
1798     dVAR;
1799     const char *s;
1800     const char *t;
1801
1802     if (PL_oldoldbufptr != PL_last_uni)
1803         return;
1804     while (isSPACE(*PL_last_uni))
1805         PL_last_uni++;
1806     s = PL_last_uni;
1807     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1808         s++;
1809     if ((t = strchr(s, '(')) && t < PL_bufptr)
1810         return;
1811
1812     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1813                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1814                      (int)(s - PL_last_uni), PL_last_uni);
1815 }
1816
1817 /*
1818  * LOP : macro to build a list operator.  Its behaviour has been replaced
1819  * with a subroutine, S_lop() for which LOP is just another name.
1820  */
1821
1822 #define LOP(f,x) return lop(f,x,s)
1823
1824 /*
1825  * S_lop
1826  * Build a list operator (or something that might be one).  The rules:
1827  *  - if we have a next token, then it's a list operator [why?]
1828  *  - if the next thing is an opening paren, then it's a function
1829  *  - else it's a list operator
1830  */
1831
1832 STATIC I32
1833 S_lop(pTHX_ I32 f, int x, char *s)
1834 {
1835     dVAR;
1836
1837     PERL_ARGS_ASSERT_LOP;
1838
1839     pl_yylval.ival = f;
1840     CLINE;
1841     PL_expect = x;
1842     PL_bufptr = s;
1843     PL_last_lop = PL_oldbufptr;
1844     PL_last_lop_op = (OPCODE)f;
1845 #ifdef PERL_MAD
1846     if (PL_lasttoke)
1847         goto lstop;
1848 #else
1849     if (PL_nexttoke)
1850         goto lstop;
1851 #endif
1852     if (*s == '(')
1853         return REPORT(FUNC);
1854     s = PEEKSPACE(s);
1855     if (*s == '(')
1856         return REPORT(FUNC);
1857     else {
1858         lstop:
1859         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1860             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1861         return REPORT(LSTOP);
1862     }
1863 }
1864
1865 #ifdef PERL_MAD
1866  /*
1867  * S_start_force
1868  * Sets up for an eventual force_next().  start_force(0) basically does
1869  * an unshift, while start_force(-1) does a push.  yylex removes items
1870  * on the "pop" end.
1871  */
1872
1873 STATIC void
1874 S_start_force(pTHX_ int where)
1875 {
1876     int i;
1877
1878     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1879         where = PL_lasttoke;
1880     assert(PL_curforce < 0 || PL_curforce == where);
1881     if (PL_curforce != where) {
1882         for (i = PL_lasttoke; i > where; --i) {
1883             PL_nexttoke[i] = PL_nexttoke[i-1];
1884         }
1885         PL_lasttoke++;
1886     }
1887     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1888         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1889     PL_curforce = where;
1890     if (PL_nextwhite) {
1891         if (PL_madskills)
1892             curmad('^', newSVpvs(""));
1893         CURMAD('_', PL_nextwhite);
1894     }
1895 }
1896
1897 STATIC void
1898 S_curmad(pTHX_ char slot, SV *sv)
1899 {
1900     MADPROP **where;
1901
1902     if (!sv)
1903         return;
1904     if (PL_curforce < 0)
1905         where = &PL_thismad;
1906     else
1907         where = &PL_nexttoke[PL_curforce].next_mad;
1908
1909     if (PL_faketokens)
1910         sv_setpvs(sv, "");
1911     else {
1912         if (!IN_BYTES) {
1913             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1914                 SvUTF8_on(sv);
1915             else if (PL_encoding) {
1916                 sv_recode_to_utf8(sv, PL_encoding);
1917             }
1918         }
1919     }
1920
1921     /* keep a slot open for the head of the list? */
1922     if (slot != '_' && *where && (*where)->mad_key == '^') {
1923         (*where)->mad_key = slot;
1924         sv_free(MUTABLE_SV(((*where)->mad_val)));
1925         (*where)->mad_val = (void*)sv;
1926     }
1927     else
1928         addmad(newMADsv(slot, sv), where, 0);
1929 }
1930 #else
1931 #  define start_force(where)    NOOP
1932 #  define curmad(slot, sv)      NOOP
1933 #endif
1934
1935 /*
1936  * S_force_next
1937  * When the lexer realizes it knows the next token (for instance,
1938  * it is reordering tokens for the parser) then it can call S_force_next
1939  * to know what token to return the next time the lexer is called.  Caller
1940  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1941  * and possibly PL_expect to ensure the lexer handles the token correctly.
1942  */
1943
1944 STATIC void
1945 S_force_next(pTHX_ I32 type)
1946 {
1947     dVAR;
1948 #ifdef DEBUGGING
1949     if (DEBUG_T_TEST) {
1950         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1951         tokereport(type, &NEXTVAL_NEXTTOKE);
1952     }
1953 #endif
1954 #ifdef PERL_MAD
1955     if (PL_curforce < 0)
1956         start_force(PL_lasttoke);
1957     PL_nexttoke[PL_curforce].next_type = type;
1958     if (PL_lex_state != LEX_KNOWNEXT)
1959         PL_lex_defer = PL_lex_state;
1960     PL_lex_state = LEX_KNOWNEXT;
1961     PL_lex_expect = PL_expect;
1962     PL_curforce = -1;
1963 #else
1964     PL_nexttype[PL_nexttoke] = type;
1965     PL_nexttoke++;
1966     if (PL_lex_state != LEX_KNOWNEXT) {
1967         PL_lex_defer = PL_lex_state;
1968         PL_lex_expect = PL_expect;
1969         PL_lex_state = LEX_KNOWNEXT;
1970     }
1971 #endif
1972 }
1973
1974 void
1975 Perl_yyunlex(pTHX)
1976 {
1977     int yyc = PL_parser->yychar;
1978     if (yyc != YYEMPTY) {
1979         if (yyc) {
1980             start_force(-1);
1981             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1982             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1983                 PL_lex_allbrackets--;
1984                 PL_lex_brackets--;
1985                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1986             } else if (yyc == '('/*)*/) {
1987                 PL_lex_allbrackets--;
1988                 yyc |= (2<<24);
1989             }
1990             force_next(yyc);
1991         }
1992         PL_parser->yychar = YYEMPTY;
1993     }
1994 }
1995
1996 STATIC SV *
1997 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1998 {
1999     dVAR;
2000     SV * const sv = newSVpvn_utf8(start, len,
2001                                   !IN_BYTES
2002                                   && UTF
2003                                   && !is_ascii_string((const U8*)start, len)
2004                                   && is_utf8_string((const U8*)start, len));
2005     return sv;
2006 }
2007
2008 /*
2009  * S_force_word
2010  * When the lexer knows the next thing is a word (for instance, it has
2011  * just seen -> and it knows that the next char is a word char, then
2012  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2013  * lookahead.
2014  *
2015  * Arguments:
2016  *   char *start : buffer position (must be within PL_linestr)
2017  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2018  *   int check_keyword : if true, Perl checks to make sure the word isn't
2019  *       a keyword (do this if the word is a label, e.g. goto FOO)
2020  *   int allow_pack : if true, : characters will also be allowed (require,
2021  *       use, etc. do this)
2022  *   int allow_initial_tick : used by the "sub" lexer only.
2023  */
2024
2025 STATIC char *
2026 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2027 {
2028     dVAR;
2029     register char *s;
2030     STRLEN len;
2031
2032     PERL_ARGS_ASSERT_FORCE_WORD;
2033
2034     start = SKIPSPACE1(start);
2035     s = start;
2036     if (isIDFIRST_lazy_if(s,UTF) ||
2037         (allow_pack && *s == ':') ||
2038         (allow_initial_tick && *s == '\'') )
2039     {
2040         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2041         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2042             return start;
2043         start_force(PL_curforce);
2044         if (PL_madskills)
2045             curmad('X', newSVpvn(start,s-start));
2046         if (token == METHOD) {
2047             s = SKIPSPACE1(s);
2048             if (*s == '(')
2049                 PL_expect = XTERM;
2050             else {
2051                 PL_expect = XOPERATOR;
2052             }
2053         }
2054         if (PL_madskills)
2055             curmad('g', newSVpvs( "forced" ));
2056         NEXTVAL_NEXTTOKE.opval
2057             = (OP*)newSVOP(OP_CONST,0,
2058                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2059         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2060         force_next(token);
2061     }
2062     return s;
2063 }
2064
2065 /*
2066  * S_force_ident
2067  * Called when the lexer wants $foo *foo &foo etc, but the program
2068  * text only contains the "foo" portion.  The first argument is a pointer
2069  * to the "foo", and the second argument is the type symbol to prefix.
2070  * Forces the next token to be a "WORD".
2071  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2072  */
2073
2074 STATIC void
2075 S_force_ident(pTHX_ register const char *s, int kind)
2076 {
2077     dVAR;
2078
2079     PERL_ARGS_ASSERT_FORCE_IDENT;
2080
2081     if (*s) {
2082         const STRLEN len = strlen(s);
2083         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2084         start_force(PL_curforce);
2085         NEXTVAL_NEXTTOKE.opval = o;
2086         force_next(WORD);
2087         if (kind) {
2088             o->op_private = OPpCONST_ENTERED;
2089             /* XXX see note in pp_entereval() for why we forgo typo
2090                warnings if the symbol must be introduced in an eval.
2091                GSAR 96-10-12 */
2092             gv_fetchpvn_flags(s, len,
2093                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2094                               : GV_ADD,
2095                               kind == '$' ? SVt_PV :
2096                               kind == '@' ? SVt_PVAV :
2097                               kind == '%' ? SVt_PVHV :
2098                               SVt_PVGV
2099                               );
2100         }
2101     }
2102 }
2103
2104 NV
2105 Perl_str_to_version(pTHX_ SV *sv)
2106 {
2107     NV retval = 0.0;
2108     NV nshift = 1.0;
2109     STRLEN len;
2110     const char *start = SvPV_const(sv,len);
2111     const char * const end = start + len;
2112     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2113
2114     PERL_ARGS_ASSERT_STR_TO_VERSION;
2115
2116     while (start < end) {
2117         STRLEN skip;
2118         UV n;
2119         if (utf)
2120             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2121         else {
2122             n = *(U8*)start;
2123             skip = 1;
2124         }
2125         retval += ((NV)n)/nshift;
2126         start += skip;
2127         nshift *= 1000;
2128     }
2129     return retval;
2130 }
2131
2132 /*
2133  * S_force_version
2134  * Forces the next token to be a version number.
2135  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2136  * and if "guessing" is TRUE, then no new token is created (and the caller
2137  * must use an alternative parsing method).
2138  */
2139
2140 STATIC char *
2141 S_force_version(pTHX_ char *s, int guessing)
2142 {
2143     dVAR;
2144     OP *version = NULL;
2145     char *d;
2146 #ifdef PERL_MAD
2147     I32 startoff = s - SvPVX(PL_linestr);
2148 #endif
2149
2150     PERL_ARGS_ASSERT_FORCE_VERSION;
2151
2152     s = SKIPSPACE1(s);
2153
2154     d = s;
2155     if (*d == 'v')
2156         d++;
2157     if (isDIGIT(*d)) {
2158         while (isDIGIT(*d) || *d == '_' || *d == '.')
2159             d++;
2160 #ifdef PERL_MAD
2161         if (PL_madskills) {
2162             start_force(PL_curforce);
2163             curmad('X', newSVpvn(s,d-s));
2164         }
2165 #endif
2166         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2167             SV *ver;
2168 #ifdef USE_LOCALE_NUMERIC
2169             char *loc = setlocale(LC_NUMERIC, "C");
2170 #endif
2171             s = scan_num(s, &pl_yylval);
2172 #ifdef USE_LOCALE_NUMERIC
2173             setlocale(LC_NUMERIC, loc);
2174 #endif
2175             version = pl_yylval.opval;
2176             ver = cSVOPx(version)->op_sv;
2177             if (SvPOK(ver) && !SvNIOK(ver)) {
2178                 SvUPGRADE(ver, SVt_PVNV);
2179                 SvNV_set(ver, str_to_version(ver));
2180                 SvNOK_on(ver);          /* hint that it is a version */
2181             }
2182         }
2183         else if (guessing) {
2184 #ifdef PERL_MAD
2185             if (PL_madskills) {
2186                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2187                 PL_nextwhite = 0;
2188                 s = SvPVX(PL_linestr) + startoff;
2189             }
2190 #endif
2191             return s;
2192         }
2193     }
2194
2195 #ifdef PERL_MAD
2196     if (PL_madskills && !version) {
2197         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2198         PL_nextwhite = 0;
2199         s = SvPVX(PL_linestr) + startoff;
2200     }
2201 #endif
2202     /* NOTE: The parser sees the package name and the VERSION swapped */
2203     start_force(PL_curforce);
2204     NEXTVAL_NEXTTOKE.opval = version;
2205     force_next(WORD);
2206
2207     return s;
2208 }
2209
2210 /*
2211  * S_force_strict_version
2212  * Forces the next token to be a version number using strict syntax rules.
2213  */
2214
2215 STATIC char *
2216 S_force_strict_version(pTHX_ char *s)
2217 {
2218     dVAR;
2219     OP *version = NULL;
2220 #ifdef PERL_MAD
2221     I32 startoff = s - SvPVX(PL_linestr);
2222 #endif
2223     const char *errstr = NULL;
2224
2225     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2226
2227     while (isSPACE(*s)) /* leading whitespace */
2228         s++;
2229
2230     if (is_STRICT_VERSION(s,&errstr)) {
2231         SV *ver = newSV(0);
2232         s = (char *)scan_version(s, ver, 0);
2233         version = newSVOP(OP_CONST, 0, ver);
2234     }
2235     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2236             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2237     {
2238         PL_bufptr = s;
2239         if (errstr)
2240             yyerror(errstr); /* version required */
2241         return s;
2242     }
2243
2244 #ifdef PERL_MAD
2245     if (PL_madskills && !version) {
2246         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2247         PL_nextwhite = 0;
2248         s = SvPVX(PL_linestr) + startoff;
2249     }
2250 #endif
2251     /* NOTE: The parser sees the package name and the VERSION swapped */
2252     start_force(PL_curforce);
2253     NEXTVAL_NEXTTOKE.opval = version;
2254     force_next(WORD);
2255
2256     return s;
2257 }
2258
2259 /*
2260  * S_tokeq
2261  * Tokenize a quoted string passed in as an SV.  It finds the next
2262  * chunk, up to end of string or a backslash.  It may make a new
2263  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2264  * turns \\ into \.
2265  */
2266
2267 STATIC SV *
2268 S_tokeq(pTHX_ SV *sv)
2269 {
2270     dVAR;
2271     register char *s;
2272     register char *send;
2273     register char *d;
2274     STRLEN len = 0;
2275     SV *pv = sv;
2276
2277     PERL_ARGS_ASSERT_TOKEQ;
2278
2279     if (!SvLEN(sv))
2280         goto finish;
2281
2282     s = SvPV_force(sv, len);
2283     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2284         goto finish;
2285     send = s + len;
2286     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2287     while (s < send && !(*s == '\\' && s[1] == '\\'))
2288         s++;
2289     if (s == send)
2290         goto finish;
2291     d = s;
2292     if ( PL_hints & HINT_NEW_STRING ) {
2293         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2294     }
2295     while (s < send) {
2296         if (*s == '\\') {
2297             if (s + 1 < send && (s[1] == '\\'))
2298                 s++;            /* all that, just for this */
2299         }
2300         *d++ = *s++;
2301     }
2302     *d = '\0';
2303     SvCUR_set(sv, d - SvPVX_const(sv));
2304   finish:
2305     if ( PL_hints & HINT_NEW_STRING )
2306        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2307     return sv;
2308 }
2309
2310 /*
2311  * Now come three functions related to double-quote context,
2312  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2313  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2314  * interact with PL_lex_state, and create fake ( ... ) argument lists
2315  * to handle functions and concatenation.
2316  * They assume that whoever calls them will be setting up a fake
2317  * join call, because each subthing puts a ',' after it.  This lets
2318  *   "lower \luPpEr"
2319  * become
2320  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2321  *
2322  * (I'm not sure whether the spurious commas at the end of lcfirst's
2323  * arguments and join's arguments are created or not).
2324  */
2325
2326 /*
2327  * S_sublex_start
2328  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2329  *
2330  * Pattern matching will set PL_lex_op to the pattern-matching op to
2331  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2332  *
2333  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2334  *
2335  * Everything else becomes a FUNC.
2336  *
2337  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2338  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2339  * call to S_sublex_push().
2340  */
2341
2342 STATIC I32
2343 S_sublex_start(pTHX)
2344 {
2345     dVAR;
2346     register const I32 op_type = pl_yylval.ival;
2347
2348     if (op_type == OP_NULL) {
2349         pl_yylval.opval = PL_lex_op;
2350         PL_lex_op = NULL;
2351         return THING;
2352     }
2353     if (op_type == OP_CONST || op_type == OP_READLINE) {
2354         SV *sv = tokeq(PL_lex_stuff);
2355
2356         if (SvTYPE(sv) == SVt_PVIV) {
2357             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2358             STRLEN len;
2359             const char * const p = SvPV_const(sv, len);
2360             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2361             SvREFCNT_dec(sv);
2362             sv = nsv;
2363         }
2364         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2365         PL_lex_stuff = NULL;
2366         /* Allow <FH> // "foo" */
2367         if (op_type == OP_READLINE)
2368             PL_expect = XTERMORDORDOR;
2369         return THING;
2370     }
2371     else if (op_type == OP_BACKTICK && PL_lex_op) {
2372         /* readpipe() vas overriden */
2373         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2374         pl_yylval.opval = PL_lex_op;
2375         PL_lex_op = NULL;
2376         PL_lex_stuff = NULL;
2377         return THING;
2378     }
2379
2380     PL_sublex_info.super_state = PL_lex_state;
2381     PL_sublex_info.sub_inwhat = (U16)op_type;
2382     PL_sublex_info.sub_op = PL_lex_op;
2383     PL_lex_state = LEX_INTERPPUSH;
2384
2385     PL_expect = XTERM;
2386     if (PL_lex_op) {
2387         pl_yylval.opval = PL_lex_op;
2388         PL_lex_op = NULL;
2389         return PMFUNC;
2390     }
2391     else
2392         return FUNC;
2393 }
2394
2395 /*
2396  * S_sublex_push
2397  * Create a new scope to save the lexing state.  The scope will be
2398  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2399  * to the uc, lc, etc. found before.
2400  * Sets PL_lex_state to LEX_INTERPCONCAT.
2401  */
2402
2403 STATIC I32
2404 S_sublex_push(pTHX)
2405 {
2406     dVAR;
2407     ENTER;
2408
2409     PL_lex_state = PL_sublex_info.super_state;
2410     SAVEBOOL(PL_lex_dojoin);
2411     SAVEI32(PL_lex_brackets);
2412     SAVEI32(PL_lex_allbrackets);
2413     SAVEI8(PL_lex_fakeeof);
2414     SAVEI32(PL_lex_casemods);
2415     SAVEI32(PL_lex_starts);
2416     SAVEI8(PL_lex_state);
2417     SAVEVPTR(PL_lex_inpat);
2418     SAVEI16(PL_lex_inwhat);
2419     SAVECOPLINE(PL_curcop);
2420     SAVEPPTR(PL_bufptr);
2421     SAVEPPTR(PL_bufend);
2422     SAVEPPTR(PL_oldbufptr);
2423     SAVEPPTR(PL_oldoldbufptr);
2424     SAVEPPTR(PL_last_lop);
2425     SAVEPPTR(PL_last_uni);
2426     SAVEPPTR(PL_linestart);
2427     SAVESPTR(PL_linestr);
2428     SAVEGENERICPV(PL_lex_brackstack);
2429     SAVEGENERICPV(PL_lex_casestack);
2430
2431     PL_linestr = PL_lex_stuff;
2432     PL_lex_stuff = NULL;
2433
2434     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2435         = SvPVX(PL_linestr);
2436     PL_bufend += SvCUR(PL_linestr);
2437     PL_last_lop = PL_last_uni = NULL;
2438     SAVEFREESV(PL_linestr);
2439
2440     PL_lex_dojoin = FALSE;
2441     PL_lex_brackets = 0;
2442     PL_lex_allbrackets = 0;
2443     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2444     Newx(PL_lex_brackstack, 120, char);
2445     Newx(PL_lex_casestack, 12, char);
2446     PL_lex_casemods = 0;
2447     *PL_lex_casestack = '\0';
2448     PL_lex_starts = 0;
2449     PL_lex_state = LEX_INTERPCONCAT;
2450     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2451
2452     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2453     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2454     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2455         PL_lex_inpat = PL_sublex_info.sub_op;
2456     else
2457         PL_lex_inpat = NULL;
2458
2459     return '(';
2460 }
2461
2462 /*
2463  * S_sublex_done
2464  * Restores lexer state after a S_sublex_push.
2465  */
2466
2467 STATIC I32
2468 S_sublex_done(pTHX)
2469 {
2470     dVAR;
2471     if (!PL_lex_starts++) {
2472         SV * const sv = newSVpvs("");
2473         if (SvUTF8(PL_linestr))
2474             SvUTF8_on(sv);
2475         PL_expect = XOPERATOR;
2476         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2477         return THING;
2478     }
2479
2480     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2481         PL_lex_state = LEX_INTERPCASEMOD;
2482         return yylex();
2483     }
2484
2485     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2486     assert(PL_lex_inwhat != OP_TRANSR);
2487     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2488         PL_linestr = PL_lex_repl;
2489         PL_lex_inpat = 0;
2490         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2491         PL_bufend += SvCUR(PL_linestr);
2492         PL_last_lop = PL_last_uni = NULL;
2493         SAVEFREESV(PL_linestr);
2494         PL_lex_dojoin = FALSE;
2495         PL_lex_brackets = 0;
2496         PL_lex_allbrackets = 0;
2497         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2498         PL_lex_casemods = 0;
2499         *PL_lex_casestack = '\0';
2500         PL_lex_starts = 0;
2501         if (SvEVALED(PL_lex_repl)) {
2502             PL_lex_state = LEX_INTERPNORMAL;
2503             PL_lex_starts++;
2504             /*  we don't clear PL_lex_repl here, so that we can check later
2505                 whether this is an evalled subst; that means we rely on the
2506                 logic to ensure sublex_done() is called again only via the
2507                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2508         }
2509         else {
2510             PL_lex_state = LEX_INTERPCONCAT;
2511             PL_lex_repl = NULL;
2512         }
2513         return ',';
2514     }
2515     else {
2516 #ifdef PERL_MAD
2517         if (PL_madskills) {
2518             if (PL_thiswhite) {
2519                 if (!PL_endwhite)
2520                     PL_endwhite = newSVpvs("");
2521                 sv_catsv(PL_endwhite, PL_thiswhite);
2522                 PL_thiswhite = 0;
2523             }
2524             if (PL_thistoken)
2525                 sv_setpvs(PL_thistoken,"");
2526             else
2527                 PL_realtokenstart = -1;
2528         }
2529 #endif
2530         LEAVE;
2531         PL_bufend = SvPVX(PL_linestr);
2532         PL_bufend += SvCUR(PL_linestr);
2533         PL_expect = XOPERATOR;
2534         PL_sublex_info.sub_inwhat = 0;
2535         return ')';
2536     }
2537 }
2538
2539 /*
2540   scan_const
2541
2542   Extracts a pattern, double-quoted string, or transliteration.  This
2543   is terrifying code.
2544
2545   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2546   processing a pattern (PL_lex_inpat is true), a transliteration
2547   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2548
2549   Returns a pointer to the character scanned up to. If this is
2550   advanced from the start pointer supplied (i.e. if anything was
2551   successfully parsed), will leave an OP for the substring scanned
2552   in pl_yylval. Caller must intuit reason for not parsing further
2553   by looking at the next characters herself.
2554
2555   In patterns:
2556     backslashes:
2557       constants: \N{NAME} only
2558       case and quoting: \U \Q \E
2559     stops on @ and $, but not for $ as tail anchor
2560
2561   In transliterations:
2562     characters are VERY literal, except for - not at the start or end
2563     of the string, which indicates a range. If the range is in bytes,
2564     scan_const expands the range to the full set of intermediate
2565     characters. If the range is in utf8, the hyphen is replaced with
2566     a certain range mark which will be handled by pmtrans() in op.c.
2567
2568   In double-quoted strings:
2569     backslashes:
2570       double-quoted style: \r and \n
2571       constants: \x31, etc.
2572       deprecated backrefs: \1 (in substitution replacements)
2573       case and quoting: \U \Q \E
2574     stops on @ and $
2575
2576   scan_const does *not* construct ops to handle interpolated strings.
2577   It stops processing as soon as it finds an embedded $ or @ variable
2578   and leaves it to the caller to work out what's going on.
2579
2580   embedded arrays (whether in pattern or not) could be:
2581       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2582
2583   $ in double-quoted strings must be the symbol of an embedded scalar.
2584
2585   $ in pattern could be $foo or could be tail anchor.  Assumption:
2586   it's a tail anchor if $ is the last thing in the string, or if it's
2587   followed by one of "()| \r\n\t"
2588
2589   \1 (backreferences) are turned into $1
2590
2591   The structure of the code is
2592       while (there's a character to process) {
2593           handle transliteration ranges
2594           skip regexp comments /(?#comment)/ and codes /(?{code})/
2595           skip #-initiated comments in //x patterns
2596           check for embedded arrays
2597           check for embedded scalars
2598           if (backslash) {
2599               deprecate \1 in substitution replacements
2600               handle string-changing backslashes \l \U \Q \E, etc.
2601               switch (what was escaped) {
2602                   handle \- in a transliteration (becomes a literal -)
2603                   if a pattern and not \N{, go treat as regular character
2604                   handle \132 (octal characters)
2605                   handle \x15 and \x{1234} (hex characters)
2606                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2607                   handle \cV (control characters)
2608                   handle printf-style backslashes (\f, \r, \n, etc)
2609               } (end switch)
2610               continue
2611           } (end if backslash)
2612           handle regular character
2613     } (end while character to read)
2614
2615 */
2616
2617 STATIC char *
2618 S_scan_const(pTHX_ char *start)
2619 {
2620     dVAR;
2621     register char *send = PL_bufend;            /* end of the constant */
2622     SV *sv = newSV(send - start);               /* sv for the constant.  See
2623                                                    note below on sizing. */
2624     register char *s = start;                   /* start of the constant */
2625     register char *d = SvPVX(sv);               /* destination for copies */
2626     bool dorange = FALSE;                       /* are we in a translit range? */
2627     bool didrange = FALSE;                      /* did we just finish a range? */
2628     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2629     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2630                                                    to be UTF8?  But, this can
2631                                                    show as true when the source
2632                                                    isn't utf8, as for example
2633                                                    when it is entirely composed
2634                                                    of hex constants */
2635
2636     /* Note on sizing:  The scanned constant is placed into sv, which is
2637      * initialized by newSV() assuming one byte of output for every byte of
2638      * input.  This routine expects newSV() to allocate an extra byte for a
2639      * trailing NUL, which this routine will append if it gets to the end of
2640      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2641      * CAPITAL LETTER A}), or more output than input if the constant ends up
2642      * recoded to utf8, but each time a construct is found that might increase
2643      * the needed size, SvGROW() is called.  Its size parameter each time is
2644      * based on the best guess estimate at the time, namely the length used so
2645      * far, plus the length the current construct will occupy, plus room for
2646      * the trailing NUL, plus one byte for every input byte still unscanned */
2647
2648     UV uv;
2649 #ifdef EBCDIC
2650     UV literal_endpoint = 0;
2651     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2652 #endif
2653
2654     PERL_ARGS_ASSERT_SCAN_CONST;
2655
2656     assert(PL_lex_inwhat != OP_TRANSR);
2657     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2658         /* If we are doing a trans and we know we want UTF8 set expectation */
2659         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2660         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2661     }
2662
2663
2664     while (s < send || dorange) {
2665
2666         /* get transliterations out of the way (they're most literal) */
2667         if (PL_lex_inwhat == OP_TRANS) {
2668             /* expand a range A-Z to the full set of characters.  AIE! */
2669             if (dorange) {
2670                 I32 i;                          /* current expanded character */
2671                 I32 min;                        /* first character in range */
2672                 I32 max;                        /* last character in range */
2673
2674 #ifdef EBCDIC
2675                 UV uvmax = 0;
2676 #endif
2677
2678                 if (has_utf8
2679 #ifdef EBCDIC
2680                     && !native_range
2681 #endif
2682                     ) {
2683                     char * const c = (char*)utf8_hop((U8*)d, -1);
2684                     char *e = d++;
2685                     while (e-- > c)
2686                         *(e + 1) = *e;
2687                     *c = (char)UTF_TO_NATIVE(0xff);
2688                     /* mark the range as done, and continue */
2689                     dorange = FALSE;
2690                     didrange = TRUE;
2691                     continue;
2692                 }
2693
2694                 i = d - SvPVX_const(sv);                /* remember current offset */
2695 #ifdef EBCDIC
2696                 SvGROW(sv,
2697                        SvLEN(sv) + (has_utf8 ?
2698                                     (512 - UTF_CONTINUATION_MARK +
2699                                      UNISKIP(0x100))
2700                                     : 256));
2701                 /* How many two-byte within 0..255: 128 in UTF-8,
2702                  * 96 in UTF-8-mod. */
2703 #else
2704                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2705 #endif
2706                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2707 #ifdef EBCDIC
2708                 if (has_utf8) {
2709                     int j;
2710                     for (j = 0; j <= 1; j++) {
2711                         char * const c = (char*)utf8_hop((U8*)d, -1);
2712                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2713                         if (j)
2714                             min = (U8)uv;
2715                         else if (uv < 256)
2716                             max = (U8)uv;
2717                         else {
2718                             max = (U8)0xff; /* only to \xff */
2719                             uvmax = uv; /* \x{100} to uvmax */
2720                         }
2721                         d = c; /* eat endpoint chars */
2722                      }
2723                 }
2724                else {
2725 #endif
2726                    d -= 2;              /* eat the first char and the - */
2727                    min = (U8)*d;        /* first char in range */
2728                    max = (U8)d[1];      /* last char in range  */
2729 #ifdef EBCDIC
2730                }
2731 #endif
2732
2733                 if (min > max) {
2734                     Perl_croak(aTHX_
2735                                "Invalid range \"%c-%c\" in transliteration operator",
2736                                (char)min, (char)max);
2737                 }
2738
2739 #ifdef EBCDIC
2740                 if (literal_endpoint == 2 &&
2741                     ((isLOWER(min) && isLOWER(max)) ||
2742                      (isUPPER(min) && isUPPER(max)))) {
2743                     if (isLOWER(min)) {
2744                         for (i = min; i <= max; i++)
2745                             if (isLOWER(i))
2746                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2747                     } else {
2748                         for (i = min; i <= max; i++)
2749                             if (isUPPER(i))
2750                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2751                     }
2752                 }
2753                 else
2754 #endif
2755                     for (i = min; i <= max; i++)
2756 #ifdef EBCDIC
2757                         if (has_utf8) {
2758                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2759                             if (UNI_IS_INVARIANT(ch))
2760                                 *d++ = (U8)i;
2761                             else {
2762                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2763                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2764                             }
2765                         }
2766                         else
2767 #endif
2768                             *d++ = (char)i;
2769
2770 #ifdef EBCDIC
2771                 if (uvmax) {
2772                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2773                     if (uvmax > 0x101)
2774                         *d++ = (char)UTF_TO_NATIVE(0xff);
2775                     if (uvmax > 0x100)
2776                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2777                 }
2778 #endif
2779
2780                 /* mark the range as done, and continue */
2781                 dorange = FALSE;
2782                 didrange = TRUE;
2783 #ifdef EBCDIC
2784                 literal_endpoint = 0;
2785 #endif
2786                 continue;
2787             }
2788
2789             /* range begins (ignore - as first or last char) */
2790             else if (*s == '-' && s+1 < send  && s != start) {
2791                 if (didrange) {
2792                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2793                 }
2794                 if (has_utf8
2795 #ifdef EBCDIC
2796                     && !native_range
2797 #endif
2798                     ) {
2799                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2800                     s++;
2801                     continue;
2802                 }
2803                 dorange = TRUE;
2804                 s++;
2805             }
2806             else {
2807                 didrange = FALSE;
2808 #ifdef EBCDIC
2809                 literal_endpoint = 0;
2810                 native_range = TRUE;
2811 #endif
2812             }
2813         }
2814
2815         /* if we get here, we're not doing a transliteration */
2816
2817         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2818            except for the last char, which will be done separately. */
2819         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2820             if (s[2] == '#') {
2821                 while (s+1 < send && *s != ')')
2822                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2823             }
2824             else if (s[2] == '{' /* This should match regcomp.c */
2825                     || (s[2] == '?' && s[3] == '{'))
2826             {
2827                 I32 count = 1;
2828                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2829                 char c;
2830
2831                 while (count && (c = *regparse)) {
2832                     if (c == '\\' && regparse[1])
2833                         regparse++;
2834                     else if (c == '{')
2835                         count++;
2836                     else if (c == '}')
2837                         count--;
2838                     regparse++;
2839                 }
2840                 if (*regparse != ')')
2841                     regparse--;         /* Leave one char for continuation. */
2842                 while (s < regparse)
2843                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2844             }
2845         }
2846
2847         /* likewise skip #-initiated comments in //x patterns */
2848         else if (*s == '#' && PL_lex_inpat &&
2849           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2850             while (s+1 < send && *s != '\n')
2851                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2852         }
2853
2854         /* check for embedded arrays
2855            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2856            */
2857         else if (*s == '@' && s[1]) {
2858             if (isALNUM_lazy_if(s+1,UTF))
2859                 break;
2860             if (strchr(":'{$", s[1]))
2861                 break;
2862             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2863                 break; /* in regexp, neither @+ nor @- are interpolated */
2864         }
2865
2866         /* check for embedded scalars.  only stop if we're sure it's a
2867            variable.
2868         */
2869         else if (*s == '$') {
2870             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2871                 break;
2872             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2873                 if (s[1] == '\\') {
2874                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2875                                    "Possible unintended interpolation of $\\ in regex");
2876                 }
2877                 break;          /* in regexp, $ might be tail anchor */
2878             }
2879         }
2880
2881         /* End of else if chain - OP_TRANS rejoin rest */
2882
2883         /* backslashes */
2884         if (*s == '\\' && s+1 < send) {
2885             char* e;    /* Can be used for ending '}', etc. */
2886
2887             s++;
2888
2889             /* warn on \1 - \9 in substitution replacements, but note that \11
2890              * is an octal; and \19 is \1 followed by '9' */
2891             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2892                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2893             {
2894                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2895                 *--s = '$';
2896                 break;
2897             }
2898
2899             /* string-change backslash escapes */
2900             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2901                 --s;
2902                 break;
2903             }
2904             /* In a pattern, process \N, but skip any other backslash escapes.
2905              * This is because we don't want to translate an escape sequence
2906              * into a meta symbol and have the regex compiler use the meta
2907              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2908              * in spite of this, we do have to process \N here while the proper
2909              * charnames handler is in scope.  See bugs #56444 and #62056.
2910              * There is a complication because \N in a pattern may also stand
2911              * for 'match a non-nl', and not mean a charname, in which case its
2912              * processing should be deferred to the regex compiler.  To be a
2913              * charname it must be followed immediately by a '{', and not look
2914              * like \N followed by a curly quantifier, i.e., not something like
2915              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2916              * quantifier */
2917             else if (PL_lex_inpat
2918                     && (*s != 'N'
2919                         || s[1] != '{'
2920                         || regcurly(s + 1)))
2921             {
2922                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2923                 goto default_action;
2924             }
2925
2926             switch (*s) {
2927
2928             /* quoted - in transliterations */
2929             case '-':
2930                 if (PL_lex_inwhat == OP_TRANS) {
2931                     *d++ = *s++;
2932                     continue;
2933                 }
2934                 /* FALL THROUGH */
2935             default:
2936                 {
2937                     if ((isALPHA(*s) || isDIGIT(*s)))
2938                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2939                                        "Unrecognized escape \\%c passed through",
2940                                        *s);
2941                     /* default action is to copy the quoted character */
2942                     goto default_action;
2943                 }
2944
2945             /* eg. \132 indicates the octal constant 0132 */
2946             case '0': case '1': case '2': case '3':
2947             case '4': case '5': case '6': case '7':
2948                 {
2949                     I32 flags = 0;
2950                     STRLEN len = 3;
2951                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2952                     s += len;
2953                 }
2954                 goto NUM_ESCAPE_INSERT;
2955
2956             /* eg. \o{24} indicates the octal constant \024 */
2957             case 'o':
2958                 {
2959                     STRLEN len;
2960                     const char* error;
2961
2962                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2963                     s += len;
2964                     if (! valid) {
2965                         yyerror(error);
2966                         continue;
2967                     }
2968                     goto NUM_ESCAPE_INSERT;
2969                 }
2970
2971             /* eg. \x24 indicates the hex constant 0x24 */
2972             case 'x':
2973                 ++s;
2974                 if (*s == '{') {
2975                     char* const e = strchr(s, '}');
2976                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2977                       PERL_SCAN_DISALLOW_PREFIX;
2978                     STRLEN len;
2979
2980                     ++s;
2981                     if (!e) {
2982                         yyerror("Missing right brace on \\x{}");
2983                         continue;
2984                     }
2985                     len = e - s;
2986                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2987                     s = e + 1;
2988                 }
2989                 else {
2990                     {
2991                         STRLEN len = 2;
2992                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2993                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2994                         s += len;
2995                     }
2996                 }
2997
2998               NUM_ESCAPE_INSERT:
2999                 /* Insert oct or hex escaped character.  There will always be
3000                  * enough room in sv since such escapes will be longer than any
3001                  * UTF-8 sequence they can end up as, except if they force us
3002                  * to recode the rest of the string into utf8 */
3003
3004                 /* Here uv is the ordinal of the next character being added in
3005                  * unicode (converted from native). */
3006                 if (!UNI_IS_INVARIANT(uv)) {
3007                     if (!has_utf8 && uv > 255) {
3008                         /* Might need to recode whatever we have accumulated so
3009                          * far if it contains any chars variant in utf8 or
3010                          * utf-ebcdic. */
3011
3012                         SvCUR_set(sv, d - SvPVX_const(sv));
3013                         SvPOK_on(sv);
3014                         *d = '\0';
3015                         /* See Note on sizing above.  */
3016                         sv_utf8_upgrade_flags_grow(sv,
3017                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3018                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3019                         d = SvPVX(sv) + SvCUR(sv);
3020                         has_utf8 = TRUE;
3021                     }
3022
3023                     if (has_utf8) {
3024                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3025                         if (PL_lex_inwhat == OP_TRANS &&
3026                             PL_sublex_info.sub_op) {
3027                             PL_sublex_info.sub_op->op_private |=
3028                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3029                                              : OPpTRANS_TO_UTF);
3030                         }
3031 #ifdef EBCDIC
3032                         if (uv > 255 && !dorange)
3033                             native_range = FALSE;
3034 #endif
3035                     }
3036                     else {
3037                         *d++ = (char)uv;
3038                     }
3039                 }
3040                 else {
3041                     *d++ = (char) uv;
3042                 }
3043                 continue;
3044
3045             case 'N':
3046                 /* In a non-pattern \N must be a named character, like \N{LATIN
3047                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3048                  * mean to match a non-newline.  For non-patterns, named
3049                  * characters are converted to their string equivalents. In
3050                  * patterns, named characters are not converted to their
3051                  * ultimate forms for the same reasons that other escapes
3052                  * aren't.  Instead, they are converted to the \N{U+...} form
3053                  * to get the value from the charnames that is in effect right
3054                  * now, while preserving the fact that it was a named character
3055                  * so that the regex compiler knows this */
3056
3057                 /* This section of code doesn't generally use the
3058                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3059                  * a close examination of this macro and determined it is a
3060                  * no-op except on utfebcdic variant characters.  Every
3061                  * character generated by this that would normally need to be
3062                  * enclosed by this macro is invariant, so the macro is not
3063                  * needed, and would complicate use of copy().  XXX There are
3064                  * other parts of this file where the macro is used
3065                  * inconsistently, but are saved by it being a no-op */
3066
3067                 /* The structure of this section of code (besides checking for
3068                  * errors and upgrading to utf8) is:
3069                  *  Further disambiguate between the two meanings of \N, and if
3070                  *      not a charname, go process it elsewhere
3071                  *  If of form \N{U+...}, pass it through if a pattern;
3072                  *      otherwise convert to utf8
3073                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3074                  *  pattern; otherwise convert to utf8 */
3075
3076                 /* Here, s points to the 'N'; the test below is guaranteed to
3077                  * succeed if we are being called on a pattern as we already
3078                  * know from a test above that the next character is a '{'.
3079                  * On a non-pattern \N must mean 'named sequence, which
3080                  * requires braces */
3081                 s++;
3082                 if (*s != '{') {
3083                     yyerror("Missing braces on \\N{}");
3084                     continue;
3085                 }
3086                 s++;
3087
3088                 /* If there is no matching '}', it is an error. */
3089                 if (! (e = strchr(s, '}'))) {
3090                     if (! PL_lex_inpat) {
3091                         yyerror("Missing right brace on \\N{}");
3092                     } else {
3093                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3094                     }
3095                     continue;
3096                 }
3097
3098                 /* Here it looks like a named character */
3099
3100                 if (PL_lex_inpat) {
3101
3102                     /* XXX This block is temporary code.  \N{} implies that the
3103                      * pattern is to have Unicode semantics, and therefore
3104                      * currently has to be encoded in utf8.  By putting it in
3105                      * utf8 now, we save a whole pass in the regular expression
3106                      * compiler.  Once that code is changed so Unicode
3107                      * semantics doesn't necessarily have to be in utf8, this
3108                      * block should be removed.  However, the code that parses
3109                      * the output of this would have to be changed to not
3110                      * necessarily expect utf8 */
3111                     if (!has_utf8) {
3112                         SvCUR_set(sv, d - SvPVX_const(sv));
3113                         SvPOK_on(sv);
3114                         *d = '\0';
3115                         /* See Note on sizing above.  */
3116                         sv_utf8_upgrade_flags_grow(sv,
3117                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3118                                         /* 5 = '\N{' + cur char + NUL */
3119                                         (STRLEN)(send - s) + 5);
3120                         d = SvPVX(sv) + SvCUR(sv);
3121                         has_utf8 = TRUE;
3122                     }
3123                 }
3124
3125                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3126                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3127                                 | PERL_SCAN_DISALLOW_PREFIX;
3128                     STRLEN len;
3129
3130                     /* For \N{U+...}, the '...' is a unicode value even on
3131                      * EBCDIC machines */
3132                     s += 2;         /* Skip to next char after the 'U+' */
3133                     len = e - s;
3134                     uv = grok_hex(s, &len, &flags, NULL);
3135                     if (len == 0 || len != (STRLEN)(e - s)) {
3136                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3137                         s = e + 1;
3138                         continue;
3139                     }
3140
3141                     if (PL_lex_inpat) {
3142
3143                         /* Pass through to the regex compiler unchanged.  The
3144                          * reason we evaluated the number above is to make sure
3145                          * there wasn't a syntax error. */
3146                         s -= 5;     /* Include the '\N{U+' */
3147                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3148                         d += e - s + 1;
3149                     }
3150                     else {  /* Not a pattern: convert the hex to string */
3151
3152                          /* If destination is not in utf8, unconditionally
3153                           * recode it to be so.  This is because \N{} implies
3154                           * Unicode semantics, and scalars have to be in utf8
3155                           * to guarantee those semantics */
3156                         if (! has_utf8) {
3157                             SvCUR_set(sv, d - SvPVX_const(sv));
3158                             SvPOK_on(sv);
3159                             *d = '\0';
3160                             /* See Note on sizing above.  */
3161                             sv_utf8_upgrade_flags_grow(
3162                                         sv,
3163                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3164                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3165                             d = SvPVX(sv) + SvCUR(sv);
3166                             has_utf8 = TRUE;
3167                         }
3168
3169                         /* Add the string to the output */
3170                         if (UNI_IS_INVARIANT(uv)) {
3171                             *d++ = (char) uv;
3172                         }
3173                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3174                     }
3175                 }
3176                 else { /* Here is \N{NAME} but not \N{U+...}. */
3177
3178                     SV *res;            /* result from charnames */
3179                     const char *str;    /* the string in 'res' */
3180                     STRLEN len;         /* its length */
3181
3182                     /* Get the value for NAME */
3183                     res = newSVpvn(s, e - s);
3184                     res = new_constant( NULL, 0, "charnames",
3185                                         /* includes all of: \N{...} */
3186                                         res, NULL, s - 3, e - s + 4 );
3187
3188                     /* Most likely res will be in utf8 already since the
3189                      * standard charnames uses pack U, but a custom translator
3190                      * can leave it otherwise, so make sure.  XXX This can be
3191                      * revisited to not have charnames use utf8 for characters
3192                      * that don't need it when regexes don't have to be in utf8
3193                      * for Unicode semantics.  If doing so, remember EBCDIC */
3194                     sv_utf8_upgrade(res);
3195                     str = SvPV_const(res, len);
3196
3197                     /* Don't accept malformed input */
3198                     if (! is_utf8_string((U8 *) str, len)) {
3199                         yyerror("Malformed UTF-8 returned by \\N");
3200                     }
3201                     else if (PL_lex_inpat) {
3202
3203                         if (! len) { /* The name resolved to an empty string */
3204                             Copy("\\N{}", d, 4, char);
3205                             d += 4;
3206                         }
3207                         else {
3208                             /* In order to not lose information for the regex
3209                             * compiler, pass the result in the specially made
3210                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3211                             * the code points in hex of each character
3212                             * returned by charnames */
3213
3214                             const char *str_end = str + len;
3215                             STRLEN char_length;     /* cur char's byte length */
3216                             STRLEN output_length;   /* and the number of bytes
3217                                                        after this is translated
3218                                                        into hex digits */
3219                             const STRLEN off = d - SvPVX_const(sv);
3220
3221                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3222                              * max('U+', '.'); and 1 for NUL */
3223                             char hex_string[2 * UTF8_MAXBYTES + 5];
3224
3225                             /* Get the first character of the result. */
3226                             U32 uv = utf8n_to_uvuni((U8 *) str,
3227                                                     len,
3228                                                     &char_length,
3229                                                     UTF8_ALLOW_ANYUV);
3230
3231                             /* The call to is_utf8_string() above hopefully
3232                              * guarantees that there won't be an error.  But
3233                              * it's easy here to make sure.  The function just
3234                              * above warns and returns 0 if invalid utf8, but
3235                              * it can also return 0 if the input is validly a
3236                              * NUL. Disambiguate */
3237                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3238                                 uv = UNICODE_REPLACEMENT;
3239                             }
3240
3241                             /* Convert first code point to hex, including the
3242                              * boiler plate before it */
3243                             output_length =
3244                                 my_snprintf(hex_string, sizeof(hex_string),
3245                                             "\\N{U+%X", (unsigned int) uv);
3246
3247                             /* Make sure there is enough space to hold it */
3248                             d = off + SvGROW(sv, off
3249                                                  + output_length
3250                                                  + (STRLEN)(send - e)
3251                                                  + 2);  /* '}' + NUL */
3252                             /* And output it */
3253                             Copy(hex_string, d, output_length, char);
3254                             d += output_length;
3255
3256                             /* For each subsequent character, append dot and
3257                              * its ordinal in hex */
3258                             while ((str += char_length) < str_end) {
3259                                 const STRLEN off = d - SvPVX_const(sv);
3260                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3261                                                         str_end - str,
3262                                                         &char_length,
3263                                                         UTF8_ALLOW_ANYUV);
3264                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3265                                     uv = UNICODE_REPLACEMENT;
3266                                 }
3267
3268                                 output_length =
3269                                     my_snprintf(hex_string, sizeof(hex_string),
3270                                                 ".%X", (unsigned int) uv);
3271
3272                                 d = off + SvGROW(sv, off
3273                                                      + output_length
3274                                                      + (STRLEN)(send - e)
3275                                                      + 2);      /* '}' +  NUL */
3276                                 Copy(hex_string, d, output_length, char);
3277                                 d += output_length;
3278                             }
3279
3280                             *d++ = '}'; /* Done.  Add the trailing brace */
3281                         }
3282                     }
3283                     else { /* Here, not in a pattern.  Convert the name to a
3284                             * string. */
3285
3286                          /* If destination is not in utf8, unconditionally
3287                           * recode it to be so.  This is because \N{} implies
3288                           * Unicode semantics, and scalars have to be in utf8
3289                           * to guarantee those semantics */
3290                         if (! has_utf8) {
3291                             SvCUR_set(sv, d - SvPVX_const(sv));
3292                             SvPOK_on(sv);
3293                             *d = '\0';
3294                             /* See Note on sizing above.  */
3295                             sv_utf8_upgrade_flags_grow(sv,
3296                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3297                                                 len + (STRLEN)(send - s) + 1);
3298                             d = SvPVX(sv) + SvCUR(sv);
3299                             has_utf8 = TRUE;
3300                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3301
3302                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3303                              * set correctly here). */
3304                             const STRLEN off = d - SvPVX_const(sv);
3305                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3306                         }
3307                         Copy(str, d, len, char);
3308                         d += len;
3309                     }
3310                     SvREFCNT_dec(res);
3311
3312                     /* Deprecate non-approved name syntax */
3313                     if (ckWARN_d(WARN_DEPRECATED)) {
3314                         bool problematic = FALSE;
3315                         char* i = s;
3316
3317                         /* For non-ut8 input, look to see that the first
3318                          * character is an alpha, then loop through the rest
3319                          * checking that each is a continuation */
3320                         if (! this_utf8) {
3321                             if (! isALPHAU(*i)) problematic = TRUE;
3322                             else for (i = s + 1; i < e; i++) {
3323                                 if (isCHARNAME_CONT(*i)) continue;
3324                                 problematic = TRUE;
3325                                 break;
3326                             }
3327                         }
3328                         else {
3329                             /* Similarly for utf8.  For invariants can check
3330                              * directly.  We accept anything above the latin1
3331                              * range because it is immaterial to Perl if it is
3332                              * correct or not, and is expensive to check.  But
3333                              * it is fairly easy in the latin1 range to convert
3334                              * the variants into a single character and check
3335                              * those */
3336                             if (UTF8_IS_INVARIANT(*i)) {
3337                                 if (! isALPHAU(*i)) problematic = TRUE;
3338                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3339                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3340                                                                             *(i+1)))))
3341                                 {
3342                                     problematic = TRUE;
3343                                 }
3344                             }
3345                             if (! problematic) for (i = s + UTF8SKIP(s);
3346                                                     i < e;
3347                                                     i+= UTF8SKIP(i))
3348                             {
3349                                 if (UTF8_IS_INVARIANT(*i)) {
3350                                     if (isCHARNAME_CONT(*i)) continue;
3351                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3352                                     continue;
3353                                 } else if (isCHARNAME_CONT(
3354                                             UNI_TO_NATIVE(
3355                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3356                                 {
3357                                     continue;
3358                                 }
3359                                 problematic = TRUE;
3360                                 break;
3361                             }
3362                         }
3363                         if (problematic) {
3364                             /* The e-i passed to the final %.*s makes sure that
3365                              * should the trailing NUL be missing that this
3366                              * print won't run off the end of the string */
3367                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3368                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3369                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3370                         }
3371                     }
3372                 } /* End \N{NAME} */
3373 #ifdef EBCDIC
3374                 if (!dorange)
3375                     native_range = FALSE; /* \N{} is defined to be Unicode */
3376 #endif
3377                 s = e + 1;  /* Point to just after the '}' */
3378                 continue;
3379
3380             /* \c is a control character */
3381             case 'c':
3382                 s++;
3383                 if (s < send) {
3384                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3385                 }
3386                 else {
3387                     yyerror("Missing control char name in \\c");
3388                 }
3389                 continue;
3390
3391             /* printf-style backslashes, formfeeds, newlines, etc */
3392             case 'b':
3393                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3394                 break;
3395             case 'n':
3396                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3397                 break;
3398             case 'r':
3399                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3400                 break;
3401             case 'f':
3402                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3403                 break;
3404             case 't':
3405                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3406                 break;
3407             case 'e':
3408                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3409                 break;
3410             case 'a':
3411                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3412                 break;
3413             } /* end switch */
3414
3415             s++;
3416             continue;
3417         } /* end if (backslash) */
3418 #ifdef EBCDIC
3419         else
3420             literal_endpoint++;
3421 #endif
3422
3423     default_action:
3424         /* If we started with encoded form, or already know we want it,
3425            then encode the next character */
3426         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3427             STRLEN len  = 1;
3428
3429
3430             /* One might think that it is wasted effort in the case of the
3431              * source being utf8 (this_utf8 == TRUE) to take the next character
3432              * in the source, convert it to an unsigned value, and then convert
3433              * it back again.  But the source has not been validated here.  The
3434              * routine that does the conversion checks for errors like
3435              * malformed utf8 */
3436
3437             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3438             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3439             if (!has_utf8) {
3440                 SvCUR_set(sv, d - SvPVX_const(sv));
3441                 SvPOK_on(sv);
3442                 *d = '\0';
3443                 /* See Note on sizing above.  */
3444                 sv_utf8_upgrade_flags_grow(sv,
3445                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3446                                         need + (STRLEN)(send - s) + 1);
3447                 d = SvPVX(sv) + SvCUR(sv);
3448                 has_utf8 = TRUE;
3449             } else if (need > len) {
3450                 /* encoded value larger than old, may need extra space (NOTE:
3451                  * SvCUR() is not set correctly here).   See Note on sizing
3452                  * above.  */
3453                 const STRLEN off = d - SvPVX_const(sv);
3454                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3455             }
3456             s += len;
3457
3458             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3459 #ifdef EBCDIC
3460             if (uv > 255 && !dorange)
3461                 native_range = FALSE;
3462 #endif
3463         }
3464         else {
3465             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3466         }
3467     } /* while loop to process each character */
3468
3469     /* terminate the string and set up the sv */
3470     *d = '\0';
3471     SvCUR_set(sv, d - SvPVX_const(sv));
3472     if (SvCUR(sv) >= SvLEN(sv))
3473         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3474
3475     SvPOK_on(sv);
3476     if (PL_encoding && !has_utf8) {
3477         sv_recode_to_utf8(sv, PL_encoding);
3478         if (SvUTF8(sv))
3479             has_utf8 = TRUE;
3480     }
3481     if (has_utf8) {
3482         SvUTF8_on(sv);
3483         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3484             PL_sublex_info.sub_op->op_private |=
3485                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3486         }
3487     }
3488
3489     /* shrink the sv if we allocated more than we used */
3490     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3491         SvPV_shrink_to_cur(sv);
3492     }
3493
3494     /* return the substring (via pl_yylval) only if we parsed anything */
3495     if (s > PL_bufptr) {
3496         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3497             const char *const key = PL_lex_inpat ? "qr" : "q";
3498             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3499             const char *type;
3500             STRLEN typelen;
3501
3502             if (PL_lex_inwhat == OP_TRANS) {
3503                 type = "tr";
3504                 typelen = 2;
3505             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3506                 type = "s";
3507                 typelen = 1;
3508             } else  {
3509                 type = "qq";
3510                 typelen = 2;
3511             }
3512
3513             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3514                                 type, typelen);
3515         }
3516         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3517     } else
3518         SvREFCNT_dec(sv);
3519     return s;
3520 }
3521
3522 /* S_intuit_more
3523  * Returns TRUE if there's more to the expression (e.g., a subscript),
3524  * FALSE otherwise.
3525  *
3526  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3527  *
3528  * ->[ and ->{ return TRUE
3529  * { and [ outside a pattern are always subscripts, so return TRUE
3530  * if we're outside a pattern and it's not { or [, then return FALSE
3531  * if we're in a pattern and the first char is a {
3532  *   {4,5} (any digits around the comma) returns FALSE
3533  * if we're in a pattern and the first char is a [
3534  *   [] returns FALSE
3535  *   [SOMETHING] has a funky algorithm to decide whether it's a
3536  *      character class or not.  It has to deal with things like
3537  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3538  * anything else returns TRUE
3539  */
3540
3541 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3542
3543 STATIC int
3544 S_intuit_more(pTHX_ register char *s)
3545 {
3546     dVAR;
3547
3548     PERL_ARGS_ASSERT_INTUIT_MORE;
3549
3550     if (PL_lex_brackets)
3551         return TRUE;
3552     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3553         return TRUE;
3554     if (*s != '{' && *s != '[')
3555         return FALSE;
3556     if (!PL_lex_inpat)
3557         return TRUE;
3558
3559     /* In a pattern, so maybe we have {n,m}. */
3560     if (*s == '{') {
3561         if (regcurly(s)) {
3562             return FALSE;
3563         }
3564         return TRUE;
3565     }
3566
3567     /* On the other hand, maybe we have a character class */
3568
3569     s++;
3570     if (*s == ']' || *s == '^')
3571         return FALSE;
3572     else {
3573         /* this is terrifying, and it works */
3574         int weight = 2;         /* let's weigh the evidence */
3575         char seen[256];
3576         unsigned char un_char = 255, last_un_char;
3577         const char * const send = strchr(s,']');
3578         char tmpbuf[sizeof PL_tokenbuf * 4];
3579
3580         if (!send)              /* has to be an expression */
3581             return TRUE;
3582
3583         Zero(seen,256,char);
3584         if (*s == '$')
3585             weight -= 3;
3586         else if (isDIGIT(*s)) {
3587             if (s[1] != ']') {
3588                 if (isDIGIT(s[1]) && s[2] == ']')
3589                     weight -= 10;
3590             }
3591             else
3592                 weight -= 100;
3593         }
3594         for (; s < send; s++) {
3595             last_un_char = un_char;
3596             un_char = (unsigned char)*s;
3597             switch (*s) {
3598             case '@':
3599             case '&':
3600             case '$':
3601                 weight -= seen[un_char] * 10;
3602                 if (isALNUM_lazy_if(s+1,UTF)) {
3603                     int len;
3604                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3605                     len = (int)strlen(tmpbuf);
3606                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3607                         weight -= 100;
3608                     else
3609                         weight -= 10;
3610                 }
3611                 else if (*s == '$' && s[1] &&
3612                   strchr("[#!%*<>()-=",s[1])) {
3613                     if (/*{*/ strchr("])} =",s[2]))
3614                         weight -= 10;
3615                     else
3616                         weight -= 1;
3617                 }
3618                 break;
3619             case '\\':
3620                 un_char = 254;
3621                 if (s[1]) {
3622                     if (strchr("wds]",s[1]))
3623                         weight += 100;
3624                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3625                         weight += 1;
3626                     else if (strchr("rnftbxcav",s[1]))
3627                         weight += 40;
3628                     else if (isDIGIT(s[1])) {
3629                         weight += 40;
3630                         while (s[1] && isDIGIT(s[1]))
3631                             s++;
3632                     }
3633                 }
3634                 else
3635                     weight += 100;
3636                 break;
3637             case '-':
3638                 if (s[1] == '\\')
3639                     weight += 50;
3640                 if (strchr("aA01! ",last_un_char))
3641                     weight += 30;
3642                 if (strchr("zZ79~",s[1]))
3643                     weight += 30;
3644                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3645                     weight -= 5;        /* cope with negative subscript */
3646                 break;
3647             default:
3648                 if (!isALNUM(last_un_char)
3649                     && !(last_un_char == '$' || last_un_char == '@'
3650                          || last_un_char == '&')
3651                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3652                     char *d = tmpbuf;
3653                     while (isALPHA(*s))
3654                         *d++ = *s++;
3655                     *d = '\0';
3656                     if (keyword(tmpbuf, d - tmpbuf, 0))
3657                         weight -= 150;
3658                 }
3659                 if (un_char == last_un_char + 1)
3660                     weight += 5;
3661                 weight -= seen[un_char];
3662                 break;
3663             }
3664             seen[un_char]++;
3665         }
3666         if (weight >= 0)        /* probably a character class */
3667             return FALSE;
3668     }
3669
3670     return TRUE;
3671 }
3672
3673 /*
3674  * S_intuit_method
3675  *
3676  * Does all the checking to disambiguate
3677  *   foo bar
3678  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3679  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3680  *
3681  * First argument is the stuff after the first token, e.g. "bar".
3682  *
3683  * Not a method if bar is a filehandle.
3684  * Not a method if foo is a subroutine prototyped to take a filehandle.
3685  * Not a method if it's really "Foo $bar"
3686  * Method if it's "foo $bar"
3687  * Not a method if it's really "print foo $bar"
3688  * Method if it's really "foo package::" (interpreted as package->foo)
3689  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3690  * Not a method if bar is a filehandle or package, but is quoted with
3691  *   =>
3692  */
3693
3694 STATIC int
3695 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3696 {
3697     dVAR;
3698     char *s = start + (*start == '$');
3699     char tmpbuf[sizeof PL_tokenbuf];
3700     STRLEN len;
3701     GV* indirgv;
3702 #ifdef PERL_MAD
3703     int soff;
3704 #endif
3705
3706     PERL_ARGS_ASSERT_INTUIT_METHOD;
3707
3708     if (gv) {
3709         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3710             return 0;
3711         if (cv) {
3712             if (SvPOK(cv)) {
3713                 const char *proto = SvPVX_const(cv);
3714                 if (proto) {
3715                     if (*proto == ';')
3716                         proto++;
3717                     if (*proto == '*')
3718                         return 0;
3719                 }
3720             }
3721         } else
3722             gv = NULL;
3723     }
3724     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3725     /* start is the beginning of the possible filehandle/object,
3726      * and s is the end of it
3727      * tmpbuf is a copy of it
3728      */
3729
3730     if (*start == '$') {
3731         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3732                 isUPPER(*PL_tokenbuf))
3733             return 0;
3734 #ifdef PERL_MAD
3735         len = start - SvPVX(PL_linestr);
3736 #endif
3737         s = PEEKSPACE(s);
3738 #ifdef PERL_MAD
3739         start = SvPVX(PL_linestr) + len;
3740 #endif
3741         PL_bufptr = start;
3742         PL_expect = XREF;
3743         return *s == '(' ? FUNCMETH : METHOD;
3744     }
3745     if (!keyword(tmpbuf, len, 0)) {
3746         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3747             len -= 2;
3748             tmpbuf[len] = '\0';
3749 #ifdef PERL_MAD
3750             soff = s - SvPVX(PL_linestr);
3751 #endif
3752             goto bare_package;
3753         }
3754         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3755         if (indirgv && GvCVu(indirgv))
3756             return 0;
3757         /* filehandle or package name makes it a method */
3758         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3759 #ifdef PERL_MAD
3760             soff = s - SvPVX(PL_linestr);
3761 #endif
3762             s = PEEKSPACE(s);
3763             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3764                 return 0;       /* no assumptions -- "=>" quotes bareword */
3765       bare_package:
3766             start_force(PL_curforce);
3767             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3768                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3769             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3770             if (PL_madskills)
3771                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3772             PL_expect = XTERM;
3773             force_next(WORD);
3774             PL_bufptr = s;
3775 #ifdef PERL_MAD
3776             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3777 #endif
3778             return *s == '(' ? FUNCMETH : METHOD;
3779         }
3780     }
3781     return 0;
3782 }
3783
3784 /* Encoded script support. filter_add() effectively inserts a
3785  * 'pre-processing' function into the current source input stream.
3786  * Note that the filter function only applies to the current source file
3787  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3788  *
3789  * The datasv parameter (which may be NULL) can be used to pass
3790  * private data to this instance of the filter. The filter function
3791  * can recover the SV using the FILTER_DATA macro and use it to
3792  * store private buffers and state information.
3793  *
3794  * The supplied datasv parameter is upgraded to a PVIO type
3795  * and the IoDIRP/IoANY field is used to store the function pointer,
3796  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3797  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3798  * private use must be set using malloc'd pointers.
3799  */
3800
3801 SV *
3802 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3803 {
3804     dVAR;
3805     if (!funcp)
3806         return NULL;
3807
3808     if (!PL_parser)
3809         return NULL;
3810
3811     if (!PL_rsfp_filters)
3812         PL_rsfp_filters = newAV();
3813     if (!datasv)
3814         datasv = newSV(0);
3815     SvUPGRADE(datasv, SVt_PVIO);
3816     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3817     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3818     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3819                           FPTR2DPTR(void *, IoANY(datasv)),
3820                           SvPV_nolen(datasv)));
3821     av_unshift(PL_rsfp_filters, 1);
3822     av_store(PL_rsfp_filters, 0, datasv) ;
3823     return(datasv);
3824 }
3825
3826
3827 /* Delete most recently added instance of this filter function. */
3828 void
3829 Perl_filter_del(pTHX_ filter_t funcp)
3830 {
3831     dVAR;
3832     SV *datasv;
3833
3834     PERL_ARGS_ASSERT_FILTER_DEL;
3835
3836 #ifdef DEBUGGING
3837     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3838                           FPTR2DPTR(void*, funcp)));
3839 #endif
3840     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3841         return;
3842     /* if filter is on top of stack (usual case) just pop it off */
3843     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3844     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3845         sv_free(av_pop(PL_rsfp_filters));
3846
3847         return;
3848     }
3849     /* we need to search for the correct entry and clear it     */
3850     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3851 }
3852
3853
3854 /* Invoke the idxth filter function for the current rsfp.        */
3855 /* maxlen 0 = read one text line */
3856 I32
3857 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3858 {
3859     dVAR;
3860     filter_t funcp;
3861     SV *datasv = NULL;
3862     /* This API is bad. It should have been using unsigned int for maxlen.
3863        Not sure if we want to change the API, but if not we should sanity
3864        check the value here.  */
3865     const unsigned int correct_length
3866         = maxlen < 0 ?
3867 #ifdef PERL_MICRO
3868         0x7FFFFFFF
3869 #else
3870         INT_MAX
3871 #endif
3872         : maxlen;
3873
3874     PERL_ARGS_ASSERT_FILTER_READ;
3875
3876     if (!PL_parser || !PL_rsfp_filters)
3877         return -1;
3878     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3879         /* Provide a default input filter to make life easy.    */
3880         /* Note that we append to the line. This is handy.      */
3881         DEBUG_P(PerlIO_printf(Perl_debug_log,
3882                               "filter_read %d: from rsfp\n", idx));
3883         if (correct_length) {
3884             /* Want a block */
3885             int len ;
3886             const int old_len = SvCUR(buf_sv);
3887
3888             /* ensure buf_sv is large enough */
3889             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3890             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3891                                    correct_length)) <= 0) {
3892                 if (PerlIO_error(PL_rsfp))
3893                     return -1;          /* error */
3894                 else
3895                     return 0 ;          /* end of file */
3896             }
3897             SvCUR_set(buf_sv, old_len + len) ;
3898             SvPVX(buf_sv)[old_len + len] = '\0';
3899         } else {
3900             /* Want a line */
3901             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3902                 if (PerlIO_error(PL_rsfp))
3903                     return -1;          /* error */
3904                 else
3905                     return 0 ;          /* end of file */
3906             }
3907         }
3908         return SvCUR(buf_sv);
3909     }
3910     /* Skip this filter slot if filter has been deleted */
3911     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3912         DEBUG_P(PerlIO_printf(Perl_debug_log,
3913                               "filter_read %d: skipped (filter deleted)\n",
3914                               idx));
3915         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3916     }
3917     /* Get function pointer hidden within datasv        */
3918     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3919     DEBUG_P(PerlIO_printf(Perl_debug_log,
3920                           "filter_read %d: via function %p (%s)\n",
3921                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3922     /* Call function. The function is expected to       */
3923     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3924     /* Return: <0:error, =0:eof, >0:not eof             */
3925     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3926 }
3927
3928 STATIC char *
3929 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3930 {
3931     dVAR;
3932
3933     PERL_ARGS_ASSERT_FILTER_GETS;
3934
3935 #ifdef PERL_CR_FILTER
3936     if (!PL_rsfp_filters) {
3937         filter_add(S_cr_textfilter,NULL);
3938     }
3939 #endif
3940     if (PL_rsfp_filters) {
3941         if (!append)
3942             SvCUR_set(sv, 0);   /* start with empty line        */
3943         if (FILTER_READ(0, sv, 0) > 0)
3944             return ( SvPVX(sv) ) ;
3945         else
3946             return NULL ;
3947     }
3948     else
3949         return (sv_gets(sv, PL_rsfp, append));
3950 }
3951
3952 STATIC HV *
3953 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3954 {
3955     dVAR;
3956     GV *gv;
3957
3958     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3959
3960     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3961         return PL_curstash;
3962
3963     if (len > 2 &&
3964         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3965         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3966     {
3967         return GvHV(gv);                        /* Foo:: */
3968     }
3969
3970     /* use constant CLASS => 'MyClass' */
3971     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3972     if (gv && GvCV(gv)) {
3973         SV * const sv = cv_const_sv(GvCV(gv));
3974         if (sv)
3975             pkgname = SvPV_const(sv, len);
3976     }
3977
3978     return gv_stashpvn(pkgname, len, 0);
3979 }
3980
3981 /*
3982  * S_readpipe_override
3983  * Check whether readpipe() is overridden, and generates the appropriate
3984  * optree, provided sublex_start() is called afterwards.
3985  */
3986 STATIC void
3987 S_readpipe_override(pTHX)
3988 {
3989     GV **gvp;
3990     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3991     pl_yylval.ival = OP_BACKTICK;
3992     if ((gv_readpipe
3993                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3994             ||
3995             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3996              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3997              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3998     {
3999         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4000             op_append_elem(OP_LIST,
4001                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4002                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4003     }
4004 }
4005
4006 #ifdef PERL_MAD
4007  /*
4008  * Perl_madlex
4009  * The intent of this yylex wrapper is to minimize the changes to the
4010  * tokener when we aren't interested in collecting madprops.  It remains
4011  * to be seen how successful this strategy will be...
4012  */
4013
4014 int
4015 Perl_madlex(pTHX)
4016 {
4017     int optype;
4018     char *s = PL_bufptr;
4019
4020     /* make sure PL_thiswhite is initialized */
4021     PL_thiswhite = 0;
4022     PL_thismad = 0;
4023
4024     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4025     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4026         return S_pending_ident(aTHX);
4027
4028     /* previous token ate up our whitespace? */
4029     if (!PL_lasttoke && PL_nextwhite) {
4030         PL_thiswhite = PL_nextwhite;
4031         PL_nextwhite = 0;
4032     }
4033
4034     /* isolate the token, and figure out where it is without whitespace */
4035     PL_realtokenstart = -1;
4036     PL_thistoken = 0;
4037     optype = yylex();
4038     s = PL_bufptr;
4039     assert(PL_curforce < 0);
4040
4041     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4042         if (!PL_thistoken) {
4043             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4044                 PL_thistoken = newSVpvs("");
4045             else {
4046                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4047                 PL_thistoken = newSVpvn(tstart, s - tstart);
4048             }
4049         }
4050         if (PL_thismad) /* install head */
4051             CURMAD('X', PL_thistoken);
4052     }
4053
4054     /* last whitespace of a sublex? */
4055     if (optype == ')' && PL_endwhite) {
4056         CURMAD('X', PL_endwhite);
4057     }
4058
4059     if (!PL_thismad) {
4060
4061         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4062         if (!PL_thiswhite && !PL_endwhite && !optype) {
4063             sv_free(PL_thistoken);
4064             PL_thistoken = 0;
4065             return 0;
4066         }
4067
4068         /* put off final whitespace till peg */
4069         if (optype == ';' && !PL_rsfp) {
4070             PL_nextwhite = PL_thiswhite;
4071             PL_thiswhite = 0;
4072         }
4073         else if (PL_thisopen) {
4074             CURMAD('q', PL_thisopen);
4075             if (PL_thistoken)
4076                 sv_free(PL_thistoken);
4077             PL_thistoken = 0;
4078         }
4079         else {
4080             /* Store actual token text as madprop X */
4081             CURMAD('X', PL_thistoken);
4082         }
4083
4084         if (PL_thiswhite) {
4085             /* add preceding whitespace as madprop _ */
4086             CURMAD('_', PL_thiswhite);
4087         }
4088
4089         if (PL_thisstuff) {
4090             /* add quoted material as madprop = */
4091             CURMAD('=', PL_thisstuff);
4092         }
4093
4094         if (PL_thisclose) {
4095             /* add terminating quote as madprop Q */
4096             CURMAD('Q', PL_thisclose);
4097         }
4098     }
4099
4100     /* special processing based on optype */
4101
4102     switch (optype) {
4103
4104     /* opval doesn't need a TOKEN since it can already store mp */
4105     case WORD:
4106     case METHOD:
4107     case FUNCMETH:
4108     case THING:
4109     case PMFUNC:
4110     case PRIVATEREF:
4111     case FUNC0SUB:
4112     case UNIOPSUB:
4113     case LSTOPSUB:
4114         if (pl_yylval.opval)
4115             append_madprops(PL_thismad, pl_yylval.opval, 0);
4116         PL_thismad = 0;
4117         return optype;
4118
4119     /* fake EOF */
4120     case 0:
4121         optype = PEG;
4122         if (PL_endwhite) {
4123             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4124             PL_endwhite = 0;
4125         }
4126         break;
4127
4128     case ']':
4129     case '}':
4130         if (PL_faketokens)
4131             break;
4132         /* remember any fake bracket that lexer is about to discard */
4133         if (PL_lex_brackets == 1 &&
4134             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4135         {
4136             s = PL_bufptr;
4137             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4138                 s++;
4139             if (*s == '}') {
4140                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4141                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4142                 PL_thiswhite = 0;
4143                 PL_bufptr = s - 1;
4144                 break;  /* don't bother looking for trailing comment */
4145             }
4146             else
4147                 s = PL_bufptr;
4148         }
4149         if (optype == ']')
4150             break;
4151         /* FALLTHROUGH */
4152
4153     /* attach a trailing comment to its statement instead of next token */
4154     case ';':
4155         if (PL_faketokens)
4156             break;
4157         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4158             s = PL_bufptr;
4159             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4160                 s++;
4161             if (*s == '\n' || *s == '#') {
4162                 while (s < PL_bufend && *s != '\n')
4163                     s++;
4164                 if (s < PL_bufend)
4165                     s++;
4166                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4167                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4168                 PL_thiswhite = 0;
4169                 PL_bufptr = s;
4170             }
4171         }
4172         break;
4173
4174     /* pval */
4175     case LABEL:
4176         break;
4177
4178     /* ival */
4179     default:
4180         break;
4181
4182     }
4183
4184     /* Create new token struct.  Note: opvals return early above. */
4185     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4186     PL_thismad = 0;
4187     return optype;
4188 }
4189 #endif
4190
4191 STATIC char *
4192 S_tokenize_use(pTHX_ int is_use, char *s) {
4193     dVAR;
4194
4195     PERL_ARGS_ASSERT_TOKENIZE_USE;
4196
4197     if (PL_expect != XSTATE)
4198         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4199                     is_use ? "use" : "no"));
4200     s = SKIPSPACE1(s);
4201     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4202         s = force_version(s, TRUE);
4203         if (*s == ';' || *s == '}'
4204                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4205             start_force(PL_curforce);
4206             NEXTVAL_NEXTTOKE.opval = NULL;
4207             force_next(WORD);
4208         }
4209         else if (*s == 'v') {
4210             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4211             s = force_version(s, FALSE);
4212         }
4213     }
4214     else {
4215         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4216         s = force_version(s, FALSE);
4217     }
4218     pl_yylval.ival = is_use;
4219     return s;
4220 }
4221 #ifdef DEBUGGING
4222     static const char* const exp_name[] =
4223         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4224           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4225         };
4226 #endif
4227
4228 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4229 STATIC bool
4230 S_word_takes_any_delimeter(char *p, STRLEN len)
4231 {
4232     return (len == 1 && strchr("msyq", p[0])) ||
4233            (len == 2 && (
4234             (p[0] == 't' && p[1] == 'r') ||
4235             (p[0] == 'q' && strchr("qwxr", p[1]))));
4236 }
4237
4238 /*
4239   yylex
4240
4241   Works out what to call the token just pulled out of the input
4242   stream.  The yacc parser takes care of taking the ops we return and
4243   stitching them into a tree.
4244
4245   Returns:
4246     PRIVATEREF
4247
4248   Structure:
4249       if read an identifier
4250           if we're in a my declaration
4251               croak if they tried to say my($foo::bar)
4252               build the ops for a my() declaration
4253           if it's an access to a my() variable
4254               are we in a sort block?
4255                   croak if my($a); $a <=> $b
4256               build ops for access to a my() variable
4257           if in a dq string, and they've said @foo and we can't find @foo
4258               croak
4259           build ops for a bareword
4260       if we already built the token before, use it.
4261 */
4262
4263
4264 #ifdef __SC__
4265 #pragma segment Perl_yylex
4266 #endif
4267 int
4268 Perl_yylex(pTHX)
4269 {
4270     dVAR;
4271     register char *s = PL_bufptr;
4272     register char *d;
4273     STRLEN len;
4274     bool bof = FALSE;
4275     U32 fake_eof = 0;
4276
4277     /* orig_keyword, gvp, and gv are initialized here because
4278      * jump to the label just_a_word_zero can bypass their
4279      * initialization later. */
4280     I32 orig_keyword = 0;
4281     GV *gv = NULL;
4282     GV **gvp = NULL;
4283
4284     DEBUG_T( {
4285         SV* tmp = newSVpvs("");
4286         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4287             (IV)CopLINE(PL_curcop),
4288             lex_state_names[PL_lex_state],
4289             exp_name[PL_expect],
4290             pv_display(tmp, s, strlen(s), 0, 60));
4291         SvREFCNT_dec(tmp);
4292     } );
4293     /* check if there's an identifier for us to look at */
4294     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4295         return REPORT(S_pending_ident(aTHX));
4296
4297     /* no identifier pending identification */
4298
4299     switch (PL_lex_state) {
4300 #ifdef COMMENTARY
4301     case LEX_NORMAL:            /* Some compilers will produce faster */
4302     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4303         break;
4304 #endif
4305
4306     /* when we've already built the next token, just pull it out of the queue */
4307     case LEX_KNOWNEXT:
4308 #ifdef PERL_MAD
4309         PL_lasttoke--;
4310         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4311         if (PL_madskills) {
4312             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4313             PL_nexttoke[PL_lasttoke].next_mad = 0;
4314             if (PL_thismad && PL_thismad->mad_key == '_') {
4315                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4316                 PL_thismad->mad_val = 0;
4317                 mad_free(PL_thismad);
4318                 PL_thismad = 0;
4319             }
4320         }
4321         if (!PL_lasttoke) {
4322             PL_lex_state = PL_lex_defer;
4323             PL_expect = PL_lex_expect;
4324             PL_lex_defer = LEX_NORMAL;
4325             if (!PL_nexttoke[PL_lasttoke].next_type)
4326                 return yylex();
4327         }
4328 #else
4329         PL_nexttoke--;
4330         pl_yylval = PL_nextval[PL_nexttoke];
4331         if (!PL_nexttoke) {
4332             PL_lex_state = PL_lex_defer;
4333             PL_expect = PL_lex_expect;
4334             PL_lex_defer = LEX_NORMAL;
4335         }
4336 #endif
4337         {
4338             I32 next_type;
4339 #ifdef PERL_MAD
4340             next_type = PL_nexttoke[PL_lasttoke].next_type;
4341 #else
4342             next_type = PL_nexttype[PL_nexttoke];
4343 #endif
4344             if (next_type & (7<<24)) {
4345                 if (next_type & (1<<24)) {
4346                     if (PL_lex_brackets > 100)
4347                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4348                     PL_lex_brackstack[PL_lex_brackets++] =
4349                         (char) ((next_type >> 16) & 0xff);
4350                 }
4351                 if (next_type & (2<<24))
4352                     PL_lex_allbrackets++;
4353                 if (next_type & (4<<24))
4354                     PL_lex_allbrackets--;
4355                 next_type &= 0xffff;
4356             }
4357 #ifdef PERL_MAD
4358             /* FIXME - can these be merged?  */
4359             return next_type;
4360 #else
4361             return REPORT(next_type);
4362 #endif
4363         }
4364
4365     /* interpolated case modifiers like \L \U, including \Q and \E.
4366        when we get here, PL_bufptr is at the \
4367     */
4368     case LEX_INTERPCASEMOD:
4369 #ifdef DEBUGGING
4370         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4371             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4372 #endif
4373         /* handle \E or end of string */
4374         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4375             /* if at a \E */
4376             if (PL_lex_casemods) {
4377                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4378                 PL_lex_casestack[PL_lex_casemods] = '\0';
4379
4380                 if (PL_bufptr != PL_bufend
4381                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4382                     PL_bufptr += 2;
4383                     PL_lex_state = LEX_INTERPCONCAT;
4384 #ifdef PERL_MAD
4385                     if (PL_madskills)
4386                         PL_thistoken = newSVpvs("\\E");
4387 #endif
4388                 }
4389                 PL_lex_allbrackets--;
4390                 return REPORT(')');
4391             }
4392 #ifdef PERL_MAD
4393             while (PL_bufptr != PL_bufend &&
4394               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4395                 if (!PL_thiswhite)
4396                     PL_thiswhite = newSVpvs("");
4397                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4398                 PL_bufptr += 2;
4399             }
4400 #else
4401             if (PL_bufptr != PL_bufend)
4402                 PL_bufptr += 2;
4403 #endif
4404             PL_lex_state = LEX_INTERPCONCAT;
4405             return yylex();
4406         }
4407         else {
4408             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4409               "### Saw case modifier\n"); });
4410             s = PL_bufptr + 1;
4411             if (s[1] == '\\' && s[2] == 'E') {
4412 #ifdef PERL_MAD
4413                 if (!PL_thiswhite)
4414                     PL_thiswhite = newSVpvs("");
4415                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4416 #endif
4417                 PL_bufptr = s + 3;
4418                 PL_lex_state = LEX_INTERPCONCAT;
4419                 return yylex();
4420             }
4421             else {
4422                 I32 tmp;
4423                 if (!PL_madskills) /* when just compiling don't need correct */
4424                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4425                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4426                 if ((*s == 'L' || *s == 'U') &&
4427                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4428                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4429                     PL_lex_allbrackets--;
4430                     return REPORT(')');
4431                 }
4432                 if (PL_lex_casemods > 10)
4433                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4434                 PL_lex_casestack[PL_lex_casemods++] = *s;
4435                 PL_lex_casestack[PL_lex_casemods] = '\0';
4436                 PL_lex_state = LEX_INTERPCONCAT;
4437                 start_force(PL_curforce);
4438                 NEXTVAL_NEXTTOKE.ival = 0;
4439                 force_next((2<<24)|'(');
4440                 start_force(PL_curforce);
4441                 if (*s == 'l')
4442                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4443                 else if (*s == 'u')
4444                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4445                 else if (*s == 'L')
4446                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4447                 else if (*s == 'U')
4448                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4449                 else if (*s == 'Q')
4450                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4451                 else
4452                     Perl_croak(aTHX_ "panic: yylex");
4453                 if (PL_madskills) {
4454                     SV* const tmpsv = newSVpvs("\\ ");
4455                     /* replace the space with the character we want to escape
4456                      */
4457                     SvPVX(tmpsv)[1] = *s;
4458                     curmad('_', tmpsv);
4459                 }
4460                 PL_bufptr = s + 1;
4461             }
4462             force_next(FUNC);
4463             if (PL_lex_starts) {
4464                 s = PL_bufptr;
4465                 PL_lex_starts = 0;
4466 #ifdef PERL_MAD
4467                 if (PL_madskills) {
4468                     if (PL_thistoken)
4469                         sv_free(PL_thistoken);
4470                     PL_thistoken = newSVpvs("");
4471                 }
4472 #endif
4473                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4474                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4475                     OPERATOR(',');
4476                 else
4477                     Aop(OP_CONCAT);
4478             }
4479             else
4480                 return yylex();
4481         }
4482
4483     case LEX_INTERPPUSH:
4484         return REPORT(sublex_push());
4485
4486     case LEX_INTERPSTART:
4487         if (PL_bufptr == PL_bufend)
4488             return REPORT(sublex_done());
4489         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4490               "### Interpolated variable\n"); });
4491         PL_expect = XTERM;
4492         PL_lex_dojoin = (*PL_bufptr == '@');
4493         PL_lex_state = LEX_INTERPNORMAL;
4494         if (PL_lex_dojoin) {
4495             start_force(PL_curforce);
4496             NEXTVAL_NEXTTOKE.ival = 0;
4497             force_next(',');
4498             start_force(PL_curforce);
4499             force_ident("\"", '$');
4500             start_force(PL_curforce);
4501             NEXTVAL_NEXTTOKE.ival = 0;
4502             force_next('$');
4503             start_force(PL_curforce);
4504             NEXTVAL_NEXTTOKE.ival = 0;
4505             force_next((2<<24)|'(');
4506             start_force(PL_curforce);
4507             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4508             force_next(FUNC);
4509         }
4510         if (PL_lex_starts++) {
4511             s = PL_bufptr;
4512 #ifdef PERL_MAD
4513             if (PL_madskills) {
4514                 if (PL_thistoken)
4515                     sv_free(PL_thistoken);
4516                 PL_thistoken = newSVpvs("");
4517             }
4518 #endif
4519             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4520             if (!PL_lex_casemods && PL_lex_inpat)
4521                 OPERATOR(',');
4522             else
4523                 Aop(OP_CONCAT);
4524         }
4525         return yylex();
4526
4527     case LEX_INTERPENDMAYBE:
4528         if (intuit_more(PL_bufptr)) {
4529             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4530             break;
4531         }
4532         /* FALL THROUGH */
4533
4534     case LEX_INTERPEND:
4535         if (PL_lex_dojoin) {
4536             PL_lex_dojoin = FALSE;
4537             PL_lex_state = LEX_INTERPCONCAT;
4538 #ifdef PERL_MAD
4539             if (PL_madskills) {
4540                 if (PL_thistoken)
4541                     sv_free(PL_thistoken);
4542                 PL_thistoken = newSVpvs("");
4543             }
4544 #endif
4545             PL_lex_allbrackets--;
4546             return REPORT(')');
4547         }
4548         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4549             && SvEVALED(PL_lex_repl))
4550         {
4551             if (PL_bufptr != PL_bufend)
4552                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4553             PL_lex_repl = NULL;
4554         }
4555         /* FALLTHROUGH */
4556     case LEX_INTERPCONCAT:
4557 #ifdef DEBUGGING
4558         if (PL_lex_brackets)
4559             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4560 #endif
4561         if (PL_bufptr == PL_bufend)
4562             return REPORT(sublex_done());
4563
4564         if (SvIVX(PL_linestr) == '\'') {
4565             SV *sv = newSVsv(PL_linestr);
4566             if (!PL_lex_inpat)
4567                 sv = tokeq(sv);
4568             else if ( PL_hints & HINT_NEW_RE )
4569                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4570             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4571             s = PL_bufend;
4572         }
4573         else {
4574             s = scan_const(PL_bufptr);
4575             if (*s == '\\')
4576                 PL_lex_state = LEX_INTERPCASEMOD;
4577             else
4578                 PL_lex_state = LEX_INTERPSTART;
4579         }
4580
4581         if (s != PL_bufptr) {
4582             start_force(PL_curforce);
4583             if (PL_madskills) {
4584                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4585             }
4586             NEXTVAL_NEXTTOKE = pl_yylval;
4587             PL_expect = XTERM;
4588             force_next(THING);
4589             if (PL_lex_starts++) {
4590 #ifdef PERL_MAD
4591                 if (PL_madskills) {
4592                     if (PL_thistoken)
4593                         sv_free(PL_thistoken);
4594                     PL_thistoken = newSVpvs("");
4595                 }
4596 #endif
4597                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4598                 if (!PL_lex_casemods && PL_lex_inpat)
4599                     OPERATOR(',');
4600                 else
4601                     Aop(OP_CONCAT);
4602             }
4603             else {
4604                 PL_bufptr = s;
4605                 return yylex();
4606             }
4607         }
4608
4609         return yylex();
4610     case LEX_FORMLINE:
4611         PL_lex_state = LEX_NORMAL;
4612         s = scan_formline(PL_bufptr);
4613         if (!PL_lex_formbrack)
4614             goto rightbracket;
4615         OPERATOR(';');
4616     }
4617
4618     s = PL_bufptr;
4619     PL_oldoldbufptr = PL_oldbufptr;
4620     PL_oldbufptr = s;
4621
4622   retry:
4623 #ifdef PERL_MAD
4624     if (PL_thistoken) {
4625         sv_free(PL_thistoken);
4626         PL_thistoken = 0;
4627     }
4628     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4629 #endif
4630     switch (*s) {
4631     default:
4632         if (isIDFIRST_lazy_if(s,UTF))
4633             goto keylookup;
4634         {
4635         unsigned char c = *s;
4636         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4637         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4638             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4639         } else {
4640             d = PL_linestart;
4641         }
4642         *s = '\0';
4643         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4644     }
4645     case 4:
4646     case 26:
4647         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4648     case 0:
4649 #ifdef PERL_MAD
4650         if (PL_madskills)
4651             PL_faketokens = 0;
4652 #endif
4653         if (!PL_rsfp) {
4654             PL_last_uni = 0;
4655             PL_last_lop = 0;
4656             if (PL_lex_brackets &&
4657                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4658                 yyerror((const char *)
4659                         (PL_lex_formbrack
4660                          ? "Format not terminated"
4661                          : "Missing right curly or square bracket"));
4662             }
4663             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4664                         "### Tokener got EOF\n");
4665             } );
4666             TOKEN(0);
4667         }
4668         if (s++ < PL_bufend)
4669             goto retry;                 /* ignore stray nulls */
4670         PL_last_uni = 0;
4671         PL_last_lop = 0;
4672         if (!PL_in_eval && !PL_preambled) {
4673             PL_preambled = TRUE;
4674 #ifdef PERL_MAD
4675             if (PL_madskills)
4676                 PL_faketokens = 1;
4677 #endif
4678             if (PL_perldb) {
4679                 /* Generate a string of Perl code to load the debugger.
4680                  * If PERL5DB is set, it will return the contents of that,
4681                  * otherwise a compile-time require of perl5db.pl.  */
4682
4683                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4684
4685                 if (pdb) {
4686                     sv_setpv(PL_linestr, pdb);
4687                     sv_catpvs(PL_linestr,";");
4688                 } else {
4689                     SETERRNO(0,SS_NORMAL);
4690                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4691                 }
4692             } else
4693                 sv_setpvs(PL_linestr,"");
4694             if (PL_preambleav) {
4695                 SV **svp = AvARRAY(PL_preambleav);
4696                 SV **const end = svp + AvFILLp(PL_preambleav);
4697                 while(svp <= end) {
4698                     sv_catsv(PL_linestr, *svp);
4699                     ++svp;
4700                     sv_catpvs(PL_linestr, ";");
4701                 }
4702                 sv_free(MUTABLE_SV(PL_preambleav));
4703                 PL_preambleav = NULL;
4704             }
4705             if (PL_minus_E)
4706                 sv_catpvs(PL_linestr,
4707                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4708             if (PL_minus_n || PL_minus_p) {
4709                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4710                 if (PL_minus_l)
4711                     sv_catpvs(PL_linestr,"chomp;");
4712                 if (PL_minus_a) {
4713                     if (PL_minus_F) {
4714                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4715                              || *PL_splitstr == '"')
4716                               && strchr(PL_splitstr + 1, *PL_splitstr))
4717                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4718                         else {
4719                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4720                                bytes can be used as quoting characters.  :-) */
4721                             const char *splits = PL_splitstr;
4722                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4723                             do {
4724                                 /* Need to \ \s  */
4725                                 if (*splits == '\\')
4726                                     sv_catpvn(PL_linestr, splits, 1);
4727                                 sv_catpvn(PL_linestr, splits, 1);
4728                             } while (*splits++);
4729                             /* This loop will embed the trailing NUL of
4730                                PL_linestr as the last thing it does before
4731                                terminating.  */
4732                             sv_catpvs(PL_linestr, ");");
4733                         }
4734                     }
4735                     else
4736                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4737                 }
4738             }
4739             sv_catpvs(PL_linestr, "\n");
4740             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4741             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4742             PL_last_lop = PL_last_uni = NULL;
4743             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4744                 update_debugger_info(PL_linestr, NULL, 0);
4745             goto retry;
4746         }
4747         do {
4748             fake_eof = 0;
4749             bof = PL_rsfp ? TRUE : FALSE;
4750             if (0) {
4751               fake_eof:
4752                 fake_eof = LEX_FAKE_EOF;
4753             }
4754             PL_bufptr = PL_bufend;
4755             CopLINE_inc(PL_curcop);
4756             if (!lex_next_chunk(fake_eof)) {
4757                 CopLINE_dec(PL_curcop);
4758                 s = PL_bufptr;
4759                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4760             }
4761             CopLINE_dec(PL_curcop);
4762 #ifdef PERL_MAD
4763             if (!PL_rsfp)
4764                 PL_realtokenstart = -1;
4765 #endif
4766             s = PL_bufptr;
4767             /* If it looks like the start of a BOM or raw UTF-16,
4768              * check if it in fact is. */
4769             if (bof && PL_rsfp &&
4770                      (*s == 0 ||
4771                       *(U8*)s == 0xEF ||
4772                       *(U8*)s >= 0xFE ||
4773                       s[1] == 0)) {
4774                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4775                 if (bof) {
4776                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4777                     s = swallow_bom((U8*)s);
4778                 }
4779             }
4780             if (PL_parser->in_pod) {
4781                 /* Incest with pod. */
4782 #ifdef PERL_MAD
4783                 if (PL_madskills)
4784                     sv_catsv(PL_thiswhite, PL_linestr);
4785 #endif
4786                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4787                     sv_setpvs(PL_linestr, "");
4788                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4789                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4790                     PL_last_lop = PL_last_uni = NULL;
4791                     PL_parser->in_pod = 0;
4792                 }
4793             }
4794             if (PL_rsfp)
4795                 incline(s);
4796         } while (PL_parser->in_pod);
4797         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4798         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4799         PL_last_lop = PL_last_uni = NULL;
4800         if (CopLINE(PL_curcop) == 1) {
4801             while (s < PL_bufend && isSPACE(*s))
4802                 s++;
4803             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4804                 s++;
4805 #ifdef PERL_MAD
4806             if (PL_madskills)
4807                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4808 #endif
4809             d = NULL;
4810             if (!PL_in_eval) {
4811                 if (*s == '#' && *(s+1) == '!')
4812                     d = s + 2;
4813 #ifdef ALTERNATE_SHEBANG
4814                 else {
4815                     static char const as[] = ALTERNATE_SHEBANG;
4816                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4817                         d = s + (sizeof(as) - 1);
4818                 }
4819 #endif /* ALTERNATE_SHEBANG */
4820             }
4821             if (d) {
4822                 char *ipath;
4823                 char *ipathend;
4824
4825                 while (isSPACE(*d))
4826                     d++;
4827                 ipath = d;
4828                 while (*d && !isSPACE(*d))
4829                     d++;
4830                 ipathend = d;
4831
4832 #ifdef ARG_ZERO_IS_SCRIPT
4833                 if (ipathend > ipath) {
4834                     /*
4835                      * HP-UX (at least) sets argv[0] to the script name,
4836                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4837                      * at least, set argv[0] to the basename of the Perl
4838                      * interpreter. So, having found "#!", we'll set it right.
4839                      */
4840                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4841                                                     SVt_PV)); /* $^X */
4842                     assert(SvPOK(x) || SvGMAGICAL(x));
4843                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4844                         sv_setpvn(x, ipath, ipathend - ipath);
4845                         SvSETMAGIC(x);
4846                     }
4847                     else {
4848                         STRLEN blen;
4849                         STRLEN llen;
4850                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4851                         const char * const lstart = SvPV_const(x,llen);
4852                         if (llen < blen) {
4853                             bstart += blen - llen;
4854                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4855                                 sv_setpvn(x, ipath, ipathend - ipath);
4856                                 SvSETMAGIC(x);
4857                             }
4858                         }
4859                     }
4860                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4861                 }
4862 #endif /* ARG_ZERO_IS_SCRIPT */
4863
4864                 /*
4865                  * Look for options.
4866                  */
4867                 d = instr(s,"perl -");
4868                 if (!d) {
4869                     d = instr(s,"perl");
4870 #if defined(DOSISH)
4871                     /* avoid getting into infinite loops when shebang
4872                      * line contains "Perl" rather than "perl" */
4873                     if (!d) {
4874                         for (d = ipathend-4; d >= ipath; --d) {
4875                             if ((*d == 'p' || *d == 'P')
4876                                 && !ibcmp(d, "perl", 4))
4877                             {
4878                                 break;
4879                             }
4880                         }
4881                         if (d < ipath)
4882                             d = NULL;
4883                     }
4884 #endif
4885                 }
4886 #ifdef ALTERNATE_SHEBANG
4887                 /*
4888                  * If the ALTERNATE_SHEBANG on this system starts with a
4889                  * character that can be part of a Perl expression, then if
4890                  * we see it but not "perl", we're probably looking at the
4891                  * start of Perl code, not a request to hand off to some
4892                  * other interpreter.  Similarly, if "perl" is there, but
4893                  * not in the first 'word' of the line, we assume the line
4894                  * contains the start of the Perl program.
4895                  */
4896                 if (d && *s != '#') {
4897                     const char *c = ipath;
4898                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4899                         c++;
4900                     if (c < d)
4901                         d = NULL;       /* "perl" not in first word; ignore */
4902                     else
4903                         *s = '#';       /* Don't try to parse shebang line */
4904                 }
4905 #endif /* ALTERNATE_SHEBANG */
4906                 if (!d &&
4907                     *s == '#' &&
4908                     ipathend > ipath &&
4909                     !PL_minus_c &&
4910                     !instr(s,"indir") &&
4911                     instr(PL_origargv[0],"perl"))
4912                 {
4913                     dVAR;
4914                     char **newargv;
4915
4916                     *ipathend = '\0';
4917                     s = ipathend + 1;
4918                     while (s < PL_bufend && isSPACE(*s))
4919                         s++;
4920                     if (s < PL_bufend) {
4921                         Newx(newargv,PL_origargc+3,char*);
4922                         newargv[1] = s;
4923                         while (s < PL_bufend && !isSPACE(*s))
4924                             s++;
4925                         *s = '\0';
4926                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4927                     }
4928                     else
4929                         newargv = PL_origargv;
4930                     newargv[0] = ipath;
4931                     PERL_FPU_PRE_EXEC
4932                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4933                     PERL_FPU_POST_EXEC
4934                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4935                 }
4936                 if (d) {
4937                     while (*d && !isSPACE(*d))
4938                         d++;
4939                     while (SPACE_OR_TAB(*d))
4940                         d++;
4941
4942                     if (*d++ == '-') {
4943                         const bool switches_done = PL_doswitches;
4944                         const U32 oldpdb = PL_perldb;
4945                         const bool oldn = PL_minus_n;
4946                         const bool oldp = PL_minus_p;
4947                         const char *d1 = d;
4948
4949                         do {
4950                             bool baduni = FALSE;
4951                             if (*d1 == 'C') {
4952                                 const char *d2 = d1 + 1;
4953                                 if (parse_unicode_opts((const char **)&d2)
4954                                     != PL_unicode)
4955                                     baduni = TRUE;
4956                             }
4957                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4958                                 const char * const m = d1;
4959                                 while (*d1 && !isSPACE(*d1))
4960                                     d1++;
4961                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4962                                       (int)(d1 - m), m);
4963                             }
4964                             d1 = moreswitches(d1);
4965                         } while (d1);
4966                         if (PL_doswitches && !switches_done) {
4967                             int argc = PL_origargc;
4968                             char **argv = PL_origargv;
4969                             do {
4970                                 argc--,argv++;
4971                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4972                             init_argv_symbols(argc,argv);
4973                         }
4974                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4975                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4976                               /* if we have already added "LINE: while (<>) {",
4977                                  we must not do it again */
4978                         {
4979                             sv_setpvs(PL_linestr, "");
4980                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4981                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4982                             PL_last_lop = PL_last_uni = NULL;
4983                             PL_preambled = FALSE;
4984                             if (PERLDB_LINE || PERLDB_SAVESRC)
4985                                 (void)gv_fetchfile(PL_origfilename);
4986                             goto retry;
4987                         }
4988                     }
4989                 }
4990             }
4991         }
4992         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4993             PL_bufptr = s;
4994             PL_lex_state = LEX_FORMLINE;
4995             return yylex();
4996         }
4997         goto retry;
4998     case '\r':
4999 #ifdef PERL_STRICT_CR
5000         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5001         Perl_croak(aTHX_
5002       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5003 #endif
5004     case ' ': case '\t': case '\f': case 013:
5005 #ifdef PERL_MAD
5006         PL_realtokenstart = -1;
5007         if (!PL_thiswhite)
5008             PL_thiswhite = newSVpvs("");
5009         sv_catpvn(PL_thiswhite, s, 1);
5010 #endif
5011         s++;
5012         goto retry;
5013     case '#':
5014     case '\n':
5015 #ifdef PERL_MAD
5016         PL_realtokenstart = -1;
5017         if (PL_madskills)
5018             PL_faketokens = 0;
5019 #endif
5020         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5021             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5022                 /* handle eval qq[#line 1 "foo"\n ...] */
5023                 CopLINE_dec(PL_curcop);
5024                 incline(s);
5025             }
5026             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5027                 s = SKIPSPACE0(s);
5028                 if (!PL_in_eval || PL_rsfp)
5029                     incline(s);
5030             }
5031             else {
5032                 d = s;
5033                 while (d < PL_bufend && *d != '\n')
5034                     d++;
5035                 if (d < PL_bufend)
5036                     d++;
5037                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5038                   Perl_croak(aTHX_ "panic: input overflow");
5039 #ifdef PERL_MAD
5040                 if (PL_madskills)
5041                     PL_thiswhite = newSVpvn(s, d - s);
5042 #endif
5043                 s = d;
5044                 incline(s);
5045             }
5046             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5047                 PL_bufptr = s;
5048                 PL_lex_state = LEX_FORMLINE;
5049                 return yylex();
5050             }
5051         }
5052         else {
5053 #ifdef PERL_MAD
5054             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5055                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5056                     PL_faketokens = 0;
5057                     s = SKIPSPACE0(s);
5058                     TOKEN(PEG); /* make sure any #! line is accessible */
5059                 }
5060                 s = SKIPSPACE0(s);
5061             }
5062             else {
5063 /*              if (PL_madskills && PL_lex_formbrack) { */
5064                     d = s;
5065                     while (d < PL_bufend && *d != '\n')
5066                         d++;
5067                     if (d < PL_bufend)
5068                         d++;
5069                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5070                       Perl_croak(aTHX_ "panic: input overflow");
5071                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5072                         if (!PL_thiswhite)
5073                             PL_thiswhite = newSVpvs("");
5074                         if (CopLINE(PL_curcop) == 1) {
5075                             sv_setpvs(PL_thiswhite, "");
5076                             PL_faketokens = 0;
5077                         }
5078                         sv_catpvn(PL_thiswhite, s, d - s);
5079                     }
5080                     s = d;
5081 /*              }
5082                 *s = '\0';
5083                 PL_bufend = s; */
5084             }
5085 #else
5086             *s = '\0';
5087             PL_bufend = s;
5088 #endif
5089         }
5090         goto retry;
5091     case '-':
5092         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5093             I32 ftst = 0;
5094             char tmp;
5095
5096             s++;
5097             PL_bufptr = s;
5098             tmp = *s++;
5099
5100             while (s < PL_bufend && SPACE_OR_TAB(*s))
5101                 s++;
5102
5103             if (strnEQ(s,"=>",2)) {
5104                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5105                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5106                 OPERATOR('-');          /* unary minus */
5107             }
5108             PL_last_uni = PL_oldbufptr;
5109             switch (tmp) {
5110             case 'r': ftst = OP_FTEREAD;        break;
5111             case 'w': ftst = OP_FTEWRITE;       break;
5112             case 'x': ftst = OP_FTEEXEC;        break;
5113             case 'o': ftst = OP_FTEOWNED;       break;
5114             case 'R': ftst = OP_FTRREAD;        break;
5115             case 'W': ftst = OP_FTRWRITE;       break;
5116             case 'X': ftst = OP_FTREXEC;        break;
5117             case 'O': ftst = OP_FTROWNED;       break;
5118             case 'e': ftst = OP_FTIS;           break;
5119             case 'z': ftst = OP_FTZERO;         break;
5120             case 's': ftst = OP_FTSIZE;         break;
5121             case 'f': ftst = OP_FTFILE;         break;
5122             case 'd': ftst = OP_FTDIR;          break;
5123             case 'l': ftst = OP_FTLINK;         break;
5124             case 'p': ftst = OP_FTPIPE;         break;
5125             case 'S': ftst = OP_FTSOCK;         break;
5126             case 'u': ftst = OP_FTSUID;         break;
5127             case 'g': ftst = OP_FTSGID;         break;
5128             case 'k': ftst = OP_FTSVTX;         break;
5129             case 'b': ftst = OP_FTBLK;          break;
5130             case 'c': ftst = OP_FTCHR;          break;
5131             case 't': ftst = OP_FTTTY;          break;
5132             case 'T': ftst = OP_FTTEXT;         break;
5133             case 'B': ftst = OP_FTBINARY;       break;
5134             case 'M': case 'A': case 'C':
5135                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5136                 switch (tmp) {
5137                 case 'M': ftst = OP_FTMTIME;    break;
5138                 case 'A': ftst = OP_FTATIME;    break;
5139                 case 'C': ftst = OP_FTCTIME;    break;
5140                 default:                        break;
5141                 }
5142                 break;
5143             default:
5144                 break;
5145             }
5146             if (ftst) {
5147                 PL_last_lop_op = (OPCODE)ftst;
5148                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5149                         "### Saw file test %c\n", (int)tmp);
5150                 } );
5151                 FTST(ftst);
5152             }
5153             else {
5154                 /* Assume it was a minus followed by a one-letter named
5155                  * subroutine call (or a -bareword), then. */
5156                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5157                         "### '-%c' looked like a file test but was not\n",
5158                         (int) tmp);
5159                 } );
5160                 s = --PL_bufptr;
5161             }
5162         }
5163         {
5164             const char tmp = *s++;
5165             if (*s == tmp) {
5166                 s++;
5167                 if (PL_expect == XOPERATOR)
5168                     TERM(POSTDEC);
5169                 else
5170                     OPERATOR(PREDEC);
5171             }
5172             else if (*s == '>') {
5173                 s++;
5174                 s = SKIPSPACE1(s);
5175                 if (isIDFIRST_lazy_if(s,UTF)) {
5176                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5177                     TOKEN(ARROW);
5178                 }
5179                 else if (*s == '$')
5180                     OPERATOR(ARROW);
5181                 else
5182                     TERM(ARROW);
5183             }
5184             if (PL_expect == XOPERATOR) {
5185                 if (*s == '=' && !PL_lex_allbrackets &&
5186                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5187                     s--;
5188                     TOKEN(0);
5189                 }
5190                 Aop(OP_SUBTRACT);
5191             }
5192             else {
5193                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5194                     check_uni();
5195                 OPERATOR('-');          /* unary minus */
5196             }
5197         }
5198
5199     case '+':
5200         {
5201             const char tmp = *s++;
5202             if (*s == tmp) {
5203                 s++;
5204                 if (PL_expect == XOPERATOR)
5205                     TERM(POSTINC);
5206                 else
5207                     OPERATOR(PREINC);
5208             }
5209             if (PL_expect == XOPERATOR) {
5210                 if (*s == '=' && !PL_lex_allbrackets &&
5211                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5212                     s--;
5213                     TOKEN(0);
5214                 }
5215                 Aop(OP_ADD);
5216             }
5217             else {
5218                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5219                     check_uni();
5220                 OPERATOR('+');
5221             }
5222         }
5223
5224     case '*':
5225         if (PL_expect != XOPERATOR) {
5226             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5227             PL_expect = XOPERATOR;
5228             force_ident(PL_tokenbuf, '*');
5229             if (!*PL_tokenbuf)
5230                 PREREF('*');
5231             TERM('*');
5232         }
5233         s++;
5234         if (*s == '*') {
5235             s++;
5236             if (*s == '=' && !PL_lex_allbrackets &&
5237                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5238                 s -= 2;
5239                 TOKEN(0);
5240             }
5241             PWop(OP_POW);
5242         }
5243         if (*s == '=' && !PL_lex_allbrackets &&
5244                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5245             s--;
5246             TOKEN(0);
5247         }
5248         Mop(OP_MULTIPLY);
5249
5250     case '%':
5251         if (PL_expect == XOPERATOR) {
5252             if (s[1] == '=' && !PL_lex_allbrackets &&
5253                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5254                 TOKEN(0);
5255             ++s;
5256             Mop(OP_MODULO);
5257         }
5258         PL_tokenbuf[0] = '%';
5259         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5260                 sizeof PL_tokenbuf - 1, FALSE);
5261         if (!PL_tokenbuf[1]) {
5262             PREREF('%');
5263         }
5264         PL_pending_ident = '%';
5265         TERM('%');
5266
5267     case '^':
5268         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5269                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5270             TOKEN(0);
5271         s++;
5272         BOop(OP_BIT_XOR);
5273     case '[':
5274         if (PL_lex_brackets > 100)
5275             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5276         PL_lex_brackstack[PL_lex_brackets++] = 0;
5277         PL_lex_allbrackets++;
5278         {
5279             const char tmp = *s++;
5280             OPERATOR(tmp);
5281         }
5282     case '~':
5283         if (s[1] == '~'
5284             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5285         {
5286             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5287                 TOKEN(0);
5288             s += 2;
5289             Eop(OP_SMARTMATCH);
5290         }
5291         s++;
5292         OPERATOR('~');
5293     case ',':
5294         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5295             TOKEN(0);
5296         s++;
5297         OPERATOR(',');
5298     case ':':
5299         if (s[1] == ':') {
5300             len = 0;
5301             goto just_a_word_zero_gv;
5302         }
5303         s++;
5304         switch (PL_expect) {
5305             OP *attrs;
5306 #ifdef PERL_MAD
5307             I32 stuffstart;
5308 #endif
5309         case XOPERATOR:
5310             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5311                 break;
5312             PL_bufptr = s;      /* update in case we back off */
5313             if (*s == '=') {
5314                 Perl_croak(aTHX_
5315                            "Use of := for an empty attribute list is not allowed");
5316             }
5317             goto grabattrs;
5318         case XATTRBLOCK:
5319             PL_expect = XBLOCK;
5320             goto grabattrs;
5321         case XATTRTERM:
5322             PL_expect = XTERMBLOCK;
5323          grabattrs:
5324 #ifdef PERL_MAD
5325             stuffstart = s - SvPVX(PL_linestr) - 1;
5326 #endif
5327             s = PEEKSPACE(s);
5328             attrs = NULL;
5329             while (isIDFIRST_lazy_if(s,UTF)) {
5330                 I32 tmp;
5331                 SV *sv;
5332                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5333                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5334                     if (tmp < 0) tmp = -tmp;
5335                     switch (tmp) {
5336                     case KEY_or:
5337                     case KEY_and:
5338                     case KEY_for:
5339                     case KEY_foreach:
5340                     case KEY_unless:
5341                     case KEY_if:
5342                     case KEY_while:
5343                     case KEY_until:
5344                         goto got_attrs;
5345                     default:
5346                         break;
5347                     }
5348                 }
5349                 sv = newSVpvn(s, len);
5350                 if (*d == '(') {
5351                     d = scan_str(d,TRUE,TRUE);
5352                     if (!d) {
5353                         /* MUST advance bufptr here to avoid bogus
5354                            "at end of line" context messages from yyerror().
5355                          */
5356                         PL_bufptr = s + len;
5357                         yyerror("Unterminated attribute parameter in attribute list");
5358                         if (attrs)
5359                             op_free(attrs);
5360                         sv_free(sv);
5361                         return REPORT(0);       /* EOF indicator */
5362                     }
5363                 }
5364                 if (PL_lex_stuff) {
5365                     sv_catsv(sv, PL_lex_stuff);
5366                     attrs = op_append_elem(OP_LIST, attrs,
5367                                         newSVOP(OP_CONST, 0, sv));
5368                     SvREFCNT_dec(PL_lex_stuff);
5369                     PL_lex_stuff = NULL;
5370                 }
5371                 else {
5372                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5373                         sv_free(sv);
5374                         if (PL_in_my == KEY_our) {
5375                             deprecate(":unique");
5376                         }
5377                         else
5378                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5379                     }
5380
5381                     /* NOTE: any CV attrs applied here need to be part of
5382                        the CVf_BUILTIN_ATTRS define in cv.h! */
5383                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5384                         sv_free(sv);
5385                         CvLVALUE_on(PL_compcv);
5386                     }
5387                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5388                         sv_free(sv);
5389                         deprecate(":locked");
5390                     }
5391                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5392                         sv_free(sv);
5393                         CvMETHOD_on(PL_compcv);
5394                     }
5395                     /* After we've set the flags, it could be argued that
5396                        we don't need to do the attributes.pm-based setting
5397                        process, and shouldn't bother appending recognized
5398                        flags.  To experiment with that, uncomment the
5399                        following "else".  (Note that's already been
5400                        uncommented.  That keeps the above-applied built-in
5401                        attributes from being intercepted (and possibly
5402                        rejected) by a package's attribute routines, but is
5403                        justified by the performance win for the common case
5404                        of applying only built-in attributes.) */
5405                     else
5406                         attrs = op_append_elem(OP_LIST, attrs,
5407                                             newSVOP(OP_CONST, 0,
5408                                                     sv));
5409                 }
5410                 s = PEEKSPACE(d);
5411                 if (*s == ':' && s[1] != ':')
5412                     s = PEEKSPACE(s+1);
5413                 else if (s == d)
5414                     break;      /* require real whitespace or :'s */
5415                 /* XXX losing whitespace on sequential attributes here */
5416             }
5417             {
5418                 const char tmp
5419                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5420                 if (*s != ';' && *s != '}' && *s != tmp
5421                     && (tmp != '=' || *s != ')')) {
5422                     const char q = ((*s == '\'') ? '"' : '\'');
5423                     /* If here for an expression, and parsed no attrs, back
5424                        off. */
5425                     if (tmp == '=' && !attrs) {
5426                         s = PL_bufptr;
5427                         break;
5428                     }
5429                     /* MUST advance bufptr here to avoid bogus "at end of line"
5430                        context messages from yyerror().
5431                     */
5432                     PL_bufptr = s;
5433                     yyerror( (const char *)
5434                              (*s
5435                               ? Perl_form(aTHX_ "Invalid separator character "
5436                                           "%c%c%c in attribute list", q, *s, q)
5437                               : "Unterminated attribute list" ) );
5438                     if (attrs)
5439                         op_free(attrs);
5440                     OPERATOR(':');
5441                 }
5442             }
5443         got_attrs:
5444             if (attrs) {
5445                 start_force(PL_curforce);
5446                 NEXTVAL_NEXTTOKE.opval = attrs;
5447                 CURMAD('_', PL_nextwhite);
5448                 force_next(THING);
5449             }
5450 #ifdef PERL_MAD
5451             if (PL_madskills) {
5452                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5453                                      (s - SvPVX(PL_linestr)) - stuffstart);
5454             }
5455 #endif
5456             TOKEN(COLONATTR);
5457         }
5458         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5459             s--;
5460             TOKEN(0);
5461         }
5462         PL_lex_allbrackets--;
5463         OPERATOR(':');
5464     case '(':
5465         s++;
5466         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5467             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5468         else
5469             PL_expect = XTERM;
5470         s = SKIPSPACE1(s);
5471         PL_lex_allbrackets++;
5472         TOKEN('(');
5473     case ';':
5474         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5475             TOKEN(0);
5476         CLINE;
5477         s++;
5478         OPERATOR(';');
5479     case ')':
5480         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5481             TOKEN(0);
5482         s++;
5483         PL_lex_allbrackets--;
5484         s = SKIPSPACE1(s);
5485         if (*s == '{')
5486             PREBLOCK(')');
5487         TERM(')');
5488     case ']':
5489         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5490             TOKEN(0);
5491         s++;
5492         if (PL_lex_brackets <= 0)
5493             yyerror("Unmatched right square bracket");
5494         else
5495             --PL_lex_brackets;
5496         PL_lex_allbrackets--;
5497         if (PL_lex_state == LEX_INTERPNORMAL) {
5498             if (PL_lex_brackets == 0) {
5499                 if (*s == '-' && s[1] == '>')
5500                     PL_lex_state = LEX_INTERPENDMAYBE;
5501                 else if (*s != '[' && *s != '{')
5502                     PL_lex_state = LEX_INTERPEND;
5503             }
5504         }
5505         TERM(']');
5506     case '{':
5507       leftbracket:
5508         s++;
5509         if (PL_lex_brackets > 100) {
5510             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5511         }
5512         switch (PL_expect) {
5513         case XTERM:
5514             if (PL_lex_formbrack) {
5515                 s--;
5516                 PRETERMBLOCK(DO);
5517             }
5518             if (PL_oldoldbufptr == PL_last_lop)
5519                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5520             else
5521                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5522             PL_lex_allbrackets++;
5523             OPERATOR(HASHBRACK);
5524         case XOPERATOR:
5525             while (s < PL_bufend && SPACE_OR_TAB(*s))
5526                 s++;
5527             d = s;
5528             PL_tokenbuf[0] = '\0';
5529             if (d < PL_bufend && *d == '-') {
5530                 PL_tokenbuf[0] = '-';
5531                 d++;
5532                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5533                     d++;
5534             }
5535             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5536                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5537                               FALSE, &len);
5538                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5539                     d++;
5540                 if (*d == '}') {
5541                     const char minus = (PL_tokenbuf[0] == '-');
5542                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5543                     if (minus)
5544                         force_next('-');
5545                 }
5546             }
5547             /* FALL THROUGH */
5548         case XATTRBLOCK:
5549         case XBLOCK:
5550             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5551             PL_lex_allbrackets++;
5552             PL_expect = XSTATE;
5553             break;
5554         case XATTRTERM:
5555         case XTERMBLOCK:
5556             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5557             PL_lex_allbrackets++;
5558             PL_expect = XSTATE;
5559             break;
5560         default: {
5561                 const char *t;
5562                 if (PL_oldoldbufptr == PL_last_lop)
5563                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5564                 else
5565                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5566                 PL_lex_allbrackets++;
5567                 s = SKIPSPACE1(s);
5568                 if (*s == '}') {
5569                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5570                         PL_expect = XTERM;
5571                         /* This hack is to get the ${} in the message. */
5572                         PL_bufptr = s+1;
5573                         yyerror("syntax error");
5574                         break;
5575                     }
5576                     OPERATOR(HASHBRACK);
5577                 }
5578                 /* This hack serves to disambiguate a pair of curlies
5579                  * as being a block or an anon hash.  Normally, expectation
5580                  * determines that, but in cases where we're not in a
5581                  * position to expect anything in particular (like inside
5582                  * eval"") we have to resolve the ambiguity.  This code
5583                  * covers the case where the first term in the curlies is a
5584                  * quoted string.  Most other cases need to be explicitly
5585                  * disambiguated by prepending a "+" before the opening
5586                  * curly in order to force resolution as an anon hash.
5587                  *
5588                  * XXX should probably propagate the outer expectation
5589                  * into eval"" to rely less on this hack, but that could
5590                  * potentially break current behavior of eval"".
5591                  * GSAR 97-07-21
5592                  */
5593                 t = s;
5594                 if (*s == '\'' || *s == '"' || *s == '`') {
5595                     /* common case: get past first string, handling escapes */
5596                     for (t++; t < PL_bufend && *t != *s;)
5597                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5598                             t++;
5599                     t++;
5600                 }
5601                 else if (*s == 'q') {
5602                     if (++t < PL_bufend
5603                         && (!isALNUM(*t)
5604                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5605                                 && !isALNUM(*t))))
5606                     {
5607                         /* skip q//-like construct */
5608                         const char *tmps;
5609                         char open, close, term;
5610                         I32 brackets = 1;
5611
5612                         while (t < PL_bufend && isSPACE(*t))
5613                             t++;
5614                         /* check for q => */
5615                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5616                             OPERATOR(HASHBRACK);
5617                         }
5618                         term = *t;
5619                         open = term;
5620                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5621                             term = tmps[5];
5622                         close = term;
5623                         if (open == close)
5624                             for (t++; t < PL_bufend; t++) {
5625                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5626                                     t++;
5627                                 else if (*t == open)
5628                                     break;
5629                             }
5630                         else {
5631                             for (t++; t < PL_bufend; t++) {
5632                                 if (*t == '\\' && t+1 < PL_bufend)
5633                                     t++;
5634                                 else if (*t == close && --brackets <= 0)
5635                                     break;
5636                                 else if (*t == open)
5637                                     brackets++;
5638                             }
5639                         }
5640                         t++;
5641                     }
5642                     else
5643                         /* skip plain q word */
5644                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5645                              t += UTF8SKIP(t);
5646                 }
5647                 else if (isALNUM_lazy_if(t,UTF)) {
5648                     t += UTF8SKIP(t);
5649                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5650                          t += UTF8SKIP(t);
5651                 }
5652                 while (t < PL_bufend && isSPACE(*t))
5653                     t++;
5654                 /* if comma follows first term, call it an anon hash */
5655                 /* XXX it could be a comma expression with loop modifiers */
5656                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5657                                    || (*t == '=' && t[1] == '>')))
5658                     OPERATOR(HASHBRACK);
5659                 if (PL_expect == XREF)
5660                     PL_expect = XTERM;
5661                 else {
5662                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5663                     PL_expect = XSTATE;
5664                 }
5665             }
5666             break;
5667         }
5668         pl_yylval.ival = CopLINE(PL_curcop);
5669         if (isSPACE(*s) || *s == '#')
5670             PL_copline = NOLINE;   /* invalidate current command line number */
5671         TOKEN('{');
5672     case '}':
5673         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5674             TOKEN(0);
5675       rightbracket:
5676         s++;
5677         if (PL_lex_brackets <= 0)
5678             yyerror("Unmatched right curly bracket");
5679         else
5680             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5681         PL_lex_allbrackets--;
5682         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5683             PL_lex_formbrack = 0;
5684         if (PL_lex_state == LEX_INTERPNORMAL) {
5685             if (PL_lex_brackets == 0) {
5686                 if (PL_expect & XFAKEBRACK) {
5687                     PL_expect &= XENUMMASK;
5688                     PL_lex_state = LEX_INTERPEND;
5689                     PL_bufptr = s;
5690 #if 0
5691                     if (PL_madskills) {
5692                         if (!PL_thiswhite)
5693                             PL_thiswhite = newSVpvs("");
5694                         sv_catpvs(PL_thiswhite,"}");
5695                     }
5696 #endif
5697                     return yylex();     /* ignore fake brackets */
5698                 }
5699                 if (*s == '-' && s[1] == '>')
5700                     PL_lex_state = LEX_INTERPENDMAYBE;
5701                 else if (*s != '[' && *s != '{')
5702                     PL_lex_state = LEX_INTERPEND;
5703             }
5704         }
5705         if (PL_expect & XFAKEBRACK) {
5706             PL_expect &= XENUMMASK;
5707             PL_bufptr = s;
5708             return yylex();             /* ignore fake brackets */
5709         }
5710         start_force(PL_curforce);
5711         if (PL_madskills) {
5712             curmad('X', newSVpvn(s-1,1));
5713             CURMAD('_', PL_thiswhite);
5714         }
5715         force_next('}');
5716 #ifdef PERL_MAD
5717         if (!PL_thistoken)
5718             PL_thistoken = newSVpvs("");
5719 #endif
5720         TOKEN(';');
5721     case '&':
5722         s++;
5723         if (*s++ == '&') {
5724             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5725                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5726                 s -= 2;
5727                 TOKEN(0);
5728             }
5729             AOPERATOR(ANDAND);
5730         }
5731         s--;
5732         if (PL_expect == XOPERATOR) {
5733             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5734                 && isIDFIRST_lazy_if(s,UTF))
5735             {
5736                 CopLINE_dec(PL_curcop);
5737                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5738                 CopLINE_inc(PL_curcop);
5739             }
5740             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5741                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5742                 s--;
5743                 TOKEN(0);
5744             }
5745             BAop(OP_BIT_AND);
5746         }
5747
5748         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5749         if (*PL_tokenbuf) {
5750             PL_expect = XOPERATOR;
5751             force_ident(PL_tokenbuf, '&');
5752         }
5753         else
5754             PREREF('&');
5755         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5756         TERM('&');
5757
5758     case '|':
5759         s++;
5760         if (*s++ == '|') {
5761             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5762                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5763                 s -= 2;
5764                 TOKEN(0);
5765             }
5766             AOPERATOR(OROR);
5767         }
5768         s--;
5769         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5770                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5771             s--;
5772             TOKEN(0);
5773         }
5774         BOop(OP_BIT_OR);
5775     case '=':
5776         s++;
5777         {
5778             const char tmp = *s++;
5779             if (tmp == '=') {
5780                 if (!PL_lex_allbrackets &&
5781                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5782                     s -= 2;
5783                     TOKEN(0);
5784                 }
5785                 Eop(OP_EQ);
5786             }
5787             if (tmp == '>') {
5788                 if (!PL_lex_allbrackets &&
5789                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5790                     s -= 2;
5791                     TOKEN(0);
5792                 }
5793                 OPERATOR(',');
5794             }
5795             if (tmp == '~')
5796                 PMop(OP_MATCH);
5797             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5798                 && strchr("+-*/%.^&|<",tmp))
5799                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5800                             "Reversed %c= operator",(int)tmp);
5801             s--;
5802             if (PL_expect == XSTATE && isALPHA(tmp) &&
5803                 (s == PL_linestart+1 || s[-2] == '\n') )
5804                 {
5805                     if (PL_in_eval && !PL_rsfp) {
5806                         d = PL_bufend;
5807                         while (s < d) {
5808                             if (*s++ == '\n') {
5809                                 incline(s);
5810                                 if (strnEQ(s,"=cut",4)) {
5811                                     s = strchr(s,'\n');
5812                                     if (s)
5813                                         s++;
5814                                     else
5815                                         s = d;
5816                                     incline(s);
5817                                     goto retry;
5818                                 }
5819                             }
5820                         }
5821                         goto retry;
5822                     }
5823 #ifdef PERL_MAD
5824                     if (PL_madskills) {
5825                         if (!PL_thiswhite)
5826                             PL_thiswhite = newSVpvs("");
5827                         sv_catpvn(PL_thiswhite, PL_linestart,
5828                                   PL_bufend - PL_linestart);
5829                     }
5830 #endif
5831                     s = PL_bufend;
5832                     PL_parser->in_pod = 1;
5833                     goto retry;
5834                 }
5835         }
5836         if (PL_lex_brackets < PL_lex_formbrack) {
5837             const char *t = s;
5838 #ifdef PERL_STRICT_CR
5839             while (SPACE_OR_TAB(*t))
5840 #else
5841             while (SPACE_OR_TAB(*t) || *t == '\r')
5842 #endif
5843                 t++;
5844             if (*t == '\n' || *t == '#') {
5845                 s--;
5846                 PL_expect = XBLOCK;
5847                 goto leftbracket;
5848             }
5849         }
5850         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5851             s--;
5852             TOKEN(0);
5853         }
5854         pl_yylval.ival = 0;
5855         OPERATOR(ASSIGNOP);
5856     case '!':
5857         s++;
5858         {
5859             const char tmp = *s++;
5860             if (tmp == '=') {
5861                 /* was this !=~ where !~ was meant?
5862                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5863
5864                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5865                     const char *t = s+1;
5866
5867                     while (t < PL_bufend && isSPACE(*t))
5868                         ++t;
5869
5870                     if (*t == '/' || *t == '?' ||
5871                         ((*t == 'm' || *t == 's' || *t == 'y')
5872                          && !isALNUM(t[1])) ||
5873                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5874                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5875                                     "!=~ should be !~");
5876                 }
5877                 if (!PL_lex_allbrackets &&
5878                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5879                     s -= 2;
5880                     TOKEN(0);
5881                 }
5882                 Eop(OP_NE);
5883             }
5884             if (tmp == '~')
5885                 PMop(OP_NOT);
5886         }
5887         s--;
5888         OPERATOR('!');
5889     case '<':
5890         if (PL_expect != XOPERATOR) {
5891             if (s[1] != '<' && !strchr(s,'>'))
5892                 check_uni();
5893             if (s[1] == '<')
5894                 s = scan_heredoc(s);
5895             else
5896                 s = scan_inputsymbol(s);
5897             TERM(sublex_start());
5898         }
5899         s++;
5900         {
5901             char tmp = *s++;
5902             if (tmp == '<') {
5903                 if (*s == '=' && !PL_lex_allbrackets &&
5904                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5905                     s -= 2;
5906                     TOKEN(0);
5907                 }
5908                 SHop(OP_LEFT_SHIFT);
5909             }
5910             if (tmp == '=') {
5911                 tmp = *s++;
5912                 if (tmp == '>') {
5913                     if (!PL_lex_allbrackets &&
5914                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5915                         s -= 3;
5916                         TOKEN(0);
5917                     }
5918                     Eop(OP_NCMP);
5919                 }
5920                 s--;
5921                 if (!PL_lex_allbrackets &&
5922                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5923                     s -= 2;
5924                     TOKEN(0);
5925                 }
5926                 Rop(OP_LE);
5927             }
5928         }
5929         s--;
5930         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5931             s--;
5932             TOKEN(0);
5933         }
5934         Rop(OP_LT);
5935     case '>':
5936         s++;
5937         {
5938             const char tmp = *s++;
5939             if (tmp == '>') {
5940                 if (*s == '=' && !PL_lex_allbrackets &&
5941                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5942                     s -= 2;
5943                     TOKEN(0);
5944                 }
5945                 SHop(OP_RIGHT_SHIFT);
5946             }
5947             else if (tmp == '=') {
5948                 if (!PL_lex_allbrackets &&
5949                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5950                     s -= 2;
5951                     TOKEN(0);
5952                 }
5953                 Rop(OP_GE);
5954             }
5955         }
5956         s--;
5957         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5958             s--;
5959             TOKEN(0);
5960         }
5961         Rop(OP_GT);
5962
5963     case '$':
5964         CLINE;
5965
5966         if (PL_expect == XOPERATOR) {
5967             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5968                 return deprecate_commaless_var_list();
5969             }
5970         }
5971
5972         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5973             PL_tokenbuf[0] = '@';
5974             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5975                            sizeof PL_tokenbuf - 1, FALSE);
5976             if (PL_expect == XOPERATOR)
5977                 no_op("Array length", s);
5978             if (!PL_tokenbuf[1])
5979                 PREREF(DOLSHARP);
5980             PL_expect = XOPERATOR;
5981             PL_pending_ident = '#';
5982             TOKEN(DOLSHARP);
5983         }
5984
5985         PL_tokenbuf[0] = '$';
5986         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5987                        sizeof PL_tokenbuf - 1, FALSE);
5988         if (PL_expect == XOPERATOR)
5989             no_op("Scalar", s);
5990         if (!PL_tokenbuf[1]) {
5991             if (s == PL_bufend)
5992                 yyerror("Final $ should be \\$ or $name");
5993             PREREF('$');
5994         }
5995
5996         /* This kludge not intended to be bulletproof. */
5997         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5998             pl_yylval.opval = newSVOP(OP_CONST, 0,
5999                                    newSViv(CopARYBASE_get(&PL_compiling)));
6000             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
6001             TERM(THING);
6002         }
6003
6004         d = s;
6005         {
6006             const char tmp = *s;
6007             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6008                 s = SKIPSPACE1(s);
6009
6010             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6011                 && intuit_more(s)) {
6012                 if (*s == '[') {
6013                     PL_tokenbuf[0] = '@';
6014                     if (ckWARN(WARN_SYNTAX)) {
6015                         char *t = s+1;
6016
6017                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6018                             t++;
6019                         if (*t++ == ',') {
6020                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6021                             while (t < PL_bufend && *t != ']')
6022                                 t++;
6023                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6024                                         "Multidimensional syntax %.*s not supported",
6025                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6026                         }
6027                     }
6028                 }
6029                 else if (*s == '{') {
6030                     char *t;
6031                     PL_tokenbuf[0] = '%';
6032                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6033                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6034                         {
6035                             char tmpbuf[sizeof PL_tokenbuf];
6036                             do {
6037                                 t++;
6038                             } while (isSPACE(*t));
6039                             if (isIDFIRST_lazy_if(t,UTF)) {
6040                                 STRLEN len;
6041                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6042                                               &len);
6043                                 while (isSPACE(*t))
6044                                     t++;
6045                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6046                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6047                                                 "You need to quote \"%s\"",
6048                                                 tmpbuf);
6049                             }
6050                         }
6051                 }
6052             }
6053
6054             PL_expect = XOPERATOR;
6055             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6056                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6057                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6058                     PL_expect = XOPERATOR;
6059                 else if (strchr("$@\"'`q", *s))
6060                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6061                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6062                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6063                 else if (isIDFIRST_lazy_if(s,UTF)) {
6064                     char tmpbuf[sizeof PL_tokenbuf];
6065                     int t2;
6066                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6067                     if ((t2 = keyword(tmpbuf, len, 0))) {
6068                         /* binary operators exclude handle interpretations */
6069                         switch (t2) {
6070                         case -KEY_x:
6071                         case -KEY_eq:
6072                         case -KEY_ne:
6073                         case -KEY_gt:
6074                         case -KEY_lt:
6075                         case -KEY_ge:
6076                         case -KEY_le:
6077                         case -KEY_cmp:
6078                             break;
6079                         default:
6080                             PL_expect = XTERM;  /* e.g. print $fh length() */
6081                             break;
6082                         }
6083                     }
6084                     else {
6085                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6086                     }
6087                 }
6088                 else if (isDIGIT(*s))
6089                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6090                 else if (*s == '.' && isDIGIT(s[1]))
6091                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6092                 else if ((*s == '?' || *s == '-' || *s == '+')
6093                          && !isSPACE(s[1]) && s[1] != '=')
6094                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6095                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6096                          && s[1] != '/')
6097                     PL_expect = XTERM;          /* e.g. print $fh /.../
6098                                                    XXX except DORDOR operator
6099                                                 */
6100                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6101                          && s[2] != '=')
6102                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6103             }
6104         }
6105         PL_pending_ident = '$';
6106         TOKEN('$');
6107
6108     case '@':
6109         if (PL_expect == XOPERATOR)
6110             no_op("Array", s);
6111         PL_tokenbuf[0] = '@';
6112         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6113         if (!PL_tokenbuf[1]) {
6114             PREREF('@');
6115         }
6116         if (PL_lex_state == LEX_NORMAL)
6117             s = SKIPSPACE1(s);
6118         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6119             if (*s == '{')
6120                 PL_tokenbuf[0] = '%';
6121
6122             /* Warn about @ where they meant $. */
6123             if (*s == '[' || *s == '{') {
6124                 if (ckWARN(WARN_SYNTAX)) {
6125                     const char *t = s + 1;
6126                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6127                         t++;
6128                     if (*t == '}' || *t == ']') {
6129                         t++;
6130                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6131                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6132                             "Scalar value %.*s better written as $%.*s",
6133                             (int)(t-PL_bufptr), PL_bufptr,
6134                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6135                     }
6136                 }
6137             }
6138         }
6139         PL_pending_ident = '@';
6140         TERM('@');
6141
6142      case '/':                  /* may be division, defined-or, or pattern */
6143         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6144             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6145                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6146                 TOKEN(0);
6147             s += 2;
6148             AOPERATOR(DORDOR);
6149         }
6150      case '?':                  /* may either be conditional or pattern */
6151         if (PL_expect == XOPERATOR) {
6152              char tmp = *s++;
6153              if(tmp == '?') {
6154                 if (!PL_lex_allbrackets &&
6155                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6156                     s--;
6157                     TOKEN(0);
6158                 }
6159                 PL_lex_allbrackets++;
6160                 OPERATOR('?');
6161              }
6162              else {
6163                  tmp = *s++;
6164                  if(tmp == '/') {
6165                      /* A // operator. */
6166                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6167                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6168                                             LEX_FAKEEOF_LOGIC)) {
6169                         s -= 2;
6170                         TOKEN(0);
6171                     }
6172                     AOPERATOR(DORDOR);
6173                  }
6174                  else {
6175                      s--;
6176                      if (*s == '=' && !PL_lex_allbrackets &&
6177                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6178                          s--;
6179                          TOKEN(0);
6180                      }
6181                      Mop(OP_DIVIDE);
6182                  }
6183              }
6184          }
6185          else {
6186              /* Disable warning on "study /blah/" */
6187              if (PL_oldoldbufptr == PL_last_uni
6188               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6189                   || memNE(PL_last_uni, "study", 5)
6190                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6191               ))
6192                  check_uni();
6193              if (*s == '?')
6194                  deprecate("?PATTERN? without explicit operator");
6195              s = scan_pat(s,OP_MATCH);
6196              TERM(sublex_start());
6197          }
6198
6199     case '.':
6200         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6201 #ifdef PERL_STRICT_CR
6202             && s[1] == '\n'
6203 #else
6204             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6205 #endif
6206             && (s == PL_linestart || s[-1] == '\n') )
6207         {
6208             PL_lex_formbrack = 0;
6209             PL_expect = XSTATE;
6210             goto rightbracket;
6211         }
6212         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6213             s += 3;
6214             OPERATOR(YADAYADA);
6215         }
6216         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6217             char tmp = *s++;
6218             if (*s == tmp) {
6219                 if (!PL_lex_allbrackets &&
6220                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6221                     s--;
6222                     TOKEN(0);
6223                 }
6224                 s++;
6225                 if (*s == tmp) {
6226                     s++;
6227                     pl_yylval.ival = OPf_SPECIAL;
6228                 }
6229                 else
6230                     pl_yylval.ival = 0;
6231                 OPERATOR(DOTDOT);
6232             }
6233             if (*s == '=' && !PL_lex_allbrackets &&
6234                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6235                 s--;
6236                 TOKEN(0);
6237             }
6238             Aop(OP_CONCAT);
6239         }
6240         /* FALL THROUGH */
6241     case '0': case '1': case '2': case '3': case '4':
6242     case '5': case '6': case '7': case '8': case '9':
6243         s = scan_num(s, &pl_yylval);
6244         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6245         if (PL_expect == XOPERATOR)
6246             no_op("Number",s);
6247         TERM(THING);
6248
6249     case '\'':
6250         s = scan_str(s,!!PL_madskills,FALSE);
6251         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6252         if (PL_expect == XOPERATOR) {
6253             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6254                 return deprecate_commaless_var_list();
6255             }
6256             else
6257                 no_op("String",s);
6258         }
6259         if (!s)
6260             missingterm(NULL);
6261         pl_yylval.ival = OP_CONST;
6262         TERM(sublex_start());
6263
6264     case '"':
6265         s = scan_str(s,!!PL_madskills,FALSE);
6266         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6267         if (PL_expect == XOPERATOR) {
6268             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6269                 return deprecate_commaless_var_list();
6270             }
6271             else
6272                 no_op("String",s);
6273         }
6274         if (!s)
6275             missingterm(NULL);
6276         pl_yylval.ival = OP_CONST;
6277         /* FIXME. I think that this can be const if char *d is replaced by
6278            more localised variables.  */
6279         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6280             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6281                 pl_yylval.ival = OP_STRINGIFY;
6282                 break;
6283             }
6284         }
6285         TERM(sublex_start());
6286
6287     case '`':
6288         s = scan_str(s,!!PL_madskills,FALSE);
6289         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6290         if (PL_expect == XOPERATOR)
6291             no_op("Backticks",s);
6292         if (!s)
6293             missingterm(NULL);
6294         readpipe_override();
6295         TERM(sublex_start());
6296
6297     case '\\':
6298         s++;
6299         if (PL_lex_inwhat && isDIGIT(*s))
6300             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6301                            *s, *s);
6302         if (PL_expect == XOPERATOR)
6303             no_op("Backslash",s);
6304         OPERATOR(REFGEN);
6305
6306     case 'v':
6307         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6308             char *start = s + 2;
6309             while (isDIGIT(*start) || *start == '_')
6310                 start++;
6311             if (*start == '.' && isDIGIT(start[1])) {
6312                 s = scan_num(s, &pl_yylval);
6313                 TERM(THING);
6314             }
6315             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6316             else if (!isALPHA(*start) && (PL_expect == XTERM
6317                         || PL_expect == XREF || PL_expect == XSTATE
6318                         || PL_expect == XTERMORDORDOR)) {
6319                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6320                 if (!gv) {
6321                     s = scan_num(s, &pl_yylval);
6322                     TERM(THING);
6323                 }
6324             }
6325         }
6326         goto keylookup;
6327     case 'x':
6328         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6329             s++;
6330             Mop(OP_REPEAT);
6331         }
6332         goto keylookup;
6333
6334     case '_':
6335     case 'a': case 'A':
6336     case 'b': case 'B':
6337     case 'c': case 'C':
6338     case 'd': case 'D':
6339     case 'e': case 'E':
6340     case 'f': case 'F':
6341     case 'g': case 'G':
6342     case 'h': case 'H':
6343     case 'i': case 'I':
6344     case 'j': case 'J':
6345     case 'k': case 'K':
6346     case 'l': case 'L':
6347     case 'm': case 'M':
6348     case 'n': case 'N':
6349     case 'o': case 'O':
6350     case 'p': case 'P':
6351     case 'q': case 'Q':
6352     case 'r': case 'R':
6353     case 's': case 'S':
6354     case 't': case 'T':
6355     case 'u': case 'U':
6356               case 'V':
6357     case 'w': case 'W':
6358               case 'X':
6359     case 'y': case 'Y':
6360     case 'z': case 'Z':
6361
6362       keylookup: {
6363         bool anydelim;
6364         I32 tmp;
6365
6366         orig_keyword = 0;
6367         gv = NULL;
6368         gvp = NULL;
6369
6370         PL_bufptr = s;
6371         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6372
6373         /* Some keywords can be followed by any delimiter, including ':' */
6374         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6375
6376         /* x::* is just a word, unless x is "CORE" */
6377         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6378             goto just_a_word;
6379
6380         d = s;
6381         while (d < PL_bufend && isSPACE(*d))
6382                 d++;    /* no comments skipped here, or s### is misparsed */
6383
6384         /* Is this a word before a => operator? */
6385         if (*d == '=' && d[1] == '>') {
6386             CLINE;
6387             pl_yylval.opval
6388                 = (OP*)newSVOP(OP_CONST, 0,
6389                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6390             pl_yylval.opval->op_private = OPpCONST_BARE;
6391             TERM(WORD);
6392         }
6393
6394         /* Check for plugged-in keyword */
6395         {
6396             OP *o;
6397             int result;
6398             char *saved_bufptr = PL_bufptr;
6399             PL_bufptr = s;
6400             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6401             s = PL_bufptr;
6402             if (result == KEYWORD_PLUGIN_DECLINE) {
6403                 /* not a plugged-in keyword */
6404                 PL_bufptr = saved_bufptr;
6405             } else if (result == KEYWORD_PLUGIN_STMT) {
6406                 pl_yylval.opval = o;
6407                 CLINE;
6408                 PL_expect = XSTATE;
6409                 return REPORT(PLUGSTMT);
6410             } else if (result == KEYWORD_PLUGIN_EXPR) {
6411                 pl_yylval.opval = o;
6412                 CLINE;
6413                 PL_expect = XOPERATOR;
6414                 return REPORT(PLUGEXPR);
6415             } else {
6416                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6417                                         PL_tokenbuf);
6418             }
6419         }
6420
6421         /* Check for built-in keyword */
6422         tmp = keyword(PL_tokenbuf, len, 0);
6423
6424         /* Is this a label? */
6425         if (!anydelim && PL_expect == XSTATE
6426               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6427             s = d + 1;
6428             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6429             CLINE;
6430             TOKEN(LABEL);
6431         }
6432
6433         if (tmp < 0) {                  /* second-class keyword? */
6434             GV *ogv = NULL;     /* override (winner) */
6435             GV *hgv = NULL;     /* hidden (loser) */
6436             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6437                 CV *cv;
6438                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6439                     (cv = GvCVu(gv)))
6440                 {
6441                     if (GvIMPORTED_CV(gv))
6442                         ogv = gv;
6443                     else if (! CvMETHOD(cv))
6444                         hgv = gv;
6445                 }
6446                 if (!ogv &&
6447                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6448                     (gv = *gvp) && isGV_with_GP(gv) &&
6449                     GvCVu(gv) && GvIMPORTED_CV(gv))
6450                 {
6451                     ogv = gv;
6452                 }
6453             }
6454             if (ogv) {
6455                 orig_keyword = tmp;
6456                 tmp = 0;                /* overridden by import or by GLOBAL */
6457             }
6458             else if (gv && !gvp
6459                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6460                      && GvCVu(gv))
6461             {
6462                 tmp = 0;                /* any sub overrides "weak" keyword */
6463             }
6464             else {                      /* no override */
6465                 tmp = -tmp;
6466                 if (tmp == KEY_dump) {
6467                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6468                                    "dump() better written as CORE::dump()");
6469                 }
6470                 gv = NULL;
6471                 gvp = 0;
6472                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6473                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6474                                    "Ambiguous call resolved as CORE::%s(), "
6475                                    "qualify as such or use &",
6476                                    GvENAME(hgv));
6477             }
6478         }
6479
6480       reserved_word:
6481         switch (tmp) {
6482
6483         default:                        /* not a keyword */
6484             /* Trade off - by using this evil construction we can pull the
6485                variable gv into the block labelled keylookup. If not, then
6486                we have to give it function scope so that the goto from the
6487                earlier ':' case doesn't bypass the initialisation.  */
6488             if (0) {
6489             just_a_word_zero_gv:
6490                 gv = NULL;
6491                 gvp = NULL;
6492                 orig_keyword = 0;
6493             }
6494           just_a_word: {
6495                 SV *sv;
6496                 int pkgname = 0;
6497                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6498                 OP *rv2cv_op;
6499                 CV *cv;
6500 #ifdef PERL_MAD
6501                 SV *nextPL_nextwhite = 0;
6502 #endif
6503
6504
6505                 /* Get the rest if it looks like a package qualifier */
6506
6507                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6508                     STRLEN morelen;
6509                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6510                                   TRUE, &morelen);
6511                     if (!morelen)
6512                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6513                                 *s == '\'' ? "'" : "::");
6514                     len += morelen;
6515                     pkgname = 1;
6516                 }
6517
6518                 if (PL_expect == XOPERATOR) {
6519                     if (PL_bufptr == PL_linestart) {
6520                         CopLINE_dec(PL_curcop);
6521                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6522                         CopLINE_inc(PL_curcop);
6523                     }
6524                     else
6525                         no_op("Bareword",s);
6526                 }
6527
6528                 /* Look for a subroutine with this name in current package,
6529                    unless name is "Foo::", in which case Foo is a bareword
6530                    (and a package name). */
6531
6532                 if (len > 2 && !PL_madskills &&
6533                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6534                 {
6535                     if (ckWARN(WARN_BAREWORD)
6536                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6537                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6538                             "Bareword \"%s\" refers to nonexistent package",
6539                              PL_tokenbuf);
6540                     len -= 2;
6541                     PL_tokenbuf[len] = '\0';
6542                     gv = NULL;
6543                     gvp = 0;
6544                 }
6545                 else {
6546                     if (!gv) {
6547                         /* Mustn't actually add anything to a symbol table.
6548                            But also don't want to "initialise" any placeholder
6549                            constants that might already be there into full
6550                            blown PVGVs with attached PVCV.  */
6551                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6552                                                GV_NOADD_NOINIT, SVt_PVCV);
6553                     }
6554                     len = 0;
6555                 }
6556
6557                 /* if we saw a global override before, get the right name */
6558
6559                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6560                     len ? len : strlen(PL_tokenbuf));
6561                 if (gvp) {
6562                     SV * const tmp_sv = sv;
6563                     sv = newSVpvs("CORE::GLOBAL::");
6564                     sv_catsv(sv, tmp_sv);
6565                     SvREFCNT_dec(tmp_sv);
6566                 }
6567
6568 #ifdef PERL_MAD
6569                 if (PL_madskills && !PL_thistoken) {
6570                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6571                     PL_thistoken = newSVpvn(start,s - start);
6572                     PL_realtokenstart = s - SvPVX(PL_linestr);
6573                 }
6574 #endif
6575
6576                 /* Presume this is going to be a bareword of some sort. */
6577                 CLINE;
6578                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6579                 pl_yylval.opval->op_private = OPpCONST_BARE;
6580
6581                 /* And if "Foo::", then that's what it certainly is. */
6582                 if (len)
6583                     goto safe_bareword;
6584
6585                 {
6586                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6587                     const_op->op_private = OPpCONST_BARE;
6588                     rv2cv_op = newCVREF(0, const_op);
6589                 }
6590                 cv = rv2cv_op_cv(rv2cv_op, 0);
6591
6592                 /* See if it's the indirect object for a list operator. */
6593
6594                 if (PL_oldoldbufptr &&
6595                     PL_oldoldbufptr < PL_bufptr &&
6596                     (PL_oldoldbufptr == PL_last_lop
6597                      || PL_oldoldbufptr == PL_last_uni) &&
6598                     /* NO SKIPSPACE BEFORE HERE! */
6599                     (PL_expect == XREF ||
6600                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6601                 {
6602                     bool immediate_paren = *s == '(';
6603
6604                     /* (Now we can afford to cross potential line boundary.) */
6605                     s = SKIPSPACE2(s,nextPL_nextwhite);
6606 #ifdef PERL_MAD
6607                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6608 #endif
6609
6610                     /* Two barewords in a row may indicate method call. */
6611
6612                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6613                         (tmp = intuit_method(s, gv, cv))) {
6614                         op_free(rv2cv_op);
6615                         if (tmp == METHOD && !PL_lex_allbrackets &&
6616                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6617                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6618                         return REPORT(tmp);
6619                     }
6620
6621                     /* If not a declared subroutine, it's an indirect object. */
6622                     /* (But it's an indir obj regardless for sort.) */
6623                     /* Also, if "_" follows a filetest operator, it's a bareword */
6624
6625                     if (
6626                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6627                          (!cv &&
6628                         (PL_last_lop_op != OP_MAPSTART &&
6629                          PL_last_lop_op != OP_GREPSTART))))
6630                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6631                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6632                        )
6633                     {
6634                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6635                         goto bareword;
6636                     }
6637                 }
6638
6639                 PL_expect = XOPERATOR;
6640 #ifdef PERL_MAD
6641                 if (isSPACE(*s))
6642                     s = SKIPSPACE2(s,nextPL_nextwhite);
6643                 PL_nextwhite = nextPL_nextwhite;
6644 #else
6645                 s = skipspace(s);
6646 #endif
6647
6648                 /* Is this a word before a => operator? */
6649                 if (*s == '=' && s[1] == '>' && !pkgname) {
6650                     op_free(rv2cv_op);
6651                     CLINE;
6652                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6653                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6654                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6655                     TERM(WORD);
6656                 }
6657
6658                 /* If followed by a paren, it's certainly a subroutine. */
6659                 if (*s == '(') {
6660                     CLINE;
6661                     if (cv) {
6662                         d = s + 1;
6663                         while (SPACE_OR_TAB(*d))
6664                             d++;
6665                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6666                             s = d + 1;
6667                             goto its_constant;
6668                         }
6669                     }
6670 #ifdef PERL_MAD
6671                     if (PL_madskills) {
6672                         PL_nextwhite = PL_thiswhite;
6673                         PL_thiswhite = 0;
6674                     }
6675                     start_force(PL_curforce);
6676 #endif
6677                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6678                     PL_expect = XOPERATOR;
6679 #ifdef PERL_MAD
6680                     if (PL_madskills) {
6681                         PL_nextwhite = nextPL_nextwhite;
6682                         curmad('X', PL_thistoken);
6683                         PL_thistoken = newSVpvs("");
6684                     }
6685 #endif
6686                     op_free(rv2cv_op);
6687                     force_next(WORD);
6688                     pl_yylval.ival = 0;
6689                     TOKEN('&');
6690                 }
6691
6692                 /* If followed by var or block, call it a method (unless sub) */
6693
6694                 if ((*s == '$' || *s == '{') && !cv) {
6695                     op_free(rv2cv_op);
6696                     PL_last_lop = PL_oldbufptr;
6697                     PL_last_lop_op = OP_METHOD;
6698                     if (!PL_lex_allbrackets &&
6699                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6700                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6701                     PREBLOCK(METHOD);
6702                 }
6703
6704                 /* If followed by a bareword, see if it looks like indir obj. */
6705
6706                 if (!orig_keyword
6707                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6708                         && (tmp = intuit_method(s, gv, cv))) {
6709                     op_free(rv2cv_op);
6710                     if (tmp == METHOD && !PL_lex_allbrackets &&
6711                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6712                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6713                     return REPORT(tmp);
6714                 }
6715
6716                 /* Not a method, so call it a subroutine (if defined) */
6717
6718                 if (cv) {
6719                     if (lastchar == '-')
6720                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6721                                          "Ambiguous use of -%s resolved as -&%s()",
6722                                          PL_tokenbuf, PL_tokenbuf);
6723                     /* Check for a constant sub */
6724                     if ((sv = cv_const_sv(cv))) {
6725                   its_constant:
6726                         op_free(rv2cv_op);
6727                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6728                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6729                         pl_yylval.opval->op_private = 0;
6730                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6731                         TOKEN(WORD);
6732                     }
6733
6734                     op_free(pl_yylval.opval);
6735                     pl_yylval.opval = rv2cv_op;
6736                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6737                     PL_last_lop = PL_oldbufptr;
6738                     PL_last_lop_op = OP_ENTERSUB;
6739                     /* Is there a prototype? */
6740                     if (
6741 #ifdef PERL_MAD
6742                         cv &&
6743 #endif
6744                         SvPOK(cv))
6745                     {
6746                         STRLEN protolen;
6747                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6748                         if (!protolen)
6749                             TERM(FUNC0SUB);
6750                         while (*proto == ';')
6751                             proto++;
6752                         if (
6753                             (
6754                                 (
6755                                     *proto == '$' || *proto == '_'
6756                                  || *proto == '*' || *proto == '+'
6757                                 )
6758                              && proto[1] == '\0'
6759                             )
6760                          || (
6761                              *proto == '\\' && proto[1] && proto[2] == '\0'
6762                             )
6763                         )
6764                             OPERATOR(UNIOPSUB);
6765                         if (*proto == '\\' && proto[1] == '[') {
6766                             const char *p = proto + 2;
6767                             while(*p && *p != ']')
6768                                 ++p;
6769                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6770                         }
6771                         if (*proto == '&' && *s == '{') {
6772                             if (PL_curstash)
6773                                 sv_setpvs(PL_subname, "__ANON__");
6774                             else
6775                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6776                             if (!PL_lex_allbrackets &&
6777                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6778                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6779                             PREBLOCK(LSTOPSUB);
6780                         }
6781                     }
6782 #ifdef PERL_MAD
6783                     {
6784                         if (PL_madskills) {
6785                             PL_nextwhite = PL_thiswhite;
6786                             PL_thiswhite = 0;
6787                         }
6788                         start_force(PL_curforce);
6789                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6790                         PL_expect = XTERM;
6791                         if (PL_madskills) {
6792                             PL_nextwhite = nextPL_nextwhite;
6793                             curmad('X', PL_thistoken);
6794                             PL_thistoken = newSVpvs("");
6795                         }
6796                         force_next(WORD);
6797                         if (!PL_lex_allbrackets &&
6798                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6799                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6800                         TOKEN(NOAMP);
6801                     }
6802                 }
6803
6804                 /* Guess harder when madskills require "best effort". */
6805                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6806                     int probable_sub = 0;
6807                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6808                         probable_sub = 1;
6809                     else if (isALPHA(*s)) {
6810                         char tmpbuf[1024];
6811                         STRLEN tmplen;
6812                         d = s;
6813                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6814                         if (!keyword(tmpbuf, tmplen, 0))
6815                             probable_sub = 1;
6816                         else {
6817                             while (d < PL_bufend && isSPACE(*d))
6818                                 d++;
6819                             if (*d == '=' && d[1] == '>')
6820                                 probable_sub = 1;
6821                         }
6822                     }
6823                     if (probable_sub) {
6824                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6825                         op_free(pl_yylval.opval);
6826                         pl_yylval.opval = rv2cv_op;
6827                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6828                         PL_last_lop = PL_oldbufptr;
6829                         PL_last_lop_op = OP_ENTERSUB;
6830                         PL_nextwhite = PL_thiswhite;
6831                         PL_thiswhite = 0;
6832                         start_force(PL_curforce);
6833                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6834                         PL_expect = XTERM;
6835                         PL_nextwhite = nextPL_nextwhite;
6836                         curmad('X', PL_thistoken);
6837                         PL_thistoken = newSVpvs("");
6838                         force_next(WORD);
6839                         if (!PL_lex_allbrackets &&
6840                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6841                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6842                         TOKEN(NOAMP);
6843                     }
6844 #else
6845                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6846                     PL_expect = XTERM;
6847                     force_next(WORD);
6848                     if (!PL_lex_allbrackets &&
6849                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6850                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6851                     TOKEN(NOAMP);
6852 #endif
6853                 }
6854
6855                 /* Call it a bare word */
6856
6857                 if (PL_hints & HINT_STRICT_SUBS)
6858                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6859                 else {
6860                 bareword:
6861                     /* after "print" and similar functions (corresponding to
6862                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6863                      * a filehandle should be subject to "strict subs".
6864                      * Likewise for the optional indirect-object argument to system
6865                      * or exec, which can't be a bareword */
6866                     if ((PL_last_lop_op == OP_PRINT
6867                             || PL_last_lop_op == OP_PRTF
6868                             || PL_last_lop_op == OP_SAY
6869                             || PL_last_lop_op == OP_SYSTEM
6870                             || PL_last_lop_op == OP_EXEC)
6871                             && (PL_hints & HINT_STRICT_SUBS))
6872                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6873                     if (lastchar != '-') {
6874                         if (ckWARN(WARN_RESERVED)) {
6875                             d = PL_tokenbuf;
6876                             while (isLOWER(*d))
6877                                 d++;
6878                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6879                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6880                                        PL_tokenbuf);
6881                         }
6882                     }
6883                 }
6884                 op_free(rv2cv_op);
6885
6886             safe_bareword:
6887                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6888                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6889                                      "Operator or semicolon missing before %c%s",
6890                                      lastchar, PL_tokenbuf);
6891                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6892                                      "Ambiguous use of %c resolved as operator %c",
6893                                      lastchar, lastchar);
6894                 }
6895                 TOKEN(WORD);
6896             }
6897
6898         case KEY___FILE__:
6899             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6900                                         newSVpv(CopFILE(PL_curcop),0));
6901             TERM(THING);
6902
6903         case KEY___LINE__:
6904             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6905                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6906             TERM(THING);
6907
6908         case KEY___PACKAGE__:
6909             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6910                                         (PL_curstash
6911                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6912                                          : &PL_sv_undef));
6913             TERM(THING);
6914
6915         case KEY___DATA__:
6916         case KEY___END__: {
6917             GV *gv;
6918             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6919                 const char *pname = "main";
6920                 if (PL_tokenbuf[2] == 'D')
6921                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6922                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6923                                 SVt_PVIO);
6924                 GvMULTI_on(gv);
6925                 if (!GvIO(gv))
6926                     GvIOp(gv) = newIO();
6927                 IoIFP(GvIOp(gv)) = PL_rsfp;
6928 #if defined(HAS_FCNTL) && defined(F_SETFD)
6929                 {
6930                     const int fd = PerlIO_fileno(PL_rsfp);
6931                     fcntl(fd,F_SETFD,fd >= 3);
6932                 }
6933 #endif
6934                 /* Mark this internal pseudo-handle as clean */
6935                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6936                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6937                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6938                 else
6939                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6940 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6941                 /* if the script was opened in binmode, we need to revert
6942                  * it to text mode for compatibility; but only iff it has CRs
6943                  * XXX this is a questionable hack at best. */
6944                 if (PL_bufend-PL_bufptr > 2
6945                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6946                 {
6947                     Off_t loc = 0;
6948                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6949                         loc = PerlIO_tell(PL_rsfp);
6950                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6951                     }
6952 #ifdef NETWARE
6953                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6954 #else
6955                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6956 #endif  /* NETWARE */
6957 #ifdef PERLIO_IS_STDIO /* really? */
6958 #  if defined(__BORLANDC__)
6959                         /* XXX see note in do_binmode() */
6960                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6961 #  endif
6962 #endif
6963                         if (loc > 0)
6964                             PerlIO_seek(PL_rsfp, loc, 0);
6965                     }
6966                 }
6967 #endif
6968 #ifdef PERLIO_LAYERS
6969                 if (!IN_BYTES) {
6970                     if (UTF)
6971                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6972                     else if (PL_encoding) {
6973                         SV *name;
6974                         dSP;
6975                         ENTER;
6976                         SAVETMPS;
6977                         PUSHMARK(sp);
6978                         EXTEND(SP, 1);
6979                         XPUSHs(PL_encoding);
6980                         PUTBACK;
6981                         call_method("name", G_SCALAR);
6982                         SPAGAIN;
6983                         name = POPs;
6984                         PUTBACK;
6985                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6986                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6987                                                       SVfARG(name)));
6988                         FREETMPS;
6989                         LEAVE;
6990                     }
6991                 }
6992 #endif
6993 #ifdef PERL_MAD
6994                 if (PL_madskills) {
6995                     if (PL_realtokenstart >= 0) {
6996                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6997                         if (!PL_endwhite)
6998                             PL_endwhite = newSVpvs("");
6999                         sv_catsv(PL_endwhite, PL_thiswhite);
7000                         PL_thiswhite = 0;
7001                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7002                         PL_realtokenstart = -1;
7003                     }
7004                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7005                            != NULL) ;
7006                 }
7007 #endif
7008                 PL_rsfp = NULL;
7009             }
7010             goto fake_eof;
7011         }
7012
7013         case KEY_AUTOLOAD:
7014         case KEY_DESTROY:
7015         case KEY_BEGIN:
7016         case KEY_UNITCHECK:
7017         case KEY_CHECK:
7018         case KEY_INIT:
7019         case KEY_END:
7020             if (PL_expect == XSTATE) {
7021                 s = PL_bufptr;
7022                 goto really_sub;
7023             }
7024             goto just_a_word;
7025
7026         case KEY_CORE:
7027             if (*s == ':' && s[1] == ':') {
7028                 s += 2;
7029                 d = s;
7030                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7031                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
7032                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7033                 if (tmp < 0)
7034                     tmp = -tmp;
7035                 else if (tmp == KEY_require || tmp == KEY_do)
7036                     /* that's a way to remember we saw "CORE::" */
7037                     orig_keyword = tmp;
7038                 goto reserved_word;
7039             }
7040             goto just_a_word;
7041
7042         case KEY_abs:
7043             UNI(OP_ABS);
7044
7045         case KEY_alarm:
7046             UNI(OP_ALARM);
7047
7048         case KEY_accept:
7049             LOP(OP_ACCEPT,XTERM);
7050
7051         case KEY_and:
7052             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7053                 return REPORT(0);
7054             OPERATOR(ANDOP);
7055
7056         case KEY_atan2:
7057             LOP(OP_ATAN2,XTERM);
7058
7059         case KEY_bind:
7060             LOP(OP_BIND,XTERM);
7061
7062         case KEY_binmode:
7063             LOP(OP_BINMODE,XTERM);
7064
7065         case KEY_bless:
7066             LOP(OP_BLESS,XTERM);
7067
7068         case KEY_break:
7069             FUN0(OP_BREAK);
7070
7071         case KEY_chop:
7072             UNI(OP_CHOP);
7073
7074         case KEY_continue:
7075             /* When 'use switch' is in effect, continue has a dual
7076                life as a control operator. */
7077             {
7078                 if (!FEATURE_IS_ENABLED("switch"))
7079                     PREBLOCK(CONTINUE);
7080                 else {
7081                     /* We have to disambiguate the two senses of
7082                       "continue". If the next token is a '{' then
7083                       treat it as the start of a continue block;
7084                       otherwise treat it as a control operator.
7085                      */
7086                     s = skipspace(s);
7087                     if (*s == '{')
7088             PREBLOCK(CONTINUE);
7089                     else
7090                         FUN0(OP_CONTINUE);
7091                 }
7092             }
7093
7094         case KEY_chdir:
7095             /* may use HOME */
7096             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7097             UNI(OP_CHDIR);
7098
7099         case KEY_close:
7100             UNI(OP_CLOSE);
7101
7102         case KEY_closedir:
7103             UNI(OP_CLOSEDIR);
7104
7105         case KEY_cmp:
7106             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7107                 return REPORT(0);
7108             Eop(OP_SCMP);
7109
7110         case KEY_caller:
7111             UNI(OP_CALLER);
7112
7113         case KEY_crypt:
7114 #ifdef FCRYPT
7115             if (!PL_cryptseen) {
7116                 PL_cryptseen = TRUE;
7117                 init_des();
7118             }
7119 #endif
7120             LOP(OP_CRYPT,XTERM);
7121
7122         case KEY_chmod:
7123             LOP(OP_CHMOD,XTERM);
7124
7125         case KEY_chown:
7126             LOP(OP_CHOWN,XTERM);
7127
7128         case KEY_connect:
7129             LOP(OP_CONNECT,XTERM);
7130
7131         case KEY_chr:
7132             UNI(OP_CHR);
7133
7134         case KEY_cos:
7135             UNI(OP_COS);
7136
7137         case KEY_chroot:
7138             UNI(OP_CHROOT);
7139
7140         case KEY_default:
7141             PREBLOCK(DEFAULT);
7142
7143         case KEY_do:
7144             s = SKIPSPACE1(s);
7145             if (*s == '{')
7146                 PRETERMBLOCK(DO);
7147             if (*s != '\'')
7148                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7149             if (orig_keyword == KEY_do) {
7150                 orig_keyword = 0;
7151                 pl_yylval.ival = 1;
7152             }
7153             else
7154                 pl_yylval.ival = 0;
7155             OPERATOR(DO);
7156
7157         case KEY_die:
7158             PL_hints |= HINT_BLOCK_SCOPE;
7159             LOP(OP_DIE,XTERM);
7160
7161         case KEY_defined:
7162             UNI(OP_DEFINED);
7163
7164         case KEY_delete:
7165             UNI(OP_DELETE);
7166
7167         case KEY_dbmopen:
7168             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7169                               STR_WITH_LEN("NDBM_File::"),
7170                               STR_WITH_LEN("DB_File::"),
7171                               STR_WITH_LEN("GDBM_File::"),
7172                               STR_WITH_LEN("SDBM_File::"),
7173                               STR_WITH_LEN("ODBM_File::"),
7174                               NULL);
7175             LOP(OP_DBMOPEN,XTERM);
7176
7177         case KEY_dbmclose:
7178             UNI(OP_DBMCLOSE);
7179
7180         case KEY_dump:
7181             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7182             LOOPX(OP_DUMP);
7183
7184         case KEY_else:
7185             PREBLOCK(ELSE);
7186
7187         case KEY_elsif:
7188             pl_yylval.ival = CopLINE(PL_curcop);
7189             OPERATOR(ELSIF);
7190
7191         case KEY_eq:
7192             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7193                 return REPORT(0);
7194             Eop(OP_SEQ);
7195
7196         case KEY_exists:
7197             UNI(OP_EXISTS);
7198
7199         case KEY_exit:
7200             if (PL_madskills)
7201                 UNI(OP_INT);
7202             UNI(OP_EXIT);
7203
7204         case KEY_eval:
7205             s = SKIPSPACE1(s);
7206             if (*s == '{') { /* block eval */
7207                 PL_expect = XTERMBLOCK;
7208                 UNIBRACK(OP_ENTERTRY);
7209             }
7210             else { /* string eval */
7211                 PL_expect = XTERM;
7212                 UNIBRACK(OP_ENTEREVAL);
7213             }
7214
7215         case KEY_eof:
7216             UNI(OP_EOF);
7217
7218         case KEY_exp:
7219             UNI(OP_EXP);
7220
7221         case KEY_each:
7222             UNI(OP_EACH);
7223
7224         case KEY_exec:
7225             LOP(OP_EXEC,XREF);
7226
7227         case KEY_endhostent:
7228             FUN0(OP_EHOSTENT);
7229
7230         case KEY_endnetent:
7231             FUN0(OP_ENETENT);
7232
7233         case KEY_endservent:
7234             FUN0(OP_ESERVENT);
7235
7236         case KEY_endprotoent:
7237             FUN0(OP_EPROTOENT);
7238
7239         case KEY_endpwent:
7240             FUN0(OP_EPWENT);
7241
7242         case KEY_endgrent:
7243             FUN0(OP_EGRENT);
7244
7245         case KEY_for:
7246         case KEY_foreach:
7247             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7248                 return REPORT(0);
7249             pl_yylval.ival = CopLINE(PL_curcop);
7250             s = SKIPSPACE1(s);
7251             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7252                 char *p = s;
7253 #ifdef PERL_MAD
7254                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7255 #endif
7256
7257                 if ((PL_bufend - p) >= 3 &&
7258                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7259                     p += 2;
7260                 else if ((PL_bufend - p) >= 4 &&
7261                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7262                     p += 3;
7263                 p = PEEKSPACE(p);
7264                 if (isIDFIRST_lazy_if(p,UTF)) {
7265                     p = scan_ident(p, PL_bufend,
7266                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7267                     p = PEEKSPACE(p);
7268                 }
7269                 if (*p != '$')
7270                     Perl_croak(aTHX_ "Missing $ on loop variable");
7271 #ifdef PERL_MAD
7272                 s = SvPVX(PL_linestr) + soff;
7273 #endif
7274             }
7275             OPERATOR(FOR);
7276
7277         case KEY_formline:
7278             LOP(OP_FORMLINE,XTERM);
7279
7280         case KEY_fork:
7281             FUN0(OP_FORK);
7282
7283         case KEY_fcntl:
7284             LOP(OP_FCNTL,XTERM);
7285
7286         case KEY_fileno:
7287             UNI(OP_FILENO);
7288
7289         case KEY_flock:
7290             LOP(OP_FLOCK,XTERM);
7291
7292         case KEY_gt:
7293             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7294                 return REPORT(0);
7295             Rop(OP_SGT);
7296
7297         case KEY_ge:
7298             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7299                 return REPORT(0);
7300             Rop(OP_SGE);
7301
7302         case KEY_grep:
7303             LOP(OP_GREPSTART, XREF);
7304
7305         case KEY_goto:
7306             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7307             LOOPX(OP_GOTO);
7308
7309         case KEY_gmtime:
7310             UNI(OP_GMTIME);
7311
7312         case KEY_getc:
7313             UNIDOR(OP_GETC);
7314
7315         case KEY_getppid:
7316             FUN0(OP_GETPPID);
7317
7318         case KEY_getpgrp:
7319             UNI(OP_GETPGRP);
7320
7321         case KEY_getpriority:
7322             LOP(OP_GETPRIORITY,XTERM);
7323
7324         case KEY_getprotobyname:
7325             UNI(OP_GPBYNAME);
7326
7327         case KEY_getprotobynumber:
7328             LOP(OP_GPBYNUMBER,XTERM);
7329
7330         case KEY_getprotoent:
7331             FUN0(OP_GPROTOENT);
7332
7333         case KEY_getpwent:
7334             FUN0(OP_GPWENT);
7335
7336         case KEY_getpwnam:
7337             UNI(OP_GPWNAM);
7338
7339         case KEY_getpwuid:
7340             UNI(OP_GPWUID);
7341
7342         case KEY_getpeername:
7343             UNI(OP_GETPEERNAME);
7344
7345         case KEY_gethostbyname:
7346             UNI(OP_GHBYNAME);
7347
7348         case KEY_gethostbyaddr:
7349             LOP(OP_GHBYADDR,XTERM);
7350
7351         case KEY_gethostent:
7352             FUN0(OP_GHOSTENT);
7353
7354         case KEY_getnetbyname:
7355             UNI(OP_GNBYNAME);
7356
7357         case KEY_getnetbyaddr:
7358             LOP(OP_GNBYADDR,XTERM);
7359
7360         case KEY_getnetent:
7361             FUN0(OP_GNETENT);
7362
7363         case KEY_getservbyname:
7364             LOP(OP_GSBYNAME,XTERM);
7365
7366         case KEY_getservbyport:
7367             LOP(OP_GSBYPORT,XTERM);
7368
7369         case KEY_getservent:
7370             FUN0(OP_GSERVENT);
7371
7372         case KEY_getsockname:
7373             UNI(OP_GETSOCKNAME);
7374
7375         case KEY_getsockopt:
7376             LOP(OP_GSOCKOPT,XTERM);
7377
7378         case KEY_getgrent:
7379             FUN0(OP_GGRENT);
7380
7381         case KEY_getgrnam:
7382             UNI(OP_GGRNAM);
7383
7384         case KEY_getgrgid:
7385             UNI(OP_GGRGID);
7386
7387         case KEY_getlogin:
7388             FUN0(OP_GETLOGIN);
7389
7390         case KEY_given:
7391             pl_yylval.ival = CopLINE(PL_curcop);
7392             OPERATOR(GIVEN);
7393
7394         case KEY_glob:
7395             LOP(OP_GLOB,XTERM);
7396
7397         case KEY_hex:
7398             UNI(OP_HEX);
7399
7400         case KEY_if:
7401             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7402                 return REPORT(0);
7403             pl_yylval.ival = CopLINE(PL_curcop);
7404             OPERATOR(IF);
7405
7406         case KEY_index:
7407             LOP(OP_INDEX,XTERM);
7408
7409         case KEY_int:
7410             UNI(OP_INT);
7411
7412         case KEY_ioctl:
7413             LOP(OP_IOCTL,XTERM);
7414
7415         case KEY_join:
7416             LOP(OP_JOIN,XTERM);
7417
7418         case KEY_keys:
7419             UNI(OP_KEYS);
7420
7421         case KEY_kill:
7422             LOP(OP_KILL,XTERM);
7423
7424         case KEY_last:
7425             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7426             LOOPX(OP_LAST);
7427
7428         case KEY_lc:
7429             UNI(OP_LC);
7430
7431         case KEY_lcfirst:
7432             UNI(OP_LCFIRST);
7433
7434         case KEY_local:
7435             pl_yylval.ival = 0;
7436             OPERATOR(LOCAL);
7437
7438         case KEY_length:
7439             UNI(OP_LENGTH);
7440
7441         case KEY_lt:
7442             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7443                 return REPORT(0);
7444             Rop(OP_SLT);
7445
7446         case KEY_le:
7447             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7448                 return REPORT(0);
7449             Rop(OP_SLE);
7450
7451         case KEY_localtime:
7452             UNI(OP_LOCALTIME);
7453
7454         case KEY_log:
7455             UNI(OP_LOG);
7456
7457         case KEY_link:
7458             LOP(OP_LINK,XTERM);
7459
7460         case KEY_listen:
7461             LOP(OP_LISTEN,XTERM);
7462
7463         case KEY_lock:
7464             UNI(OP_LOCK);
7465
7466         case KEY_lstat:
7467             UNI(OP_LSTAT);
7468
7469         case KEY_m:
7470             s = scan_pat(s,OP_MATCH);
7471             TERM(sublex_start());
7472
7473         case KEY_map:
7474             LOP(OP_MAPSTART, XREF);
7475
7476         case KEY_mkdir:
7477             LOP(OP_MKDIR,XTERM);
7478
7479         case KEY_msgctl:
7480             LOP(OP_MSGCTL,XTERM);
7481
7482         case KEY_msgget:
7483             LOP(OP_MSGGET,XTERM);
7484
7485         case KEY_msgrcv:
7486             LOP(OP_MSGRCV,XTERM);
7487
7488         case KEY_msgsnd:
7489             LOP(OP_MSGSND,XTERM);
7490
7491         case KEY_our:
7492         case KEY_my:
7493         case KEY_state:
7494             PL_in_my = (U16)tmp;
7495             s = SKIPSPACE1(s);
7496             if (isIDFIRST_lazy_if(s,UTF)) {
7497 #ifdef PERL_MAD
7498                 char* start = s;
7499 #endif
7500                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7501                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7502                     goto really_sub;
7503                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7504                 if (!PL_in_my_stash) {
7505                     char tmpbuf[1024];
7506                     PL_bufptr = s;
7507                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7508                     yyerror(tmpbuf);
7509                 }
7510 #ifdef PERL_MAD
7511                 if (PL_madskills) {     /* just add type to declarator token */
7512                     sv_catsv(PL_thistoken, PL_nextwhite);
7513                     PL_nextwhite = 0;
7514                     sv_catpvn(PL_thistoken, start, s - start);
7515                 }
7516 #endif
7517             }
7518             pl_yylval.ival = 1;
7519             OPERATOR(MY);
7520
7521         case KEY_next:
7522             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7523             LOOPX(OP_NEXT);
7524
7525         case KEY_ne:
7526             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7527                 return REPORT(0);
7528             Eop(OP_SNE);
7529
7530         case KEY_no:
7531             s = tokenize_use(0, s);
7532             OPERATOR(USE);
7533
7534         case KEY_not:
7535             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7536                 FUN1(OP_NOT);
7537             else {
7538                 if (!PL_lex_allbrackets &&
7539                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7540                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7541                 OPERATOR(NOTOP);
7542             }
7543
7544         case KEY_open:
7545             s = SKIPSPACE1(s);
7546             if (isIDFIRST_lazy_if(s,UTF)) {
7547                 const char *t;
7548                 for (d = s; isALNUM_lazy_if(d,UTF);)
7549                     d++;
7550                 for (t=d; isSPACE(*t);)
7551                     t++;
7552                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7553                     /* [perl #16184] */
7554                     && !(t[0] == '=' && t[1] == '>')
7555                 ) {
7556                     int parms_len = (int)(d-s);
7557                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7558                            "Precedence problem: open %.*s should be open(%.*s)",
7559                             parms_len, s, parms_len, s);
7560                 }
7561             }
7562             LOP(OP_OPEN,XTERM);
7563
7564         case KEY_or:
7565             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7566                 return REPORT(0);
7567             pl_yylval.ival = OP_OR;
7568             OPERATOR(OROP);
7569
7570         case KEY_ord:
7571             UNI(OP_ORD);
7572
7573         case KEY_oct:
7574             UNI(OP_OCT);
7575
7576         case KEY_opendir:
7577             LOP(OP_OPEN_DIR,XTERM);
7578
7579         case KEY_print:
7580             checkcomma(s,PL_tokenbuf,"filehandle");
7581             LOP(OP_PRINT,XREF);
7582
7583         case KEY_printf:
7584             checkcomma(s,PL_tokenbuf,"filehandle");
7585             LOP(OP_PRTF,XREF);
7586
7587         case KEY_prototype:
7588             UNI(OP_PROTOTYPE);
7589
7590         case KEY_push:
7591             LOP(OP_PUSH,XTERM);
7592
7593         case KEY_pop:
7594             UNIDOR(OP_POP);
7595
7596         case KEY_pos:
7597             UNIDOR(OP_POS);
7598
7599         case KEY_pack:
7600             LOP(OP_PACK,XTERM);
7601
7602         case KEY_package:
7603             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7604             s = SKIPSPACE1(s);
7605             s = force_strict_version(s);
7606             PL_lex_expect = XBLOCK;
7607             OPERATOR(PACKAGE);
7608
7609         case KEY_pipe:
7610             LOP(OP_PIPE_OP,XTERM);
7611
7612         case KEY_q:
7613             s = scan_str(s,!!PL_madskills,FALSE);
7614             if (!s)
7615                 missingterm(NULL);
7616             pl_yylval.ival = OP_CONST;
7617             TERM(sublex_start());
7618
7619         case KEY_quotemeta:
7620             UNI(OP_QUOTEMETA);
7621
7622         case KEY_qw: {
7623             OP *words = NULL;
7624             s = scan_str(s,!!PL_madskills,FALSE);
7625             if (!s)
7626                 missingterm(NULL);
7627             PL_expect = XOPERATOR;
7628             if (SvCUR(PL_lex_stuff)) {
7629                 int warned = 0;
7630                 d = SvPV_force(PL_lex_stuff, len);
7631                 while (len) {
7632                     for (; isSPACE(*d) && len; --len, ++d)
7633                         /**/;
7634                     if (len) {
7635                         SV *sv;
7636                         const char *b = d;
7637                         if (!warned && ckWARN(WARN_QW)) {
7638                             for (; !isSPACE(*d) && len; --len, ++d) {
7639                                 if (*d == ',') {
7640                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7641                                         "Possible attempt to separate words with commas");
7642                                     ++warned;
7643                                 }
7644                                 else if (*d == '#') {
7645                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7646                                         "Possible attempt to put comments in qw() list");
7647                                     ++warned;
7648                                 }
7649                             }
7650                         }
7651                         else {
7652                             for (; !isSPACE(*d) && len; --len, ++d)
7653                                 /**/;
7654                         }
7655                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7656                         words = op_append_elem(OP_LIST, words,
7657                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7658                     }
7659                 }
7660             }
7661             if (!words)
7662                 words = newNULLLIST();
7663             if (PL_lex_stuff) {
7664                 SvREFCNT_dec(PL_lex_stuff);
7665                 PL_lex_stuff = NULL;
7666             }
7667             PL_expect = XOPERATOR;
7668             pl_yylval.opval = sawparens(words);
7669             TOKEN(QWLIST);
7670         }
7671
7672         case KEY_qq:
7673             s = scan_str(s,!!PL_madskills,FALSE);
7674             if (!s)
7675                 missingterm(NULL);
7676             pl_yylval.ival = OP_STRINGIFY;
7677             if (SvIVX(PL_lex_stuff) == '\'')
7678                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7679             TERM(sublex_start());
7680
7681         case KEY_qr:
7682             s = scan_pat(s,OP_QR);
7683             TERM(sublex_start());
7684
7685         case KEY_qx:
7686             s = scan_str(s,!!PL_madskills,FALSE);
7687             if (!s)
7688                 missingterm(NULL);
7689             readpipe_override();
7690             TERM(sublex_start());
7691
7692         case KEY_return:
7693             OLDLOP(OP_RETURN);
7694
7695         case KEY_require:
7696             s = SKIPSPACE1(s);
7697             if (isDIGIT(*s)) {
7698                 s = force_version(s, FALSE);
7699             }
7700             else if (*s != 'v' || !isDIGIT(s[1])
7701                     || (s = force_version(s, TRUE), *s == 'v'))
7702             {
7703                 *PL_tokenbuf = '\0';
7704                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7705                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7706                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7707                 else if (*s == '<')
7708                     yyerror("<> should be quotes");
7709             }
7710             if (orig_keyword == KEY_require) {
7711                 orig_keyword = 0;
7712                 pl_yylval.ival = 1;
7713             }
7714             else
7715                 pl_yylval.ival = 0;
7716             PL_expect = XTERM;
7717             PL_bufptr = s;
7718             PL_last_uni = PL_oldbufptr;
7719             PL_last_lop_op = OP_REQUIRE;
7720             s = skipspace(s);
7721             return REPORT( (int)REQUIRE );
7722
7723         case KEY_reset:
7724             UNI(OP_RESET);
7725
7726         case KEY_redo:
7727             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7728             LOOPX(OP_REDO);
7729
7730         case KEY_rename:
7731             LOP(OP_RENAME,XTERM);
7732
7733         case KEY_rand:
7734             UNI(OP_RAND);
7735
7736         case KEY_rmdir:
7737             UNI(OP_RMDIR);
7738
7739         case KEY_rindex:
7740             LOP(OP_RINDEX,XTERM);
7741
7742         case KEY_read:
7743             LOP(OP_READ,XTERM);
7744
7745         case KEY_readdir:
7746             UNI(OP_READDIR);
7747
7748         case KEY_readline:
7749             UNIDOR(OP_READLINE);
7750
7751         case KEY_readpipe:
7752             UNIDOR(OP_BACKTICK);
7753
7754         case KEY_rewinddir:
7755             UNI(OP_REWINDDIR);
7756
7757         case KEY_recv:
7758             LOP(OP_RECV,XTERM);
7759
7760         case KEY_reverse:
7761             LOP(OP_REVERSE,XTERM);
7762
7763         case KEY_readlink:
7764             UNIDOR(OP_READLINK);
7765
7766         case KEY_ref:
7767             UNI(OP_REF);
7768
7769         case KEY_s:
7770             s = scan_subst(s);
7771             if (pl_yylval.opval)
7772                 TERM(sublex_start());
7773             else
7774                 TOKEN(1);       /* force error */
7775
7776         case KEY_say:
7777             checkcomma(s,PL_tokenbuf,"filehandle");
7778             LOP(OP_SAY,XREF);
7779
7780         case KEY_chomp:
7781             UNI(OP_CHOMP);
7782
7783         case KEY_scalar:
7784             UNI(OP_SCALAR);
7785
7786         case KEY_select:
7787             LOP(OP_SELECT,XTERM);
7788
7789         case KEY_seek:
7790             LOP(OP_SEEK,XTERM);
7791
7792         case KEY_semctl:
7793             LOP(OP_SEMCTL,XTERM);
7794
7795         case KEY_semget:
7796             LOP(OP_SEMGET,XTERM);
7797
7798         case KEY_semop:
7799             LOP(OP_SEMOP,XTERM);
7800
7801         case KEY_send:
7802             LOP(OP_SEND,XTERM);
7803
7804         case KEY_setpgrp:
7805             LOP(OP_SETPGRP,XTERM);
7806
7807         case KEY_setpriority:
7808             LOP(OP_SETPRIORITY,XTERM);
7809
7810         case KEY_sethostent:
7811             UNI(OP_SHOSTENT);
7812
7813         case KEY_setnetent:
7814             UNI(OP_SNETENT);
7815
7816         case KEY_setservent:
7817             UNI(OP_SSERVENT);
7818
7819         case KEY_setprotoent:
7820             UNI(OP_SPROTOENT);
7821
7822         case KEY_setpwent:
7823             FUN0(OP_SPWENT);
7824
7825         case KEY_setgrent:
7826             FUN0(OP_SGRENT);
7827
7828         case KEY_seekdir:
7829             LOP(OP_SEEKDIR,XTERM);
7830
7831         case KEY_setsockopt:
7832             LOP(OP_SSOCKOPT,XTERM);
7833
7834         case KEY_shift:
7835             UNIDOR(OP_SHIFT);
7836
7837         case KEY_shmctl:
7838             LOP(OP_SHMCTL,XTERM);
7839
7840         case KEY_shmget:
7841             LOP(OP_SHMGET,XTERM);
7842
7843         case KEY_shmread:
7844             LOP(OP_SHMREAD,XTERM);
7845
7846         case KEY_shmwrite:
7847             LOP(OP_SHMWRITE,XTERM);
7848
7849         case KEY_shutdown:
7850             LOP(OP_SHUTDOWN,XTERM);
7851
7852         case KEY_sin:
7853             UNI(OP_SIN);
7854
7855         case KEY_sleep:
7856             UNI(OP_SLEEP);
7857
7858         case KEY_socket:
7859             LOP(OP_SOCKET,XTERM);
7860
7861         case KEY_socketpair:
7862             LOP(OP_SOCKPAIR,XTERM);
7863
7864         case KEY_sort:
7865             checkcomma(s,PL_tokenbuf,"subroutine name");
7866             s = SKIPSPACE1(s);
7867             if (*s == ';' || *s == ')')         /* probably a close */
7868                 Perl_croak(aTHX_ "sort is now a reserved word");
7869             PL_expect = XTERM;
7870             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7871             LOP(OP_SORT,XREF);
7872
7873         case KEY_split:
7874             LOP(OP_SPLIT,XTERM);
7875
7876         case KEY_sprintf:
7877             LOP(OP_SPRINTF,XTERM);
7878
7879         case KEY_splice:
7880             LOP(OP_SPLICE,XTERM);
7881
7882         case KEY_sqrt:
7883             UNI(OP_SQRT);
7884
7885         case KEY_srand:
7886             UNI(OP_SRAND);
7887
7888         case KEY_stat:
7889             UNI(OP_STAT);
7890
7891         case KEY_study:
7892             UNI(OP_STUDY);
7893
7894         case KEY_substr:
7895             LOP(OP_SUBSTR,XTERM);
7896
7897         case KEY_format:
7898         case KEY_sub:
7899           really_sub:
7900             {
7901                 char tmpbuf[sizeof PL_tokenbuf];
7902                 SSize_t tboffset = 0;
7903                 expectation attrful;
7904                 bool have_name, have_proto;
7905                 const int key = tmp;
7906
7907 #ifdef PERL_MAD
7908                 SV *tmpwhite = 0;
7909
7910                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7911                 SV *subtoken = newSVpvn(tstart, s - tstart);
7912                 PL_thistoken = 0;
7913
7914                 d = s;
7915                 s = SKIPSPACE2(s,tmpwhite);
7916 #else
7917                 s = skipspace(s);
7918 #endif
7919
7920                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7921                     (*s == ':' && s[1] == ':'))
7922                 {
7923 #ifdef PERL_MAD
7924                     SV *nametoke = NULL;
7925 #endif
7926
7927                     PL_expect = XBLOCK;
7928                     attrful = XATTRBLOCK;
7929                     /* remember buffer pos'n for later force_word */
7930                     tboffset = s - PL_oldbufptr;
7931                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7932 #ifdef PERL_MAD
7933                     if (PL_madskills)
7934                         nametoke = newSVpvn(s, d - s);
7935 #endif
7936                     if (memchr(tmpbuf, ':', len))
7937                         sv_setpvn(PL_subname, tmpbuf, len);
7938                     else {
7939                         sv_setsv(PL_subname,PL_curstname);
7940                         sv_catpvs(PL_subname,"::");
7941                         sv_catpvn(PL_subname,tmpbuf,len);
7942                     }
7943                     have_name = TRUE;
7944
7945 #ifdef PERL_MAD
7946
7947                     start_force(0);
7948                     CURMAD('X', nametoke);
7949                     CURMAD('_', tmpwhite);
7950                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7951                                       FALSE, TRUE, TRUE);
7952
7953                     s = SKIPSPACE2(d,tmpwhite);
7954 #else
7955                     s = skipspace(d);
7956 #endif
7957                 }
7958                 else {
7959                     if (key == KEY_my)
7960                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7961                     PL_expect = XTERMBLOCK;
7962                     attrful = XATTRTERM;
7963                     sv_setpvs(PL_subname,"?");
7964                     have_name = FALSE;
7965                 }
7966
7967                 if (key == KEY_format) {
7968                     if (*s == '=')
7969                         PL_lex_formbrack = PL_lex_brackets + 1;
7970 #ifdef PERL_MAD
7971                     PL_thistoken = subtoken;
7972                     s = d;
7973 #else
7974                     if (have_name)
7975                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7976                                           FALSE, TRUE, TRUE);
7977 #endif
7978                     OPERATOR(FORMAT);
7979                 }
7980
7981                 /* Look for a prototype */
7982                 if (*s == '(') {
7983                     char *p;
7984                     bool bad_proto = FALSE;
7985                     bool in_brackets = FALSE;
7986                     char greedy_proto = ' ';
7987                     bool proto_after_greedy_proto = FALSE;
7988                     bool must_be_last = FALSE;
7989                     bool underscore = FALSE;
7990                     bool seen_underscore = FALSE;
7991                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7992
7993                     s = scan_str(s,!!PL_madskills,FALSE);
7994                     if (!s)
7995                         Perl_croak(aTHX_ "Prototype not terminated");
7996                     /* strip spaces and check for bad characters */
7997                     d = SvPVX(PL_lex_stuff);
7998                     tmp = 0;
7999                     for (p = d; *p; ++p) {
8000                         if (!isSPACE(*p)) {
8001                             d[tmp++] = *p;
8002
8003                             if (warnillegalproto) {
8004                                 if (must_be_last)
8005                                     proto_after_greedy_proto = TRUE;
8006                                 if (!strchr("$@%*;[]&\\_+", *p)) {
8007                                     bad_proto = TRUE;
8008                                 }
8009                                 else {
8010                                     if ( underscore ) {
8011                                         if ( *p != ';' )
8012                                             bad_proto = TRUE;
8013                                         underscore = FALSE;
8014                                     }
8015                                     if ( *p == '[' ) {
8016                                         in_brackets = TRUE;
8017                                     }
8018                                     else if ( *p == ']' ) {
8019                                         in_brackets = FALSE;
8020                                     }
8021                                     else if ( (*p == '@' || *p == '%') &&
8022                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8023                                          !in_brackets ) {
8024                                         must_be_last = TRUE;
8025                                         greedy_proto = *p;
8026                                     }
8027                                     else if ( *p == '_' ) {
8028                                         underscore = seen_underscore = TRUE;
8029                                     }
8030                                 }
8031                             }
8032                         }
8033                     }
8034                     d[tmp] = '\0';
8035                     if (proto_after_greedy_proto)
8036                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8037                                     "Prototype after '%c' for %"SVf" : %s",
8038                                     greedy_proto, SVfARG(PL_subname), d);
8039                     if (bad_proto)
8040                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8041                                     "Illegal character %sin prototype for %"SVf" : %s",
8042                                     seen_underscore ? "after '_' " : "",
8043                                     SVfARG(PL_subname), d);
8044                     SvCUR_set(PL_lex_stuff, tmp);
8045                     have_proto = TRUE;
8046
8047 #ifdef PERL_MAD
8048                     start_force(0);
8049                     CURMAD('q', PL_thisopen);
8050                     CURMAD('_', tmpwhite);
8051                     CURMAD('=', PL_thisstuff);
8052                     CURMAD('Q', PL_thisclose);
8053                     NEXTVAL_NEXTTOKE.opval =
8054                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8055                     PL_lex_stuff = NULL;
8056                     force_next(THING);
8057
8058                     s = SKIPSPACE2(s,tmpwhite);
8059 #else
8060                     s = skipspace(s);
8061 #endif
8062                 }
8063                 else
8064                     have_proto = FALSE;
8065
8066                 if (*s == ':' && s[1] != ':')
8067                     PL_expect = attrful;
8068                 else if (*s != '{' && key == KEY_sub) {
8069                     if (!have_name)
8070                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8071                     else if (*s != ';' && *s != '}')
8072                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8073                 }
8074
8075 #ifdef PERL_MAD
8076                 start_force(0);
8077                 if (tmpwhite) {
8078                     if (PL_madskills)
8079                         curmad('^', newSVpvs(""));
8080                     CURMAD('_', tmpwhite);
8081                 }
8082                 force_next(0);
8083
8084                 PL_thistoken = subtoken;
8085 #else
8086                 if (have_proto) {
8087                     NEXTVAL_NEXTTOKE.opval =
8088                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8089                     PL_lex_stuff = NULL;
8090                     force_next(THING);
8091                 }
8092 #endif
8093                 if (!have_name) {
8094                     if (PL_curstash)
8095                         sv_setpvs(PL_subname, "__ANON__");
8096                     else
8097                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8098                     TOKEN(ANONSUB);
8099                 }
8100 #ifndef PERL_MAD
8101                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8102                                   FALSE, TRUE, TRUE);
8103 #endif
8104                 if (key == KEY_my)
8105                     TOKEN(MYSUB);
8106                 TOKEN(SUB);
8107             }
8108
8109         case KEY_system:
8110             LOP(OP_SYSTEM,XREF);
8111
8112         case KEY_symlink:
8113             LOP(OP_SYMLINK,XTERM);
8114
8115         case KEY_syscall:
8116             LOP(OP_SYSCALL,XTERM);
8117
8118         case KEY_sysopen:
8119             LOP(OP_SYSOPEN,XTERM);
8120
8121         case KEY_sysseek:
8122             LOP(OP_SYSSEEK,XTERM);
8123
8124         case KEY_sysread:
8125             LOP(OP_SYSREAD,XTERM);
8126
8127         case KEY_syswrite:
8128             LOP(OP_SYSWRITE,XTERM);
8129
8130         case KEY_tr:
8131             s = scan_trans(s);
8132             TERM(sublex_start());
8133
8134         case KEY_tell:
8135             UNI(OP_TELL);
8136
8137         case KEY_telldir:
8138             UNI(OP_TELLDIR);
8139
8140         case KEY_tie:
8141             LOP(OP_TIE,XTERM);
8142
8143         case KEY_tied:
8144             UNI(OP_TIED);
8145
8146         case KEY_time:
8147             FUN0(OP_TIME);
8148
8149         case KEY_times:
8150             FUN0(OP_TMS);
8151
8152         case KEY_truncate:
8153             LOP(OP_TRUNCATE,XTERM);
8154
8155         case KEY_uc:
8156             UNI(OP_UC);
8157
8158         case KEY_ucfirst:
8159             UNI(OP_UCFIRST);
8160
8161         case KEY_untie:
8162             UNI(OP_UNTIE);
8163
8164         case KEY_until:
8165             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8166                 return REPORT(0);
8167             pl_yylval.ival = CopLINE(PL_curcop);
8168             OPERATOR(UNTIL);
8169
8170         case KEY_unless:
8171             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8172                 return REPORT(0);
8173             pl_yylval.ival = CopLINE(PL_curcop);
8174             OPERATOR(UNLESS);
8175
8176         case KEY_unlink:
8177             LOP(OP_UNLINK,XTERM);
8178
8179         case KEY_undef:
8180             UNIDOR(OP_UNDEF);
8181
8182         case KEY_unpack:
8183             LOP(OP_UNPACK,XTERM);
8184
8185         case KEY_utime:
8186             LOP(OP_UTIME,XTERM);
8187
8188         case KEY_umask:
8189             UNIDOR(OP_UMASK);
8190
8191         case KEY_unshift:
8192             LOP(OP_UNSHIFT,XTERM);
8193
8194         case KEY_use:
8195             s = tokenize_use(1, s);
8196             OPERATOR(USE);
8197
8198         case KEY_values:
8199             UNI(OP_VALUES);
8200
8201         case KEY_vec:
8202             LOP(OP_VEC,XTERM);
8203
8204         case KEY_when:
8205             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8206                 return REPORT(0);
8207             pl_yylval.ival = CopLINE(PL_curcop);
8208             OPERATOR(WHEN);
8209
8210         case KEY_while:
8211             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8212                 return REPORT(0);
8213             pl_yylval.ival = CopLINE(PL_curcop);
8214             OPERATOR(WHILE);
8215
8216         case KEY_warn:
8217             PL_hints |= HINT_BLOCK_SCOPE;
8218             LOP(OP_WARN,XTERM);
8219
8220         case KEY_wait:
8221             FUN0(OP_WAIT);
8222
8223         case KEY_waitpid:
8224             LOP(OP_WAITPID,XTERM);
8225
8226         case KEY_wantarray:
8227             FUN0(OP_WANTARRAY);
8228
8229         case KEY_write:
8230 #ifdef EBCDIC
8231         {
8232             char ctl_l[2];
8233             ctl_l[0] = toCTRL('L');
8234             ctl_l[1] = '\0';
8235             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8236         }
8237 #else
8238             /* Make sure $^L is defined */
8239             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8240 #endif
8241             UNI(OP_ENTERWRITE);
8242
8243         case KEY_x:
8244             if (PL_expect == XOPERATOR) {
8245                 if (*s == '=' && !PL_lex_allbrackets &&
8246                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8247                     return REPORT(0);
8248                 Mop(OP_REPEAT);
8249             }
8250             check_uni();
8251             goto just_a_word;
8252
8253         case KEY_xor:
8254             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8255                 return REPORT(0);
8256             pl_yylval.ival = OP_XOR;
8257             OPERATOR(OROP);
8258
8259         case KEY_y:
8260             s = scan_trans(s);
8261             TERM(sublex_start());
8262         }
8263     }}
8264 }
8265 #ifdef __SC__
8266 #pragma segment Main
8267 #endif
8268
8269 static int
8270 S_pending_ident(pTHX)
8271 {
8272     dVAR;
8273     register char *d;
8274     PADOFFSET tmp = 0;
8275     /* pit holds the identifier we read and pending_ident is reset */
8276     char pit = PL_pending_ident;
8277     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8278     /* All routes through this function want to know if there is a colon.  */
8279     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8280     PL_pending_ident = 0;
8281
8282     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8283     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8284           "### Pending identifier '%s'\n", PL_tokenbuf); });
8285
8286     /* if we're in a my(), we can't allow dynamics here.
8287        $foo'bar has already been turned into $foo::bar, so
8288        just check for colons.
8289
8290        if it's a legal name, the OP is a PADANY.
8291     */
8292     if (PL_in_my) {
8293         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8294             if (has_colon)
8295                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8296                                   "variable %s in \"our\"",
8297                                   PL_tokenbuf));
8298             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8299         }
8300         else {
8301             if (has_colon)
8302                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8303                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8304
8305             pl_yylval.opval = newOP(OP_PADANY, 0);
8306             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8307             return PRIVATEREF;
8308         }
8309     }
8310
8311     /*
8312        build the ops for accesses to a my() variable.
8313
8314        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8315        then used in a comparison.  This catches most, but not
8316        all cases.  For instance, it catches
8317            sort { my($a); $a <=> $b }
8318        but not
8319            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8320        (although why you'd do that is anyone's guess).
8321     */
8322
8323     if (!has_colon) {
8324         if (!PL_in_my)
8325             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8326         if (tmp != NOT_IN_PAD) {
8327             /* might be an "our" variable" */
8328             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8329                 /* build ops for a bareword */
8330                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8331                 HEK * const stashname = HvNAME_HEK(stash);
8332                 SV *  const sym = newSVhek(stashname);
8333                 sv_catpvs(sym, "::");
8334                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8335                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8336                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8337                 gv_fetchsv(sym,
8338                     (PL_in_eval
8339                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8340                         : GV_ADDMULTI
8341                     ),
8342                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8343                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8344                      : SVt_PVHV));
8345                 return WORD;
8346             }
8347
8348             /* if it's a sort block and they're naming $a or $b */
8349             if (PL_last_lop_op == OP_SORT &&
8350                 PL_tokenbuf[0] == '$' &&
8351                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8352                 && !PL_tokenbuf[2])
8353             {
8354                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8355                      d < PL_bufend && *d != '\n';
8356                      d++)
8357                 {
8358                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8359                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8360                               PL_tokenbuf);
8361                     }
8362                 }
8363             }
8364
8365             pl_yylval.opval = newOP(OP_PADANY, 0);
8366             pl_yylval.opval->op_targ = tmp;
8367             return PRIVATEREF;
8368         }
8369     }
8370
8371     /*
8372        Whine if they've said @foo in a doublequoted string,
8373        and @foo isn't a variable we can find in the symbol
8374        table.
8375     */
8376     if (ckWARN(WARN_AMBIGUOUS) &&
8377         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8378         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8379                                          SVt_PVAV);
8380         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8381                 /* DO NOT warn for @- and @+ */
8382                 && !( PL_tokenbuf[2] == '\0' &&
8383                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8384            )
8385         {
8386             /* Downgraded from fatal to warning 20000522 mjd */
8387             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8388                         "Possible unintended interpolation of %s in string",
8389                         PL_tokenbuf);
8390         }
8391     }
8392
8393     /* build ops for a bareword */
8394     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8395                                                       tokenbuf_len - 1));
8396     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8397     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8398                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8399                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8400                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8401                       : SVt_PVHV));
8402     return WORD;
8403 }
8404
8405 STATIC void
8406 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8407 {
8408     dVAR;
8409
8410     PERL_ARGS_ASSERT_CHECKCOMMA;
8411
8412     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8413         if (ckWARN(WARN_SYNTAX)) {
8414             int level = 1;
8415             const char *w;
8416             for (w = s+2; *w && level; w++) {
8417                 if (*w == '(')
8418                     ++level;
8419                 else if (*w == ')')
8420                     --level;
8421             }
8422             while (isSPACE(*w))
8423                 ++w;
8424             /* the list of chars below is for end of statements or
8425              * block / parens, boolean operators (&&, ||, //) and branch
8426              * constructs (or, and, if, until, unless, while, err, for).
8427              * Not a very solid hack... */
8428             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8429                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8430                             "%s (...) interpreted as function",name);
8431         }
8432     }
8433     while (s < PL_bufend && isSPACE(*s))
8434         s++;
8435     if (*s == '(')
8436         s++;
8437     while (s < PL_bufend && isSPACE(*s))
8438         s++;
8439     if (isIDFIRST_lazy_if(s,UTF)) {
8440         const char * const w = s++;
8441         while (isALNUM_lazy_if(s,UTF))
8442             s++;
8443         while (s < PL_bufend && isSPACE(*s))
8444             s++;
8445         if (*s == ',') {
8446             GV* gv;
8447             if (keyword(w, s - w, 0))
8448                 return;
8449
8450             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8451             if (gv && GvCVu(gv))
8452                 return;
8453             Perl_croak(aTHX_ "No comma allowed after %s", what);
8454         }
8455     }
8456 }
8457
8458 /* Either returns sv, or mortalizes sv and returns a new SV*.
8459    Best used as sv=new_constant(..., sv, ...).
8460    If s, pv are NULL, calls subroutine with one argument,
8461    and type is used with error messages only. */
8462
8463 STATIC SV *
8464 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8465                SV *sv, SV *pv, const char *type, STRLEN typelen)
8466 {
8467     dVAR; dSP;
8468     HV * const table = GvHV(PL_hintgv);          /* ^H */
8469     SV *res;
8470     SV **cvp;
8471     SV *cv, *typesv;
8472     const char *why1 = "", *why2 = "", *why3 = "";
8473
8474     PERL_ARGS_ASSERT_NEW_CONSTANT;
8475
8476     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8477         SV *msg;
8478
8479         why2 = (const char *)
8480             (strEQ(key,"charnames")
8481              ? "(possibly a missing \"use charnames ...\")"
8482              : "");
8483         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8484                             (type ? type: "undef"), why2);
8485
8486         /* This is convoluted and evil ("goto considered harmful")
8487          * but I do not understand the intricacies of all the different
8488          * failure modes of %^H in here.  The goal here is to make
8489          * the most probable error message user-friendly. --jhi */
8490
8491         goto msgdone;
8492
8493     report:
8494         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8495                             (type ? type: "undef"), why1, why2, why3);
8496     msgdone:
8497         yyerror(SvPVX_const(msg));
8498         SvREFCNT_dec(msg);
8499         return sv;
8500     }
8501
8502     /* charnames doesn't work well if there have been errors found */
8503     if (PL_error_count > 0 && strEQ(key,"charnames"))
8504         return &PL_sv_undef;
8505
8506     cvp = hv_fetch(table, key, keylen, FALSE);
8507     if (!cvp || !SvOK(*cvp)) {
8508         why1 = "$^H{";
8509         why2 = key;
8510         why3 = "} is not defined";
8511         goto report;
8512     }
8513     sv_2mortal(sv);                     /* Parent created it permanently */
8514     cv = *cvp;
8515     if (!pv && s)
8516         pv = newSVpvn_flags(s, len, SVs_TEMP);
8517     if (type && pv)
8518         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8519     else
8520         typesv = &PL_sv_undef;
8521
8522     PUSHSTACKi(PERLSI_OVERLOAD);
8523     ENTER ;
8524     SAVETMPS;
8525
8526     PUSHMARK(SP) ;
8527     EXTEND(sp, 3);
8528     if (pv)
8529         PUSHs(pv);
8530     PUSHs(sv);
8531     if (pv)
8532         PUSHs(typesv);
8533     PUTBACK;
8534     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8535
8536     SPAGAIN ;
8537
8538     /* Check the eval first */
8539     if (!PL_in_eval && SvTRUE(ERRSV)) {
8540         sv_catpvs(ERRSV, "Propagated");
8541         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8542         (void)POPs;
8543         res = SvREFCNT_inc_simple(sv);
8544     }
8545     else {
8546         res = POPs;
8547         SvREFCNT_inc_simple_void(res);
8548     }
8549
8550     PUTBACK ;
8551     FREETMPS ;
8552     LEAVE ;
8553     POPSTACK;
8554
8555     if (!SvOK(res)) {
8556         why1 = "Call to &{$^H{";
8557         why2 = key;
8558         why3 = "}} did not return a defined value";
8559         sv = res;
8560         goto report;
8561     }
8562
8563     return res;
8564 }
8565
8566 /* Returns a NUL terminated string, with the length of the string written to
8567    *slp
8568    */
8569 STATIC char *
8570 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8571 {
8572     dVAR;
8573     register char *d = dest;
8574     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8575
8576     PERL_ARGS_ASSERT_SCAN_WORD;
8577
8578     for (;;) {
8579         if (d >= e)
8580             Perl_croak(aTHX_ ident_too_long);
8581         if (isALNUM(*s))        /* UTF handled below */
8582             *d++ = *s++;
8583         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8584             *d++ = ':';
8585             *d++ = ':';
8586             s++;
8587         }
8588         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8589             *d++ = *s++;
8590             *d++ = *s++;
8591         }
8592         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8593             char *t = s + UTF8SKIP(s);
8594             size_t len;
8595             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8596                 t += UTF8SKIP(t);
8597             len = t - s;
8598             if (d + len > e)
8599                 Perl_croak(aTHX_ ident_too_long);
8600             Copy(s, d, len, char);
8601             d += len;
8602             s = t;
8603         }
8604         else {
8605             *d = '\0';
8606             *slp = d - dest;
8607             return s;
8608         }
8609     }
8610 }
8611
8612 STATIC char *
8613 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8614 {
8615     dVAR;
8616     char *bracket = NULL;
8617     char funny = *s++;
8618     register char *d = dest;
8619     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8620
8621     PERL_ARGS_ASSERT_SCAN_IDENT;
8622
8623     if (isSPACE(*s))
8624         s = PEEKSPACE(s);
8625     if (isDIGIT(*s)) {
8626         while (isDIGIT(*s)) {
8627             if (d >= e)
8628                 Perl_croak(aTHX_ ident_too_long);
8629             *d++ = *s++;
8630         }
8631     }
8632     else {
8633         for (;;) {
8634             if (d >= e)
8635                 Perl_croak(aTHX_ ident_too_long);
8636             if (isALNUM(*s))    /* UTF handled below */
8637                 *d++ = *s++;
8638             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8639                 *d++ = ':';
8640                 *d++ = ':';
8641                 s++;
8642             }
8643             else if (*s == ':' && s[1] == ':') {
8644                 *d++ = *s++;
8645                 *d++ = *s++;
8646             }
8647             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8648                 char *t = s + UTF8SKIP(s);
8649                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8650                     t += UTF8SKIP(t);
8651                 if (d + (t - s) > e)
8652                     Perl_croak(aTHX_ ident_too_long);
8653                 Copy(s, d, t - s, char);
8654                 d += t - s;
8655                 s = t;
8656             }
8657             else
8658                 break;
8659         }
8660     }
8661     *d = '\0';
8662     d = dest;
8663     if (*d) {
8664         if (PL_lex_state != LEX_NORMAL)
8665             PL_lex_state = LEX_INTERPENDMAYBE;
8666         return s;
8667     }
8668     if (*s == '$' && s[1] &&
8669         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8670     {
8671         return s;
8672     }
8673     if (*s == '{') {
8674         bracket = s;
8675         s++;
8676     }
8677     else if (ck_uni)
8678         check_uni();
8679     if (s < send)
8680         *d = *s++;
8681     d[1] = '\0';
8682     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8683         *d = toCTRL(*s);
8684         s++;
8685     }
8686     if (bracket) {
8687         if (isSPACE(s[-1])) {
8688             while (s < send) {
8689                 const char ch = *s++;
8690                 if (!SPACE_OR_TAB(ch)) {
8691                     *d = ch;
8692                     break;
8693                 }
8694             }
8695         }
8696         if (isIDFIRST_lazy_if(d,UTF)) {
8697             d++;
8698             if (UTF) {
8699                 char *end = s;
8700                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8701                     end += UTF8SKIP(end);
8702                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8703                         end += UTF8SKIP(end);
8704                 }
8705                 Copy(s, d, end - s, char);
8706                 d += end - s;
8707                 s = end;
8708             }
8709             else {
8710                 while ((isALNUM(*s) || *s == ':') && d < e)
8711                     *d++ = *s++;
8712                 if (d >= e)
8713                     Perl_croak(aTHX_ ident_too_long);
8714             }
8715             *d = '\0';
8716             while (s < send && SPACE_OR_TAB(*s))
8717                 s++;
8718             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8719                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8720                     const char * const brack =
8721                         (const char *)
8722                         ((*s == '[') ? "[...]" : "{...}");
8723    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8724                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8725                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8726                         funny, dest, brack, funny, dest, brack);
8727                 }
8728                 bracket++;
8729                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8730                 PL_lex_allbrackets++;
8731                 return s;
8732             }
8733         }
8734         /* Handle extended ${^Foo} variables
8735          * 1999-02-27 mjd-perl-patch@plover.com */
8736         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8737                  && isALNUM(*s))
8738         {
8739             d++;
8740             while (isALNUM(*s) && d < e) {
8741                 *d++ = *s++;
8742             }
8743             if (d >= e)
8744                 Perl_croak(aTHX_ ident_too_long);
8745             *d = '\0';
8746         }
8747         if (*s == '}') {
8748             s++;
8749             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8750                 PL_lex_state = LEX_INTERPEND;
8751                 PL_expect = XREF;
8752             }
8753             if (PL_lex_state == LEX_NORMAL) {
8754                 if (ckWARN(WARN_AMBIGUOUS) &&
8755                     (keyword(dest, d - dest, 0)
8756                      || get_cvn_flags(dest, d - dest, 0)))
8757                 {
8758                     if (funny == '#')
8759                         funny = '@';
8760                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8761                         "Ambiguous use of %c{%s} resolved to %c%s",
8762                         funny, dest, funny, dest);
8763                 }
8764             }
8765         }
8766         else {
8767             s = bracket;                /* let the parser handle it */
8768             *dest = '\0';
8769         }
8770     }
8771     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8772         PL_lex_state = LEX_INTERPEND;
8773     return s;
8774 }
8775
8776 static bool
8777 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8778
8779     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8780      * the parse starting at 's', based on the subset that are valid in this
8781      * context input to this routine in 'valid_flags'. Advances s.  Returns
8782      * TRUE if the input was a valid flag, so the next char may be as well;
8783      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8784      * current regex.  This routine will set it to any charset modifier found.
8785      * The caller shouldn't change it.  This way, another charset modifier
8786      * encountered in the parse can be detected as an error, as we have decided
8787      * allow only one */
8788
8789     const char c = **s;
8790
8791     if (! strchr(valid_flags, c)) {
8792         if (isALNUM(c)) {
8793             goto deprecate;
8794         }
8795         return FALSE;
8796     }
8797
8798     switch (c) {
8799
8800         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8801         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8802         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8803         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8804         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8805         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8806         case LOCALE_PAT_MOD:
8807
8808             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8809              * can't be regex modifiers.
8810              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8811              * much as easily done.  s///lei, for example, has to mean regex
8812              * modifiers if it's not an error (as does any word character
8813              * following the 'e').  Otherwise, we resolve to the backwards-
8814              * compatible, but less likely 's/// le ...', i.e. as meaning
8815              * less-than-or-equal.  The reason it's not likely is that s//
8816              * returns a number for code in the field (/r returns a string, but
8817              * that wasn't added until the 5.13 series), and so '<=' should be
8818              * used for comparing, not 'le'. */
8819             if (*((*s) + 1) == 't') {
8820                 goto deprecate;
8821             }
8822             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8823
8824                 /* 'e' is valid only for substitutes, s///e.  If it is not
8825                  * valid in the current context, then 'm//le' must mean the
8826                  * comparison operator, so use the regular deprecation message.
8827                  */
8828                 if (! strchr(valid_flags, 'e')) {
8829                     goto deprecate;
8830                 }
8831                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8832                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8833                 return FALSE;
8834             }
8835             if (*charset) {
8836                 goto multiple_charsets;
8837             }
8838             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8839             *charset = c;
8840             break;
8841         case UNICODE_PAT_MOD:
8842             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8843              * 'n' means they can't be regex modifiers */
8844             if (*((*s) + 1) == 'n') {
8845                 goto deprecate;
8846             }
8847             if (*charset) {
8848                 goto multiple_charsets;
8849             }
8850             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8851             *charset = c;
8852             break;
8853         case ASCII_RESTRICT_PAT_MOD:
8854             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8855              * can't be regex modifiers */
8856             if (*((*s) + 1) == 'n') {
8857                 goto deprecate;
8858             }
8859             if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
8860                 /* Doubled modifier implies more restricted */
8861                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8862                 (*s)++;
8863             }
8864             else {
8865                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8866             }
8867             if (*charset) { /* Do this after the increment of *s in /aa, so
8868                                the return advances the ptr correctly */
8869                 goto multiple_charsets;
8870             }
8871             *charset = c;
8872             break;
8873         case DEPENDS_PAT_MOD:
8874             if (*charset) {
8875                 goto multiple_charsets;
8876             }
8877             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8878             *charset = c;
8879             break;
8880     }
8881
8882     (*s)++;
8883     return TRUE;
8884
8885     deprecate:
8886         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8887             "Having no space between pattern and following word is deprecated");
8888         return FALSE;
8889
8890     multiple_charsets:
8891         if (*charset != c) {
8892             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8893         }
8894         else {
8895             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8896         }
8897
8898         /* Pretend that it worked, so will continue processing before dieing */
8899         (*s)++;
8900         return TRUE;
8901 }
8902
8903 STATIC char *
8904 S_scan_pat(pTHX_ char *start, I32 type)
8905 {
8906     dVAR;
8907     PMOP *pm;
8908     char *s = scan_str(start,!!PL_madskills,FALSE);
8909     const char * const valid_flags =
8910         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8911     char charset = '\0';    /* character set modifier */
8912 #ifdef PERL_MAD
8913     char *modstart;
8914 #endif
8915
8916     PERL_ARGS_ASSERT_SCAN_PAT;
8917
8918     if (!s) {
8919         const char * const delimiter = skipspace(start);
8920         Perl_croak(aTHX_
8921                    (const char *)
8922                    (*delimiter == '?'
8923                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8924                     : "Search pattern not terminated" ));
8925     }
8926
8927     pm = (PMOP*)newPMOP(type, 0);
8928     if (PL_multi_open == '?') {
8929         /* This is the only point in the code that sets PMf_ONCE:  */
8930         pm->op_pmflags |= PMf_ONCE;
8931
8932         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8933            allows us to restrict the list needed by reset to just the ??
8934            matches.  */
8935         assert(type != OP_TRANS);
8936         if (PL_curstash) {
8937             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8938             U32 elements;
8939             if (!mg) {
8940                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8941                                  0);
8942             }
8943             elements = mg->mg_len / sizeof(PMOP**);
8944             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8945             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8946             mg->mg_len = elements * sizeof(PMOP**);
8947             PmopSTASH_set(pm,PL_curstash);
8948         }
8949     }
8950 #ifdef PERL_MAD
8951     modstart = s;
8952 #endif
8953     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8954 #ifdef PERL_MAD
8955     if (PL_madskills && modstart != s) {
8956         SV* tmptoken = newSVpvn(modstart, s - modstart);
8957         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8958     }
8959 #endif
8960     /* issue a warning if /c is specified,but /g is not */
8961     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8962     {
8963         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8964                        "Use of /c modifier is meaningless without /g" );
8965     }
8966
8967     PL_lex_op = (OP*)pm;
8968     pl_yylval.ival = OP_MATCH;
8969     return s;
8970 }
8971
8972 STATIC char *
8973 S_scan_subst(pTHX_ char *start)
8974 {
8975     dVAR;
8976     char *s;
8977     register PMOP *pm;
8978     I32 first_start;
8979     I32 es = 0;
8980     char charset = '\0';    /* character set modifier */
8981 #ifdef PERL_MAD
8982     char *modstart;
8983 #endif
8984
8985     PERL_ARGS_ASSERT_SCAN_SUBST;
8986
8987     pl_yylval.ival = OP_NULL;
8988
8989     s = scan_str(start,!!PL_madskills,FALSE);
8990
8991     if (!s)
8992         Perl_croak(aTHX_ "Substitution pattern not terminated");
8993
8994     if (s[-1] == PL_multi_open)
8995         s--;
8996 #ifdef PERL_MAD
8997     if (PL_madskills) {
8998         CURMAD('q', PL_thisopen);
8999         CURMAD('_', PL_thiswhite);
9000         CURMAD('E', PL_thisstuff);
9001         CURMAD('Q', PL_thisclose);
9002         PL_realtokenstart = s - SvPVX(PL_linestr);
9003     }
9004 #endif
9005
9006     first_start = PL_multi_start;
9007     s = scan_str(s,!!PL_madskills,FALSE);
9008     if (!s) {
9009         if (PL_lex_stuff) {
9010             SvREFCNT_dec(PL_lex_stuff);
9011             PL_lex_stuff = NULL;
9012         }
9013         Perl_croak(aTHX_ "Substitution replacement not terminated");
9014     }
9015     PL_multi_start = first_start;       /* so whole substitution is taken together */
9016
9017     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9018
9019 #ifdef PERL_MAD
9020     if (PL_madskills) {
9021         CURMAD('z', PL_thisopen);
9022         CURMAD('R', PL_thisstuff);
9023         CURMAD('Z', PL_thisclose);
9024     }
9025     modstart = s;
9026 #endif
9027
9028     while (*s) {
9029         if (*s == EXEC_PAT_MOD) {
9030             s++;
9031             es++;
9032         }
9033         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9034         {
9035             break;
9036         }
9037     }
9038
9039 #ifdef PERL_MAD
9040     if (PL_madskills) {
9041         if (modstart != s)
9042             curmad('m', newSVpvn(modstart, s - modstart));
9043         append_madprops(PL_thismad, (OP*)pm, 0);
9044         PL_thismad = 0;
9045     }
9046 #endif
9047     if ((pm->op_pmflags & PMf_CONTINUE)) {
9048         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9049     }
9050
9051     if (es) {
9052         SV * const repl = newSVpvs("");
9053
9054         PL_sublex_info.super_bufptr = s;
9055         PL_sublex_info.super_bufend = PL_bufend;
9056         PL_multi_end = 0;
9057         pm->op_pmflags |= PMf_EVAL;
9058         while (es-- > 0) {
9059             if (es)
9060                 sv_catpvs(repl, "eval ");
9061             else
9062                 sv_catpvs(repl, "do ");
9063         }
9064         sv_catpvs(repl, "{");
9065         sv_catsv(repl, PL_lex_repl);
9066         if (strchr(SvPVX(PL_lex_repl), '#'))
9067             sv_catpvs(repl, "\n");
9068         sv_catpvs(repl, "}");
9069         SvEVALED_on(repl);
9070         SvREFCNT_dec(PL_lex_repl);
9071         PL_lex_repl = repl;
9072     }
9073
9074     PL_lex_op = (OP*)pm;
9075     pl_yylval.ival = OP_SUBST;
9076     return s;
9077 }
9078
9079 STATIC char *
9080 S_scan_trans(pTHX_ char *start)
9081 {
9082     dVAR;
9083     register char* s;
9084     OP *o;
9085     short *tbl;
9086     U8 squash;
9087     U8 del;
9088     U8 complement;
9089     bool nondestruct = 0;
9090 #ifdef PERL_MAD
9091     char *modstart;
9092 #endif
9093
9094     PERL_ARGS_ASSERT_SCAN_TRANS;
9095
9096     pl_yylval.ival = OP_NULL;
9097
9098     s = scan_str(start,!!PL_madskills,FALSE);
9099     if (!s)
9100         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9101
9102     if (s[-1] == PL_multi_open)
9103         s--;
9104 #ifdef PERL_MAD
9105     if (PL_madskills) {
9106         CURMAD('q', PL_thisopen);
9107         CURMAD('_', PL_thiswhite);
9108         CURMAD('E', PL_thisstuff);
9109         CURMAD('Q', PL_thisclose);
9110         PL_realtokenstart = s - SvPVX(PL_linestr);
9111     }
9112 #endif
9113
9114     s = scan_str(s,!!PL_madskills,FALSE);
9115     if (!s) {
9116         if (PL_lex_stuff) {
9117             SvREFCNT_dec(PL_lex_stuff);
9118             PL_lex_stuff = NULL;
9119         }
9120         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9121     }
9122     if (PL_madskills) {
9123         CURMAD('z', PL_thisopen);
9124         CURMAD('R', PL_thisstuff);
9125         CURMAD('Z', PL_thisclose);
9126     }
9127
9128     complement = del = squash = 0;
9129 #ifdef PERL_MAD
9130     modstart = s;
9131 #endif
9132     while (1) {
9133         switch (*s) {
9134         case 'c':
9135             complement = OPpTRANS_COMPLEMENT;
9136             break;
9137         case 'd':
9138             del = OPpTRANS_DELETE;
9139             break;
9140         case 's':
9141             squash = OPpTRANS_SQUASH;
9142             break;
9143         case 'r':
9144             nondestruct = 1;
9145             break;
9146         default:
9147             goto no_more;
9148         }
9149         s++;
9150     }
9151   no_more:
9152
9153     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9154     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9155     o->op_private &= ~OPpTRANS_ALL;
9156     o->op_private |= del|squash|complement|
9157       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9158       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9159
9160     PL_lex_op = o;
9161     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9162
9163 #ifdef PERL_MAD
9164     if (PL_madskills) {
9165         if (modstart != s)
9166             curmad('m', newSVpvn(modstart, s - modstart));
9167         append_madprops(PL_thismad, o, 0);
9168         PL_thismad = 0;
9169     }
9170 #endif
9171
9172     return s;
9173 }
9174
9175 STATIC char *
9176 S_scan_heredoc(pTHX_ register char *s)
9177 {
9178     dVAR;
9179     SV *herewas;
9180     I32 op_type = OP_SCALAR;
9181     I32 len;
9182     SV *tmpstr;
9183     char term;
9184     const char *found_newline;
9185     register char *d;
9186     register char *e;
9187     char *peek;
9188     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9189 #ifdef PERL_MAD
9190     I32 stuffstart = s - SvPVX(PL_linestr);
9191     char *tstart;
9192
9193     PL_realtokenstart = -1;
9194 #endif
9195
9196     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9197
9198     s += 2;
9199     d = PL_tokenbuf;
9200     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9201     if (!outer)
9202         *d++ = '\n';
9203     peek = s;
9204     while (SPACE_OR_TAB(*peek))
9205         peek++;
9206     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9207         s = peek;
9208         term = *s++;
9209         s = delimcpy(d, e, s, PL_bufend, term, &len);
9210         d += len;
9211         if (s < PL_bufend)
9212             s++;
9213     }
9214     else {
9215         if (*s == '\\')
9216             s++, term = '\'';
9217         else
9218             term = '"';
9219         if (!isALNUM_lazy_if(s,UTF))
9220             deprecate("bare << to mean <<\"\"");
9221         for (; isALNUM_lazy_if(s,UTF); s++) {
9222             if (d < e)
9223                 *d++ = *s;
9224         }
9225     }
9226     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9227         Perl_croak(aTHX_ "Delimiter for here document is too long");
9228     *d++ = '\n';
9229     *d = '\0';
9230     len = d - PL_tokenbuf;
9231
9232 #ifdef PERL_MAD
9233     if (PL_madskills) {
9234         tstart = PL_tokenbuf + !outer;
9235         PL_thisclose = newSVpvn(tstart, len - !outer);
9236         tstart = SvPVX(PL_linestr) + stuffstart;
9237         PL_thisopen = newSVpvn(tstart, s - tstart);
9238         stuffstart = s - SvPVX(PL_linestr);
9239     }
9240 #endif
9241 #ifndef PERL_STRICT_CR
9242     d = strchr(s, '\r');
9243     if (d) {
9244         char * const olds = s;
9245         s = d;
9246         while (s < PL_bufend) {
9247             if (*s == '\r') {
9248                 *d++ = '\n';
9249                 if (*++s == '\n')
9250                     s++;
9251             }
9252             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9253                 *d++ = *s++;
9254                 s++;
9255             }
9256             else
9257                 *d++ = *s++;
9258         }
9259         *d = '\0';
9260         PL_bufend = d;
9261         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9262         s = olds;
9263     }
9264 #endif
9265 #ifdef PERL_MAD
9266     found_newline = 0;
9267 #endif
9268     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9269         herewas = newSVpvn(s,PL_bufend-s);
9270     }
9271     else {
9272 #ifdef PERL_MAD
9273         herewas = newSVpvn(s-1,found_newline-s+1);
9274 #else
9275         s--;
9276         herewas = newSVpvn(s,found_newline-s);
9277 #endif
9278     }
9279 #ifdef PERL_MAD
9280     if (PL_madskills) {
9281         tstart = SvPVX(PL_linestr) + stuffstart;
9282         if (PL_thisstuff)
9283             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9284         else
9285             PL_thisstuff = newSVpvn(tstart, s - tstart);
9286     }
9287 #endif
9288     s += SvCUR(herewas);
9289
9290 #ifdef PERL_MAD
9291     stuffstart = s - SvPVX(PL_linestr);
9292
9293     if (found_newline)
9294         s--;
9295 #endif
9296
9297     tmpstr = newSV_type(SVt_PVIV);
9298     SvGROW(tmpstr, 80);
9299     if (term == '\'') {
9300         op_type = OP_CONST;
9301         SvIV_set(tmpstr, -1);
9302     }
9303     else if (term == '`') {
9304         op_type = OP_BACKTICK;
9305         SvIV_set(tmpstr, '\\');
9306     }
9307
9308     CLINE;
9309     PL_multi_start = CopLINE(PL_curcop);
9310     PL_multi_open = PL_multi_close = '<';
9311     term = *PL_tokenbuf;
9312     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9313         char * const bufptr = PL_sublex_info.super_bufptr;
9314         char * const bufend = PL_sublex_info.super_bufend;
9315         char * const olds = s - SvCUR(herewas);
9316         s = strchr(bufptr, '\n');
9317         if (!s)
9318             s = bufend;
9319         d = s;
9320         while (s < bufend &&
9321           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9322             if (*s++ == '\n')
9323                 CopLINE_inc(PL_curcop);
9324         }
9325         if (s >= bufend) {
9326             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9327             missingterm(PL_tokenbuf);
9328         }
9329         sv_setpvn(herewas,bufptr,d-bufptr+1);
9330         sv_setpvn(tmpstr,d+1,s-d);
9331         s += len - 1;
9332         sv_catpvn(herewas,s,bufend-s);
9333         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9334
9335         s = olds;
9336         goto retval;
9337     }
9338     else if (!outer) {
9339         d = s;
9340         while (s < PL_bufend &&
9341           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9342             if (*s++ == '\n')
9343                 CopLINE_inc(PL_curcop);
9344         }
9345         if (s >= PL_bufend) {
9346             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9347             missingterm(PL_tokenbuf);
9348         }
9349         sv_setpvn(tmpstr,d+1,s-d);
9350 #ifdef PERL_MAD
9351         if (PL_madskills) {
9352             if (PL_thisstuff)
9353                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9354             else
9355                 PL_thisstuff = newSVpvn(d + 1, s - d);
9356             stuffstart = s - SvPVX(PL_linestr);
9357         }
9358 #endif
9359         s += len - 1;
9360         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9361
9362         sv_catpvn(herewas,s,PL_bufend-s);
9363         sv_setsv(PL_linestr,herewas);
9364         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9365         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9366         PL_last_lop = PL_last_uni = NULL;
9367     }
9368     else
9369         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9370     while (s >= PL_bufend) {    /* multiple line string? */
9371 #ifdef PERL_MAD
9372         if (PL_madskills) {
9373             tstart = SvPVX(PL_linestr) + stuffstart;
9374             if (PL_thisstuff)
9375                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9376             else
9377                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9378         }
9379 #endif
9380         PL_bufptr = s;
9381         CopLINE_inc(PL_curcop);
9382         if (!outer || !lex_next_chunk(0)) {
9383             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9384             missingterm(PL_tokenbuf);
9385         }
9386         CopLINE_dec(PL_curcop);
9387         s = PL_bufptr;
9388 #ifdef PERL_MAD
9389         stuffstart = s - SvPVX(PL_linestr);
9390 #endif
9391         CopLINE_inc(PL_curcop);
9392         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9393         PL_last_lop = PL_last_uni = NULL;
9394 #ifndef PERL_STRICT_CR
9395         if (PL_bufend - PL_linestart >= 2) {
9396             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9397                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9398             {
9399                 PL_bufend[-2] = '\n';
9400                 PL_bufend--;
9401                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9402             }
9403             else if (PL_bufend[-1] == '\r')
9404                 PL_bufend[-1] = '\n';
9405         }
9406         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9407             PL_bufend[-1] = '\n';
9408 #endif
9409         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9410             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9411             *(SvPVX(PL_linestr) + off ) = ' ';
9412             sv_catsv(PL_linestr,herewas);
9413             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9414             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9415         }
9416         else {
9417             s = PL_bufend;
9418             sv_catsv(tmpstr,PL_linestr);
9419         }
9420     }
9421     s++;
9422 retval:
9423     PL_multi_end = CopLINE(PL_curcop);
9424     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9425         SvPV_shrink_to_cur(tmpstr);
9426     }
9427     SvREFCNT_dec(herewas);
9428     if (!IN_BYTES) {
9429         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9430             SvUTF8_on(tmpstr);
9431         else if (PL_encoding)
9432             sv_recode_to_utf8(tmpstr, PL_encoding);
9433     }
9434     PL_lex_stuff = tmpstr;
9435     pl_yylval.ival = op_type;
9436     return s;
9437 }
9438
9439 /* scan_inputsymbol
9440    takes: current position in input buffer
9441    returns: new position in input buffer
9442    side-effects: pl_yylval and lex_op are set.
9443
9444    This code handles:
9445
9446    <>           read from ARGV
9447    <FH>         read from filehandle
9448    <pkg::FH>    read from package qualified filehandle
9449    <pkg'FH>     read from package qualified filehandle
9450    <$fh>        read from filehandle in $fh
9451    <*.h>        filename glob
9452
9453 */
9454
9455 STATIC char *
9456 S_scan_inputsymbol(pTHX_ char *start)
9457 {
9458     dVAR;
9459     register char *s = start;           /* current position in buffer */
9460     char *end;
9461     I32 len;
9462     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9463     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9464
9465     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9466
9467     end = strchr(s, '\n');
9468     if (!end)
9469         end = PL_bufend;
9470     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9471
9472     /* die if we didn't have space for the contents of the <>,
9473        or if it didn't end, or if we see a newline
9474     */
9475
9476     if (len >= (I32)sizeof PL_tokenbuf)
9477         Perl_croak(aTHX_ "Excessively long <> operator");
9478     if (s >= end)
9479         Perl_croak(aTHX_ "Unterminated <> operator");
9480
9481     s++;
9482
9483     /* check for <$fh>
9484        Remember, only scalar variables are interpreted as filehandles by
9485        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9486        treated as a glob() call.
9487        This code makes use of the fact that except for the $ at the front,
9488        a scalar variable and a filehandle look the same.
9489     */
9490     if (*d == '$' && d[1]) d++;
9491
9492     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9493     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9494         d++;
9495
9496     /* If we've tried to read what we allow filehandles to look like, and
9497        there's still text left, then it must be a glob() and not a getline.
9498        Use scan_str to pull out the stuff between the <> and treat it
9499        as nothing more than a string.
9500     */
9501
9502     if (d - PL_tokenbuf != len) {
9503         pl_yylval.ival = OP_GLOB;
9504         s = scan_str(start,!!PL_madskills,FALSE);
9505         if (!s)
9506            Perl_croak(aTHX_ "Glob not terminated");
9507         return s;
9508     }
9509     else {
9510         bool readline_overriden = FALSE;
9511         GV *gv_readline;
9512         GV **gvp;
9513         /* we're in a filehandle read situation */
9514         d = PL_tokenbuf;
9515
9516         /* turn <> into <ARGV> */
9517         if (!len)
9518             Copy("ARGV",d,5,char);
9519
9520         /* Check whether readline() is overriden */
9521         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9522         if ((gv_readline
9523                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9524                 ||
9525                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9526                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9527                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9528             readline_overriden = TRUE;
9529
9530         /* if <$fh>, create the ops to turn the variable into a
9531            filehandle
9532         */
9533         if (*d == '$') {
9534             /* try to find it in the pad for this block, otherwise find
9535                add symbol table ops
9536             */
9537             const PADOFFSET tmp = pad_findmy(d, len, 0);
9538             if (tmp != NOT_IN_PAD) {
9539                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9540                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9541                     HEK * const stashname = HvNAME_HEK(stash);
9542                     SV * const sym = sv_2mortal(newSVhek(stashname));
9543                     sv_catpvs(sym, "::");
9544                     sv_catpv(sym, d+1);
9545                     d = SvPVX(sym);
9546                     goto intro_sym;
9547                 }
9548                 else {
9549                     OP * const o = newOP(OP_PADSV, 0);
9550                     o->op_targ = tmp;
9551                     PL_lex_op = readline_overriden
9552                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9553                                 op_append_elem(OP_LIST, o,
9554                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9555                         : (OP*)newUNOP(OP_READLINE, 0, o);
9556                 }
9557             }
9558             else {
9559                 GV *gv;
9560                 ++d;
9561 intro_sym:
9562                 gv = gv_fetchpv(d,
9563                                 (PL_in_eval
9564                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9565                                  : GV_ADDMULTI),
9566                                 SVt_PV);
9567                 PL_lex_op = readline_overriden
9568                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9569                             op_append_elem(OP_LIST,
9570                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9571                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9572                     : (OP*)newUNOP(OP_READLINE, 0,
9573                             newUNOP(OP_RV2SV, 0,
9574                                 newGVOP(OP_GV, 0, gv)));
9575             }
9576             if (!readline_overriden)
9577                 PL_lex_op->op_flags |= OPf_SPECIAL;
9578             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9579             pl_yylval.ival = OP_NULL;
9580         }
9581
9582         /* If it's none of the above, it must be a literal filehandle
9583            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9584         else {
9585             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9586             PL_lex_op = readline_overriden
9587                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9588                         op_append_elem(OP_LIST,
9589                             newGVOP(OP_GV, 0, gv),
9590                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9591                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9592             pl_yylval.ival = OP_NULL;
9593         }
9594     }
9595
9596     return s;
9597 }
9598
9599
9600 /* scan_str
9601    takes: start position in buffer
9602           keep_quoted preserve \ on the embedded delimiter(s)
9603           keep_delims preserve the delimiters around the string
9604    returns: position to continue reading from buffer
9605    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9606         updates the read buffer.
9607
9608    This subroutine pulls a string out of the input.  It is called for:
9609         q               single quotes           q(literal text)
9610         '               single quotes           'literal text'
9611         qq              double quotes           qq(interpolate $here please)
9612         "               double quotes           "interpolate $here please"
9613         qx              backticks               qx(/bin/ls -l)
9614         `               backticks               `/bin/ls -l`
9615         qw              quote words             @EXPORT_OK = qw( func() $spam )
9616         m//             regexp match            m/this/
9617         s///            regexp substitute       s/this/that/
9618         tr///           string transliterate    tr/this/that/
9619         y///            string transliterate    y/this/that/
9620         ($*@)           sub prototypes          sub foo ($)
9621         (stuff)         sub attr parameters     sub foo : attr(stuff)
9622         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9623
9624    In most of these cases (all but <>, patterns and transliterate)
9625    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9626    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9627    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9628    calls scan_str().
9629
9630    It skips whitespace before the string starts, and treats the first
9631    character as the delimiter.  If the delimiter is one of ([{< then
9632    the corresponding "close" character )]}> is used as the closing
9633    delimiter.  It allows quoting of delimiters, and if the string has
9634    balanced delimiters ([{<>}]) it allows nesting.
9635
9636    On success, the SV with the resulting string is put into lex_stuff or,
9637    if that is already non-NULL, into lex_repl. The second case occurs only
9638    when parsing the RHS of the special constructs s/// and tr/// (y///).
9639    For convenience, the terminating delimiter character is stuffed into
9640    SvIVX of the SV.
9641 */
9642
9643 STATIC char *
9644 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9645 {
9646     dVAR;
9647     SV *sv;                             /* scalar value: string */
9648     const char *tmps;                   /* temp string, used for delimiter matching */
9649     register char *s = start;           /* current position in the buffer */
9650     register char term;                 /* terminating character */
9651     register char *to;                  /* current position in the sv's data */
9652     I32 brackets = 1;                   /* bracket nesting level */
9653     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9654     I32 termcode;                       /* terminating char. code */
9655     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9656     STRLEN termlen;                     /* length of terminating string */
9657     int last_off = 0;                   /* last position for nesting bracket */
9658 #ifdef PERL_MAD
9659     int stuffstart;
9660     char *tstart;
9661 #endif
9662
9663     PERL_ARGS_ASSERT_SCAN_STR;
9664
9665     /* skip space before the delimiter */
9666     if (isSPACE(*s)) {
9667         s = PEEKSPACE(s);
9668     }
9669
9670 #ifdef PERL_MAD
9671     if (PL_realtokenstart >= 0) {
9672         stuffstart = PL_realtokenstart;
9673         PL_realtokenstart = -1;
9674     }
9675     else
9676         stuffstart = start - SvPVX(PL_linestr);
9677 #endif
9678     /* mark where we are, in case we need to report errors */
9679     CLINE;
9680
9681     /* after skipping whitespace, the next character is the terminator */
9682     term = *s;
9683     if (!UTF) {
9684         termcode = termstr[0] = term;
9685         termlen = 1;
9686     }
9687     else {
9688         termcode = utf8_to_uvchr((U8*)s, &termlen);
9689         Copy(s, termstr, termlen, U8);
9690         if (!UTF8_IS_INVARIANT(term))
9691             has_utf8 = TRUE;
9692     }
9693
9694     /* mark where we are */
9695     PL_multi_start = CopLINE(PL_curcop);
9696     PL_multi_open = term;
9697
9698     /* find corresponding closing delimiter */
9699     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9700         termcode = termstr[0] = term = tmps[5];
9701
9702     PL_multi_close = term;
9703
9704     /* create a new SV to hold the contents.  79 is the SV's initial length.
9705        What a random number. */
9706     sv = newSV_type(SVt_PVIV);
9707     SvGROW(sv, 80);
9708     SvIV_set(sv, termcode);
9709     (void)SvPOK_only(sv);               /* validate pointer */
9710
9711     /* move past delimiter and try to read a complete string */
9712     if (keep_delims)
9713         sv_catpvn(sv, s, termlen);
9714     s += termlen;
9715 #ifdef PERL_MAD
9716     tstart = SvPVX(PL_linestr) + stuffstart;
9717     if (!PL_thisopen && !keep_delims) {
9718         PL_thisopen = newSVpvn(tstart, s - tstart);
9719         stuffstart = s - SvPVX(PL_linestr);
9720     }
9721 #endif
9722     for (;;) {
9723         if (PL_encoding && !UTF) {
9724             bool cont = TRUE;
9725
9726             while (cont) {
9727                 int offset = s - SvPVX_const(PL_linestr);
9728                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9729                                            &offset, (char*)termstr, termlen);
9730                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9731                 char * const svlast = SvEND(sv) - 1;
9732
9733                 for (; s < ns; s++) {
9734                     if (*s == '\n' && !PL_rsfp)
9735                         CopLINE_inc(PL_curcop);
9736                 }
9737                 if (!found)
9738                     goto read_more_line;
9739                 else {
9740                     /* handle quoted delimiters */
9741                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9742                         const char *t;
9743                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9744                             t--;
9745                         if ((svlast-1 - t) % 2) {
9746                             if (!keep_quoted) {
9747                                 *(svlast-1) = term;
9748                                 *svlast = '\0';
9749                                 SvCUR_set(sv, SvCUR(sv) - 1);
9750                             }
9751                             continue;
9752                         }
9753                     }
9754                     if (PL_multi_open == PL_multi_close) {
9755                         cont = FALSE;
9756                     }
9757                     else {
9758                         const char *t;
9759                         char *w;
9760                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9761                             /* At here, all closes are "was quoted" one,
9762                                so we don't check PL_multi_close. */
9763                             if (*t == '\\') {
9764                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9765                                     t++;
9766                                 else
9767                                     *w++ = *t++;
9768                             }
9769                             else if (*t == PL_multi_open)
9770                                 brackets++;
9771
9772                             *w = *t;
9773                         }
9774                         if (w < t) {
9775                             *w++ = term;
9776                             *w = '\0';
9777                             SvCUR_set(sv, w - SvPVX_const(sv));
9778                         }
9779                         last_off = w - SvPVX(sv);
9780                         if (--brackets <= 0)
9781                             cont = FALSE;
9782                     }
9783                 }
9784             }
9785             if (!keep_delims) {
9786                 SvCUR_set(sv, SvCUR(sv) - 1);
9787                 *SvEND(sv) = '\0';
9788             }
9789             break;
9790         }
9791
9792         /* extend sv if need be */
9793         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9794         /* set 'to' to the next character in the sv's string */
9795         to = SvPVX(sv)+SvCUR(sv);
9796
9797         /* if open delimiter is the close delimiter read unbridle */
9798         if (PL_multi_open == PL_multi_close) {
9799             for (; s < PL_bufend; s++,to++) {
9800                 /* embedded newlines increment the current line number */
9801                 if (*s == '\n' && !PL_rsfp)
9802                     CopLINE_inc(PL_curcop);
9803                 /* handle quoted delimiters */
9804                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9805                     if (!keep_quoted && s[1] == term)
9806                         s++;
9807                 /* any other quotes are simply copied straight through */
9808                     else
9809                         *to++ = *s++;
9810                 }
9811                 /* terminate when run out of buffer (the for() condition), or
9812                    have found the terminator */
9813                 else if (*s == term) {
9814                     if (termlen == 1)
9815                         break;
9816                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9817                         break;
9818                 }
9819                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9820                     has_utf8 = TRUE;
9821                 *to = *s;
9822             }
9823         }
9824
9825         /* if the terminator isn't the same as the start character (e.g.,
9826            matched brackets), we have to allow more in the quoting, and
9827            be prepared for nested brackets.
9828         */
9829         else {
9830             /* read until we run out of string, or we find the terminator */
9831             for (; s < PL_bufend; s++,to++) {
9832                 /* embedded newlines increment the line count */
9833                 if (*s == '\n' && !PL_rsfp)
9834                     CopLINE_inc(PL_curcop);
9835                 /* backslashes can escape the open or closing characters */
9836                 if (*s == '\\' && s+1 < PL_bufend) {
9837                     if (!keep_quoted &&
9838                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9839                         s++;
9840                     else
9841                         *to++ = *s++;
9842                 }
9843                 /* allow nested opens and closes */
9844                 else if (*s == PL_multi_close && --brackets <= 0)
9845                     break;
9846                 else if (*s == PL_multi_open)
9847                     brackets++;
9848                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9849                     has_utf8 = TRUE;
9850                 *to = *s;
9851             }
9852         }
9853         /* terminate the copied string and update the sv's end-of-string */
9854         *to = '\0';
9855         SvCUR_set(sv, to - SvPVX_const(sv));
9856
9857         /*
9858          * this next chunk reads more into the buffer if we're not done yet
9859          */
9860
9861         if (s < PL_bufend)
9862             break;              /* handle case where we are done yet :-) */
9863
9864 #ifndef PERL_STRICT_CR
9865         if (to - SvPVX_const(sv) >= 2) {
9866             if ((to[-2] == '\r' && to[-1] == '\n') ||
9867                 (to[-2] == '\n' && to[-1] == '\r'))
9868             {
9869                 to[-2] = '\n';
9870                 to--;
9871                 SvCUR_set(sv, to - SvPVX_const(sv));
9872             }
9873             else if (to[-1] == '\r')
9874                 to[-1] = '\n';
9875         }
9876         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9877             to[-1] = '\n';
9878 #endif
9879
9880      read_more_line:
9881         /* if we're out of file, or a read fails, bail and reset the current
9882            line marker so we can report where the unterminated string began
9883         */
9884 #ifdef PERL_MAD
9885         if (PL_madskills) {
9886             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9887             if (PL_thisstuff)
9888                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9889             else
9890                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9891         }
9892 #endif
9893         CopLINE_inc(PL_curcop);
9894         PL_bufptr = PL_bufend;
9895         if (!lex_next_chunk(0)) {
9896             sv_free(sv);
9897             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9898             return NULL;
9899         }
9900         s = PL_bufptr;
9901 #ifdef PERL_MAD
9902         stuffstart = 0;
9903 #endif
9904     }
9905
9906     /* at this point, we have successfully read the delimited string */
9907
9908     if (!PL_encoding || UTF) {
9909 #ifdef PERL_MAD
9910         if (PL_madskills) {
9911             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9912             const int len = s - tstart;
9913             if (PL_thisstuff)
9914                 sv_catpvn(PL_thisstuff, tstart, len);
9915             else
9916                 PL_thisstuff = newSVpvn(tstart, len);
9917             if (!PL_thisclose && !keep_delims)
9918                 PL_thisclose = newSVpvn(s,termlen);
9919         }
9920 #endif
9921
9922         if (keep_delims)
9923             sv_catpvn(sv, s, termlen);
9924         s += termlen;
9925     }
9926 #ifdef PERL_MAD
9927     else {
9928         if (PL_madskills) {
9929             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9930             const int len = s - tstart - termlen;
9931             if (PL_thisstuff)
9932                 sv_catpvn(PL_thisstuff, tstart, len);
9933             else
9934                 PL_thisstuff = newSVpvn(tstart, len);
9935             if (!PL_thisclose && !keep_delims)
9936                 PL_thisclose = newSVpvn(s - termlen,termlen);
9937         }
9938     }
9939 #endif
9940     if (has_utf8 || PL_encoding)
9941         SvUTF8_on(sv);
9942
9943     PL_multi_end = CopLINE(PL_curcop);
9944
9945     /* if we allocated too much space, give some back */
9946     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9947         SvLEN_set(sv, SvCUR(sv) + 1);
9948         SvPV_renew(sv, SvLEN(sv));
9949     }
9950
9951     /* decide whether this is the first or second quoted string we've read
9952        for this op
9953     */
9954
9955     if (PL_lex_stuff)
9956         PL_lex_repl = sv;
9957     else
9958         PL_lex_stuff = sv;
9959     return s;
9960 }
9961
9962 /*
9963   scan_num
9964   takes: pointer to position in buffer
9965   returns: pointer to new position in buffer
9966   side-effects: builds ops for the constant in pl_yylval.op
9967
9968   Read a number in any of the formats that Perl accepts:
9969
9970   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9971   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9972   0b[01](_?[01])*
9973   0[0-7](_?[0-7])*
9974   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9975
9976   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9977   thing it reads.
9978
9979   If it reads a number without a decimal point or an exponent, it will
9980   try converting the number to an integer and see if it can do so
9981   without loss of precision.
9982 */
9983
9984 char *
9985 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9986 {
9987     dVAR;
9988     register const char *s = start;     /* current position in buffer */
9989     register char *d;                   /* destination in temp buffer */
9990     register char *e;                   /* end of temp buffer */
9991     NV nv;                              /* number read, as a double */
9992     SV *sv = NULL;                      /* place to put the converted number */
9993     bool floatit;                       /* boolean: int or float? */
9994     const char *lastub = NULL;          /* position of last underbar */
9995     static char const number_too_long[] = "Number too long";
9996
9997     PERL_ARGS_ASSERT_SCAN_NUM;
9998
9999     /* We use the first character to decide what type of number this is */
10000
10001     switch (*s) {
10002     default:
10003       Perl_croak(aTHX_ "panic: scan_num");
10004
10005     /* if it starts with a 0, it could be an octal number, a decimal in
10006        0.13 disguise, or a hexadecimal number, or a binary number. */
10007     case '0':
10008         {
10009           /* variables:
10010              u          holds the "number so far"
10011              shift      the power of 2 of the base
10012                         (hex == 4, octal == 3, binary == 1)
10013              overflowed was the number more than we can hold?
10014
10015              Shift is used when we add a digit.  It also serves as an "are
10016              we in octal/hex/binary?" indicator to disallow hex characters
10017              when in octal mode.
10018            */
10019             NV n = 0.0;
10020             UV u = 0;
10021             I32 shift;
10022             bool overflowed = FALSE;
10023             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10024             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10025             static const char* const bases[5] =
10026               { "", "binary", "", "octal", "hexadecimal" };
10027             static const char* const Bases[5] =
10028               { "", "Binary", "", "Octal", "Hexadecimal" };
10029             static const char* const maxima[5] =
10030               { "",
10031                 "0b11111111111111111111111111111111",
10032                 "",
10033                 "037777777777",
10034                 "0xffffffff" };
10035             const char *base, *Base, *max;
10036
10037             /* check for hex */
10038             if (s[1] == 'x' || s[1] == 'X') {
10039                 shift = 4;
10040                 s += 2;
10041                 just_zero = FALSE;
10042             } else if (s[1] == 'b' || s[1] == 'B') {
10043                 shift = 1;
10044                 s += 2;
10045                 just_zero = FALSE;
10046             }
10047             /* check for a decimal in disguise */
10048             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10049                 goto decimal;
10050             /* so it must be octal */
10051             else {
10052                 shift = 3;
10053                 s++;
10054             }
10055
10056             if (*s == '_') {
10057                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10058                                "Misplaced _ in number");
10059                lastub = s++;
10060             }
10061
10062             base = bases[shift];
10063             Base = Bases[shift];
10064             max  = maxima[shift];
10065
10066             /* read the rest of the number */
10067             for (;;) {
10068                 /* x is used in the overflow test,
10069                    b is the digit we're adding on. */
10070                 UV x, b;
10071
10072                 switch (*s) {
10073
10074                 /* if we don't mention it, we're done */
10075                 default:
10076                     goto out;
10077
10078                 /* _ are ignored -- but warned about if consecutive */
10079                 case '_':
10080                     if (lastub && s == lastub + 1)
10081                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10082                                        "Misplaced _ in number");
10083                     lastub = s++;
10084                     break;
10085
10086                 /* 8 and 9 are not octal */
10087                 case '8': case '9':
10088                     if (shift == 3)
10089                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10090                     /* FALL THROUGH */
10091
10092                 /* octal digits */
10093                 case '2': case '3': case '4':
10094                 case '5': case '6': case '7':
10095                     if (shift == 1)
10096                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10097                     /* FALL THROUGH */
10098
10099                 case '0': case '1':
10100                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10101                     goto digit;
10102
10103                 /* hex digits */
10104                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10105                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10106                     /* make sure they said 0x */
10107                     if (shift != 4)
10108                         goto out;
10109                     b = (*s++ & 7) + 9;
10110
10111                     /* Prepare to put the digit we have onto the end
10112                        of the number so far.  We check for overflows.
10113                     */
10114
10115                   digit:
10116                     just_zero = FALSE;
10117                     if (!overflowed) {
10118                         x = u << shift; /* make room for the digit */
10119
10120                         if ((x >> shift) != u
10121                             && !(PL_hints & HINT_NEW_BINARY)) {
10122                             overflowed = TRUE;
10123                             n = (NV) u;
10124                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10125                                              "Integer overflow in %s number",
10126                                              base);
10127                         } else
10128                             u = x | b;          /* add the digit to the end */
10129                     }
10130                     if (overflowed) {
10131                         n *= nvshift[shift];
10132                         /* If an NV has not enough bits in its
10133                          * mantissa to represent an UV this summing of
10134                          * small low-order numbers is a waste of time
10135                          * (because the NV cannot preserve the
10136                          * low-order bits anyway): we could just
10137                          * remember when did we overflow and in the
10138                          * end just multiply n by the right
10139                          * amount. */
10140                         n += (NV) b;
10141                     }
10142                     break;
10143                 }
10144             }
10145
10146           /* if we get here, we had success: make a scalar value from
10147              the number.
10148           */
10149           out:
10150
10151             /* final misplaced underbar check */
10152             if (s[-1] == '_') {
10153                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10154             }
10155
10156             if (overflowed) {
10157                 if (n > 4294967295.0)
10158                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10159                                    "%s number > %s non-portable",
10160                                    Base, max);
10161                 sv = newSVnv(n);
10162             }
10163             else {
10164 #if UVSIZE > 4
10165                 if (u > 0xffffffff)
10166                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10167                                    "%s number > %s non-portable",
10168                                    Base, max);
10169 #endif
10170                 sv = newSVuv(u);
10171             }
10172             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10173                 sv = new_constant(start, s - start, "integer",
10174                                   sv, NULL, NULL, 0);
10175             else if (PL_hints & HINT_NEW_BINARY)
10176                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10177         }
10178         break;
10179
10180     /*
10181       handle decimal numbers.
10182       we're also sent here when we read a 0 as the first digit
10183     */
10184     case '1': case '2': case '3': case '4': case '5':
10185     case '6': case '7': case '8': case '9': case '.':
10186       decimal:
10187         d = PL_tokenbuf;
10188         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10189         floatit = FALSE;
10190
10191         /* read next group of digits and _ and copy into d */
10192         while (isDIGIT(*s) || *s == '_') {
10193             /* skip underscores, checking for misplaced ones
10194                if -w is on
10195             */
10196             if (*s == '_') {
10197                 if (lastub && s == lastub + 1)
10198                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10199                                    "Misplaced _ in number");
10200                 lastub = s++;
10201             }
10202             else {
10203                 /* check for end of fixed-length buffer */
10204                 if (d >= e)
10205                     Perl_croak(aTHX_ number_too_long);
10206                 /* if we're ok, copy the character */
10207                 *d++ = *s++;
10208             }
10209         }
10210
10211         /* final misplaced underbar check */
10212         if (lastub && s == lastub + 1) {
10213             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10214         }
10215
10216         /* read a decimal portion if there is one.  avoid
10217            3..5 being interpreted as the number 3. followed
10218            by .5
10219         */
10220         if (*s == '.' && s[1] != '.') {
10221             floatit = TRUE;
10222             *d++ = *s++;
10223
10224             if (*s == '_') {
10225                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10226                                "Misplaced _ in number");
10227                 lastub = s;
10228             }
10229
10230             /* copy, ignoring underbars, until we run out of digits.
10231             */
10232             for (; isDIGIT(*s) || *s == '_'; s++) {
10233                 /* fixed length buffer check */
10234                 if (d >= e)
10235                     Perl_croak(aTHX_ number_too_long);
10236                 if (*s == '_') {
10237                    if (lastub && s == lastub + 1)
10238                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10239                                       "Misplaced _ in number");
10240                    lastub = s;
10241                 }
10242                 else
10243                     *d++ = *s;
10244             }
10245             /* fractional part ending in underbar? */
10246             if (s[-1] == '_') {
10247                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10248                                "Misplaced _ in number");
10249             }
10250             if (*s == '.' && isDIGIT(s[1])) {
10251                 /* oops, it's really a v-string, but without the "v" */
10252                 s = start;
10253                 goto vstring;
10254             }
10255         }
10256
10257         /* read exponent part, if present */
10258         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10259             floatit = TRUE;
10260             s++;
10261
10262             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10263             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10264
10265             /* stray preinitial _ */
10266             if (*s == '_') {
10267                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10268                                "Misplaced _ in number");
10269                 lastub = s++;
10270             }
10271
10272             /* allow positive or negative exponent */
10273             if (*s == '+' || *s == '-')
10274                 *d++ = *s++;
10275
10276             /* stray initial _ */
10277             if (*s == '_') {
10278                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10279                                "Misplaced _ in number");
10280                 lastub = s++;
10281             }
10282
10283             /* read digits of exponent */
10284             while (isDIGIT(*s) || *s == '_') {
10285                 if (isDIGIT(*s)) {
10286                     if (d >= e)
10287                         Perl_croak(aTHX_ number_too_long);
10288                     *d++ = *s++;
10289                 }
10290                 else {
10291                    if (((lastub && s == lastub + 1) ||
10292                         (!isDIGIT(s[1]) && s[1] != '_')))
10293                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10294                                       "Misplaced _ in number");
10295                    lastub = s++;
10296                 }
10297             }
10298         }
10299
10300
10301         /*
10302            We try to do an integer conversion first if no characters
10303            indicating "float" have been found.
10304          */
10305
10306         if (!floatit) {
10307             UV uv;
10308             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10309
10310             if (flags == IS_NUMBER_IN_UV) {
10311               if (uv <= IV_MAX)
10312                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10313               else
10314                 sv = newSVuv(uv);
10315             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10316               if (uv <= (UV) IV_MIN)
10317                 sv = newSViv(-(IV)uv);
10318               else
10319                 floatit = TRUE;
10320             } else
10321               floatit = TRUE;
10322         }
10323         if (floatit) {
10324             /* terminate the string */
10325             *d = '\0';
10326             nv = Atof(PL_tokenbuf);
10327             sv = newSVnv(nv);
10328         }
10329
10330         if ( floatit
10331              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10332             const char *const key = floatit ? "float" : "integer";
10333             const STRLEN keylen = floatit ? 5 : 7;
10334             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10335                                 key, keylen, sv, NULL, NULL, 0);
10336         }
10337         break;
10338
10339     /* if it starts with a v, it could be a v-string */
10340     case 'v':
10341 vstring:
10342                 sv = newSV(5); /* preallocate storage space */
10343                 s = scan_vstring(s, PL_bufend, sv);
10344         break;
10345     }
10346
10347     /* make the op for the constant and return */
10348
10349     if (sv)
10350         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10351     else
10352         lvalp->opval = NULL;
10353
10354     return (char *)s;
10355 }
10356
10357 STATIC char *
10358 S_scan_formline(pTHX_ register char *s)
10359 {
10360     dVAR;
10361     register char *eol;
10362     register char *t;
10363     SV * const stuff = newSVpvs("");
10364     bool needargs = FALSE;
10365     bool eofmt = FALSE;
10366 #ifdef PERL_MAD
10367     char *tokenstart = s;
10368     SV* savewhite = NULL;
10369
10370     if (PL_madskills) {
10371         savewhite = PL_thiswhite;
10372         PL_thiswhite = 0;
10373     }
10374 #endif
10375
10376     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10377
10378     while (!needargs) {
10379         if (*s == '.') {
10380             t = s+1;
10381 #ifdef PERL_STRICT_CR
10382             while (SPACE_OR_TAB(*t))
10383                 t++;
10384 #else
10385             while (SPACE_OR_TAB(*t) || *t == '\r')
10386                 t++;
10387 #endif
10388             if (*t == '\n' || t == PL_bufend) {
10389                 eofmt = TRUE;
10390                 break;
10391             }
10392         }
10393         if (PL_in_eval && !PL_rsfp) {
10394             eol = (char *) memchr(s,'\n',PL_bufend-s);
10395             if (!eol++)
10396                 eol = PL_bufend;
10397         }
10398         else
10399             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10400         if (*s != '#') {
10401             for (t = s; t < eol; t++) {
10402                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10403                     needargs = FALSE;
10404                     goto enough;        /* ~~ must be first line in formline */
10405                 }
10406                 if (*t == '@' || *t == '^')
10407                     needargs = TRUE;
10408             }
10409             if (eol > s) {
10410                 sv_catpvn(stuff, s, eol-s);
10411 #ifndef PERL_STRICT_CR
10412                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10413                     char *end = SvPVX(stuff) + SvCUR(stuff);
10414                     end[-2] = '\n';
10415                     end[-1] = '\0';
10416                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10417                 }
10418 #endif
10419             }
10420             else
10421               break;
10422         }
10423         s = (char*)eol;
10424         if (PL_rsfp) {
10425             bool got_some;
10426 #ifdef PERL_MAD
10427             if (PL_madskills) {
10428                 if (PL_thistoken)
10429                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10430                 else
10431                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10432             }
10433 #endif
10434             PL_bufptr = PL_bufend;
10435             CopLINE_inc(PL_curcop);
10436             got_some = lex_next_chunk(0);
10437             CopLINE_dec(PL_curcop);
10438             s = PL_bufptr;
10439 #ifdef PERL_MAD
10440             tokenstart = PL_bufptr;
10441 #endif
10442             if (!got_some)
10443                 break;
10444         }
10445         incline(s);
10446     }
10447   enough:
10448     if (SvCUR(stuff)) {
10449         PL_expect = XTERM;
10450         if (needargs) {
10451             PL_lex_state = LEX_NORMAL;
10452             start_force(PL_curforce);
10453             NEXTVAL_NEXTTOKE.ival = 0;
10454             force_next(',');
10455         }
10456         else
10457             PL_lex_state = LEX_FORMLINE;
10458         if (!IN_BYTES) {
10459             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10460                 SvUTF8_on(stuff);
10461             else if (PL_encoding)
10462                 sv_recode_to_utf8(stuff, PL_encoding);
10463         }
10464         start_force(PL_curforce);
10465         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10466         force_next(THING);
10467         start_force(PL_curforce);
10468         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10469         force_next(LSTOP);
10470     }
10471     else {
10472         SvREFCNT_dec(stuff);
10473         if (eofmt)
10474             PL_lex_formbrack = 0;
10475         PL_bufptr = s;
10476     }
10477 #ifdef PERL_MAD
10478     if (PL_madskills) {
10479         if (PL_thistoken)
10480             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10481         else
10482             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10483         PL_thiswhite = savewhite;
10484     }
10485 #endif
10486     return s;
10487 }
10488
10489 I32
10490 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10491 {
10492     dVAR;
10493     const I32 oldsavestack_ix = PL_savestack_ix;
10494     CV* const outsidecv = PL_compcv;
10495
10496     if (PL_compcv) {
10497         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10498     }
10499     SAVEI32(PL_subline);
10500     save_item(PL_subname);
10501     SAVESPTR(PL_compcv);
10502
10503     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10504     CvFLAGS(PL_compcv) |= flags;
10505
10506     PL_subline = CopLINE(PL_curcop);
10507     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10508     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10509     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10510
10511     return oldsavestack_ix;
10512 }
10513
10514 #ifdef __SC__
10515 #pragma segment Perl_yylex
10516 #endif
10517 static int
10518 S_yywarn(pTHX_ const char *const s)
10519 {
10520     dVAR;
10521
10522     PERL_ARGS_ASSERT_YYWARN;
10523
10524     PL_in_eval |= EVAL_WARNONLY;
10525     yyerror(s);
10526     PL_in_eval &= ~EVAL_WARNONLY;
10527     return 0;
10528 }
10529
10530 int
10531 Perl_yyerror(pTHX_ const char *const s)
10532 {
10533     dVAR;
10534     const char *where = NULL;
10535     const char *context = NULL;
10536     int contlen = -1;
10537     SV *msg;
10538     int yychar  = PL_parser->yychar;
10539
10540     PERL_ARGS_ASSERT_YYERROR;
10541
10542     if (!yychar || (yychar == ';' && !PL_rsfp))
10543         where = "at EOF";
10544     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10545       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10546       PL_oldbufptr != PL_bufptr) {
10547         /*
10548                 Only for NetWare:
10549                 The code below is removed for NetWare because it abends/crashes on NetWare
10550                 when the script has error such as not having the closing quotes like:
10551                     if ($var eq "value)
10552                 Checking of white spaces is anyway done in NetWare code.
10553         */
10554 #ifndef NETWARE
10555         while (isSPACE(*PL_oldoldbufptr))
10556             PL_oldoldbufptr++;
10557 #endif
10558         context = PL_oldoldbufptr;
10559         contlen = PL_bufptr - PL_oldoldbufptr;
10560     }
10561     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10562       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10563         /*
10564                 Only for NetWare:
10565                 The code below is removed for NetWare because it abends/crashes on NetWare
10566                 when the script has error such as not having the closing quotes like:
10567                     if ($var eq "value)
10568                 Checking of white spaces is anyway done in NetWare code.
10569         */
10570 #ifndef NETWARE
10571         while (isSPACE(*PL_oldbufptr))
10572             PL_oldbufptr++;
10573 #endif
10574         context = PL_oldbufptr;
10575         contlen = PL_bufptr - PL_oldbufptr;
10576     }
10577     else if (yychar > 255)
10578         where = "next token ???";
10579     else if (yychar == -2) { /* YYEMPTY */
10580         if (PL_lex_state == LEX_NORMAL ||
10581            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10582             where = "at end of line";
10583         else if (PL_lex_inpat)
10584             where = "within pattern";
10585         else
10586             where = "within string";
10587     }
10588     else {
10589         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10590         if (yychar < 32)
10591             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10592         else if (isPRINT_LC(yychar)) {
10593             const char string = yychar;
10594             sv_catpvn(where_sv, &string, 1);
10595         }
10596         else
10597             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10598         where = SvPVX_const(where_sv);
10599     }
10600     msg = sv_2mortal(newSVpv(s, 0));
10601     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10602         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10603     if (context)
10604         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10605     else
10606         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10607     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10608         Perl_sv_catpvf(aTHX_ msg,
10609         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10610                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10611         PL_multi_end = 0;
10612     }
10613     if (PL_in_eval & EVAL_WARNONLY) {
10614         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10615     }
10616     else
10617         qerror(msg);
10618     if (PL_error_count >= 10) {
10619         if (PL_in_eval && SvCUR(ERRSV))
10620             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10621                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10622         else
10623             Perl_croak(aTHX_ "%s has too many errors.\n",
10624             OutCopFILE(PL_curcop));
10625     }
10626     PL_in_my = 0;
10627     PL_in_my_stash = NULL;
10628     return 0;
10629 }
10630 #ifdef __SC__
10631 #pragma segment Main
10632 #endif
10633
10634 STATIC char*
10635 S_swallow_bom(pTHX_ U8 *s)
10636 {
10637     dVAR;
10638     const STRLEN slen = SvCUR(PL_linestr);
10639
10640     PERL_ARGS_ASSERT_SWALLOW_BOM;
10641
10642     switch (s[0]) {
10643     case 0xFF:
10644         if (s[1] == 0xFE) {
10645             /* UTF-16 little-endian? (or UTF-32LE?) */
10646             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10647                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10648 #ifndef PERL_NO_UTF16_FILTER
10649             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10650             s += 2;
10651             if (PL_bufend > (char*)s) {
10652                 s = add_utf16_textfilter(s, TRUE);
10653             }
10654 #else
10655             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10656 #endif
10657         }
10658         break;
10659     case 0xFE:
10660         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10661 #ifndef PERL_NO_UTF16_FILTER
10662             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10663             s += 2;
10664             if (PL_bufend > (char *)s) {
10665                 s = add_utf16_textfilter(s, FALSE);
10666             }
10667 #else
10668             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10669 #endif
10670         }
10671         break;
10672     case 0xEF:
10673         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10674             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10675             s += 3;                      /* UTF-8 */
10676         }
10677         break;
10678     case 0:
10679         if (slen > 3) {
10680              if (s[1] == 0) {
10681                   if (s[2] == 0xFE && s[3] == 0xFF) {
10682                        /* UTF-32 big-endian */
10683                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10684                   }
10685              }
10686              else if (s[2] == 0 && s[3] != 0) {
10687                   /* Leading bytes
10688                    * 00 xx 00 xx
10689                    * are a good indicator of UTF-16BE. */
10690 #ifndef PERL_NO_UTF16_FILTER
10691                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10692                   s = add_utf16_textfilter(s, FALSE);
10693 #else
10694                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10695 #endif
10696              }
10697         }
10698 #ifdef EBCDIC
10699     case 0xDD:
10700         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10701             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10702             s += 4;                      /* UTF-8 */
10703         }
10704         break;
10705 #endif
10706
10707     default:
10708          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10709                   /* Leading bytes
10710                    * xx 00 xx 00
10711                    * are a good indicator of UTF-16LE. */
10712 #ifndef PERL_NO_UTF16_FILTER
10713               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10714               s = add_utf16_textfilter(s, TRUE);
10715 #else
10716               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10717 #endif
10718          }
10719     }
10720     return (char*)s;
10721 }
10722
10723
10724 #ifndef PERL_NO_UTF16_FILTER
10725 static I32
10726 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10727 {
10728     dVAR;
10729     SV *const filter = FILTER_DATA(idx);
10730     /* We re-use this each time round, throwing the contents away before we
10731        return.  */
10732     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10733     SV *const utf8_buffer = filter;
10734     IV status = IoPAGE(filter);
10735     const bool reverse = cBOOL(IoLINES(filter));
10736     I32 retval;
10737
10738     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10739
10740     /* As we're automatically added, at the lowest level, and hence only called
10741        from this file, we can be sure that we're not called in block mode. Hence
10742        don't bother writing code to deal with block mode.  */
10743     if (maxlen) {
10744         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10745     }
10746     if (status < 0) {
10747         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10748     }
10749     DEBUG_P(PerlIO_printf(Perl_debug_log,
10750                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10751                           FPTR2DPTR(void *, S_utf16_textfilter),
10752                           reverse ? 'l' : 'b', idx, maxlen, status,
10753                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10754
10755     while (1) {
10756         STRLEN chars;
10757         STRLEN have;
10758         I32 newlen;
10759         U8 *end;
10760         /* First, look in our buffer of existing UTF-8 data:  */
10761         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10762
10763         if (nl) {
10764             ++nl;
10765         } else if (status == 0) {
10766             /* EOF */
10767             IoPAGE(filter) = 0;
10768             nl = SvEND(utf8_buffer);
10769         }
10770         if (nl) {
10771             STRLEN got = nl - SvPVX(utf8_buffer);
10772             /* Did we have anything to append?  */
10773             retval = got != 0;
10774             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10775             /* Everything else in this code works just fine if SVp_POK isn't
10776                set.  This, however, needs it, and we need it to work, else
10777                we loop infinitely because the buffer is never consumed.  */
10778             sv_chop(utf8_buffer, nl);
10779             break;
10780         }
10781
10782         /* OK, not a complete line there, so need to read some more UTF-16.
10783            Read an extra octect if the buffer currently has an odd number. */
10784         while (1) {
10785             if (status <= 0)
10786                 break;
10787             if (SvCUR(utf16_buffer) >= 2) {
10788                 /* Location of the high octet of the last complete code point.
10789                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10790                    *coupled* with all the benefits of partial reads and
10791                    endianness.  */
10792                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10793                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10794
10795                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10796                     break;
10797                 }
10798
10799                 /* We have the first half of a surrogate. Read more.  */
10800                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10801             }
10802
10803             status = FILTER_READ(idx + 1, utf16_buffer,
10804                                  160 + (SvCUR(utf16_buffer) & 1));
10805             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10806             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10807             if (status < 0) {
10808                 /* Error */
10809                 IoPAGE(filter) = status;
10810                 return status;
10811             }
10812         }
10813
10814         chars = SvCUR(utf16_buffer) >> 1;
10815         have = SvCUR(utf8_buffer);
10816         SvGROW(utf8_buffer, have + chars * 3 + 1);
10817
10818         if (reverse) {
10819             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10820                                          (U8*)SvPVX_const(utf8_buffer) + have,
10821                                          chars * 2, &newlen);
10822         } else {
10823             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10824                                 (U8*)SvPVX_const(utf8_buffer) + have,
10825                                 chars * 2, &newlen);
10826         }
10827         SvCUR_set(utf8_buffer, have + newlen);
10828         *end = '\0';
10829
10830         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10831            it's private to us, and utf16_to_utf8{,reversed} take a
10832            (pointer,length) pair, rather than a NUL-terminated string.  */
10833         if(SvCUR(utf16_buffer) & 1) {
10834             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10835             SvCUR_set(utf16_buffer, 1);
10836         } else {
10837             SvCUR_set(utf16_buffer, 0);
10838         }
10839     }
10840     DEBUG_P(PerlIO_printf(Perl_debug_log,
10841                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10842                           status,
10843                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10844     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10845     return retval;
10846 }
10847
10848 static U8 *
10849 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10850 {
10851     SV *filter = filter_add(S_utf16_textfilter, NULL);
10852
10853     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10854
10855     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10856     sv_setpvs(filter, "");
10857     IoLINES(filter) = reversed;
10858     IoPAGE(filter) = 1; /* Not EOF */
10859
10860     /* Sadly, we have to return a valid pointer, come what may, so we have to
10861        ignore any error return from this.  */
10862     SvCUR_set(PL_linestr, 0);
10863     if (FILTER_READ(0, PL_linestr, 0)) {
10864         SvUTF8_on(PL_linestr);
10865     } else {
10866         SvUTF8_on(PL_linestr);
10867     }
10868     PL_bufend = SvEND(PL_linestr);
10869     return (U8*)SvPVX(PL_linestr);
10870 }
10871 #endif
10872
10873 /*
10874 Returns a pointer to the next character after the parsed
10875 vstring, as well as updating the passed in sv.
10876
10877 Function must be called like
10878
10879         sv = newSV(5);
10880         s = scan_vstring(s,e,sv);
10881
10882 where s and e are the start and end of the string.
10883 The sv should already be large enough to store the vstring
10884 passed in, for performance reasons.
10885
10886 */
10887
10888 char *
10889 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10890 {
10891     dVAR;
10892     const char *pos = s;
10893     const char *start = s;
10894
10895     PERL_ARGS_ASSERT_SCAN_VSTRING;
10896
10897     if (*pos == 'v') pos++;  /* get past 'v' */
10898     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10899         pos++;
10900     if ( *pos != '.') {
10901         /* this may not be a v-string if followed by => */
10902         const char *next = pos;
10903         while (next < e && isSPACE(*next))
10904             ++next;
10905         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10906             /* return string not v-string */
10907             sv_setpvn(sv,(char *)s,pos-s);
10908             return (char *)pos;
10909         }
10910     }
10911
10912     if (!isALPHA(*pos)) {
10913         U8 tmpbuf[UTF8_MAXBYTES+1];
10914
10915         if (*s == 'v')
10916             s++;  /* get past 'v' */
10917
10918         sv_setpvs(sv, "");
10919
10920         for (;;) {
10921             /* this is atoi() that tolerates underscores */
10922             U8 *tmpend;
10923             UV rev = 0;
10924             const char *end = pos;
10925             UV mult = 1;
10926             while (--end >= s) {
10927                 if (*end != '_') {
10928                     const UV orev = rev;
10929                     rev += (*end - '0') * mult;
10930                     mult *= 10;
10931                     if (orev > rev)
10932                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10933                                          "Integer overflow in decimal number");
10934                 }
10935             }
10936 #ifdef EBCDIC
10937             if (rev > 0x7FFFFFFF)
10938                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10939 #endif
10940             /* Append native character for the rev point */
10941             tmpend = uvchr_to_utf8(tmpbuf, rev);
10942             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10943             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10944                  SvUTF8_on(sv);
10945             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10946                  s = ++pos;
10947             else {
10948                  s = pos;
10949                  break;
10950             }
10951             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10952                  pos++;
10953         }
10954         SvPOK_on(sv);
10955         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10956         SvRMAGICAL_on(sv);
10957     }
10958     return (char *)s;
10959 }
10960
10961 int
10962 Perl_keyword_plugin_standard(pTHX_
10963         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10964 {
10965     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10966     PERL_UNUSED_CONTEXT;
10967     PERL_UNUSED_ARG(keyword_ptr);
10968     PERL_UNUSED_ARG(keyword_len);
10969     PERL_UNUSED_ARG(op_ptr);
10970     return KEYWORD_PLUGIN_DECLINE;
10971 }
10972
10973 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10974 static void
10975 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
10976 {
10977     SAVEI32(PL_lex_brackets);
10978     if (PL_lex_brackets > 100)
10979         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10980     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
10981     SAVEI32(PL_lex_allbrackets);
10982     PL_lex_allbrackets = 0;
10983     SAVEI8(PL_lex_fakeeof);
10984     PL_lex_fakeeof = (U8)fakeeof;
10985     if(yyparse(gramtype) && !PL_parser->error_count)
10986         qerror(Perl_mess(aTHX_ "Parse error"));
10987 }
10988
10989 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
10990 static OP *
10991 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
10992 {
10993     OP *o;
10994     ENTER;
10995     SAVEVPTR(PL_eval_root);
10996     PL_eval_root = NULL;
10997     parse_recdescent(gramtype, fakeeof);
10998     o = PL_eval_root;
10999     LEAVE;
11000     return o;
11001 }
11002
11003 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11004 static OP *
11005 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11006 {
11007     OP *exprop;
11008     if (flags & ~PARSE_OPTIONAL)
11009         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11010     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11011     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11012         if (!PL_parser->error_count)
11013             qerror(Perl_mess(aTHX_ "Parse error"));
11014         exprop = newOP(OP_NULL, 0);
11015     }
11016     return exprop;
11017 }
11018
11019 /*
11020 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11021
11022 Parse a Perl arithmetic expression.  This may contain operators of precedence
11023 down to the bit shift operators.  The expression must be followed (and thus
11024 terminated) either by a comparison or lower-precedence operator or by
11025 something that would normally terminate an expression such as semicolon.
11026 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11027 otherwise it is mandatory.  It is up to the caller to ensure that the
11028 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11029 the source of the code to be parsed and the lexical context for the
11030 expression.
11031
11032 The op tree representing the expression is returned.  If an optional
11033 expression is absent, a null pointer is returned, otherwise the pointer
11034 will be non-null.
11035
11036 If an error occurs in parsing or compilation, in most cases a valid op
11037 tree is returned anyway.  The error is reflected in the parser state,
11038 normally resulting in a single exception at the top level of parsing
11039 which covers all the compilation errors that occurred.  Some compilation
11040 errors, however, will throw an exception immediately.
11041
11042 =cut
11043 */
11044
11045 OP *
11046 Perl_parse_arithexpr(pTHX_ U32 flags)
11047 {
11048     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11049 }
11050
11051 /*
11052 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11053
11054 Parse a Perl term expression.  This may contain operators of precedence
11055 down to the assignment operators.  The expression must be followed (and thus
11056 terminated) either by a comma or lower-precedence operator or by
11057 something that would normally terminate an expression such as semicolon.
11058 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11059 otherwise it is mandatory.  It is up to the caller to ensure that the
11060 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11061 the source of the code to be parsed and the lexical context for the
11062 expression.
11063
11064 The op tree representing the expression is returned.  If an optional
11065 expression is absent, a null pointer is returned, otherwise the pointer
11066 will be non-null.
11067
11068 If an error occurs in parsing or compilation, in most cases a valid op
11069 tree is returned anyway.  The error is reflected in the parser state,
11070 normally resulting in a single exception at the top level of parsing
11071 which covers all the compilation errors that occurred.  Some compilation
11072 errors, however, will throw an exception immediately.
11073
11074 =cut
11075 */
11076
11077 OP *
11078 Perl_parse_termexpr(pTHX_ U32 flags)
11079 {
11080     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11081 }
11082
11083 /*
11084 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11085
11086 Parse a Perl list expression.  This may contain operators of precedence
11087 down to the comma operator.  The expression must be followed (and thus
11088 terminated) either by a low-precedence logic operator such as C<or> or by
11089 something that would normally terminate an expression such as semicolon.
11090 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11091 otherwise it is mandatory.  It is up to the caller to ensure that the
11092 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11093 the source of the code to be parsed and the lexical context for the
11094 expression.
11095
11096 The op tree representing the expression is returned.  If an optional
11097 expression is absent, a null pointer is returned, otherwise the pointer
11098 will be non-null.
11099
11100 If an error occurs in parsing or compilation, in most cases a valid op
11101 tree is returned anyway.  The error is reflected in the parser state,
11102 normally resulting in a single exception at the top level of parsing
11103 which covers all the compilation errors that occurred.  Some compilation
11104 errors, however, will throw an exception immediately.
11105
11106 =cut
11107 */
11108
11109 OP *
11110 Perl_parse_listexpr(pTHX_ U32 flags)
11111 {
11112     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11113 }
11114
11115 /*
11116 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11117
11118 Parse a single complete Perl expression.  This allows the full
11119 expression grammar, including the lowest-precedence operators such
11120 as C<or>.  The expression must be followed (and thus terminated) by a
11121 token that an expression would normally be terminated by: end-of-file,
11122 closing bracketing punctuation, semicolon, or one of the keywords that
11123 signals a postfix expression-statement modifier.  If I<flags> includes
11124 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11125 mandatory.  It is up to the caller to ensure that the dynamic parser
11126 state (L</PL_parser> et al) is correctly set to reflect the source of
11127 the code to be parsed and the lexical context for the expression.
11128
11129 The op tree representing the expression is returned.  If an optional
11130 expression is absent, a null pointer is returned, otherwise the pointer
11131 will be non-null.
11132
11133 If an error occurs in parsing or compilation, in most cases a valid op
11134 tree is returned anyway.  The error is reflected in the parser state,
11135 normally resulting in a single exception at the top level of parsing
11136 which covers all the compilation errors that occurred.  Some compilation
11137 errors, however, will throw an exception immediately.
11138
11139 =cut
11140 */
11141
11142 OP *
11143 Perl_parse_fullexpr(pTHX_ U32 flags)
11144 {
11145     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11146 }
11147
11148 /*
11149 =for apidoc Amx|OP *|parse_block|U32 flags
11150
11151 Parse a single complete Perl code block.  This consists of an opening
11152 brace, a sequence of statements, and a closing brace.  The block
11153 constitutes a lexical scope, so C<my> variables and various compile-time
11154 effects can be contained within it.  It is up to the caller to ensure
11155 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11156 reflect the source of the code to be parsed and the lexical context for
11157 the statement.
11158
11159 The op tree representing the code block is returned.  This is always a
11160 real op, never a null pointer.  It will normally be a C<lineseq> list,
11161 including C<nextstate> or equivalent ops.  No ops to construct any kind
11162 of runtime scope are included by virtue of it being a block.
11163
11164 If an error occurs in parsing or compilation, in most cases a valid op
11165 tree (most likely null) is returned anyway.  The error is reflected in
11166 the parser state, normally resulting in a single exception at the top
11167 level of parsing which covers all the compilation errors that occurred.
11168 Some compilation errors, however, will throw an exception immediately.
11169
11170 The I<flags> parameter is reserved for future use, and must always
11171 be zero.
11172
11173 =cut
11174 */
11175
11176 OP *
11177 Perl_parse_block(pTHX_ U32 flags)
11178 {
11179     if (flags)
11180         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11181     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11182 }
11183
11184 /*
11185 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11186
11187 Parse a single unadorned Perl statement.  This may be a normal imperative
11188 statement or a declaration that has compile-time effect.  It does not
11189 include any label or other affixture.  It is up to the caller to ensure
11190 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11191 reflect the source of the code to be parsed and the lexical context for
11192 the statement.
11193
11194 The op tree representing the statement is returned.  This may be a
11195 null pointer if the statement is null, for example if it was actually
11196 a subroutine definition (which has compile-time side effects).  If not
11197 null, it will be ops directly implementing the statement, suitable to
11198 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11199 equivalent op (except for those embedded in a scope contained entirely
11200 within the statement).
11201
11202 If an error occurs in parsing or compilation, in most cases a valid op
11203 tree (most likely null) is returned anyway.  The error is reflected in
11204 the parser state, normally resulting in a single exception at the top
11205 level of parsing which covers all the compilation errors that occurred.
11206 Some compilation errors, however, will throw an exception immediately.
11207
11208 The I<flags> parameter is reserved for future use, and must always
11209 be zero.
11210
11211 =cut
11212 */
11213
11214 OP *
11215 Perl_parse_barestmt(pTHX_ U32 flags)
11216 {
11217     if (flags)
11218         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11219     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11220 }
11221
11222 /*
11223 =for apidoc Amx|SV *|parse_label|U32 flags
11224
11225 Parse a single label, possibly optional, of the type that may prefix a
11226 Perl statement.  It is up to the caller to ensure that the dynamic parser
11227 state (L</PL_parser> et al) is correctly set to reflect the source of
11228 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11229 label is optional, otherwise it is mandatory.
11230
11231 The name of the label is returned in the form of a fresh scalar.  If an
11232 optional label is absent, a null pointer is returned.
11233
11234 If an error occurs in parsing, which can only occur if the label is
11235 mandatory, a valid label is returned anyway.  The error is reflected in
11236 the parser state, normally resulting in a single exception at the top
11237 level of parsing which covers all the compilation errors that occurred.
11238
11239 =cut
11240 */
11241
11242 SV *
11243 Perl_parse_label(pTHX_ U32 flags)
11244 {
11245     if (flags & ~PARSE_OPTIONAL)
11246         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11247     if (PL_lex_state == LEX_KNOWNEXT) {
11248         PL_parser->yychar = yylex();
11249         if (PL_parser->yychar == LABEL) {
11250             char *lpv = pl_yylval.pval;
11251             STRLEN llen = strlen(lpv);
11252             SV *lsv;
11253             PL_parser->yychar = YYEMPTY;
11254             lsv = newSV_type(SVt_PV);
11255             SvPV_set(lsv, lpv);
11256             SvCUR_set(lsv, llen);
11257             SvLEN_set(lsv, llen+1);
11258             SvPOK_on(lsv);
11259             return lsv;
11260         } else {
11261             yyunlex();
11262             goto no_label;
11263         }
11264     } else {
11265         char *s, *t;
11266         U8 c;
11267         STRLEN wlen, bufptr_pos;
11268         lex_read_space(0);
11269         t = s = PL_bufptr;
11270         c = (U8)*s;
11271         if (!isIDFIRST_A(c))
11272             goto no_label;
11273         do {
11274             c = (U8)*++t;
11275         } while(isWORDCHAR_A(c));
11276         wlen = t - s;
11277         if (word_takes_any_delimeter(s, wlen))
11278             goto no_label;
11279         bufptr_pos = s - SvPVX(PL_linestr);
11280         PL_bufptr = t;
11281         lex_read_space(LEX_KEEP_PREVIOUS);
11282         t = PL_bufptr;
11283         s = SvPVX(PL_linestr) + bufptr_pos;
11284         if (t[0] == ':' && t[1] != ':') {
11285             PL_oldoldbufptr = PL_oldbufptr;
11286             PL_oldbufptr = s;
11287             PL_bufptr = t+1;
11288             return newSVpvn(s, wlen);
11289         } else {
11290             PL_bufptr = s;
11291             no_label:
11292             if (flags & PARSE_OPTIONAL) {
11293                 return NULL;
11294             } else {
11295                 qerror(Perl_mess(aTHX_ "Parse error"));
11296                 return newSVpvs("x");
11297             }
11298         }
11299     }
11300 }
11301
11302 /*
11303 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11304
11305 Parse a single complete Perl statement.  This may be a normal imperative
11306 statement or a declaration that has compile-time effect, and may include
11307 optional labels.  It is up to the caller to ensure that the dynamic
11308 parser state (L</PL_parser> et al) is correctly set to reflect the source
11309 of the code to be parsed and the lexical context for the statement.
11310
11311 The op tree representing the statement is returned.  This may be a
11312 null pointer if the statement is null, for example if it was actually
11313 a subroutine definition (which has compile-time side effects).  If not
11314 null, it will be the result of a L</newSTATEOP> call, normally including
11315 a C<nextstate> or equivalent op.
11316
11317 If an error occurs in parsing or compilation, in most cases a valid op
11318 tree (most likely null) is returned anyway.  The error is reflected in
11319 the parser state, normally resulting in a single exception at the top
11320 level of parsing which covers all the compilation errors that occurred.
11321 Some compilation errors, however, will throw an exception immediately.
11322
11323 The I<flags> parameter is reserved for future use, and must always
11324 be zero.
11325
11326 =cut
11327 */
11328
11329 OP *
11330 Perl_parse_fullstmt(pTHX_ U32 flags)
11331 {
11332     if (flags)
11333         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11334     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11335 }
11336
11337 /*
11338 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11339
11340 Parse a sequence of zero or more Perl statements.  These may be normal
11341 imperative statements, including optional labels, or declarations
11342 that have compile-time effect, or any mixture thereof.  The statement
11343 sequence ends when a closing brace or end-of-file is encountered in a
11344 place where a new statement could have validly started.  It is up to
11345 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11346 is correctly set to reflect the source of the code to be parsed and the
11347 lexical context for the statements.
11348
11349 The op tree representing the statement sequence is returned.  This may
11350 be a null pointer if the statements were all null, for example if there
11351 were no statements or if there were only subroutine definitions (which
11352 have compile-time side effects).  If not null, it will be a C<lineseq>
11353 list, normally including C<nextstate> or equivalent ops.
11354
11355 If an error occurs in parsing or compilation, in most cases a valid op
11356 tree is returned anyway.  The error is reflected in the parser state,
11357 normally resulting in a single exception at the top level of parsing
11358 which covers all the compilation errors that occurred.  Some compilation
11359 errors, however, will throw an exception immediately.
11360
11361 The I<flags> parameter is reserved for future use, and must always
11362 be zero.
11363
11364 =cut
11365 */
11366
11367 OP *
11368 Perl_parse_stmtseq(pTHX_ U32 flags)
11369 {
11370     OP *stmtseqop;
11371     I32 c;
11372     if (flags)
11373         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11374     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11375     c = lex_peek_unichar(0);
11376     if (c != -1 && c != /*{*/'}')
11377         qerror(Perl_mess(aTHX_ "Parse error"));
11378     return stmtseqop;
11379 }
11380
11381 void
11382 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11383 {
11384     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11385     deprecate("qw(...) as parentheses");
11386     force_next((4<<24)|')');
11387     if (qwlist->op_type == OP_STUB) {
11388         op_free(qwlist);
11389     }
11390     else {
11391         start_force(PL_curforce);
11392         NEXTVAL_NEXTTOKE.opval = qwlist;
11393         force_next(THING);
11394     }
11395     force_next((2<<24)|'(');
11396 }
11397
11398 /*
11399  * Local variables:
11400  * c-indentation-style: bsd
11401  * c-basic-offset: 4
11402  * indent-tabs-mode: t
11403  * End:
11404  *
11405  * ex: set ts=8 sts=4 sw=4 noet:
11406  */