toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN1         : not used, except for not, which isn't a UNIOP
 228  * BOop         : bitwise or or xor
 229  * BAop         : bitwise and
 230  * SHop         : shift operator
 231  * PWop         : power operator
 232  * PMop         : pattern-matching operator
 233  * Aop          : addition-level operator
 234  * Mop          : multiplication-level operator
 235  * Eop          : equality-testing operator
 236  * Rop          : relational operator <= != gt
 237  *
 238  * Also see LOP and lop() below.
 239  */
 240
 241 #ifdef DEBUGGING /* Serve -DT. */
 242 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 243 #else
 244 #   define REPORT(retval) (retval)
 245 #endif
 246
 247 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 248 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 249 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 250 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 253 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 254 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 255 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 256 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 257 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 258 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 259 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 260 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 261 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 262 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 263 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 264 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 265 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 266 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 267
 268 /* This bit of chicanery makes a unary function followed by
 269  * a parenthesis into a function with one argument, highest precedence.
 270  * The UNIDOR macro is for unary functions that can be followed by the //
 271  * operator (such as C<shift // 0>).
 272  */
 273 #define UNI2(f,x) { \
 274         pl_yylval.ival = f; \
 275         PL_expect = x; \
 276         PL_bufptr = s; \
 277         PL_last_uni = PL_oldbufptr; \
 278         PL_last_lop_op = f; \
 279         if (*s == '(') \
 280             return REPORT( (int)FUNC1 ); \
 281         s = PEEKSPACE(s); \
 282         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 283         }
 284 #define UNI(f)    UNI2(f,XTERM)
 285 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 286
 287 #define UNIBRACK(f) { \
 288         pl_yylval.ival = f; \
 289         PL_bufptr = s; \
 290         PL_last_uni = PL_oldbufptr; \
 291         if (*s == '(') \
 292             return REPORT( (int)FUNC1 ); \
 293         s = PEEKSPACE(s); \
 294         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 295         }
 296
 297 /* grandfather return to old style */
 298 #define OLDLOP(f) \
 299         do { \
 300             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 301                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 302             pl_yylval.ival = (f); \
 303             PL_expect = XTERM; \
 304             PL_bufptr = s; \
 305             return (int)LSTOP; \
 306         } while(0)
 307
 308 #ifdef DEBUGGING
 309
 310 /* how to interpret the pl_yylval associated with the token */
 311 enum token_type {
 312     TOKENTYPE_NONE,
 313     TOKENTYPE_IVAL,
 314     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 315     TOKENTYPE_PVAL,
 316     TOKENTYPE_OPVAL,
 317     TOKENTYPE_GVVAL
 318 };
 319
 320 static struct debug_tokens {
 321     const int token;
 322     enum token_type type;
 323     const char *name;
 324 } const debug_tokens[] =
 325 {
 326     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 327     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 328     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 329     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 330     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 331     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 332     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 333     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 334     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 335     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 336     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 337     { DO,               TOKENTYPE_NONE,         "DO" },
 338     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 339     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 340     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 341     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 342     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 343     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 344     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 345     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 346     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 347     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 348     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 349     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 350     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 351     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 352     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 353     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 354     { IF,               TOKENTYPE_IVAL,         "IF" },
 355     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 356     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 357     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 358     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 359     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 360     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 361     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 362     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 363     { MY,               TOKENTYPE_IVAL,         "MY" },
 364     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 365     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 366     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 367     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 368     { OROR,             TOKENTYPE_NONE,         "OROR" },
 369     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 370     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 371     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 372     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 373     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 374     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 375     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 376     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 377     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 378     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 379     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 380     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 381     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 382     { SUB,              TOKENTYPE_NONE,         "SUB" },
 383     { THING,            TOKENTYPE_OPVAL,        "THING" },
 384     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 385     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 386     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 387     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 388     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 389     { USE,              TOKENTYPE_IVAL,         "USE" },
 390     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 391     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 392     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 393     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 394     { 0,                TOKENTYPE_NONE,         NULL }
 395 };
 396
 397 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 398
 399 STATIC int
 400 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 401 {
 402     dVAR;
 403
 404     PERL_ARGS_ASSERT_TOKEREPORT;
 405
 406     if (DEBUG_T_TEST) {
 407         const char *name = NULL;
 408         enum token_type type = TOKENTYPE_NONE;
 409         const struct debug_tokens *p;
 410         SV* const report = newSVpvs("<== ");
 411
 412         for (p = debug_tokens; p->token; p++) {
 413             if (p->token == (int)rv) {
 414                 name = p->name;
 415                 type = p->type;
 416                 break;
 417             }
 418         }
 419         if (name)
 420             Perl_sv_catpv(aTHX_ report, name);
 421         else if ((char)rv > ' ' && (char)rv < '~')
 422             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 423         else if (!rv)
 424             sv_catpvs(report, "EOF");
 425         else
 426             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 427         switch (type) {
 428         case TOKENTYPE_NONE:
 429         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 430             break;
 431         case TOKENTYPE_IVAL:
 432             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 433             break;
 434         case TOKENTYPE_OPNUM:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 436                                     PL_op_name[lvalp->ival]);
 437             break;
 438         case TOKENTYPE_PVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 440             break;
 441         case TOKENTYPE_OPVAL:
 442             if (lvalp->opval) {
 443                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 444                                     PL_op_name[lvalp->opval->op_type]);
 445                 if (lvalp->opval->op_type == OP_CONST) {
 446                     Perl_sv_catpvf(aTHX_ report, " %s",
 447                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 448                 }
 449
 450             }
 451             else
 452                 sv_catpvs(report, "(opval=null)");
 453             break;
 454         }
 455         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 456     };
 457     return (int)rv;
 458 }
 459
 460
 461 /* print the buffer with suitable escapes */
 462
 463 STATIC void
 464 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 465 {
 466     SV* const tmp = newSVpvs("");
 467
 468     PERL_ARGS_ASSERT_PRINTBUF;
 469
 470     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 471     SvREFCNT_dec(tmp);
 472 }
 473
 474 #endif
 475
 476 static int
 477 S_deprecate_commaless_var_list(pTHX) {
 478     PL_expect = XTERM;
 479     deprecate("comma-less variable list");
 480     return REPORT(','); /* grandfather non-comma-format format */
 481 }
 482
 483 /*
 484  * S_ao
 485  *
 486  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 487  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 488  */
 489
 490 STATIC int
 491 S_ao(pTHX_ int toketype)
 492 {
 493     dVAR;
 494     if (*PL_bufptr == '=') {
 495         PL_bufptr++;
 496         if (toketype == ANDAND)
 497             pl_yylval.ival = OP_ANDASSIGN;
 498         else if (toketype == OROR)
 499             pl_yylval.ival = OP_ORASSIGN;
 500         else if (toketype == DORDOR)
 501             pl_yylval.ival = OP_DORASSIGN;
 502         toketype = ASSIGNOP;
 503     }
 504     return toketype;
 505 }
 506
 507 /*
 508  * S_no_op
 509  * When Perl expects an operator and finds something else, no_op
 510  * prints the warning.  It always prints "<something> found where
 511  * operator expected.  It prints "Missing semicolon on previous line?"
 512  * if the surprise occurs at the start of the line.  "do you need to
 513  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 514  * where the compiler doesn't know if foo is a method call or a function.
 515  * It prints "Missing operator before end of line" if there's nothing
 516  * after the missing operator, or "... before <...>" if there is something
 517  * after the missing operator.
 518  */
 519
 520 STATIC void
 521 S_no_op(pTHX_ const char *const what, char *s)
 522 {
 523     dVAR;
 524     char * const oldbp = PL_bufptr;
 525     const bool is_first = (PL_oldbufptr == PL_linestart);
 526
 527     PERL_ARGS_ASSERT_NO_OP;
 528
 529     if (!s)
 530         s = oldbp;
 531     else
 532         PL_bufptr = s;
 533     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 534     if (ckWARN_d(WARN_SYNTAX)) {
 535         if (is_first)
 536             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 537                     "\t(Missing semicolon on previous line?)\n");
 538         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 539             const char *t;
 540             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 541                 NOOP;
 542             if (t < PL_bufptr && isSPACE(*t))
 543                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                         "\t(Do you need to predeclare %.*s?)\n",
 545                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 546         }
 547         else {
 548             assert(s >= oldbp);
 549             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 550                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 551         }
 552     }
 553     PL_bufptr = oldbp;
 554 }
 555
 556 /*
 557  * S_missingterm
 558  * Complain about missing quote/regexp/heredoc terminator.
 559  * If it's called with NULL then it cauterizes the line buffer.
 560  * If we're in a delimited string and the delimiter is a control
 561  * character, it's reformatted into a two-char sequence like ^C.
 562  * This is fatal.
 563  */
 564
 565 STATIC void
 566 S_missingterm(pTHX_ char *s)
 567 {
 568     dVAR;
 569     char tmpbuf[3];
 570     char q;
 571     if (s) {
 572         char * const nl = strrchr(s,'\n');
 573         if (nl)
 574             *nl = '\0';
 575     }
 576     else if (isCNTRL(PL_multi_close)) {
 577         *tmpbuf = '^';
 578         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 579         tmpbuf[2] = '\0';
 580         s = tmpbuf;
 581     }
 582     else {
 583         *tmpbuf = (char)PL_multi_close;
 584         tmpbuf[1] = '\0';
 585         s = tmpbuf;
 586     }
 587     q = strchr(s,'"') ? '\'' : '"';
 588     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 589 }
 590
 591 /*
 592  * Check whether the named feature is enabled.
 593  */
 594 bool
 595 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 596 {
 597     dVAR;
 598     HV * const hinthv = GvHV(PL_hintgv);
 599     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 600
 601     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 602
 603     if (namelen > MAX_FEATURE_LEN)
 604         return FALSE;
 605     memcpy(&he_name[8], name, namelen);
 606
 607     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 608 }
 609
 610 /*
 611  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 612  * utf16-to-utf8-reversed.
 613  */
 614
 615 #ifdef PERL_CR_FILTER
 616 static void
 617 strip_return(SV *sv)
 618 {
 619     register const char *s = SvPVX_const(sv);
 620     register const char * const e = s + SvCUR(sv);
 621
 622     PERL_ARGS_ASSERT_STRIP_RETURN;
 623
 624     /* outer loop optimized to do nothing if there are no CR-LFs */
 625     while (s < e) {
 626         if (*s++ == '\r' && *s == '\n') {
 627             /* hit a CR-LF, need to copy the rest */
 628             register char *d = s - 1;
 629             *d++ = *s++;
 630             while (s < e) {
 631                 if (*s == '\r' && s[1] == '\n')
 632                     s++;
 633                 *d++ = *s++;
 634             }
 635             SvCUR(sv) -= s - d;
 636             return;
 637         }
 638     }
 639 }
 640
 641 STATIC I32
 642 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 643 {
 644     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 645     if (count > 0 && !maxlen)
 646         strip_return(sv);
 647     return count;
 648 }
 649 #endif
 650
 651 /*
 652 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 653
 654 Creates and initialises a new lexer/parser state object, supplying
 655 a context in which to lex and parse from a new source of Perl code.
 656 A pointer to the new state object is placed in L</PL_parser>.  An entry
 657 is made on the save stack so that upon unwinding the new state object
 658 will be destroyed and the former value of L</PL_parser> will be restored.
 659 Nothing else need be done to clean up the parsing context.
 660
 661 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 662 non-null, provides a string (in SV form) containing code to be parsed.
 663 A copy of the string is made, so subsequent modification of I<line>
 664 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 665 from which code will be read to be parsed.  If both are non-null, the
 666 code in I<line> comes first and must consist of complete lines of input,
 667 and I<rsfp> supplies the remainder of the source.
 668
 669 The I<flags> parameter is reserved for future use, and must always
 670 be zero, except for one flag that is currently reserved for perl's internal
 671 use.
 672
 673 =cut
 674 */
 675
 676 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 677    can share filters with the current parser. */
 678
 679 void
 680 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 681 {
 682     dVAR;
 683     const char *s = NULL;
 684     STRLEN len;
 685     yy_parser *parser, *oparser;
 686     if (flags && flags != LEX_START_SAME_FILTER)
 687         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 688
 689     /* create and initialise a parser */
 690
 691     Newxz(parser, 1, yy_parser);
 692     parser->old_parser = oparser = PL_parser;
 693     PL_parser = parser;
 694
 695     parser->stack = NULL;
 696     parser->ps = NULL;
 697     parser->stack_size = 0;
 698
 699     /* on scope exit, free this parser and restore any outer one */
 700     SAVEPARSER(parser);
 701     parser->saved_curcop = PL_curcop;
 702
 703     /* initialise lexer state */
 704
 705 #ifdef PERL_MAD
 706     parser->curforce = -1;
 707 #else
 708     parser->nexttoke = 0;
 709 #endif
 710     parser->error_count = oparser ? oparser->error_count : 0;
 711     parser->copline = NOLINE;
 712     parser->lex_state = LEX_NORMAL;
 713     parser->expect = XSTATE;
 714     parser->rsfp = rsfp;
 715     parser->rsfp_filters =
 716       !(flags & LEX_START_SAME_FILTER) || !oparser
 717         ? newAV()
 718         : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 719
 720     Newx(parser->lex_brackstack, 120, char);
 721     Newx(parser->lex_casestack, 12, char);
 722     *parser->lex_casestack = '\0';
 723
 724     if (line) {
 725         s = SvPV_const(line, len);
 726     } else {
 727         len = 0;
 728     }
 729
 730     if (!len) {
 731         parser->linestr = newSVpvs("\n;");
 732     } else {
 733         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 734         if (s[len-1] != ';')
 735             sv_catpvs(parser->linestr, "\n;");
 736     }
 737     parser->oldoldbufptr =
 738         parser->oldbufptr =
 739         parser->bufptr =
 740         parser->linestart = SvPVX(parser->linestr);
 741     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 742     parser->last_lop = parser->last_uni = NULL;
 743
 744     parser->in_pod = 0;
 745 }
 746
 747
 748 /* delete a parser object */
 749
 750 void
 751 Perl_parser_free(pTHX_  const yy_parser *parser)
 752 {
 753     PERL_ARGS_ASSERT_PARSER_FREE;
 754
 755     PL_curcop = parser->saved_curcop;
 756     SvREFCNT_dec(parser->linestr);
 757
 758     if (parser->rsfp == PerlIO_stdin())
 759         PerlIO_clearerr(parser->rsfp);
 760     else if (parser->rsfp && (!parser->old_parser ||
 761                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 762         PerlIO_close(parser->rsfp);
 763     SvREFCNT_dec(parser->rsfp_filters);
 764
 765     Safefree(parser->lex_brackstack);
 766     Safefree(parser->lex_casestack);
 767     PL_parser = parser->old_parser;
 768     Safefree(parser);
 769 }
 770
 771
 772 /*
 773 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 774
 775 Buffer scalar containing the chunk currently under consideration of the
 776 text currently being lexed.  This is always a plain string scalar (for
 777 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 778 normal scalar means; instead refer to the buffer directly by the pointer
 779 variables described below.
 780
 781 The lexer maintains various C<char*> pointers to things in the
 782 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 783 reallocated, all of these pointers must be updated.  Don't attempt to
 784 do this manually, but rather use L</lex_grow_linestr> if you need to
 785 reallocate the buffer.
 786
 787 The content of the text chunk in the buffer is commonly exactly one
 788 complete line of input, up to and including a newline terminator,
 789 but there are situations where it is otherwise.  The octets of the
 790 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 791 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 792 flag on this scalar, which may disagree with it.
 793
 794 For direct examination of the buffer, the variable
 795 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 796 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 797 of these pointers is usually preferable to examination of the scalar
 798 through normal scalar means.
 799
 800 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 801
 802 Direct pointer to the end of the chunk of text currently being lexed, the
 803 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 804 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 805 always located at the end of the buffer, and does not count as part of
 806 the buffer's contents.
 807
 808 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 809
 810 Points to the current position of lexing inside the lexer buffer.
 811 Characters around this point may be freely examined, within
 812 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 813 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 814 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 815
 816 Lexing code (whether in the Perl core or not) moves this pointer past
 817 the characters that it consumes.  It is also expected to perform some
 818 bookkeeping whenever a newline character is consumed.  This movement
 819 can be more conveniently performed by the function L</lex_read_to>,
 820 which handles newlines appropriately.
 821
 822 Interpretation of the buffer's octets can be abstracted out by
 823 using the slightly higher-level functions L</lex_peek_unichar> and
 824 L</lex_read_unichar>.
 825
 826 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 827
 828 Points to the start of the current line inside the lexer buffer.
 829 This is useful for indicating at which column an error occurred, and
 830 not much else.  This must be updated by any lexing code that consumes
 831 a newline; the function L</lex_read_to> handles this detail.
 832
 833 =cut
 834 */
 835
 836 /*
 837 =for apidoc Amx|bool|lex_bufutf8
 838
 839 Indicates whether the octets in the lexer buffer
 840 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 841 of Unicode characters.  If not, they should be interpreted as Latin-1
 842 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 843
 844 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 845 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 846 encoding.
 847
 848 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 849 is significant, but not the whole story regarding the input character
 850 encoding.  Normally, when a file is being read, the scalar contains octets
 851 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 852 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 853 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 854 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 855 is in effect.  This logic may change in the future; use this function
 856 instead of implementing the logic yourself.
 857
 858 =cut
 859 */
 860
 861 bool
 862 Perl_lex_bufutf8(pTHX)
 863 {
 864     return UTF;
 865 }
 866
 867 /*
 868 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 869
 870 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 871 at least I<len> octets (including terminating NUL).  Returns a
 872 pointer to the reallocated buffer.  This is necessary before making
 873 any direct modification of the buffer that would increase its length.
 874 L</lex_stuff_pvn> provides a more convenient way to insert text into
 875 the buffer.
 876
 877 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 878 this function updates all of the lexer's variables that point directly
 879 into the buffer.
 880
 881 =cut
 882 */
 883
 884 char *
 885 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 886 {
 887     SV *linestr;
 888     char *buf;
 889     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 890     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 891     linestr = PL_parser->linestr;
 892     buf = SvPVX(linestr);
 893     if (len <= SvLEN(linestr))
 894         return buf;
 895     bufend_pos = PL_parser->bufend - buf;
 896     bufptr_pos = PL_parser->bufptr - buf;
 897     oldbufptr_pos = PL_parser->oldbufptr - buf;
 898     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 899     linestart_pos = PL_parser->linestart - buf;
 900     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 901     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 902     buf = sv_grow(linestr, len);
 903     PL_parser->bufend = buf + bufend_pos;
 904     PL_parser->bufptr = buf + bufptr_pos;
 905     PL_parser->oldbufptr = buf + oldbufptr_pos;
 906     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 907     PL_parser->linestart = buf + linestart_pos;
 908     if (PL_parser->last_uni)
 909         PL_parser->last_uni = buf + last_uni_pos;
 910     if (PL_parser->last_lop)
 911         PL_parser->last_lop = buf + last_lop_pos;
 912     return buf;
 913 }
 914
 915 /*
 916 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 917
 918 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 919 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 920 reallocating the buffer if necessary.  This means that lexing code that
 921 runs later will see the characters as if they had appeared in the input.
 922 It is not recommended to do this as part of normal parsing, and most
 923 uses of this facility run the risk of the inserted characters being
 924 interpreted in an unintended manner.
 925
 926 The string to be inserted is represented by I<len> octets starting
 927 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 928 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 929 The characters are recoded for the lexer buffer, according to how the
 930 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 931 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 932 function is more convenient.
 933
 934 =cut
 935 */
 936
 937 void
 938 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 939 {
 940     dVAR;
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             const char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             SvCUR_set(PL_parser->linestr,
 959                 SvCUR(PL_parser->linestr) + len+highhalf);
 960             PL_parser->bufend += len+highhalf;
 961             for (p = pv; p != e; p++) {
 962                 U8 c = (U8)*p;
 963                 if (c & 0x80) {
 964                     *bufptr++ = (char)(0xc0 | (c >> 6));
 965                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 966                 } else {
 967                     *bufptr++ = (char)c;
 968                 }
 969             }
 970         }
 971     } else {
 972         if (flags & LEX_STUFF_UTF8) {
 973             STRLEN highhalf = 0;
 974             const char *p, *e = pv+len;
 975             for (p = pv; p != e; p++) {
 976                 U8 c = (U8)*p;
 977                 if (c >= 0xc4) {
 978                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 979                                 "non-Latin-1 character into Latin-1 input");
 980                 } else if (c >= 0xc2 && p+1 != e &&
 981                             (((U8)p[1]) & 0xc0) == 0x80) {
 982                     p++;
 983                     highhalf++;
 984                 } else if (c >= 0x80) {
 985                     /* malformed UTF-8 */
 986                     ENTER;
 987                     SAVESPTR(PL_warnhook);
 988                     PL_warnhook = PERL_WARNHOOK_FATAL;
 989                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 990                     LEAVE;
 991                 }
 992             }
 993             if (!highhalf)
 994                 goto plain_copy;
 995             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 996             bufptr = PL_parser->bufptr;
 997             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 998             SvCUR_set(PL_parser->linestr,
 999                 SvCUR(PL_parser->linestr) + len-highhalf);
1000             PL_parser->bufend += len-highhalf;
1001             for (p = pv; p != e; p++) {
1002                 U8 c = (U8)*p;
1003                 if (c & 0x80) {
1004                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1005                     p++;
1006                 } else {
1007                     *bufptr++ = (char)c;
1008                 }
1009             }
1010         } else {
1011             plain_copy:
1012             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1013             bufptr = PL_parser->bufptr;
1014             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1015             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1016             PL_parser->bufend += len;
1017             Copy(pv, bufptr, len, char);
1018         }
1019     }
1020 }
1021
1022 /*
1023 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1024
1025 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1026 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1027 reallocating the buffer if necessary.  This means that lexing code that
1028 runs later will see the characters as if they had appeared in the input.
1029 It is not recommended to do this as part of normal parsing, and most
1030 uses of this facility run the risk of the inserted characters being
1031 interpreted in an unintended manner.
1032
1033 The string to be inserted is represented by octets starting at I<pv>
1034 and continuing to the first nul.  These octets are interpreted as either
1035 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1036 in I<flags>.  The characters are recoded for the lexer buffer, according
1037 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1038 If it is not convenient to nul-terminate a string to be inserted, the
1039 L</lex_stuff_pvn> function is more appropriate.
1040
1041 =cut
1042 */
1043
1044 void
1045 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1046 {
1047     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1048     lex_stuff_pvn(pv, strlen(pv), flags);
1049 }
1050
1051 /*
1052 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1053
1054 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1055 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1056 reallocating the buffer if necessary.  This means that lexing code that
1057 runs later will see the characters as if they had appeared in the input.
1058 It is not recommended to do this as part of normal parsing, and most
1059 uses of this facility run the risk of the inserted characters being
1060 interpreted in an unintended manner.
1061
1062 The string to be inserted is the string value of I<sv>.  The characters
1063 are recoded for the lexer buffer, according to how the buffer is currently
1064 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1065 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1066 need to construct a scalar.
1067
1068 =cut
1069 */
1070
1071 void
1072 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1073 {
1074     char *pv;
1075     STRLEN len;
1076     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1077     if (flags)
1078         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1079     pv = SvPV(sv, len);
1080     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_unstuff|char *ptr
1085
1086 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1087 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1088 This hides the discarded text from any lexing code that runs later,
1089 as if the text had never appeared.
1090
1091 This is not the normal way to consume lexed text.  For that, use
1092 L</lex_read_to>.
1093
1094 =cut
1095 */
1096
1097 void
1098 Perl_lex_unstuff(pTHX_ char *ptr)
1099 {
1100     char *buf, *bufend;
1101     STRLEN unstuff_len;
1102     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1103     buf = PL_parser->bufptr;
1104     if (ptr < buf)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1106     if (ptr == buf)
1107         return;
1108     bufend = PL_parser->bufend;
1109     if (ptr > bufend)
1110         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1111     unstuff_len = ptr - buf;
1112     Move(ptr, buf, bufend+1-ptr, char);
1113     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1114     PL_parser->bufend = bufend - unstuff_len;
1115 }
1116
1117 /*
1118 =for apidoc Amx|void|lex_read_to|char *ptr
1119
1120 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1121 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1122 performing the correct bookkeeping whenever a newline character is passed.
1123 This is the normal way to consume lexed text.
1124
1125 Interpretation of the buffer's octets can be abstracted out by
1126 using the slightly higher-level functions L</lex_peek_unichar> and
1127 L</lex_read_unichar>.
1128
1129 =cut
1130 */
1131
1132 void
1133 Perl_lex_read_to(pTHX_ char *ptr)
1134 {
1135     char *s;
1136     PERL_ARGS_ASSERT_LEX_READ_TO;
1137     s = PL_parser->bufptr;
1138     if (ptr < s || ptr > PL_parser->bufend)
1139         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1140     for (; s != ptr; s++)
1141         if (*s == '\n') {
1142             CopLINE_inc(PL_curcop);
1143             PL_parser->linestart = s+1;
1144         }
1145     PL_parser->bufptr = ptr;
1146 }
1147
1148 /*
1149 =for apidoc Amx|void|lex_discard_to|char *ptr
1150
1151 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1152 up to I<ptr>.  The remaining content of the buffer will be moved, and
1153 all pointers into the buffer updated appropriately.  I<ptr> must not
1154 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1155 it is not permitted to discard text that has yet to be lexed.
1156
1157 Normally it is not necessarily to do this directly, because it suffices to
1158 use the implicit discarding behaviour of L</lex_next_chunk> and things
1159 based on it.  However, if a token stretches across multiple lines,
1160 and the lexing code has kept multiple lines of text in the buffer for
1161 that purpose, then after completion of the token it would be wise to
1162 explicitly discard the now-unneeded earlier lines, to avoid future
1163 multi-line tokens growing the buffer without bound.
1164
1165 =cut
1166 */
1167
1168 void
1169 Perl_lex_discard_to(pTHX_ char *ptr)
1170 {
1171     char *buf;
1172     STRLEN discard_len;
1173     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1174     buf = SvPVX(PL_parser->linestr);
1175     if (ptr < buf)
1176         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1177     if (ptr == buf)
1178         return;
1179     if (ptr > PL_parser->bufptr)
1180         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1181     discard_len = ptr - buf;
1182     if (PL_parser->oldbufptr < ptr)
1183         PL_parser->oldbufptr = ptr;
1184     if (PL_parser->oldoldbufptr < ptr)
1185         PL_parser->oldoldbufptr = ptr;
1186     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1187         PL_parser->last_uni = NULL;
1188     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1189         PL_parser->last_lop = NULL;
1190     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1191     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1192     PL_parser->bufend -= discard_len;
1193     PL_parser->bufptr -= discard_len;
1194     PL_parser->oldbufptr -= discard_len;
1195     PL_parser->oldoldbufptr -= discard_len;
1196     if (PL_parser->last_uni)
1197         PL_parser->last_uni -= discard_len;
1198     if (PL_parser->last_lop)
1199         PL_parser->last_lop -= discard_len;
1200 }
1201
1202 /*
1203 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1204
1205 Reads in the next chunk of text to be lexed, appending it to
1206 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1207 looked to the end of the current chunk and wants to know more.  It is
1208 usual, but not necessary, for lexing to have consumed the entirety of
1209 the current chunk at this time.
1210
1211 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1212 chunk (i.e., the current chunk has been entirely consumed), normally the
1213 current chunk will be discarded at the same time that the new chunk is
1214 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1215 will not be discarded.  If the current chunk has not been entirely
1216 consumed, then it will not be discarded regardless of the flag.
1217
1218 Returns true if some new text was added to the buffer, or false if the
1219 buffer has reached the end of the input text.
1220
1221 =cut
1222 */
1223
1224 #define LEX_FAKE_EOF 0x80000000
1225
1226 bool
1227 Perl_lex_next_chunk(pTHX_ U32 flags)
1228 {
1229     SV *linestr;
1230     char *buf;
1231     STRLEN old_bufend_pos, new_bufend_pos;
1232     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1233     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1234     bool got_some_for_debugger = 0;
1235     bool got_some;
1236     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1237         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1238     linestr = PL_parser->linestr;
1239     buf = SvPVX(linestr);
1240     if (!(flags & LEX_KEEP_PREVIOUS) &&
1241             PL_parser->bufptr == PL_parser->bufend) {
1242         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1243         linestart_pos = 0;
1244         if (PL_parser->last_uni != PL_parser->bufend)
1245             PL_parser->last_uni = NULL;
1246         if (PL_parser->last_lop != PL_parser->bufend)
1247             PL_parser->last_lop = NULL;
1248         last_uni_pos = last_lop_pos = 0;
1249         *buf = 0;
1250         SvCUR(linestr) = 0;
1251     } else {
1252         old_bufend_pos = PL_parser->bufend - buf;
1253         bufptr_pos = PL_parser->bufptr - buf;
1254         oldbufptr_pos = PL_parser->oldbufptr - buf;
1255         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1256         linestart_pos = PL_parser->linestart - buf;
1257         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1258         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1259     }
1260     if (flags & LEX_FAKE_EOF) {
1261         goto eof;
1262     } else if (!PL_parser->rsfp) {
1263         got_some = 0;
1264     } else if (filter_gets(linestr, old_bufend_pos)) {
1265         got_some = 1;
1266         got_some_for_debugger = 1;
1267     } else {
1268         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1269             sv_setpvs(linestr, "");
1270         eof:
1271         /* End of real input.  Close filehandle (unless it was STDIN),
1272          * then add implicit termination.
1273          */
1274         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1275             PerlIO_clearerr(PL_parser->rsfp);
1276         else if (PL_parser->rsfp)
1277             (void)PerlIO_close(PL_parser->rsfp);
1278         PL_parser->rsfp = NULL;
1279         PL_parser->in_pod = 0;
1280 #ifdef PERL_MAD
1281         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1282             PL_faketokens = 1;
1283 #endif
1284         if (!PL_in_eval && PL_minus_p) {
1285             sv_catpvs(linestr,
1286                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1287             PL_minus_n = PL_minus_p = 0;
1288         } else if (!PL_in_eval && PL_minus_n) {
1289             sv_catpvs(linestr, /*{*/";}");
1290             PL_minus_n = 0;
1291         } else
1292             sv_catpvs(linestr, ";");
1293         got_some = 1;
1294     }
1295     buf = SvPVX(linestr);
1296     new_bufend_pos = SvCUR(linestr);
1297     PL_parser->bufend = buf + new_bufend_pos;
1298     PL_parser->bufptr = buf + bufptr_pos;
1299     PL_parser->oldbufptr = buf + oldbufptr_pos;
1300     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1301     PL_parser->linestart = buf + linestart_pos;
1302     if (PL_parser->last_uni)
1303         PL_parser->last_uni = buf + last_uni_pos;
1304     if (PL_parser->last_lop)
1305         PL_parser->last_lop = buf + last_lop_pos;
1306     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1307             PL_curstash != PL_debstash) {
1308         /* debugger active and we're not compiling the debugger code,
1309          * so store the line into the debugger's array of lines
1310          */
1311         update_debugger_info(NULL, buf+old_bufend_pos,
1312             new_bufend_pos-old_bufend_pos);
1313     }
1314     return got_some;
1315 }
1316
1317 /*
1318 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1319
1320 Looks ahead one (Unicode) character in the text currently being lexed.
1321 Returns the codepoint (unsigned integer value) of the next character,
1322 or -1 if lexing has reached the end of the input text.  To consume the
1323 peeked character, use L</lex_read_unichar>.
1324
1325 If the next character is in (or extends into) the next chunk of input
1326 text, the next chunk will be read in.  Normally the current chunk will be
1327 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1328 then the current chunk will not be discarded.
1329
1330 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1331 is encountered, an exception is generated.
1332
1333 =cut
1334 */
1335
1336 I32
1337 Perl_lex_peek_unichar(pTHX_ U32 flags)
1338 {
1339     dVAR;
1340     char *s, *bufend;
1341     if (flags & ~(LEX_KEEP_PREVIOUS))
1342         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1343     s = PL_parser->bufptr;
1344     bufend = PL_parser->bufend;
1345     if (UTF) {
1346         U8 head;
1347         I32 unichar;
1348         STRLEN len, retlen;
1349         if (s == bufend) {
1350             if (!lex_next_chunk(flags))
1351                 return -1;
1352             s = PL_parser->bufptr;
1353             bufend = PL_parser->bufend;
1354         }
1355         head = (U8)*s;
1356         if (!(head & 0x80))
1357             return head;
1358         if (head & 0x40) {
1359             len = PL_utf8skip[head];
1360             while ((STRLEN)(bufend-s) < len) {
1361                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1362                     break;
1363                 s = PL_parser->bufptr;
1364                 bufend = PL_parser->bufend;
1365             }
1366         }
1367         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1368         if (retlen == (STRLEN)-1) {
1369             /* malformed UTF-8 */
1370             ENTER;
1371             SAVESPTR(PL_warnhook);
1372             PL_warnhook = PERL_WARNHOOK_FATAL;
1373             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1374             LEAVE;
1375         }
1376         return unichar;
1377     } else {
1378         if (s == bufend) {
1379             if (!lex_next_chunk(flags))
1380                 return -1;
1381             s = PL_parser->bufptr;
1382         }
1383         return (U8)*s;
1384     }
1385 }
1386
1387 /*
1388 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1389
1390 Reads the next (Unicode) character in the text currently being lexed.
1391 Returns the codepoint (unsigned integer value) of the character read,
1392 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1393 if lexing has reached the end of the input text.  To non-destructively
1394 examine the next character, use L</lex_peek_unichar> instead.
1395
1396 If the next character is in (or extends into) the next chunk of input
1397 text, the next chunk will be read in.  Normally the current chunk will be
1398 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1399 then the current chunk will not be discarded.
1400
1401 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1402 is encountered, an exception is generated.
1403
1404 =cut
1405 */
1406
1407 I32
1408 Perl_lex_read_unichar(pTHX_ U32 flags)
1409 {
1410     I32 c;
1411     if (flags & ~(LEX_KEEP_PREVIOUS))
1412         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1413     c = lex_peek_unichar(flags);
1414     if (c != -1) {
1415         if (c == '\n')
1416             CopLINE_inc(PL_curcop);
1417         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1418     }
1419     return c;
1420 }
1421
1422 /*
1423 =for apidoc Amx|void|lex_read_space|U32 flags
1424
1425 Reads optional spaces, in Perl style, in the text currently being
1426 lexed.  The spaces may include ordinary whitespace characters and
1427 Perl-style comments.  C<#line> directives are processed if encountered.
1428 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1429 at a non-space character (or the end of the input text).
1430
1431 If spaces extend into the next chunk of input text, the next chunk will
1432 be read in.  Normally the current chunk will be discarded at the same
1433 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1434 chunk will not be discarded.
1435
1436 =cut
1437 */
1438
1439 #define LEX_NO_NEXT_CHUNK 0x80000000
1440
1441 void
1442 Perl_lex_read_space(pTHX_ U32 flags)
1443 {
1444     char *s, *bufend;
1445     bool need_incline = 0;
1446     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1447         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1448 #ifdef PERL_MAD
1449     if (PL_skipwhite) {
1450         sv_free(PL_skipwhite);
1451         PL_skipwhite = NULL;
1452     }
1453     if (PL_madskills)
1454         PL_skipwhite = newSVpvs("");
1455 #endif /* PERL_MAD */
1456     s = PL_parser->bufptr;
1457     bufend = PL_parser->bufend;
1458     while (1) {
1459         char c = *s;
1460         if (c == '#') {
1461             do {
1462                 c = *++s;
1463             } while (!(c == '\n' || (c == 0 && s == bufend)));
1464         } else if (c == '\n') {
1465             s++;
1466             PL_parser->linestart = s;
1467             if (s == bufend)
1468                 need_incline = 1;
1469             else
1470                 incline(s);
1471         } else if (isSPACE(c)) {
1472             s++;
1473         } else if (c == 0 && s == bufend) {
1474             bool got_more;
1475 #ifdef PERL_MAD
1476             if (PL_madskills)
1477                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1478 #endif /* PERL_MAD */
1479             if (flags & LEX_NO_NEXT_CHUNK)
1480                 break;
1481             PL_parser->bufptr = s;
1482             CopLINE_inc(PL_curcop);
1483             got_more = lex_next_chunk(flags);
1484             CopLINE_dec(PL_curcop);
1485             s = PL_parser->bufptr;
1486             bufend = PL_parser->bufend;
1487             if (!got_more)
1488                 break;
1489             if (need_incline && PL_parser->rsfp) {
1490                 incline(s);
1491                 need_incline = 0;
1492             }
1493         } else {
1494             break;
1495         }
1496     }
1497 #ifdef PERL_MAD
1498     if (PL_madskills)
1499         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1500 #endif /* PERL_MAD */
1501     PL_parser->bufptr = s;
1502 }
1503
1504 /*
1505  * S_incline
1506  * This subroutine has nothing to do with tilting, whether at windmills
1507  * or pinball tables.  Its name is short for "increment line".  It
1508  * increments the current line number in CopLINE(PL_curcop) and checks
1509  * to see whether the line starts with a comment of the form
1510  *    # line 500 "foo.pm"
1511  * If so, it sets the current line number and file to the values in the comment.
1512  */
1513
1514 STATIC void
1515 S_incline(pTHX_ const char *s)
1516 {
1517     dVAR;
1518     const char *t;
1519     const char *n;
1520     const char *e;
1521     line_t line_num;
1522
1523     PERL_ARGS_ASSERT_INCLINE;
1524
1525     CopLINE_inc(PL_curcop);
1526     if (*s++ != '#')
1527         return;
1528     while (SPACE_OR_TAB(*s))
1529         s++;
1530     if (strnEQ(s, "line", 4))
1531         s += 4;
1532     else
1533         return;
1534     if (SPACE_OR_TAB(*s))
1535         s++;
1536     else
1537         return;
1538     while (SPACE_OR_TAB(*s))
1539         s++;
1540     if (!isDIGIT(*s))
1541         return;
1542
1543     n = s;
1544     while (isDIGIT(*s))
1545         s++;
1546     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1547         return;
1548     while (SPACE_OR_TAB(*s))
1549         s++;
1550     if (*s == '"' && (t = strchr(s+1, '"'))) {
1551         s++;
1552         e = t + 1;
1553     }
1554     else {
1555         t = s;
1556         while (!isSPACE(*t))
1557             t++;
1558         e = t;
1559     }
1560     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1561         e++;
1562     if (*e != '\n' && *e != '\0')
1563         return;         /* false alarm */
1564
1565     line_num = atoi(n)-1;
1566
1567     if (t - s > 0) {
1568         const STRLEN len = t - s;
1569         SV *const temp_sv = CopFILESV(PL_curcop);
1570         const char *cf;
1571         STRLEN tmplen;
1572
1573         if (temp_sv) {
1574             cf = SvPVX(temp_sv);
1575             tmplen = SvCUR(temp_sv);
1576         } else {
1577             cf = NULL;
1578             tmplen = 0;
1579         }
1580
1581         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1582             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1583              * to *{"::_<newfilename"} */
1584             /* However, the long form of evals is only turned on by the
1585                debugger - usually they're "(eval %lu)" */
1586             char smallbuf[128];
1587             char *tmpbuf;
1588             GV **gvp;
1589             STRLEN tmplen2 = len;
1590             if (tmplen + 2 <= sizeof smallbuf)
1591                 tmpbuf = smallbuf;
1592             else
1593                 Newx(tmpbuf, tmplen + 2, char);
1594             tmpbuf[0] = '_';
1595             tmpbuf[1] = '<';
1596             memcpy(tmpbuf + 2, cf, tmplen);
1597             tmplen += 2;
1598             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1599             if (gvp) {
1600                 char *tmpbuf2;
1601                 GV *gv2;
1602
1603                 if (tmplen2 + 2 <= sizeof smallbuf)
1604                     tmpbuf2 = smallbuf;
1605                 else
1606                     Newx(tmpbuf2, tmplen2 + 2, char);
1607
1608                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1609                     /* Either they malloc'd it, or we malloc'd it,
1610                        so no prefix is present in ours.  */
1611                     tmpbuf2[0] = '_';
1612                     tmpbuf2[1] = '<';
1613                 }
1614
1615                 memcpy(tmpbuf2 + 2, s, tmplen2);
1616                 tmplen2 += 2;
1617
1618                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1619                 if (!isGV(gv2)) {
1620                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1621                     /* adjust ${"::_<newfilename"} to store the new file name */
1622                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1623                     /* The line number may differ. If that is the case,
1624                        alias the saved lines that are in the array.
1625                        Otherwise alias the whole array. */
1626                     if (CopLINE(PL_curcop) == line_num) {
1627                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1628                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1629                     }
1630                     else if (GvAV(*gvp)) {
1631                         AV * const av = GvAV(*gvp);
1632                         const I32 start = CopLINE(PL_curcop)+1;
1633                         I32 items = AvFILLp(av) - start;
1634                         if (items > 0) {
1635                             AV * const av2 = GvAVn(gv2);
1636                             SV **svp = AvARRAY(av) + start;
1637                             I32 l = (I32)line_num+1;
1638                             while (items--)
1639                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1640                         }
1641                     }
1642                 }
1643
1644                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1645             }
1646             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1647         }
1648         CopFILE_free(PL_curcop);
1649         CopFILE_setn(PL_curcop, s, len);
1650     }
1651     CopLINE_set(PL_curcop, line_num);
1652 }
1653
1654 #ifdef PERL_MAD
1655 /* skip space before PL_thistoken */
1656
1657 STATIC char *
1658 S_skipspace0(pTHX_ register char *s)
1659 {
1660     PERL_ARGS_ASSERT_SKIPSPACE0;
1661
1662     s = skipspace(s);
1663     if (!PL_madskills)
1664         return s;
1665     if (PL_skipwhite) {
1666         if (!PL_thiswhite)
1667             PL_thiswhite = newSVpvs("");
1668         sv_catsv(PL_thiswhite, PL_skipwhite);
1669         sv_free(PL_skipwhite);
1670         PL_skipwhite = 0;
1671     }
1672     PL_realtokenstart = s - SvPVX(PL_linestr);
1673     return s;
1674 }
1675
1676 /* skip space after PL_thistoken */
1677
1678 STATIC char *
1679 S_skipspace1(pTHX_ register char *s)
1680 {
1681     const char *start = s;
1682     I32 startoff = start - SvPVX(PL_linestr);
1683
1684     PERL_ARGS_ASSERT_SKIPSPACE1;
1685
1686     s = skipspace(s);
1687     if (!PL_madskills)
1688         return s;
1689     start = SvPVX(PL_linestr) + startoff;
1690     if (!PL_thistoken && PL_realtokenstart >= 0) {
1691         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1692         PL_thistoken = newSVpvn(tstart, start - tstart);
1693     }
1694     PL_realtokenstart = -1;
1695     if (PL_skipwhite) {
1696         if (!PL_nextwhite)
1697             PL_nextwhite = newSVpvs("");
1698         sv_catsv(PL_nextwhite, PL_skipwhite);
1699         sv_free(PL_skipwhite);
1700         PL_skipwhite = 0;
1701     }
1702     return s;
1703 }
1704
1705 STATIC char *
1706 S_skipspace2(pTHX_ register char *s, SV **svp)
1707 {
1708     char *start;
1709     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1710     const I32 startoff = s - SvPVX(PL_linestr);
1711
1712     PERL_ARGS_ASSERT_SKIPSPACE2;
1713
1714     s = skipspace(s);
1715     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1716     if (!PL_madskills || !svp)
1717         return s;
1718     start = SvPVX(PL_linestr) + startoff;
1719     if (!PL_thistoken && PL_realtokenstart >= 0) {
1720         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1721         PL_thistoken = newSVpvn(tstart, start - tstart);
1722         PL_realtokenstart = -1;
1723     }
1724     if (PL_skipwhite) {
1725         if (!*svp)
1726             *svp = newSVpvs("");
1727         sv_setsv(*svp, PL_skipwhite);
1728         sv_free(PL_skipwhite);
1729         PL_skipwhite = 0;
1730     }
1731
1732     return s;
1733 }
1734 #endif
1735
1736 STATIC void
1737 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1738 {
1739     AV *av = CopFILEAVx(PL_curcop);
1740     if (av) {
1741         SV * const sv = newSV_type(SVt_PVMG);
1742         if (orig_sv)
1743             sv_setsv(sv, orig_sv);
1744         else
1745             sv_setpvn(sv, buf, len);
1746         (void)SvIOK_on(sv);
1747         SvIV_set(sv, 0);
1748         av_store(av, (I32)CopLINE(PL_curcop), sv);
1749     }
1750 }
1751
1752 /*
1753  * S_skipspace
1754  * Called to gobble the appropriate amount and type of whitespace.
1755  * Skips comments as well.
1756  */
1757
1758 STATIC char *
1759 S_skipspace(pTHX_ register char *s)
1760 {
1761 #ifdef PERL_MAD
1762     char *start = s;
1763 #endif /* PERL_MAD */
1764     PERL_ARGS_ASSERT_SKIPSPACE;
1765 #ifdef PERL_MAD
1766     if (PL_skipwhite) {
1767         sv_free(PL_skipwhite);
1768         PL_skipwhite = NULL;
1769     }
1770 #endif /* PERL_MAD */
1771     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1772         while (s < PL_bufend && SPACE_OR_TAB(*s))
1773             s++;
1774     } else {
1775         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1776         PL_bufptr = s;
1777         lex_read_space(LEX_KEEP_PREVIOUS |
1778                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1779                     LEX_NO_NEXT_CHUNK : 0));
1780         s = PL_bufptr;
1781         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1782         if (PL_linestart > PL_bufptr)
1783             PL_bufptr = PL_linestart;
1784         return s;
1785     }
1786 #ifdef PERL_MAD
1787     if (PL_madskills)
1788         PL_skipwhite = newSVpvn(start, s-start);
1789 #endif /* PERL_MAD */
1790     return s;
1791 }
1792
1793 /*
1794  * S_check_uni
1795  * Check the unary operators to ensure there's no ambiguity in how they're
1796  * used.  An ambiguous piece of code would be:
1797  *     rand + 5
1798  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1799  * the +5 is its argument.
1800  */
1801
1802 STATIC void
1803 S_check_uni(pTHX)
1804 {
1805     dVAR;
1806     const char *s;
1807     const char *t;
1808
1809     if (PL_oldoldbufptr != PL_last_uni)
1810         return;
1811     while (isSPACE(*PL_last_uni))
1812         PL_last_uni++;
1813     s = PL_last_uni;
1814     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1815         s++;
1816     if ((t = strchr(s, '(')) && t < PL_bufptr)
1817         return;
1818
1819     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1820                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1821                      (int)(s - PL_last_uni), PL_last_uni);
1822 }
1823
1824 /*
1825  * LOP : macro to build a list operator.  Its behaviour has been replaced
1826  * with a subroutine, S_lop() for which LOP is just another name.
1827  */
1828
1829 #define LOP(f,x) return lop(f,x,s)
1830
1831 /*
1832  * S_lop
1833  * Build a list operator (or something that might be one).  The rules:
1834  *  - if we have a next token, then it's a list operator [why?]
1835  *  - if the next thing is an opening paren, then it's a function
1836  *  - else it's a list operator
1837  */
1838
1839 STATIC I32
1840 S_lop(pTHX_ I32 f, int x, char *s)
1841 {
1842     dVAR;
1843
1844     PERL_ARGS_ASSERT_LOP;
1845
1846     pl_yylval.ival = f;
1847     CLINE;
1848     PL_expect = x;
1849     PL_bufptr = s;
1850     PL_last_lop = PL_oldbufptr;
1851     PL_last_lop_op = (OPCODE)f;
1852 #ifdef PERL_MAD
1853     if (PL_lasttoke)
1854         goto lstop;
1855 #else
1856     if (PL_nexttoke)
1857         goto lstop;
1858 #endif
1859     if (*s == '(')
1860         return REPORT(FUNC);
1861     s = PEEKSPACE(s);
1862     if (*s == '(')
1863         return REPORT(FUNC);
1864     else {
1865         lstop:
1866         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1867             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1868         return REPORT(LSTOP);
1869     }
1870 }
1871
1872 #ifdef PERL_MAD
1873  /*
1874  * S_start_force
1875  * Sets up for an eventual force_next().  start_force(0) basically does
1876  * an unshift, while start_force(-1) does a push.  yylex removes items
1877  * on the "pop" end.
1878  */
1879
1880 STATIC void
1881 S_start_force(pTHX_ int where)
1882 {
1883     int i;
1884
1885     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1886         where = PL_lasttoke;
1887     assert(PL_curforce < 0 || PL_curforce == where);
1888     if (PL_curforce != where) {
1889         for (i = PL_lasttoke; i > where; --i) {
1890             PL_nexttoke[i] = PL_nexttoke[i-1];
1891         }
1892         PL_lasttoke++;
1893     }
1894     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1895         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1896     PL_curforce = where;
1897     if (PL_nextwhite) {
1898         if (PL_madskills)
1899             curmad('^', newSVpvs(""));
1900         CURMAD('_', PL_nextwhite);
1901     }
1902 }
1903
1904 STATIC void
1905 S_curmad(pTHX_ char slot, SV *sv)
1906 {
1907     MADPROP **where;
1908
1909     if (!sv)
1910         return;
1911     if (PL_curforce < 0)
1912         where = &PL_thismad;
1913     else
1914         where = &PL_nexttoke[PL_curforce].next_mad;
1915
1916     if (PL_faketokens)
1917         sv_setpvs(sv, "");
1918     else {
1919         if (!IN_BYTES) {
1920             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1921                 SvUTF8_on(sv);
1922             else if (PL_encoding) {
1923                 sv_recode_to_utf8(sv, PL_encoding);
1924             }
1925         }
1926     }
1927
1928     /* keep a slot open for the head of the list? */
1929     if (slot != '_' && *where && (*where)->mad_key == '^') {
1930         (*where)->mad_key = slot;
1931         sv_free(MUTABLE_SV(((*where)->mad_val)));
1932         (*where)->mad_val = (void*)sv;
1933     }
1934     else
1935         addmad(newMADsv(slot, sv), where, 0);
1936 }
1937 #else
1938 #  define start_force(where)    NOOP
1939 #  define curmad(slot, sv)      NOOP
1940 #endif
1941
1942 /*
1943  * S_force_next
1944  * When the lexer realizes it knows the next token (for instance,
1945  * it is reordering tokens for the parser) then it can call S_force_next
1946  * to know what token to return the next time the lexer is called.  Caller
1947  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1948  * and possibly PL_expect to ensure the lexer handles the token correctly.
1949  */
1950
1951 STATIC void
1952 S_force_next(pTHX_ I32 type)
1953 {
1954     dVAR;
1955 #ifdef DEBUGGING
1956     if (DEBUG_T_TEST) {
1957         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1958         tokereport(type, &NEXTVAL_NEXTTOKE);
1959     }
1960 #endif
1961 #ifdef PERL_MAD
1962     if (PL_curforce < 0)
1963         start_force(PL_lasttoke);
1964     PL_nexttoke[PL_curforce].next_type = type;
1965     if (PL_lex_state != LEX_KNOWNEXT)
1966         PL_lex_defer = PL_lex_state;
1967     PL_lex_state = LEX_KNOWNEXT;
1968     PL_lex_expect = PL_expect;
1969     PL_curforce = -1;
1970 #else
1971     PL_nexttype[PL_nexttoke] = type;
1972     PL_nexttoke++;
1973     if (PL_lex_state != LEX_KNOWNEXT) {
1974         PL_lex_defer = PL_lex_state;
1975         PL_lex_expect = PL_expect;
1976         PL_lex_state = LEX_KNOWNEXT;
1977     }
1978 #endif
1979 }
1980
1981 void
1982 Perl_yyunlex(pTHX)
1983 {
1984     int yyc = PL_parser->yychar;
1985     if (yyc != YYEMPTY) {
1986         if (yyc) {
1987             start_force(-1);
1988             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1989             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1990                 PL_lex_allbrackets--;
1991                 PL_lex_brackets--;
1992                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1993             } else if (yyc == '('/*)*/) {
1994                 PL_lex_allbrackets--;
1995                 yyc |= (2<<24);
1996             }
1997             force_next(yyc);
1998         }
1999         PL_parser->yychar = YYEMPTY;
2000     }
2001 }
2002
2003 STATIC SV *
2004 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2005 {
2006     dVAR;
2007     SV * const sv = newSVpvn_utf8(start, len,
2008                                   !IN_BYTES
2009                                   && UTF
2010                                   && !is_ascii_string((const U8*)start, len)
2011                                   && is_utf8_string((const U8*)start, len));
2012     return sv;
2013 }
2014
2015 /*
2016  * S_force_word
2017  * When the lexer knows the next thing is a word (for instance, it has
2018  * just seen -> and it knows that the next char is a word char, then
2019  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2020  * lookahead.
2021  *
2022  * Arguments:
2023  *   char *start : buffer position (must be within PL_linestr)
2024  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2025  *   int check_keyword : if true, Perl checks to make sure the word isn't
2026  *       a keyword (do this if the word is a label, e.g. goto FOO)
2027  *   int allow_pack : if true, : characters will also be allowed (require,
2028  *       use, etc. do this)
2029  *   int allow_initial_tick : used by the "sub" lexer only.
2030  */
2031
2032 STATIC char *
2033 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2034 {
2035     dVAR;
2036     register char *s;
2037     STRLEN len;
2038
2039     PERL_ARGS_ASSERT_FORCE_WORD;
2040
2041     start = SKIPSPACE1(start);
2042     s = start;
2043     if (isIDFIRST_lazy_if(s,UTF) ||
2044         (allow_pack && *s == ':') ||
2045         (allow_initial_tick && *s == '\'') )
2046     {
2047         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2048         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2049             return start;
2050         start_force(PL_curforce);
2051         if (PL_madskills)
2052             curmad('X', newSVpvn(start,s-start));
2053         if (token == METHOD) {
2054             s = SKIPSPACE1(s);
2055             if (*s == '(')
2056                 PL_expect = XTERM;
2057             else {
2058                 PL_expect = XOPERATOR;
2059             }
2060         }
2061         if (PL_madskills)
2062             curmad('g', newSVpvs( "forced" ));
2063         NEXTVAL_NEXTTOKE.opval
2064             = (OP*)newSVOP(OP_CONST,0,
2065                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2066         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2067         force_next(token);
2068     }
2069     return s;
2070 }
2071
2072 /*
2073  * S_force_ident
2074  * Called when the lexer wants $foo *foo &foo etc, but the program
2075  * text only contains the "foo" portion.  The first argument is a pointer
2076  * to the "foo", and the second argument is the type symbol to prefix.
2077  * Forces the next token to be a "WORD".
2078  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2079  */
2080
2081 STATIC void
2082 S_force_ident(pTHX_ register const char *s, int kind)
2083 {
2084     dVAR;
2085
2086     PERL_ARGS_ASSERT_FORCE_IDENT;
2087
2088     if (*s) {
2089         const STRLEN len = strlen(s);
2090         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2091         start_force(PL_curforce);
2092         NEXTVAL_NEXTTOKE.opval = o;
2093         force_next(WORD);
2094         if (kind) {
2095             o->op_private = OPpCONST_ENTERED;
2096             /* XXX see note in pp_entereval() for why we forgo typo
2097                warnings if the symbol must be introduced in an eval.
2098                GSAR 96-10-12 */
2099             gv_fetchpvn_flags(s, len,
2100                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2101                               : GV_ADD,
2102                               kind == '$' ? SVt_PV :
2103                               kind == '@' ? SVt_PVAV :
2104                               kind == '%' ? SVt_PVHV :
2105                               SVt_PVGV
2106                               );
2107         }
2108     }
2109 }
2110
2111 NV
2112 Perl_str_to_version(pTHX_ SV *sv)
2113 {
2114     NV retval = 0.0;
2115     NV nshift = 1.0;
2116     STRLEN len;
2117     const char *start = SvPV_const(sv,len);
2118     const char * const end = start + len;
2119     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2120
2121     PERL_ARGS_ASSERT_STR_TO_VERSION;
2122
2123     while (start < end) {
2124         STRLEN skip;
2125         UV n;
2126         if (utf)
2127             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2128         else {
2129             n = *(U8*)start;
2130             skip = 1;
2131         }
2132         retval += ((NV)n)/nshift;
2133         start += skip;
2134         nshift *= 1000;
2135     }
2136     return retval;
2137 }
2138
2139 /*
2140  * S_force_version
2141  * Forces the next token to be a version number.
2142  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2143  * and if "guessing" is TRUE, then no new token is created (and the caller
2144  * must use an alternative parsing method).
2145  */
2146
2147 STATIC char *
2148 S_force_version(pTHX_ char *s, int guessing)
2149 {
2150     dVAR;
2151     OP *version = NULL;
2152     char *d;
2153 #ifdef PERL_MAD
2154     I32 startoff = s - SvPVX(PL_linestr);
2155 #endif
2156
2157     PERL_ARGS_ASSERT_FORCE_VERSION;
2158
2159     s = SKIPSPACE1(s);
2160
2161     d = s;
2162     if (*d == 'v')
2163         d++;
2164     if (isDIGIT(*d)) {
2165         while (isDIGIT(*d) || *d == '_' || *d == '.')
2166             d++;
2167 #ifdef PERL_MAD
2168         if (PL_madskills) {
2169             start_force(PL_curforce);
2170             curmad('X', newSVpvn(s,d-s));
2171         }
2172 #endif
2173         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2174             SV *ver;
2175 #ifdef USE_LOCALE_NUMERIC
2176             char *loc = setlocale(LC_NUMERIC, "C");
2177 #endif
2178             s = scan_num(s, &pl_yylval);
2179 #ifdef USE_LOCALE_NUMERIC
2180             setlocale(LC_NUMERIC, loc);
2181 #endif
2182             version = pl_yylval.opval;
2183             ver = cSVOPx(version)->op_sv;
2184             if (SvPOK(ver) && !SvNIOK(ver)) {
2185                 SvUPGRADE(ver, SVt_PVNV);
2186                 SvNV_set(ver, str_to_version(ver));
2187                 SvNOK_on(ver);          /* hint that it is a version */
2188             }
2189         }
2190         else if (guessing) {
2191 #ifdef PERL_MAD
2192             if (PL_madskills) {
2193                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2194                 PL_nextwhite = 0;
2195                 s = SvPVX(PL_linestr) + startoff;
2196             }
2197 #endif
2198             return s;
2199         }
2200     }
2201
2202 #ifdef PERL_MAD
2203     if (PL_madskills && !version) {
2204         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2205         PL_nextwhite = 0;
2206         s = SvPVX(PL_linestr) + startoff;
2207     }
2208 #endif
2209     /* NOTE: The parser sees the package name and the VERSION swapped */
2210     start_force(PL_curforce);
2211     NEXTVAL_NEXTTOKE.opval = version;
2212     force_next(WORD);
2213
2214     return s;
2215 }
2216
2217 /*
2218  * S_force_strict_version
2219  * Forces the next token to be a version number using strict syntax rules.
2220  */
2221
2222 STATIC char *
2223 S_force_strict_version(pTHX_ char *s)
2224 {
2225     dVAR;
2226     OP *version = NULL;
2227 #ifdef PERL_MAD
2228     I32 startoff = s - SvPVX(PL_linestr);
2229 #endif
2230     const char *errstr = NULL;
2231
2232     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2233
2234     while (isSPACE(*s)) /* leading whitespace */
2235         s++;
2236
2237     if (is_STRICT_VERSION(s,&errstr)) {
2238         SV *ver = newSV(0);
2239         s = (char *)scan_version(s, ver, 0);
2240         version = newSVOP(OP_CONST, 0, ver);
2241     }
2242     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2243             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2244     {
2245         PL_bufptr = s;
2246         if (errstr)
2247             yyerror(errstr); /* version required */
2248         return s;
2249     }
2250
2251 #ifdef PERL_MAD
2252     if (PL_madskills && !version) {
2253         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2254         PL_nextwhite = 0;
2255         s = SvPVX(PL_linestr) + startoff;
2256     }
2257 #endif
2258     /* NOTE: The parser sees the package name and the VERSION swapped */
2259     start_force(PL_curforce);
2260     NEXTVAL_NEXTTOKE.opval = version;
2261     force_next(WORD);
2262
2263     return s;
2264 }
2265
2266 /*
2267  * S_tokeq
2268  * Tokenize a quoted string passed in as an SV.  It finds the next
2269  * chunk, up to end of string or a backslash.  It may make a new
2270  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2271  * turns \\ into \.
2272  */
2273
2274 STATIC SV *
2275 S_tokeq(pTHX_ SV *sv)
2276 {
2277     dVAR;
2278     register char *s;
2279     register char *send;
2280     register char *d;
2281     STRLEN len = 0;
2282     SV *pv = sv;
2283
2284     PERL_ARGS_ASSERT_TOKEQ;
2285
2286     if (!SvLEN(sv))
2287         goto finish;
2288
2289     s = SvPV_force(sv, len);
2290     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2291         goto finish;
2292     send = s + len;
2293     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2294     while (s < send && !(*s == '\\' && s[1] == '\\'))
2295         s++;
2296     if (s == send)
2297         goto finish;
2298     d = s;
2299     if ( PL_hints & HINT_NEW_STRING ) {
2300         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2301     }
2302     while (s < send) {
2303         if (*s == '\\') {
2304             if (s + 1 < send && (s[1] == '\\'))
2305                 s++;            /* all that, just for this */
2306         }
2307         *d++ = *s++;
2308     }
2309     *d = '\0';
2310     SvCUR_set(sv, d - SvPVX_const(sv));
2311   finish:
2312     if ( PL_hints & HINT_NEW_STRING )
2313        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2314     return sv;
2315 }
2316
2317 /*
2318  * Now come three functions related to double-quote context,
2319  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2320  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2321  * interact with PL_lex_state, and create fake ( ... ) argument lists
2322  * to handle functions and concatenation.
2323  * They assume that whoever calls them will be setting up a fake
2324  * join call, because each subthing puts a ',' after it.  This lets
2325  *   "lower \luPpEr"
2326  * become
2327  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2328  *
2329  * (I'm not sure whether the spurious commas at the end of lcfirst's
2330  * arguments and join's arguments are created or not).
2331  */
2332
2333 /*
2334  * S_sublex_start
2335  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2336  *
2337  * Pattern matching will set PL_lex_op to the pattern-matching op to
2338  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2339  *
2340  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2341  *
2342  * Everything else becomes a FUNC.
2343  *
2344  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2345  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2346  * call to S_sublex_push().
2347  */
2348
2349 STATIC I32
2350 S_sublex_start(pTHX)
2351 {
2352     dVAR;
2353     register const I32 op_type = pl_yylval.ival;
2354
2355     if (op_type == OP_NULL) {
2356         pl_yylval.opval = PL_lex_op;
2357         PL_lex_op = NULL;
2358         return THING;
2359     }
2360     if (op_type == OP_CONST || op_type == OP_READLINE) {
2361         SV *sv = tokeq(PL_lex_stuff);
2362
2363         if (SvTYPE(sv) == SVt_PVIV) {
2364             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2365             STRLEN len;
2366             const char * const p = SvPV_const(sv, len);
2367             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2368             SvREFCNT_dec(sv);
2369             sv = nsv;
2370         }
2371         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2372         PL_lex_stuff = NULL;
2373         /* Allow <FH> // "foo" */
2374         if (op_type == OP_READLINE)
2375             PL_expect = XTERMORDORDOR;
2376         return THING;
2377     }
2378     else if (op_type == OP_BACKTICK && PL_lex_op) {
2379         /* readpipe() vas overriden */
2380         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2381         pl_yylval.opval = PL_lex_op;
2382         PL_lex_op = NULL;
2383         PL_lex_stuff = NULL;
2384         return THING;
2385     }
2386
2387     PL_sublex_info.super_state = PL_lex_state;
2388     PL_sublex_info.sub_inwhat = (U16)op_type;
2389     PL_sublex_info.sub_op = PL_lex_op;
2390     PL_lex_state = LEX_INTERPPUSH;
2391
2392     PL_expect = XTERM;
2393     if (PL_lex_op) {
2394         pl_yylval.opval = PL_lex_op;
2395         PL_lex_op = NULL;
2396         return PMFUNC;
2397     }
2398     else
2399         return FUNC;
2400 }
2401
2402 /*
2403  * S_sublex_push
2404  * Create a new scope to save the lexing state.  The scope will be
2405  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2406  * to the uc, lc, etc. found before.
2407  * Sets PL_lex_state to LEX_INTERPCONCAT.
2408  */
2409
2410 STATIC I32
2411 S_sublex_push(pTHX)
2412 {
2413     dVAR;
2414     ENTER;
2415
2416     PL_lex_state = PL_sublex_info.super_state;
2417     SAVEBOOL(PL_lex_dojoin);
2418     SAVEI32(PL_lex_brackets);
2419     SAVEI32(PL_lex_allbrackets);
2420     SAVEI8(PL_lex_fakeeof);
2421     SAVEI32(PL_lex_casemods);
2422     SAVEI32(PL_lex_starts);
2423     SAVEI8(PL_lex_state);
2424     SAVEVPTR(PL_lex_inpat);
2425     SAVEI16(PL_lex_inwhat);
2426     SAVECOPLINE(PL_curcop);
2427     SAVEPPTR(PL_bufptr);
2428     SAVEPPTR(PL_bufend);
2429     SAVEPPTR(PL_oldbufptr);
2430     SAVEPPTR(PL_oldoldbufptr);
2431     SAVEPPTR(PL_last_lop);
2432     SAVEPPTR(PL_last_uni);
2433     SAVEPPTR(PL_linestart);
2434     SAVESPTR(PL_linestr);
2435     SAVEGENERICPV(PL_lex_brackstack);
2436     SAVEGENERICPV(PL_lex_casestack);
2437
2438     PL_linestr = PL_lex_stuff;
2439     PL_lex_stuff = NULL;
2440
2441     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2442         = SvPVX(PL_linestr);
2443     PL_bufend += SvCUR(PL_linestr);
2444     PL_last_lop = PL_last_uni = NULL;
2445     SAVEFREESV(PL_linestr);
2446
2447     PL_lex_dojoin = FALSE;
2448     PL_lex_brackets = 0;
2449     PL_lex_allbrackets = 0;
2450     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2451     Newx(PL_lex_brackstack, 120, char);
2452     Newx(PL_lex_casestack, 12, char);
2453     PL_lex_casemods = 0;
2454     *PL_lex_casestack = '\0';
2455     PL_lex_starts = 0;
2456     PL_lex_state = LEX_INTERPCONCAT;
2457     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2458
2459     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2460     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2461     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2462         PL_lex_inpat = PL_sublex_info.sub_op;
2463     else
2464         PL_lex_inpat = NULL;
2465
2466     return '(';
2467 }
2468
2469 /*
2470  * S_sublex_done
2471  * Restores lexer state after a S_sublex_push.
2472  */
2473
2474 STATIC I32
2475 S_sublex_done(pTHX)
2476 {
2477     dVAR;
2478     if (!PL_lex_starts++) {
2479         SV * const sv = newSVpvs("");
2480         if (SvUTF8(PL_linestr))
2481             SvUTF8_on(sv);
2482         PL_expect = XOPERATOR;
2483         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2484         return THING;
2485     }
2486
2487     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2488         PL_lex_state = LEX_INTERPCASEMOD;
2489         return yylex();
2490     }
2491
2492     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2493     assert(PL_lex_inwhat != OP_TRANSR);
2494     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2495         PL_linestr = PL_lex_repl;
2496         PL_lex_inpat = 0;
2497         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2498         PL_bufend += SvCUR(PL_linestr);
2499         PL_last_lop = PL_last_uni = NULL;
2500         SAVEFREESV(PL_linestr);
2501         PL_lex_dojoin = FALSE;
2502         PL_lex_brackets = 0;
2503         PL_lex_allbrackets = 0;
2504         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2505         PL_lex_casemods = 0;
2506         *PL_lex_casestack = '\0';
2507         PL_lex_starts = 0;
2508         if (SvEVALED(PL_lex_repl)) {
2509             PL_lex_state = LEX_INTERPNORMAL;
2510             PL_lex_starts++;
2511             /*  we don't clear PL_lex_repl here, so that we can check later
2512                 whether this is an evalled subst; that means we rely on the
2513                 logic to ensure sublex_done() is called again only via the
2514                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2515         }
2516         else {
2517             PL_lex_state = LEX_INTERPCONCAT;
2518             PL_lex_repl = NULL;
2519         }
2520         return ',';
2521     }
2522     else {
2523 #ifdef PERL_MAD
2524         if (PL_madskills) {
2525             if (PL_thiswhite) {
2526                 if (!PL_endwhite)
2527                     PL_endwhite = newSVpvs("");
2528                 sv_catsv(PL_endwhite, PL_thiswhite);
2529                 PL_thiswhite = 0;
2530             }
2531             if (PL_thistoken)
2532                 sv_setpvs(PL_thistoken,"");
2533             else
2534                 PL_realtokenstart = -1;
2535         }
2536 #endif
2537         LEAVE;
2538         PL_bufend = SvPVX(PL_linestr);
2539         PL_bufend += SvCUR(PL_linestr);
2540         PL_expect = XOPERATOR;
2541         PL_sublex_info.sub_inwhat = 0;
2542         return ')';
2543     }
2544 }
2545
2546 /*
2547   scan_const
2548
2549   Extracts a pattern, double-quoted string, or transliteration.  This
2550   is terrifying code.
2551
2552   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2553   processing a pattern (PL_lex_inpat is true), a transliteration
2554   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2555
2556   Returns a pointer to the character scanned up to. If this is
2557   advanced from the start pointer supplied (i.e. if anything was
2558   successfully parsed), will leave an OP for the substring scanned
2559   in pl_yylval. Caller must intuit reason for not parsing further
2560   by looking at the next characters herself.
2561
2562   In patterns:
2563     backslashes:
2564       constants: \N{NAME} only
2565       case and quoting: \U \Q \E
2566     stops on @ and $, but not for $ as tail anchor
2567
2568   In transliterations:
2569     characters are VERY literal, except for - not at the start or end
2570     of the string, which indicates a range. If the range is in bytes,
2571     scan_const expands the range to the full set of intermediate
2572     characters. If the range is in utf8, the hyphen is replaced with
2573     a certain range mark which will be handled by pmtrans() in op.c.
2574
2575   In double-quoted strings:
2576     backslashes:
2577       double-quoted style: \r and \n
2578       constants: \x31, etc.
2579       deprecated backrefs: \1 (in substitution replacements)
2580       case and quoting: \U \Q \E
2581     stops on @ and $
2582
2583   scan_const does *not* construct ops to handle interpolated strings.
2584   It stops processing as soon as it finds an embedded $ or @ variable
2585   and leaves it to the caller to work out what's going on.
2586
2587   embedded arrays (whether in pattern or not) could be:
2588       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2589
2590   $ in double-quoted strings must be the symbol of an embedded scalar.
2591
2592   $ in pattern could be $foo or could be tail anchor.  Assumption:
2593   it's a tail anchor if $ is the last thing in the string, or if it's
2594   followed by one of "()| \r\n\t"
2595
2596   \1 (backreferences) are turned into $1
2597
2598   The structure of the code is
2599       while (there's a character to process) {
2600           handle transliteration ranges
2601           skip regexp comments /(?#comment)/ and codes /(?{code})/
2602           skip #-initiated comments in //x patterns
2603           check for embedded arrays
2604           check for embedded scalars
2605           if (backslash) {
2606               deprecate \1 in substitution replacements
2607               handle string-changing backslashes \l \U \Q \E, etc.
2608               switch (what was escaped) {
2609                   handle \- in a transliteration (becomes a literal -)
2610                   if a pattern and not \N{, go treat as regular character
2611                   handle \132 (octal characters)
2612                   handle \x15 and \x{1234} (hex characters)
2613                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2614                   handle \cV (control characters)
2615                   handle printf-style backslashes (\f, \r, \n, etc)
2616               } (end switch)
2617               continue
2618           } (end if backslash)
2619           handle regular character
2620     } (end while character to read)
2621
2622 */
2623
2624 STATIC char *
2625 S_scan_const(pTHX_ char *start)
2626 {
2627     dVAR;
2628     register char *send = PL_bufend;            /* end of the constant */
2629     SV *sv = newSV(send - start);               /* sv for the constant.  See
2630                                                    note below on sizing. */
2631     register char *s = start;                   /* start of the constant */
2632     register char *d = SvPVX(sv);               /* destination for copies */
2633     bool dorange = FALSE;                       /* are we in a translit range? */
2634     bool didrange = FALSE;                      /* did we just finish a range? */
2635     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2636     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2637                                                    to be UTF8?  But, this can
2638                                                    show as true when the source
2639                                                    isn't utf8, as for example
2640                                                    when it is entirely composed
2641                                                    of hex constants */
2642
2643     /* Note on sizing:  The scanned constant is placed into sv, which is
2644      * initialized by newSV() assuming one byte of output for every byte of
2645      * input.  This routine expects newSV() to allocate an extra byte for a
2646      * trailing NUL, which this routine will append if it gets to the end of
2647      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2648      * CAPITAL LETTER A}), or more output than input if the constant ends up
2649      * recoded to utf8, but each time a construct is found that might increase
2650      * the needed size, SvGROW() is called.  Its size parameter each time is
2651      * based on the best guess estimate at the time, namely the length used so
2652      * far, plus the length the current construct will occupy, plus room for
2653      * the trailing NUL, plus one byte for every input byte still unscanned */
2654
2655     UV uv;
2656 #ifdef EBCDIC
2657     UV literal_endpoint = 0;
2658     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2659 #endif
2660
2661     PERL_ARGS_ASSERT_SCAN_CONST;
2662
2663     assert(PL_lex_inwhat != OP_TRANSR);
2664     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2665         /* If we are doing a trans and we know we want UTF8 set expectation */
2666         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2667         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2668     }
2669
2670
2671     while (s < send || dorange) {
2672
2673         /* get transliterations out of the way (they're most literal) */
2674         if (PL_lex_inwhat == OP_TRANS) {
2675             /* expand a range A-Z to the full set of characters.  AIE! */
2676             if (dorange) {
2677                 I32 i;                          /* current expanded character */
2678                 I32 min;                        /* first character in range */
2679                 I32 max;                        /* last character in range */
2680
2681 #ifdef EBCDIC
2682                 UV uvmax = 0;
2683 #endif
2684
2685                 if (has_utf8
2686 #ifdef EBCDIC
2687                     && !native_range
2688 #endif
2689                     ) {
2690                     char * const c = (char*)utf8_hop((U8*)d, -1);
2691                     char *e = d++;
2692                     while (e-- > c)
2693                         *(e + 1) = *e;
2694                     *c = (char)UTF_TO_NATIVE(0xff);
2695                     /* mark the range as done, and continue */
2696                     dorange = FALSE;
2697                     didrange = TRUE;
2698                     continue;
2699                 }
2700
2701                 i = d - SvPVX_const(sv);                /* remember current offset */
2702 #ifdef EBCDIC
2703                 SvGROW(sv,
2704                        SvLEN(sv) + (has_utf8 ?
2705                                     (512 - UTF_CONTINUATION_MARK +
2706                                      UNISKIP(0x100))
2707                                     : 256));
2708                 /* How many two-byte within 0..255: 128 in UTF-8,
2709                  * 96 in UTF-8-mod. */
2710 #else
2711                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2712 #endif
2713                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2714 #ifdef EBCDIC
2715                 if (has_utf8) {
2716                     int j;
2717                     for (j = 0; j <= 1; j++) {
2718                         char * const c = (char*)utf8_hop((U8*)d, -1);
2719                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2720                         if (j)
2721                             min = (U8)uv;
2722                         else if (uv < 256)
2723                             max = (U8)uv;
2724                         else {
2725                             max = (U8)0xff; /* only to \xff */
2726                             uvmax = uv; /* \x{100} to uvmax */
2727                         }
2728                         d = c; /* eat endpoint chars */
2729                      }
2730                 }
2731                else {
2732 #endif
2733                    d -= 2;              /* eat the first char and the - */
2734                    min = (U8)*d;        /* first char in range */
2735                    max = (U8)d[1];      /* last char in range  */
2736 #ifdef EBCDIC
2737                }
2738 #endif
2739
2740                 if (min > max) {
2741                     Perl_croak(aTHX_
2742                                "Invalid range \"%c-%c\" in transliteration operator",
2743                                (char)min, (char)max);
2744                 }
2745
2746 #ifdef EBCDIC
2747                 if (literal_endpoint == 2 &&
2748                     ((isLOWER(min) && isLOWER(max)) ||
2749                      (isUPPER(min) && isUPPER(max)))) {
2750                     if (isLOWER(min)) {
2751                         for (i = min; i <= max; i++)
2752                             if (isLOWER(i))
2753                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2754                     } else {
2755                         for (i = min; i <= max; i++)
2756                             if (isUPPER(i))
2757                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2758                     }
2759                 }
2760                 else
2761 #endif
2762                     for (i = min; i <= max; i++)
2763 #ifdef EBCDIC
2764                         if (has_utf8) {
2765                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2766                             if (UNI_IS_INVARIANT(ch))
2767                                 *d++ = (U8)i;
2768                             else {
2769                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2770                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2771                             }
2772                         }
2773                         else
2774 #endif
2775                             *d++ = (char)i;
2776
2777 #ifdef EBCDIC
2778                 if (uvmax) {
2779                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2780                     if (uvmax > 0x101)
2781                         *d++ = (char)UTF_TO_NATIVE(0xff);
2782                     if (uvmax > 0x100)
2783                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2784                 }
2785 #endif
2786
2787                 /* mark the range as done, and continue */
2788                 dorange = FALSE;
2789                 didrange = TRUE;
2790 #ifdef EBCDIC
2791                 literal_endpoint = 0;
2792 #endif
2793                 continue;
2794             }
2795
2796             /* range begins (ignore - as first or last char) */
2797             else if (*s == '-' && s+1 < send  && s != start) {
2798                 if (didrange) {
2799                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2800                 }
2801                 if (has_utf8
2802 #ifdef EBCDIC
2803                     && !native_range
2804 #endif
2805                     ) {
2806                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2807                     s++;
2808                     continue;
2809                 }
2810                 dorange = TRUE;
2811                 s++;
2812             }
2813             else {
2814                 didrange = FALSE;
2815 #ifdef EBCDIC
2816                 literal_endpoint = 0;
2817                 native_range = TRUE;
2818 #endif
2819             }
2820         }
2821
2822         /* if we get here, we're not doing a transliteration */
2823
2824         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2825            except for the last char, which will be done separately. */
2826         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2827             if (s[2] == '#') {
2828                 while (s+1 < send && *s != ')')
2829                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2830             }
2831             else if (s[2] == '{' /* This should match regcomp.c */
2832                     || (s[2] == '?' && s[3] == '{'))
2833             {
2834                 I32 count = 1;
2835                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2836                 char c;
2837
2838                 while (count && (c = *regparse)) {
2839                     if (c == '\\' && regparse[1])
2840                         regparse++;
2841                     else if (c == '{')
2842                         count++;
2843                     else if (c == '}')
2844                         count--;
2845                     regparse++;
2846                 }
2847                 if (*regparse != ')')
2848                     regparse--;         /* Leave one char for continuation. */
2849                 while (s < regparse)
2850                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2851             }
2852         }
2853
2854         /* likewise skip #-initiated comments in //x patterns */
2855         else if (*s == '#' && PL_lex_inpat &&
2856           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2857             while (s+1 < send && *s != '\n')
2858                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2859         }
2860
2861         /* check for embedded arrays
2862            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2863            */
2864         else if (*s == '@' && s[1]) {
2865             if (isALNUM_lazy_if(s+1,UTF))
2866                 break;
2867             if (strchr(":'{$", s[1]))
2868                 break;
2869             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2870                 break; /* in regexp, neither @+ nor @- are interpolated */
2871         }
2872
2873         /* check for embedded scalars.  only stop if we're sure it's a
2874            variable.
2875         */
2876         else if (*s == '$') {
2877             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2878                 break;
2879             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2880                 if (s[1] == '\\') {
2881                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2882                                    "Possible unintended interpolation of $\\ in regex");
2883                 }
2884                 break;          /* in regexp, $ might be tail anchor */
2885             }
2886         }
2887
2888         /* End of else if chain - OP_TRANS rejoin rest */
2889
2890         /* backslashes */
2891         if (*s == '\\' && s+1 < send) {
2892             char* e;    /* Can be used for ending '}', etc. */
2893
2894             s++;
2895
2896             /* warn on \1 - \9 in substitution replacements, but note that \11
2897              * is an octal; and \19 is \1 followed by '9' */
2898             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2899                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2900             {
2901                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2902                 *--s = '$';
2903                 break;
2904             }
2905
2906             /* string-change backslash escapes */
2907             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2908                 --s;
2909                 break;
2910             }
2911             /* In a pattern, process \N, but skip any other backslash escapes.
2912              * This is because we don't want to translate an escape sequence
2913              * into a meta symbol and have the regex compiler use the meta
2914              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2915              * in spite of this, we do have to process \N here while the proper
2916              * charnames handler is in scope.  See bugs #56444 and #62056.
2917              * There is a complication because \N in a pattern may also stand
2918              * for 'match a non-nl', and not mean a charname, in which case its
2919              * processing should be deferred to the regex compiler.  To be a
2920              * charname it must be followed immediately by a '{', and not look
2921              * like \N followed by a curly quantifier, i.e., not something like
2922              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2923              * quantifier */
2924             else if (PL_lex_inpat
2925                     && (*s != 'N'
2926                         || s[1] != '{'
2927                         || regcurly(s + 1)))
2928             {
2929                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2930                 goto default_action;
2931             }
2932
2933             switch (*s) {
2934
2935             /* quoted - in transliterations */
2936             case '-':
2937                 if (PL_lex_inwhat == OP_TRANS) {
2938                     *d++ = *s++;
2939                     continue;
2940                 }
2941                 /* FALL THROUGH */
2942             default:
2943                 {
2944                     if ((isALPHA(*s) || isDIGIT(*s)))
2945                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2946                                        "Unrecognized escape \\%c passed through",
2947                                        *s);
2948                     /* default action is to copy the quoted character */
2949                     goto default_action;
2950                 }
2951
2952             /* eg. \132 indicates the octal constant 0132 */
2953             case '0': case '1': case '2': case '3':
2954             case '4': case '5': case '6': case '7':
2955                 {
2956                     I32 flags = 0;
2957                     STRLEN len = 3;
2958                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2959                     s += len;
2960                 }
2961                 goto NUM_ESCAPE_INSERT;
2962
2963             /* eg. \o{24} indicates the octal constant \024 */
2964             case 'o':
2965                 {
2966                     STRLEN len;
2967                     const char* error;
2968
2969                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2970                     s += len;
2971                     if (! valid) {
2972                         yyerror(error);
2973                         continue;
2974                     }
2975                     goto NUM_ESCAPE_INSERT;
2976                 }
2977
2978             /* eg. \x24 indicates the hex constant 0x24 */
2979             case 'x':
2980                 ++s;
2981                 if (*s == '{') {
2982                     char* const e = strchr(s, '}');
2983                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2984                       PERL_SCAN_DISALLOW_PREFIX;
2985                     STRLEN len;
2986
2987                     ++s;
2988                     if (!e) {
2989                         yyerror("Missing right brace on \\x{}");
2990                         continue;
2991                     }
2992                     len = e - s;
2993                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2994                     s = e + 1;
2995                 }
2996                 else {
2997                     {
2998                         STRLEN len = 2;
2999                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3000                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3001                         s += len;
3002                     }
3003                 }
3004
3005               NUM_ESCAPE_INSERT:
3006                 /* Insert oct or hex escaped character.  There will always be
3007                  * enough room in sv since such escapes will be longer than any
3008                  * UTF-8 sequence they can end up as, except if they force us
3009                  * to recode the rest of the string into utf8 */
3010
3011                 /* Here uv is the ordinal of the next character being added in
3012                  * unicode (converted from native). */
3013                 if (!UNI_IS_INVARIANT(uv)) {
3014                     if (!has_utf8 && uv > 255) {
3015                         /* Might need to recode whatever we have accumulated so
3016                          * far if it contains any chars variant in utf8 or
3017                          * utf-ebcdic. */
3018
3019                         SvCUR_set(sv, d - SvPVX_const(sv));
3020                         SvPOK_on(sv);
3021                         *d = '\0';
3022                         /* See Note on sizing above.  */
3023                         sv_utf8_upgrade_flags_grow(sv,
3024                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3025                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3026                         d = SvPVX(sv) + SvCUR(sv);
3027                         has_utf8 = TRUE;
3028                     }
3029
3030                     if (has_utf8) {
3031                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3032                         if (PL_lex_inwhat == OP_TRANS &&
3033                             PL_sublex_info.sub_op) {
3034                             PL_sublex_info.sub_op->op_private |=
3035                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3036                                              : OPpTRANS_TO_UTF);
3037                         }
3038 #ifdef EBCDIC
3039                         if (uv > 255 && !dorange)
3040                             native_range = FALSE;
3041 #endif
3042                     }
3043                     else {
3044                         *d++ = (char)uv;
3045                     }
3046                 }
3047                 else {
3048                     *d++ = (char) uv;
3049                 }
3050                 continue;
3051
3052             case 'N':
3053                 /* In a non-pattern \N must be a named character, like \N{LATIN
3054                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3055                  * mean to match a non-newline.  For non-patterns, named
3056                  * characters are converted to their string equivalents. In
3057                  * patterns, named characters are not converted to their
3058                  * ultimate forms for the same reasons that other escapes
3059                  * aren't.  Instead, they are converted to the \N{U+...} form
3060                  * to get the value from the charnames that is in effect right
3061                  * now, while preserving the fact that it was a named character
3062                  * so that the regex compiler knows this */
3063
3064                 /* This section of code doesn't generally use the
3065                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3066                  * a close examination of this macro and determined it is a
3067                  * no-op except on utfebcdic variant characters.  Every
3068                  * character generated by this that would normally need to be
3069                  * enclosed by this macro is invariant, so the macro is not
3070                  * needed, and would complicate use of copy().  XXX There are
3071                  * other parts of this file where the macro is used
3072                  * inconsistently, but are saved by it being a no-op */
3073
3074                 /* The structure of this section of code (besides checking for
3075                  * errors and upgrading to utf8) is:
3076                  *  Further disambiguate between the two meanings of \N, and if
3077                  *      not a charname, go process it elsewhere
3078                  *  If of form \N{U+...}, pass it through if a pattern;
3079                  *      otherwise convert to utf8
3080                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3081                  *  pattern; otherwise convert to utf8 */
3082
3083                 /* Here, s points to the 'N'; the test below is guaranteed to
3084                  * succeed if we are being called on a pattern as we already
3085                  * know from a test above that the next character is a '{'.
3086                  * On a non-pattern \N must mean 'named sequence, which
3087                  * requires braces */
3088                 s++;
3089                 if (*s != '{') {
3090                     yyerror("Missing braces on \\N{}");
3091                     continue;
3092                 }
3093                 s++;
3094
3095                 /* If there is no matching '}', it is an error. */
3096                 if (! (e = strchr(s, '}'))) {
3097                     if (! PL_lex_inpat) {
3098                         yyerror("Missing right brace on \\N{}");
3099                     } else {
3100                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3101                     }
3102                     continue;
3103                 }
3104
3105                 /* Here it looks like a named character */
3106
3107                 if (PL_lex_inpat) {
3108
3109                     /* XXX This block is temporary code.  \N{} implies that the
3110                      * pattern is to have Unicode semantics, and therefore
3111                      * currently has to be encoded in utf8.  By putting it in
3112                      * utf8 now, we save a whole pass in the regular expression
3113                      * compiler.  Once that code is changed so Unicode
3114                      * semantics doesn't necessarily have to be in utf8, this
3115                      * block should be removed.  However, the code that parses
3116                      * the output of this would have to be changed to not
3117                      * necessarily expect utf8 */
3118                     if (!has_utf8) {
3119                         SvCUR_set(sv, d - SvPVX_const(sv));
3120                         SvPOK_on(sv);
3121                         *d = '\0';
3122                         /* See Note on sizing above.  */
3123                         sv_utf8_upgrade_flags_grow(sv,
3124                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3125                                         /* 5 = '\N{' + cur char + NUL */
3126                                         (STRLEN)(send - s) + 5);
3127                         d = SvPVX(sv) + SvCUR(sv);
3128                         has_utf8 = TRUE;
3129                     }
3130                 }
3131
3132                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3133                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3134                                 | PERL_SCAN_DISALLOW_PREFIX;
3135                     STRLEN len;
3136
3137                     /* For \N{U+...}, the '...' is a unicode value even on
3138                      * EBCDIC machines */
3139                     s += 2;         /* Skip to next char after the 'U+' */
3140                     len = e - s;
3141                     uv = grok_hex(s, &len, &flags, NULL);
3142                     if (len == 0 || len != (STRLEN)(e - s)) {
3143                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3144                         s = e + 1;
3145                         continue;
3146                     }
3147
3148                     if (PL_lex_inpat) {
3149
3150                         /* On non-EBCDIC platforms, pass through to the regex
3151                          * compiler unchanged.  The reason we evaluated the
3152                          * number above is to make sure there wasn't a syntax
3153                          * error.  But on EBCDIC we convert to native so
3154                          * downstream code can continue to assume it's native
3155                          */
3156                         s -= 5;     /* Include the '\N{U+' */
3157 #ifdef EBCDIC
3158                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3159                                                                and the \0 */
3160                                     "\\N{U+%X}",
3161                                     (unsigned int) UNI_TO_NATIVE(uv));
3162 #else
3163                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3164                         d += e - s + 1;
3165 #endif
3166                     }
3167                     else {  /* Not a pattern: convert the hex to string */
3168
3169                          /* If destination is not in utf8, unconditionally
3170                           * recode it to be so.  This is because \N{} implies
3171                           * Unicode semantics, and scalars have to be in utf8
3172                           * to guarantee those semantics */
3173                         if (! has_utf8) {
3174                             SvCUR_set(sv, d - SvPVX_const(sv));
3175                             SvPOK_on(sv);
3176                             *d = '\0';
3177                             /* See Note on sizing above.  */
3178                             sv_utf8_upgrade_flags_grow(
3179                                         sv,
3180                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3181                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3182                             d = SvPVX(sv) + SvCUR(sv);
3183                             has_utf8 = TRUE;
3184                         }
3185
3186                         /* Add the string to the output */
3187                         if (UNI_IS_INVARIANT(uv)) {
3188                             *d++ = (char) uv;
3189                         }
3190                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3191                     }
3192                 }
3193                 else { /* Here is \N{NAME} but not \N{U+...}. */
3194
3195                     SV *res;            /* result from charnames */
3196                     const char *str;    /* the string in 'res' */
3197                     STRLEN len;         /* its length */
3198
3199                     /* Get the value for NAME */
3200                     res = newSVpvn(s, e - s);
3201                     res = new_constant( NULL, 0, "charnames",
3202                                         /* includes all of: \N{...} */
3203                                         res, NULL, s - 3, e - s + 4 );
3204
3205                     /* Most likely res will be in utf8 already since the
3206                      * standard charnames uses pack U, but a custom translator
3207                      * can leave it otherwise, so make sure.  XXX This can be
3208                      * revisited to not have charnames use utf8 for characters
3209                      * that don't need it when regexes don't have to be in utf8
3210                      * for Unicode semantics.  If doing so, remember EBCDIC */
3211                     sv_utf8_upgrade(res);
3212                     str = SvPV_const(res, len);
3213
3214                     /* Don't accept malformed input */
3215                     if (! is_utf8_string((U8 *) str, len)) {
3216                         yyerror("Malformed UTF-8 returned by \\N");
3217                     }
3218                     else if (PL_lex_inpat) {
3219
3220                         if (! len) { /* The name resolved to an empty string */
3221                             Copy("\\N{}", d, 4, char);
3222                             d += 4;
3223                         }
3224                         else {
3225                             /* In order to not lose information for the regex
3226                             * compiler, pass the result in the specially made
3227                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3228                             * the code points in hex of each character
3229                             * returned by charnames */
3230
3231                             const char *str_end = str + len;
3232                             STRLEN char_length;     /* cur char's byte length */
3233                             STRLEN output_length;   /* and the number of bytes
3234                                                        after this is translated
3235                                                        into hex digits */
3236                             const STRLEN off = d - SvPVX_const(sv);
3237
3238                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3239                              * max('U+', '.'); and 1 for NUL */
3240                             char hex_string[2 * UTF8_MAXBYTES + 5];
3241
3242                             /* Get the first character of the result. */
3243                             U32 uv = utf8n_to_uvuni((U8 *) str,
3244                                                     len,
3245                                                     &char_length,
3246                                                     UTF8_ALLOW_ANYUV);
3247
3248                             /* The call to is_utf8_string() above hopefully
3249                              * guarantees that there won't be an error.  But
3250                              * it's easy here to make sure.  The function just
3251                              * above warns and returns 0 if invalid utf8, but
3252                              * it can also return 0 if the input is validly a
3253                              * NUL. Disambiguate */
3254                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3255                                 uv = UNICODE_REPLACEMENT;
3256                             }
3257
3258                             /* Convert first code point to hex, including the
3259                              * boiler plate before it.  For all these, we
3260                              * convert to native format so that downstream code
3261                              * can continue to assume the input is native */
3262                             output_length =
3263                                 my_snprintf(hex_string, sizeof(hex_string),
3264                                             "\\N{U+%X",
3265                                             (unsigned int) UNI_TO_NATIVE(uv));
3266
3267                             /* Make sure there is enough space to hold it */
3268                             d = off + SvGROW(sv, off
3269                                                  + output_length
3270                                                  + (STRLEN)(send - e)
3271                                                  + 2);  /* '}' + NUL */
3272                             /* And output it */
3273                             Copy(hex_string, d, output_length, char);
3274                             d += output_length;
3275
3276                             /* For each subsequent character, append dot and
3277                              * its ordinal in hex */
3278                             while ((str += char_length) < str_end) {
3279                                 const STRLEN off = d - SvPVX_const(sv);
3280                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3281                                                         str_end - str,
3282                                                         &char_length,
3283                                                         UTF8_ALLOW_ANYUV);
3284                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3285                                     uv = UNICODE_REPLACEMENT;
3286                                 }
3287
3288                                 output_length =
3289                                     my_snprintf(hex_string, sizeof(hex_string),
3290                                             ".%X",
3291                                             (unsigned int) UNI_TO_NATIVE(uv));
3292
3293                                 d = off + SvGROW(sv, off
3294                                                      + output_length
3295                                                      + (STRLEN)(send - e)
3296                                                      + 2);      /* '}' +  NUL */
3297                                 Copy(hex_string, d, output_length, char);
3298                                 d += output_length;
3299                             }
3300
3301                             *d++ = '}'; /* Done.  Add the trailing brace */
3302                         }
3303                     }
3304                     else { /* Here, not in a pattern.  Convert the name to a
3305                             * string. */
3306
3307                          /* If destination is not in utf8, unconditionally
3308                           * recode it to be so.  This is because \N{} implies
3309                           * Unicode semantics, and scalars have to be in utf8
3310                           * to guarantee those semantics */
3311                         if (! has_utf8) {
3312                             SvCUR_set(sv, d - SvPVX_const(sv));
3313                             SvPOK_on(sv);
3314                             *d = '\0';
3315                             /* See Note on sizing above.  */
3316                             sv_utf8_upgrade_flags_grow(sv,
3317                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3318                                                 len + (STRLEN)(send - s) + 1);
3319                             d = SvPVX(sv) + SvCUR(sv);
3320                             has_utf8 = TRUE;
3321                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3322
3323                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3324                              * set correctly here). */
3325                             const STRLEN off = d - SvPVX_const(sv);
3326                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3327                         }
3328                         Copy(str, d, len, char);
3329                         d += len;
3330                     }
3331                     SvREFCNT_dec(res);
3332
3333                     /* Deprecate non-approved name syntax */
3334                     if (ckWARN_d(WARN_DEPRECATED)) {
3335                         bool problematic = FALSE;
3336                         char* i = s;
3337
3338                         /* For non-ut8 input, look to see that the first
3339                          * character is an alpha, then loop through the rest
3340                          * checking that each is a continuation */
3341                         if (! this_utf8) {
3342                             if (! isALPHAU(*i)) problematic = TRUE;
3343                             else for (i = s + 1; i < e; i++) {
3344                                 if (isCHARNAME_CONT(*i)) continue;
3345                                 problematic = TRUE;
3346                                 break;
3347                             }
3348                         }
3349                         else {
3350                             /* Similarly for utf8.  For invariants can check
3351                              * directly.  We accept anything above the latin1
3352                              * range because it is immaterial to Perl if it is
3353                              * correct or not, and is expensive to check.  But
3354                              * it is fairly easy in the latin1 range to convert
3355                              * the variants into a single character and check
3356                              * those */
3357                             if (UTF8_IS_INVARIANT(*i)) {
3358                                 if (! isALPHAU(*i)) problematic = TRUE;
3359                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3360                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3361                                                                             *(i+1)))))
3362                                 {
3363                                     problematic = TRUE;
3364                                 }
3365                             }
3366                             if (! problematic) for (i = s + UTF8SKIP(s);
3367                                                     i < e;
3368                                                     i+= UTF8SKIP(i))
3369                             {
3370                                 if (UTF8_IS_INVARIANT(*i)) {
3371                                     if (isCHARNAME_CONT(*i)) continue;
3372                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3373                                     continue;
3374                                 } else if (isCHARNAME_CONT(
3375                                             UNI_TO_NATIVE(
3376                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3377                                 {
3378                                     continue;
3379                                 }
3380                                 problematic = TRUE;
3381                                 break;
3382                             }
3383                         }
3384                         if (problematic) {
3385                             /* The e-i passed to the final %.*s makes sure that
3386                              * should the trailing NUL be missing that this
3387                              * print won't run off the end of the string */
3388                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3389                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3390                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3391                         }
3392                     }
3393                 } /* End \N{NAME} */
3394 #ifdef EBCDIC
3395                 if (!dorange)
3396                     native_range = FALSE; /* \N{} is defined to be Unicode */
3397 #endif
3398                 s = e + 1;  /* Point to just after the '}' */
3399                 continue;
3400
3401             /* \c is a control character */
3402             case 'c':
3403                 s++;
3404                 if (s < send) {
3405                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3406                 }
3407                 else {
3408                     yyerror("Missing control char name in \\c");
3409                 }
3410                 continue;
3411
3412             /* printf-style backslashes, formfeeds, newlines, etc */
3413             case 'b':
3414                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3415                 break;
3416             case 'n':
3417                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3418                 break;
3419             case 'r':
3420                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3421                 break;
3422             case 'f':
3423                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3424                 break;
3425             case 't':
3426                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3427                 break;
3428             case 'e':
3429                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3430                 break;
3431             case 'a':
3432                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3433                 break;
3434             } /* end switch */
3435
3436             s++;
3437             continue;
3438         } /* end if (backslash) */
3439 #ifdef EBCDIC
3440         else
3441             literal_endpoint++;
3442 #endif
3443
3444     default_action:
3445         /* If we started with encoded form, or already know we want it,
3446            then encode the next character */
3447         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3448             STRLEN len  = 1;
3449
3450
3451             /* One might think that it is wasted effort in the case of the
3452              * source being utf8 (this_utf8 == TRUE) to take the next character
3453              * in the source, convert it to an unsigned value, and then convert
3454              * it back again.  But the source has not been validated here.  The
3455              * routine that does the conversion checks for errors like
3456              * malformed utf8 */
3457
3458             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3459             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3460             if (!has_utf8) {
3461                 SvCUR_set(sv, d - SvPVX_const(sv));
3462                 SvPOK_on(sv);
3463                 *d = '\0';
3464                 /* See Note on sizing above.  */
3465                 sv_utf8_upgrade_flags_grow(sv,
3466                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3467                                         need + (STRLEN)(send - s) + 1);
3468                 d = SvPVX(sv) + SvCUR(sv);
3469                 has_utf8 = TRUE;
3470             } else if (need > len) {
3471                 /* encoded value larger than old, may need extra space (NOTE:
3472                  * SvCUR() is not set correctly here).   See Note on sizing
3473                  * above.  */
3474                 const STRLEN off = d - SvPVX_const(sv);
3475                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3476             }
3477             s += len;
3478
3479             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3480 #ifdef EBCDIC
3481             if (uv > 255 && !dorange)
3482                 native_range = FALSE;
3483 #endif
3484         }
3485         else {
3486             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3487         }
3488     } /* while loop to process each character */
3489
3490     /* terminate the string and set up the sv */
3491     *d = '\0';
3492     SvCUR_set(sv, d - SvPVX_const(sv));
3493     if (SvCUR(sv) >= SvLEN(sv))
3494         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3495
3496     SvPOK_on(sv);
3497     if (PL_encoding && !has_utf8) {
3498         sv_recode_to_utf8(sv, PL_encoding);
3499         if (SvUTF8(sv))
3500             has_utf8 = TRUE;
3501     }
3502     if (has_utf8) {
3503         SvUTF8_on(sv);
3504         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3505             PL_sublex_info.sub_op->op_private |=
3506                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3507         }
3508     }
3509
3510     /* shrink the sv if we allocated more than we used */
3511     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3512         SvPV_shrink_to_cur(sv);
3513     }
3514
3515     /* return the substring (via pl_yylval) only if we parsed anything */
3516     if (s > PL_bufptr) {
3517         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3518             const char *const key = PL_lex_inpat ? "qr" : "q";
3519             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3520             const char *type;
3521             STRLEN typelen;
3522
3523             if (PL_lex_inwhat == OP_TRANS) {
3524                 type = "tr";
3525                 typelen = 2;
3526             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3527                 type = "s";
3528                 typelen = 1;
3529             } else  {
3530                 type = "qq";
3531                 typelen = 2;
3532             }
3533
3534             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3535                                 type, typelen);
3536         }
3537         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3538     } else
3539         SvREFCNT_dec(sv);
3540     return s;
3541 }
3542
3543 /* S_intuit_more
3544  * Returns TRUE if there's more to the expression (e.g., a subscript),
3545  * FALSE otherwise.
3546  *
3547  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3548  *
3549  * ->[ and ->{ return TRUE
3550  * { and [ outside a pattern are always subscripts, so return TRUE
3551  * if we're outside a pattern and it's not { or [, then return FALSE
3552  * if we're in a pattern and the first char is a {
3553  *   {4,5} (any digits around the comma) returns FALSE
3554  * if we're in a pattern and the first char is a [
3555  *   [] returns FALSE
3556  *   [SOMETHING] has a funky algorithm to decide whether it's a
3557  *      character class or not.  It has to deal with things like
3558  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3559  * anything else returns TRUE
3560  */
3561
3562 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3563
3564 STATIC int
3565 S_intuit_more(pTHX_ register char *s)
3566 {
3567     dVAR;
3568
3569     PERL_ARGS_ASSERT_INTUIT_MORE;
3570
3571     if (PL_lex_brackets)
3572         return TRUE;
3573     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3574         return TRUE;
3575     if (*s != '{' && *s != '[')
3576         return FALSE;
3577     if (!PL_lex_inpat)
3578         return TRUE;
3579
3580     /* In a pattern, so maybe we have {n,m}. */
3581     if (*s == '{') {
3582         if (regcurly(s)) {
3583             return FALSE;
3584         }
3585         return TRUE;
3586     }
3587
3588     /* On the other hand, maybe we have a character class */
3589
3590     s++;
3591     if (*s == ']' || *s == '^')
3592         return FALSE;
3593     else {
3594         /* this is terrifying, and it works */
3595         int weight = 2;         /* let's weigh the evidence */
3596         char seen[256];
3597         unsigned char un_char = 255, last_un_char;
3598         const char * const send = strchr(s,']');
3599         char tmpbuf[sizeof PL_tokenbuf * 4];
3600
3601         if (!send)              /* has to be an expression */
3602             return TRUE;
3603
3604         Zero(seen,256,char);
3605         if (*s == '$')
3606             weight -= 3;
3607         else if (isDIGIT(*s)) {
3608             if (s[1] != ']') {
3609                 if (isDIGIT(s[1]) && s[2] == ']')
3610                     weight -= 10;
3611             }
3612             else
3613                 weight -= 100;
3614         }
3615         for (; s < send; s++) {
3616             last_un_char = un_char;
3617             un_char = (unsigned char)*s;
3618             switch (*s) {
3619             case '@':
3620             case '&':
3621             case '$':
3622                 weight -= seen[un_char] * 10;
3623                 if (isALNUM_lazy_if(s+1,UTF)) {
3624                     int len;
3625                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3626                     len = (int)strlen(tmpbuf);
3627                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3628                         weight -= 100;
3629                     else
3630                         weight -= 10;
3631                 }
3632                 else if (*s == '$' && s[1] &&
3633                   strchr("[#!%*<>()-=",s[1])) {
3634                     if (/*{*/ strchr("])} =",s[2]))
3635                         weight -= 10;
3636                     else
3637                         weight -= 1;
3638                 }
3639                 break;
3640             case '\\':
3641                 un_char = 254;
3642                 if (s[1]) {
3643                     if (strchr("wds]",s[1]))
3644                         weight += 100;
3645                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3646                         weight += 1;
3647                     else if (strchr("rnftbxcav",s[1]))
3648                         weight += 40;
3649                     else if (isDIGIT(s[1])) {
3650                         weight += 40;
3651                         while (s[1] && isDIGIT(s[1]))
3652                             s++;
3653                     }
3654                 }
3655                 else
3656                     weight += 100;
3657                 break;
3658             case '-':
3659                 if (s[1] == '\\')
3660                     weight += 50;
3661                 if (strchr("aA01! ",last_un_char))
3662                     weight += 30;
3663                 if (strchr("zZ79~",s[1]))
3664                     weight += 30;
3665                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3666                     weight -= 5;        /* cope with negative subscript */
3667                 break;
3668             default:
3669                 if (!isALNUM(last_un_char)
3670                     && !(last_un_char == '$' || last_un_char == '@'
3671                          || last_un_char == '&')
3672                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3673                     char *d = tmpbuf;
3674                     while (isALPHA(*s))
3675                         *d++ = *s++;
3676                     *d = '\0';
3677                     if (keyword(tmpbuf, d - tmpbuf, 0))
3678                         weight -= 150;
3679                 }
3680                 if (un_char == last_un_char + 1)
3681                     weight += 5;
3682                 weight -= seen[un_char];
3683                 break;
3684             }
3685             seen[un_char]++;
3686         }
3687         if (weight >= 0)        /* probably a character class */
3688             return FALSE;
3689     }
3690
3691     return TRUE;
3692 }
3693
3694 /*
3695  * S_intuit_method
3696  *
3697  * Does all the checking to disambiguate
3698  *   foo bar
3699  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3700  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3701  *
3702  * First argument is the stuff after the first token, e.g. "bar".
3703  *
3704  * Not a method if bar is a filehandle.
3705  * Not a method if foo is a subroutine prototyped to take a filehandle.
3706  * Not a method if it's really "Foo $bar"
3707  * Method if it's "foo $bar"
3708  * Not a method if it's really "print foo $bar"
3709  * Method if it's really "foo package::" (interpreted as package->foo)
3710  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3711  * Not a method if bar is a filehandle or package, but is quoted with
3712  *   =>
3713  */
3714
3715 STATIC int
3716 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3717 {
3718     dVAR;
3719     char *s = start + (*start == '$');
3720     char tmpbuf[sizeof PL_tokenbuf];
3721     STRLEN len;
3722     GV* indirgv;
3723 #ifdef PERL_MAD
3724     int soff;
3725 #endif
3726
3727     PERL_ARGS_ASSERT_INTUIT_METHOD;
3728
3729     if (gv) {
3730         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3731             return 0;
3732         if (cv) {
3733             if (SvPOK(cv)) {
3734                 const char *proto = SvPVX_const(cv);
3735                 if (proto) {
3736                     if (*proto == ';')
3737                         proto++;
3738                     if (*proto == '*')
3739                         return 0;
3740                 }
3741             }
3742         } else
3743             gv = NULL;
3744     }
3745     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3746     /* start is the beginning of the possible filehandle/object,
3747      * and s is the end of it
3748      * tmpbuf is a copy of it
3749      */
3750
3751     if (*start == '$') {
3752         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3753                 isUPPER(*PL_tokenbuf))
3754             return 0;
3755 #ifdef PERL_MAD
3756         len = start - SvPVX(PL_linestr);
3757 #endif
3758         s = PEEKSPACE(s);
3759 #ifdef PERL_MAD
3760         start = SvPVX(PL_linestr) + len;
3761 #endif
3762         PL_bufptr = start;
3763         PL_expect = XREF;
3764         return *s == '(' ? FUNCMETH : METHOD;
3765     }
3766     if (!keyword(tmpbuf, len, 0)) {
3767         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3768             len -= 2;
3769             tmpbuf[len] = '\0';
3770 #ifdef PERL_MAD
3771             soff = s - SvPVX(PL_linestr);
3772 #endif
3773             goto bare_package;
3774         }
3775         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3776         if (indirgv && GvCVu(indirgv))
3777             return 0;
3778         /* filehandle or package name makes it a method */
3779         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3780 #ifdef PERL_MAD
3781             soff = s - SvPVX(PL_linestr);
3782 #endif
3783             s = PEEKSPACE(s);
3784             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3785                 return 0;       /* no assumptions -- "=>" quotes bareword */
3786       bare_package:
3787             start_force(PL_curforce);
3788             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3789                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3790             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3791             if (PL_madskills)
3792                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3793             PL_expect = XTERM;
3794             force_next(WORD);
3795             PL_bufptr = s;
3796 #ifdef PERL_MAD
3797             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3798 #endif
3799             return *s == '(' ? FUNCMETH : METHOD;
3800         }
3801     }
3802     return 0;
3803 }
3804
3805 /* Encoded script support. filter_add() effectively inserts a
3806  * 'pre-processing' function into the current source input stream.
3807  * Note that the filter function only applies to the current source file
3808  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3809  *
3810  * The datasv parameter (which may be NULL) can be used to pass
3811  * private data to this instance of the filter. The filter function
3812  * can recover the SV using the FILTER_DATA macro and use it to
3813  * store private buffers and state information.
3814  *
3815  * The supplied datasv parameter is upgraded to a PVIO type
3816  * and the IoDIRP/IoANY field is used to store the function pointer,
3817  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3818  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3819  * private use must be set using malloc'd pointers.
3820  */
3821
3822 SV *
3823 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3824 {
3825     dVAR;
3826     if (!funcp)
3827         return NULL;
3828
3829     if (!PL_parser)
3830         return NULL;
3831
3832     if (!PL_rsfp_filters)
3833         PL_rsfp_filters = newAV();
3834     if (!datasv)
3835         datasv = newSV(0);
3836     SvUPGRADE(datasv, SVt_PVIO);
3837     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3838     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3839     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3840                           FPTR2DPTR(void *, IoANY(datasv)),
3841                           SvPV_nolen(datasv)));
3842     av_unshift(PL_rsfp_filters, 1);
3843     av_store(PL_rsfp_filters, 0, datasv) ;
3844     return(datasv);
3845 }
3846
3847
3848 /* Delete most recently added instance of this filter function. */
3849 void
3850 Perl_filter_del(pTHX_ filter_t funcp)
3851 {
3852     dVAR;
3853     SV *datasv;
3854
3855     PERL_ARGS_ASSERT_FILTER_DEL;
3856
3857 #ifdef DEBUGGING
3858     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3859                           FPTR2DPTR(void*, funcp)));
3860 #endif
3861     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3862         return;
3863     /* if filter is on top of stack (usual case) just pop it off */
3864     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3865     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3866         sv_free(av_pop(PL_rsfp_filters));
3867
3868         return;
3869     }
3870     /* we need to search for the correct entry and clear it     */
3871     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3872 }
3873
3874
3875 /* Invoke the idxth filter function for the current rsfp.        */
3876 /* maxlen 0 = read one text line */
3877 I32
3878 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3879 {
3880     dVAR;
3881     filter_t funcp;
3882     SV *datasv = NULL;
3883     /* This API is bad. It should have been using unsigned int for maxlen.
3884        Not sure if we want to change the API, but if not we should sanity
3885        check the value here.  */
3886     const unsigned int correct_length
3887         = maxlen < 0 ?
3888 #ifdef PERL_MICRO
3889         0x7FFFFFFF
3890 #else
3891         INT_MAX
3892 #endif
3893         : maxlen;
3894
3895     PERL_ARGS_ASSERT_FILTER_READ;
3896
3897     if (!PL_parser || !PL_rsfp_filters)
3898         return -1;
3899     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3900         /* Provide a default input filter to make life easy.    */
3901         /* Note that we append to the line. This is handy.      */
3902         DEBUG_P(PerlIO_printf(Perl_debug_log,
3903                               "filter_read %d: from rsfp\n", idx));
3904         if (correct_length) {
3905             /* Want a block */
3906             int len ;
3907             const int old_len = SvCUR(buf_sv);
3908
3909             /* ensure buf_sv is large enough */
3910             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3911             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3912                                    correct_length)) <= 0) {
3913                 if (PerlIO_error(PL_rsfp))
3914                     return -1;          /* error */
3915                 else
3916                     return 0 ;          /* end of file */
3917             }
3918             SvCUR_set(buf_sv, old_len + len) ;
3919             SvPVX(buf_sv)[old_len + len] = '\0';
3920         } else {
3921             /* Want a line */
3922             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3923                 if (PerlIO_error(PL_rsfp))
3924                     return -1;          /* error */
3925                 else
3926                     return 0 ;          /* end of file */
3927             }
3928         }
3929         return SvCUR(buf_sv);
3930     }
3931     /* Skip this filter slot if filter has been deleted */
3932     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3933         DEBUG_P(PerlIO_printf(Perl_debug_log,
3934                               "filter_read %d: skipped (filter deleted)\n",
3935                               idx));
3936         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3937     }
3938     /* Get function pointer hidden within datasv        */
3939     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3940     DEBUG_P(PerlIO_printf(Perl_debug_log,
3941                           "filter_read %d: via function %p (%s)\n",
3942                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3943     /* Call function. The function is expected to       */
3944     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3945     /* Return: <0:error, =0:eof, >0:not eof             */
3946     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3947 }
3948
3949 STATIC char *
3950 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3951 {
3952     dVAR;
3953
3954     PERL_ARGS_ASSERT_FILTER_GETS;
3955
3956 #ifdef PERL_CR_FILTER
3957     if (!PL_rsfp_filters) {
3958         filter_add(S_cr_textfilter,NULL);
3959     }
3960 #endif
3961     if (PL_rsfp_filters) {
3962         if (!append)
3963             SvCUR_set(sv, 0);   /* start with empty line        */
3964         if (FILTER_READ(0, sv, 0) > 0)
3965             return ( SvPVX(sv) ) ;
3966         else
3967             return NULL ;
3968     }
3969     else
3970         return (sv_gets(sv, PL_rsfp, append));
3971 }
3972
3973 STATIC HV *
3974 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3975 {
3976     dVAR;
3977     GV *gv;
3978
3979     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3980
3981     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3982         return PL_curstash;
3983
3984     if (len > 2 &&
3985         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3986         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3987     {
3988         return GvHV(gv);                        /* Foo:: */
3989     }
3990
3991     /* use constant CLASS => 'MyClass' */
3992     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3993     if (gv && GvCV(gv)) {
3994         SV * const sv = cv_const_sv(GvCV(gv));
3995         if (sv)
3996             pkgname = SvPV_const(sv, len);
3997     }
3998
3999     return gv_stashpvn(pkgname, len, 0);
4000 }
4001
4002 /*
4003  * S_readpipe_override
4004  * Check whether readpipe() is overridden, and generates the appropriate
4005  * optree, provided sublex_start() is called afterwards.
4006  */
4007 STATIC void
4008 S_readpipe_override(pTHX)
4009 {
4010     GV **gvp;
4011     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4012     pl_yylval.ival = OP_BACKTICK;
4013     if ((gv_readpipe
4014                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4015             ||
4016             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4017              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4018              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4019     {
4020         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4021             op_append_elem(OP_LIST,
4022                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4023                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4024     }
4025 }
4026
4027 #ifdef PERL_MAD
4028  /*
4029  * Perl_madlex
4030  * The intent of this yylex wrapper is to minimize the changes to the
4031  * tokener when we aren't interested in collecting madprops.  It remains
4032  * to be seen how successful this strategy will be...
4033  */
4034
4035 int
4036 Perl_madlex(pTHX)
4037 {
4038     int optype;
4039     char *s = PL_bufptr;
4040
4041     /* make sure PL_thiswhite is initialized */
4042     PL_thiswhite = 0;
4043     PL_thismad = 0;
4044
4045     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4046     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4047         return S_pending_ident(aTHX);
4048
4049     /* previous token ate up our whitespace? */
4050     if (!PL_lasttoke && PL_nextwhite) {
4051         PL_thiswhite = PL_nextwhite;
4052         PL_nextwhite = 0;
4053     }
4054
4055     /* isolate the token, and figure out where it is without whitespace */
4056     PL_realtokenstart = -1;
4057     PL_thistoken = 0;
4058     optype = yylex();
4059     s = PL_bufptr;
4060     assert(PL_curforce < 0);
4061
4062     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4063         if (!PL_thistoken) {
4064             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4065                 PL_thistoken = newSVpvs("");
4066             else {
4067                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4068                 PL_thistoken = newSVpvn(tstart, s - tstart);
4069             }
4070         }
4071         if (PL_thismad) /* install head */
4072             CURMAD('X', PL_thistoken);
4073     }
4074
4075     /* last whitespace of a sublex? */
4076     if (optype == ')' && PL_endwhite) {
4077         CURMAD('X', PL_endwhite);
4078     }
4079
4080     if (!PL_thismad) {
4081
4082         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4083         if (!PL_thiswhite && !PL_endwhite && !optype) {
4084             sv_free(PL_thistoken);
4085             PL_thistoken = 0;
4086             return 0;
4087         }
4088
4089         /* put off final whitespace till peg */
4090         if (optype == ';' && !PL_rsfp) {
4091             PL_nextwhite = PL_thiswhite;
4092             PL_thiswhite = 0;
4093         }
4094         else if (PL_thisopen) {
4095             CURMAD('q', PL_thisopen);
4096             if (PL_thistoken)
4097                 sv_free(PL_thistoken);
4098             PL_thistoken = 0;
4099         }
4100         else {
4101             /* Store actual token text as madprop X */
4102             CURMAD('X', PL_thistoken);
4103         }
4104
4105         if (PL_thiswhite) {
4106             /* add preceding whitespace as madprop _ */
4107             CURMAD('_', PL_thiswhite);
4108         }
4109
4110         if (PL_thisstuff) {
4111             /* add quoted material as madprop = */
4112             CURMAD('=', PL_thisstuff);
4113         }
4114
4115         if (PL_thisclose) {
4116             /* add terminating quote as madprop Q */
4117             CURMAD('Q', PL_thisclose);
4118         }
4119     }
4120
4121     /* special processing based on optype */
4122
4123     switch (optype) {
4124
4125     /* opval doesn't need a TOKEN since it can already store mp */
4126     case WORD:
4127     case METHOD:
4128     case FUNCMETH:
4129     case THING:
4130     case PMFUNC:
4131     case PRIVATEREF:
4132     case FUNC0SUB:
4133     case UNIOPSUB:
4134     case LSTOPSUB:
4135         if (pl_yylval.opval)
4136             append_madprops(PL_thismad, pl_yylval.opval, 0);
4137         PL_thismad = 0;
4138         return optype;
4139
4140     /* fake EOF */
4141     case 0:
4142         optype = PEG;
4143         if (PL_endwhite) {
4144             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4145             PL_endwhite = 0;
4146         }
4147         break;
4148
4149     case ']':
4150     case '}':
4151         if (PL_faketokens)
4152             break;
4153         /* remember any fake bracket that lexer is about to discard */
4154         if (PL_lex_brackets == 1 &&
4155             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4156         {
4157             s = PL_bufptr;
4158             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4159                 s++;
4160             if (*s == '}') {
4161                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4162                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4163                 PL_thiswhite = 0;
4164                 PL_bufptr = s - 1;
4165                 break;  /* don't bother looking for trailing comment */
4166             }
4167             else
4168                 s = PL_bufptr;
4169         }
4170         if (optype == ']')
4171             break;
4172         /* FALLTHROUGH */
4173
4174     /* attach a trailing comment to its statement instead of next token */
4175     case ';':
4176         if (PL_faketokens)
4177             break;
4178         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4179             s = PL_bufptr;
4180             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4181                 s++;
4182             if (*s == '\n' || *s == '#') {
4183                 while (s < PL_bufend && *s != '\n')
4184                     s++;
4185                 if (s < PL_bufend)
4186                     s++;
4187                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4188                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4189                 PL_thiswhite = 0;
4190                 PL_bufptr = s;
4191             }
4192         }
4193         break;
4194
4195     /* pval */
4196     case LABEL:
4197         break;
4198
4199     /* ival */
4200     default:
4201         break;
4202
4203     }
4204
4205     /* Create new token struct.  Note: opvals return early above. */
4206     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4207     PL_thismad = 0;
4208     return optype;
4209 }
4210 #endif
4211
4212 STATIC char *
4213 S_tokenize_use(pTHX_ int is_use, char *s) {
4214     dVAR;
4215
4216     PERL_ARGS_ASSERT_TOKENIZE_USE;
4217
4218     if (PL_expect != XSTATE)
4219         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4220                     is_use ? "use" : "no"));
4221     s = SKIPSPACE1(s);
4222     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4223         s = force_version(s, TRUE);
4224         if (*s == ';' || *s == '}'
4225                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4226             start_force(PL_curforce);
4227             NEXTVAL_NEXTTOKE.opval = NULL;
4228             force_next(WORD);
4229         }
4230         else if (*s == 'v') {
4231             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4232             s = force_version(s, FALSE);
4233         }
4234     }
4235     else {
4236         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4237         s = force_version(s, FALSE);
4238     }
4239     pl_yylval.ival = is_use;
4240     return s;
4241 }
4242 #ifdef DEBUGGING
4243     static const char* const exp_name[] =
4244         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4245           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4246         };
4247 #endif
4248
4249 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4250 STATIC bool
4251 S_word_takes_any_delimeter(char *p, STRLEN len)
4252 {
4253     return (len == 1 && strchr("msyq", p[0])) ||
4254            (len == 2 && (
4255             (p[0] == 't' && p[1] == 'r') ||
4256             (p[0] == 'q' && strchr("qwxr", p[1]))));
4257 }
4258
4259 /*
4260   yylex
4261
4262   Works out what to call the token just pulled out of the input
4263   stream.  The yacc parser takes care of taking the ops we return and
4264   stitching them into a tree.
4265
4266   Returns:
4267     PRIVATEREF
4268
4269   Structure:
4270       if read an identifier
4271           if we're in a my declaration
4272               croak if they tried to say my($foo::bar)
4273               build the ops for a my() declaration
4274           if it's an access to a my() variable
4275               are we in a sort block?
4276                   croak if my($a); $a <=> $b
4277               build ops for access to a my() variable
4278           if in a dq string, and they've said @foo and we can't find @foo
4279               croak
4280           build ops for a bareword
4281       if we already built the token before, use it.
4282 */
4283
4284
4285 #ifdef __SC__
4286 #pragma segment Perl_yylex
4287 #endif
4288 int
4289 Perl_yylex(pTHX)
4290 {
4291     dVAR;
4292     register char *s = PL_bufptr;
4293     register char *d;
4294     STRLEN len;
4295     bool bof = FALSE;
4296     U32 fake_eof = 0;
4297
4298     /* orig_keyword, gvp, and gv are initialized here because
4299      * jump to the label just_a_word_zero can bypass their
4300      * initialization later. */
4301     I32 orig_keyword = 0;
4302     GV *gv = NULL;
4303     GV **gvp = NULL;
4304
4305     DEBUG_T( {
4306         SV* tmp = newSVpvs("");
4307         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4308             (IV)CopLINE(PL_curcop),
4309             lex_state_names[PL_lex_state],
4310             exp_name[PL_expect],
4311             pv_display(tmp, s, strlen(s), 0, 60));
4312         SvREFCNT_dec(tmp);
4313     } );
4314     /* check if there's an identifier for us to look at */
4315     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4316         return REPORT(S_pending_ident(aTHX));
4317
4318     /* no identifier pending identification */
4319
4320     switch (PL_lex_state) {
4321 #ifdef COMMENTARY
4322     case LEX_NORMAL:            /* Some compilers will produce faster */
4323     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4324         break;
4325 #endif
4326
4327     /* when we've already built the next token, just pull it out of the queue */
4328     case LEX_KNOWNEXT:
4329 #ifdef PERL_MAD
4330         PL_lasttoke--;
4331         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4332         if (PL_madskills) {
4333             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4334             PL_nexttoke[PL_lasttoke].next_mad = 0;
4335             if (PL_thismad && PL_thismad->mad_key == '_') {
4336                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4337                 PL_thismad->mad_val = 0;
4338                 mad_free(PL_thismad);
4339                 PL_thismad = 0;
4340             }
4341         }
4342         if (!PL_lasttoke) {
4343             PL_lex_state = PL_lex_defer;
4344             PL_expect = PL_lex_expect;
4345             PL_lex_defer = LEX_NORMAL;
4346             if (!PL_nexttoke[PL_lasttoke].next_type)
4347                 return yylex();
4348         }
4349 #else
4350         PL_nexttoke--;
4351         pl_yylval = PL_nextval[PL_nexttoke];
4352         if (!PL_nexttoke) {
4353             PL_lex_state = PL_lex_defer;
4354             PL_expect = PL_lex_expect;
4355             PL_lex_defer = LEX_NORMAL;
4356         }
4357 #endif
4358         {
4359             I32 next_type;
4360 #ifdef PERL_MAD
4361             next_type = PL_nexttoke[PL_lasttoke].next_type;
4362 #else
4363             next_type = PL_nexttype[PL_nexttoke];
4364 #endif
4365             if (next_type & (7<<24)) {
4366                 if (next_type & (1<<24)) {
4367                     if (PL_lex_brackets > 100)
4368                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4369                     PL_lex_brackstack[PL_lex_brackets++] =
4370                         (char) ((next_type >> 16) & 0xff);
4371                 }
4372                 if (next_type & (2<<24))
4373                     PL_lex_allbrackets++;
4374                 if (next_type & (4<<24))
4375                     PL_lex_allbrackets--;
4376                 next_type &= 0xffff;
4377             }
4378 #ifdef PERL_MAD
4379             /* FIXME - can these be merged?  */
4380             return next_type;
4381 #else
4382             return REPORT(next_type);
4383 #endif
4384         }
4385
4386     /* interpolated case modifiers like \L \U, including \Q and \E.
4387        when we get here, PL_bufptr is at the \
4388     */
4389     case LEX_INTERPCASEMOD:
4390 #ifdef DEBUGGING
4391         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4392             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4393 #endif
4394         /* handle \E or end of string */
4395         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4396             /* if at a \E */
4397             if (PL_lex_casemods) {
4398                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4399                 PL_lex_casestack[PL_lex_casemods] = '\0';
4400
4401                 if (PL_bufptr != PL_bufend
4402                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4403                     PL_bufptr += 2;
4404                     PL_lex_state = LEX_INTERPCONCAT;
4405 #ifdef PERL_MAD
4406                     if (PL_madskills)
4407                         PL_thistoken = newSVpvs("\\E");
4408 #endif
4409                 }
4410                 PL_lex_allbrackets--;
4411                 return REPORT(')');
4412             }
4413 #ifdef PERL_MAD
4414             while (PL_bufptr != PL_bufend &&
4415               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4416                 if (!PL_thiswhite)
4417                     PL_thiswhite = newSVpvs("");
4418                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4419                 PL_bufptr += 2;
4420             }
4421 #else
4422             if (PL_bufptr != PL_bufend)
4423                 PL_bufptr += 2;
4424 #endif
4425             PL_lex_state = LEX_INTERPCONCAT;
4426             return yylex();
4427         }
4428         else {
4429             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4430               "### Saw case modifier\n"); });
4431             s = PL_bufptr + 1;
4432             if (s[1] == '\\' && s[2] == 'E') {
4433 #ifdef PERL_MAD
4434                 if (!PL_thiswhite)
4435                     PL_thiswhite = newSVpvs("");
4436                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4437 #endif
4438                 PL_bufptr = s + 3;
4439                 PL_lex_state = LEX_INTERPCONCAT;
4440                 return yylex();
4441             }
4442             else {
4443                 I32 tmp;
4444                 if (!PL_madskills) /* when just compiling don't need correct */
4445                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4446                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4447                 if ((*s == 'L' || *s == 'U') &&
4448                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4449                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4450                     PL_lex_allbrackets--;
4451                     return REPORT(')');
4452                 }
4453                 if (PL_lex_casemods > 10)
4454                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4455                 PL_lex_casestack[PL_lex_casemods++] = *s;
4456                 PL_lex_casestack[PL_lex_casemods] = '\0';
4457                 PL_lex_state = LEX_INTERPCONCAT;
4458                 start_force(PL_curforce);
4459                 NEXTVAL_NEXTTOKE.ival = 0;
4460                 force_next((2<<24)|'(');
4461                 start_force(PL_curforce);
4462                 if (*s == 'l')
4463                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4464                 else if (*s == 'u')
4465                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4466                 else if (*s == 'L')
4467                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4468                 else if (*s == 'U')
4469                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4470                 else if (*s == 'Q')
4471                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4472                 else
4473                     Perl_croak(aTHX_ "panic: yylex");
4474                 if (PL_madskills) {
4475                     SV* const tmpsv = newSVpvs("\\ ");
4476                     /* replace the space with the character we want to escape
4477                      */
4478                     SvPVX(tmpsv)[1] = *s;
4479                     curmad('_', tmpsv);
4480                 }
4481                 PL_bufptr = s + 1;
4482             }
4483             force_next(FUNC);
4484             if (PL_lex_starts) {
4485                 s = PL_bufptr;
4486                 PL_lex_starts = 0;
4487 #ifdef PERL_MAD
4488                 if (PL_madskills) {
4489                     if (PL_thistoken)
4490                         sv_free(PL_thistoken);
4491                     PL_thistoken = newSVpvs("");
4492                 }
4493 #endif
4494                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4495                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4496                     OPERATOR(',');
4497                 else
4498                     Aop(OP_CONCAT);
4499             }
4500             else
4501                 return yylex();
4502         }
4503
4504     case LEX_INTERPPUSH:
4505         return REPORT(sublex_push());
4506
4507     case LEX_INTERPSTART:
4508         if (PL_bufptr == PL_bufend)
4509             return REPORT(sublex_done());
4510         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4511               "### Interpolated variable\n"); });
4512         PL_expect = XTERM;
4513         PL_lex_dojoin = (*PL_bufptr == '@');
4514         PL_lex_state = LEX_INTERPNORMAL;
4515         if (PL_lex_dojoin) {
4516             start_force(PL_curforce);
4517             NEXTVAL_NEXTTOKE.ival = 0;
4518             force_next(',');
4519             start_force(PL_curforce);
4520             force_ident("\"", '$');
4521             start_force(PL_curforce);
4522             NEXTVAL_NEXTTOKE.ival = 0;
4523             force_next('$');
4524             start_force(PL_curforce);
4525             NEXTVAL_NEXTTOKE.ival = 0;
4526             force_next((2<<24)|'(');
4527             start_force(PL_curforce);
4528             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4529             force_next(FUNC);
4530         }
4531         if (PL_lex_starts++) {
4532             s = PL_bufptr;
4533 #ifdef PERL_MAD
4534             if (PL_madskills) {
4535                 if (PL_thistoken)
4536                     sv_free(PL_thistoken);
4537                 PL_thistoken = newSVpvs("");
4538             }
4539 #endif
4540             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4541             if (!PL_lex_casemods && PL_lex_inpat)
4542                 OPERATOR(',');
4543             else
4544                 Aop(OP_CONCAT);
4545         }
4546         return yylex();
4547
4548     case LEX_INTERPENDMAYBE:
4549         if (intuit_more(PL_bufptr)) {
4550             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4551             break;
4552         }
4553         /* FALL THROUGH */
4554
4555     case LEX_INTERPEND:
4556         if (PL_lex_dojoin) {
4557             PL_lex_dojoin = FALSE;
4558             PL_lex_state = LEX_INTERPCONCAT;
4559 #ifdef PERL_MAD
4560             if (PL_madskills) {
4561                 if (PL_thistoken)
4562                     sv_free(PL_thistoken);
4563                 PL_thistoken = newSVpvs("");
4564             }
4565 #endif
4566             PL_lex_allbrackets--;
4567             return REPORT(')');
4568         }
4569         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4570             && SvEVALED(PL_lex_repl))
4571         {
4572             if (PL_bufptr != PL_bufend)
4573                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4574             PL_lex_repl = NULL;
4575         }
4576         /* FALLTHROUGH */
4577     case LEX_INTERPCONCAT:
4578 #ifdef DEBUGGING
4579         if (PL_lex_brackets)
4580             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4581 #endif
4582         if (PL_bufptr == PL_bufend)
4583             return REPORT(sublex_done());
4584
4585         if (SvIVX(PL_linestr) == '\'') {
4586             SV *sv = newSVsv(PL_linestr);
4587             if (!PL_lex_inpat)
4588                 sv = tokeq(sv);
4589             else if ( PL_hints & HINT_NEW_RE )
4590                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4591             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4592             s = PL_bufend;
4593         }
4594         else {
4595             s = scan_const(PL_bufptr);
4596             if (*s == '\\')
4597                 PL_lex_state = LEX_INTERPCASEMOD;
4598             else
4599                 PL_lex_state = LEX_INTERPSTART;
4600         }
4601
4602         if (s != PL_bufptr) {
4603             start_force(PL_curforce);
4604             if (PL_madskills) {
4605                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4606             }
4607             NEXTVAL_NEXTTOKE = pl_yylval;
4608             PL_expect = XTERM;
4609             force_next(THING);
4610             if (PL_lex_starts++) {
4611 #ifdef PERL_MAD
4612                 if (PL_madskills) {
4613                     if (PL_thistoken)
4614                         sv_free(PL_thistoken);
4615                     PL_thistoken = newSVpvs("");
4616                 }
4617 #endif
4618                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4619                 if (!PL_lex_casemods && PL_lex_inpat)
4620                     OPERATOR(',');
4621                 else
4622                     Aop(OP_CONCAT);
4623             }
4624             else {
4625                 PL_bufptr = s;
4626                 return yylex();
4627             }
4628         }
4629
4630         return yylex();
4631     case LEX_FORMLINE:
4632         PL_lex_state = LEX_NORMAL;
4633         s = scan_formline(PL_bufptr);
4634         if (!PL_lex_formbrack)
4635             goto rightbracket;
4636         OPERATOR(';');
4637     }
4638
4639     s = PL_bufptr;
4640     PL_oldoldbufptr = PL_oldbufptr;
4641     PL_oldbufptr = s;
4642
4643   retry:
4644 #ifdef PERL_MAD
4645     if (PL_thistoken) {
4646         sv_free(PL_thistoken);
4647         PL_thistoken = 0;
4648     }
4649     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4650 #endif
4651     switch (*s) {
4652     default:
4653         if (isIDFIRST_lazy_if(s,UTF))
4654             goto keylookup;
4655         {
4656         unsigned char c = *s;
4657         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4658         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4659             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4660         } else {
4661             d = PL_linestart;
4662         }
4663         *s = '\0';
4664         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4665     }
4666     case 4:
4667     case 26:
4668         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4669     case 0:
4670 #ifdef PERL_MAD
4671         if (PL_madskills)
4672             PL_faketokens = 0;
4673 #endif
4674         if (!PL_rsfp) {
4675             PL_last_uni = 0;
4676             PL_last_lop = 0;
4677             if (PL_lex_brackets &&
4678                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4679                 yyerror((const char *)
4680                         (PL_lex_formbrack
4681                          ? "Format not terminated"
4682                          : "Missing right curly or square bracket"));
4683             }
4684             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4685                         "### Tokener got EOF\n");
4686             } );
4687             TOKEN(0);
4688         }
4689         if (s++ < PL_bufend)
4690             goto retry;                 /* ignore stray nulls */
4691         PL_last_uni = 0;
4692         PL_last_lop = 0;
4693         if (!PL_in_eval && !PL_preambled) {
4694             PL_preambled = TRUE;
4695 #ifdef PERL_MAD
4696             if (PL_madskills)
4697                 PL_faketokens = 1;
4698 #endif
4699             if (PL_perldb) {
4700                 /* Generate a string of Perl code to load the debugger.
4701                  * If PERL5DB is set, it will return the contents of that,
4702                  * otherwise a compile-time require of perl5db.pl.  */
4703
4704                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4705
4706                 if (pdb) {
4707                     sv_setpv(PL_linestr, pdb);
4708                     sv_catpvs(PL_linestr,";");
4709                 } else {
4710                     SETERRNO(0,SS_NORMAL);
4711                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4712                 }
4713             } else
4714                 sv_setpvs(PL_linestr,"");
4715             if (PL_preambleav) {
4716                 SV **svp = AvARRAY(PL_preambleav);
4717                 SV **const end = svp + AvFILLp(PL_preambleav);
4718                 while(svp <= end) {
4719                     sv_catsv(PL_linestr, *svp);
4720                     ++svp;
4721                     sv_catpvs(PL_linestr, ";");
4722                 }
4723                 sv_free(MUTABLE_SV(PL_preambleav));
4724                 PL_preambleav = NULL;
4725             }
4726             if (PL_minus_E)
4727                 sv_catpvs(PL_linestr,
4728                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4729             if (PL_minus_n || PL_minus_p) {
4730                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4731                 if (PL_minus_l)
4732                     sv_catpvs(PL_linestr,"chomp;");
4733                 if (PL_minus_a) {
4734                     if (PL_minus_F) {
4735                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4736                              || *PL_splitstr == '"')
4737                               && strchr(PL_splitstr + 1, *PL_splitstr))
4738                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4739                         else {
4740                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4741                                bytes can be used as quoting characters.  :-) */
4742                             const char *splits = PL_splitstr;
4743                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4744                             do {
4745                                 /* Need to \ \s  */
4746                                 if (*splits == '\\')
4747                                     sv_catpvn(PL_linestr, splits, 1);
4748                                 sv_catpvn(PL_linestr, splits, 1);
4749                             } while (*splits++);
4750                             /* This loop will embed the trailing NUL of
4751                                PL_linestr as the last thing it does before
4752                                terminating.  */
4753                             sv_catpvs(PL_linestr, ");");
4754                         }
4755                     }
4756                     else
4757                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4758                 }
4759             }
4760             sv_catpvs(PL_linestr, "\n");
4761             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4762             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4763             PL_last_lop = PL_last_uni = NULL;
4764             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4765                 update_debugger_info(PL_linestr, NULL, 0);
4766             goto retry;
4767         }
4768         do {
4769             fake_eof = 0;
4770             bof = PL_rsfp ? TRUE : FALSE;
4771             if (0) {
4772               fake_eof:
4773                 fake_eof = LEX_FAKE_EOF;
4774             }
4775             PL_bufptr = PL_bufend;
4776             CopLINE_inc(PL_curcop);
4777             if (!lex_next_chunk(fake_eof)) {
4778                 CopLINE_dec(PL_curcop);
4779                 s = PL_bufptr;
4780                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4781             }
4782             CopLINE_dec(PL_curcop);
4783 #ifdef PERL_MAD
4784             if (!PL_rsfp)
4785                 PL_realtokenstart = -1;
4786 #endif
4787             s = PL_bufptr;
4788             /* If it looks like the start of a BOM or raw UTF-16,
4789              * check if it in fact is. */
4790             if (bof && PL_rsfp &&
4791                      (*s == 0 ||
4792                       *(U8*)s == 0xEF ||
4793                       *(U8*)s >= 0xFE ||
4794                       s[1] == 0)) {
4795                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4796                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4797 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4798                 /* offset may include swallowed CR */
4799                 if (!bof)
4800                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4801 #endif
4802                 if (bof) {
4803                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4804                     s = swallow_bom((U8*)s);
4805                 }
4806             }
4807             if (PL_parser->in_pod) {
4808                 /* Incest with pod. */
4809 #ifdef PERL_MAD
4810                 if (PL_madskills)
4811                     sv_catsv(PL_thiswhite, PL_linestr);
4812 #endif
4813                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4814                     sv_setpvs(PL_linestr, "");
4815                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4816                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4817                     PL_last_lop = PL_last_uni = NULL;
4818                     PL_parser->in_pod = 0;
4819                 }
4820             }
4821             if (PL_rsfp)
4822                 incline(s);
4823         } while (PL_parser->in_pod);
4824         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4825         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4826         PL_last_lop = PL_last_uni = NULL;
4827         if (CopLINE(PL_curcop) == 1) {
4828             while (s < PL_bufend && isSPACE(*s))
4829                 s++;
4830             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4831                 s++;
4832 #ifdef PERL_MAD
4833             if (PL_madskills)
4834                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4835 #endif
4836             d = NULL;
4837             if (!PL_in_eval) {
4838                 if (*s == '#' && *(s+1) == '!')
4839                     d = s + 2;
4840 #ifdef ALTERNATE_SHEBANG
4841                 else {
4842                     static char const as[] = ALTERNATE_SHEBANG;
4843                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4844                         d = s + (sizeof(as) - 1);
4845                 }
4846 #endif /* ALTERNATE_SHEBANG */
4847             }
4848             if (d) {
4849                 char *ipath;
4850                 char *ipathend;
4851
4852                 while (isSPACE(*d))
4853                     d++;
4854                 ipath = d;
4855                 while (*d && !isSPACE(*d))
4856                     d++;
4857                 ipathend = d;
4858
4859 #ifdef ARG_ZERO_IS_SCRIPT
4860                 if (ipathend > ipath) {
4861                     /*
4862                      * HP-UX (at least) sets argv[0] to the script name,
4863                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4864                      * at least, set argv[0] to the basename of the Perl
4865                      * interpreter. So, having found "#!", we'll set it right.
4866                      */
4867                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4868                                                     SVt_PV)); /* $^X */
4869                     assert(SvPOK(x) || SvGMAGICAL(x));
4870                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4871                         sv_setpvn(x, ipath, ipathend - ipath);
4872                         SvSETMAGIC(x);
4873                     }
4874                     else {
4875                         STRLEN blen;
4876                         STRLEN llen;
4877                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4878                         const char * const lstart = SvPV_const(x,llen);
4879                         if (llen < blen) {
4880                             bstart += blen - llen;
4881                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4882                                 sv_setpvn(x, ipath, ipathend - ipath);
4883                                 SvSETMAGIC(x);
4884                             }
4885                         }
4886                     }
4887                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4888                 }
4889 #endif /* ARG_ZERO_IS_SCRIPT */
4890
4891                 /*
4892                  * Look for options.
4893                  */
4894                 d = instr(s,"perl -");
4895                 if (!d) {
4896                     d = instr(s,"perl");
4897 #if defined(DOSISH)
4898                     /* avoid getting into infinite loops when shebang
4899                      * line contains "Perl" rather than "perl" */
4900                     if (!d) {
4901                         for (d = ipathend-4; d >= ipath; --d) {
4902                             if ((*d == 'p' || *d == 'P')
4903                                 && !ibcmp(d, "perl", 4))
4904                             {
4905                                 break;
4906                             }
4907                         }
4908                         if (d < ipath)
4909                             d = NULL;
4910                     }
4911 #endif
4912                 }
4913 #ifdef ALTERNATE_SHEBANG
4914                 /*
4915                  * If the ALTERNATE_SHEBANG on this system starts with a
4916                  * character that can be part of a Perl expression, then if
4917                  * we see it but not "perl", we're probably looking at the
4918                  * start of Perl code, not a request to hand off to some
4919                  * other interpreter.  Similarly, if "perl" is there, but
4920                  * not in the first 'word' of the line, we assume the line
4921                  * contains the start of the Perl program.
4922                  */
4923                 if (d && *s != '#') {
4924                     const char *c = ipath;
4925                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4926                         c++;
4927                     if (c < d)
4928                         d = NULL;       /* "perl" not in first word; ignore */
4929                     else
4930                         *s = '#';       /* Don't try to parse shebang line */
4931                 }
4932 #endif /* ALTERNATE_SHEBANG */
4933                 if (!d &&
4934                     *s == '#' &&
4935                     ipathend > ipath &&
4936                     !PL_minus_c &&
4937                     !instr(s,"indir") &&
4938                     instr(PL_origargv[0],"perl"))
4939                 {
4940                     dVAR;
4941                     char **newargv;
4942
4943                     *ipathend = '\0';
4944                     s = ipathend + 1;
4945                     while (s < PL_bufend && isSPACE(*s))
4946                         s++;
4947                     if (s < PL_bufend) {
4948                         Newx(newargv,PL_origargc+3,char*);
4949                         newargv[1] = s;
4950                         while (s < PL_bufend && !isSPACE(*s))
4951                             s++;
4952                         *s = '\0';
4953                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4954                     }
4955                     else
4956                         newargv = PL_origargv;
4957                     newargv[0] = ipath;
4958                     PERL_FPU_PRE_EXEC
4959                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4960                     PERL_FPU_POST_EXEC
4961                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4962                 }
4963                 if (d) {
4964                     while (*d && !isSPACE(*d))
4965                         d++;
4966                     while (SPACE_OR_TAB(*d))
4967                         d++;
4968
4969                     if (*d++ == '-') {
4970                         const bool switches_done = PL_doswitches;
4971                         const U32 oldpdb = PL_perldb;
4972                         const bool oldn = PL_minus_n;
4973                         const bool oldp = PL_minus_p;
4974                         const char *d1 = d;
4975
4976                         do {
4977                             bool baduni = FALSE;
4978                             if (*d1 == 'C') {
4979                                 const char *d2 = d1 + 1;
4980                                 if (parse_unicode_opts((const char **)&d2)
4981                                     != PL_unicode)
4982                                     baduni = TRUE;
4983                             }
4984                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4985                                 const char * const m = d1;
4986                                 while (*d1 && !isSPACE(*d1))
4987                                     d1++;
4988                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4989                                       (int)(d1 - m), m);
4990                             }
4991                             d1 = moreswitches(d1);
4992                         } while (d1);
4993                         if (PL_doswitches && !switches_done) {
4994                             int argc = PL_origargc;
4995                             char **argv = PL_origargv;
4996                             do {
4997                                 argc--,argv++;
4998                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4999                             init_argv_symbols(argc,argv);
5000                         }
5001                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5002                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5003                               /* if we have already added "LINE: while (<>) {",
5004                                  we must not do it again */
5005                         {
5006                             sv_setpvs(PL_linestr, "");
5007                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5008                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5009                             PL_last_lop = PL_last_uni = NULL;
5010                             PL_preambled = FALSE;
5011                             if (PERLDB_LINE || PERLDB_SAVESRC)
5012                                 (void)gv_fetchfile(PL_origfilename);
5013                             goto retry;
5014                         }
5015                     }
5016                 }
5017             }
5018         }
5019         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5020             PL_bufptr = s;
5021             PL_lex_state = LEX_FORMLINE;
5022             return yylex();
5023         }
5024         goto retry;
5025     case '\r':
5026 #ifdef PERL_STRICT_CR
5027         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5028         Perl_croak(aTHX_
5029       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5030 #endif
5031     case ' ': case '\t': case '\f': case 013:
5032 #ifdef PERL_MAD
5033         PL_realtokenstart = -1;
5034         if (!PL_thiswhite)
5035             PL_thiswhite = newSVpvs("");
5036         sv_catpvn(PL_thiswhite, s, 1);
5037 #endif
5038         s++;
5039         goto retry;
5040     case '#':
5041     case '\n':
5042 #ifdef PERL_MAD
5043         PL_realtokenstart = -1;
5044         if (PL_madskills)
5045             PL_faketokens = 0;
5046 #endif
5047         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5048             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5049                 /* handle eval qq[#line 1 "foo"\n ...] */
5050                 CopLINE_dec(PL_curcop);
5051                 incline(s);
5052             }
5053             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5054                 s = SKIPSPACE0(s);
5055                 if (!PL_in_eval || PL_rsfp)
5056                     incline(s);
5057             }
5058             else {
5059                 d = s;
5060                 while (d < PL_bufend && *d != '\n')
5061                     d++;
5062                 if (d < PL_bufend)
5063                     d++;
5064                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5065                   Perl_croak(aTHX_ "panic: input overflow");
5066 #ifdef PERL_MAD
5067                 if (PL_madskills)
5068                     PL_thiswhite = newSVpvn(s, d - s);
5069 #endif
5070                 s = d;
5071                 incline(s);
5072             }
5073             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5074                 PL_bufptr = s;
5075                 PL_lex_state = LEX_FORMLINE;
5076                 return yylex();
5077             }
5078         }
5079         else {
5080 #ifdef PERL_MAD
5081             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5082                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5083                     PL_faketokens = 0;
5084                     s = SKIPSPACE0(s);
5085                     TOKEN(PEG); /* make sure any #! line is accessible */
5086                 }
5087                 s = SKIPSPACE0(s);
5088             }
5089             else {
5090 /*              if (PL_madskills && PL_lex_formbrack) { */
5091                     d = s;
5092                     while (d < PL_bufend && *d != '\n')
5093                         d++;
5094                     if (d < PL_bufend)
5095                         d++;
5096                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5097                       Perl_croak(aTHX_ "panic: input overflow");
5098                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5099                         if (!PL_thiswhite)
5100                             PL_thiswhite = newSVpvs("");
5101                         if (CopLINE(PL_curcop) == 1) {
5102                             sv_setpvs(PL_thiswhite, "");
5103                             PL_faketokens = 0;
5104                         }
5105                         sv_catpvn(PL_thiswhite, s, d - s);
5106                     }
5107                     s = d;
5108 /*              }
5109                 *s = '\0';
5110                 PL_bufend = s; */
5111             }
5112 #else
5113             *s = '\0';
5114             PL_bufend = s;
5115 #endif
5116         }
5117         goto retry;
5118     case '-':
5119         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5120             I32 ftst = 0;
5121             char tmp;
5122
5123             s++;
5124             PL_bufptr = s;
5125             tmp = *s++;
5126
5127             while (s < PL_bufend && SPACE_OR_TAB(*s))
5128                 s++;
5129
5130             if (strnEQ(s,"=>",2)) {
5131                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5132                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5133                 OPERATOR('-');          /* unary minus */
5134             }
5135             PL_last_uni = PL_oldbufptr;
5136             switch (tmp) {
5137             case 'r': ftst = OP_FTEREAD;        break;
5138             case 'w': ftst = OP_FTEWRITE;       break;
5139             case 'x': ftst = OP_FTEEXEC;        break;
5140             case 'o': ftst = OP_FTEOWNED;       break;
5141             case 'R': ftst = OP_FTRREAD;        break;
5142             case 'W': ftst = OP_FTRWRITE;       break;
5143             case 'X': ftst = OP_FTREXEC;        break;
5144             case 'O': ftst = OP_FTROWNED;       break;
5145             case 'e': ftst = OP_FTIS;           break;
5146             case 'z': ftst = OP_FTZERO;         break;
5147             case 's': ftst = OP_FTSIZE;         break;
5148             case 'f': ftst = OP_FTFILE;         break;
5149             case 'd': ftst = OP_FTDIR;          break;
5150             case 'l': ftst = OP_FTLINK;         break;
5151             case 'p': ftst = OP_FTPIPE;         break;
5152             case 'S': ftst = OP_FTSOCK;         break;
5153             case 'u': ftst = OP_FTSUID;         break;
5154             case 'g': ftst = OP_FTSGID;         break;
5155             case 'k': ftst = OP_FTSVTX;         break;
5156             case 'b': ftst = OP_FTBLK;          break;
5157             case 'c': ftst = OP_FTCHR;          break;
5158             case 't': ftst = OP_FTTTY;          break;
5159             case 'T': ftst = OP_FTTEXT;         break;
5160             case 'B': ftst = OP_FTBINARY;       break;
5161             case 'M': case 'A': case 'C':
5162                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5163                 switch (tmp) {
5164                 case 'M': ftst = OP_FTMTIME;    break;
5165                 case 'A': ftst = OP_FTATIME;    break;
5166                 case 'C': ftst = OP_FTCTIME;    break;
5167                 default:                        break;
5168                 }
5169                 break;
5170             default:
5171                 break;
5172             }
5173             if (ftst) {
5174                 PL_last_lop_op = (OPCODE)ftst;
5175                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5176                         "### Saw file test %c\n", (int)tmp);
5177                 } );
5178                 FTST(ftst);
5179             }
5180             else {
5181                 /* Assume it was a minus followed by a one-letter named
5182                  * subroutine call (or a -bareword), then. */
5183                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5184                         "### '-%c' looked like a file test but was not\n",
5185                         (int) tmp);
5186                 } );
5187                 s = --PL_bufptr;
5188             }
5189         }
5190         {
5191             const char tmp = *s++;
5192             if (*s == tmp) {
5193                 s++;
5194                 if (PL_expect == XOPERATOR)
5195                     TERM(POSTDEC);
5196                 else
5197                     OPERATOR(PREDEC);
5198             }
5199             else if (*s == '>') {
5200                 s++;
5201                 s = SKIPSPACE1(s);
5202                 if (isIDFIRST_lazy_if(s,UTF)) {
5203                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5204                     TOKEN(ARROW);
5205                 }
5206                 else if (*s == '$')
5207                     OPERATOR(ARROW);
5208                 else
5209                     TERM(ARROW);
5210             }
5211             if (PL_expect == XOPERATOR) {
5212                 if (*s == '=' && !PL_lex_allbrackets &&
5213                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5214                     s--;
5215                     TOKEN(0);
5216                 }
5217                 Aop(OP_SUBTRACT);
5218             }
5219             else {
5220                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5221                     check_uni();
5222                 OPERATOR('-');          /* unary minus */
5223             }
5224         }
5225
5226     case '+':
5227         {
5228             const char tmp = *s++;
5229             if (*s == tmp) {
5230                 s++;
5231                 if (PL_expect == XOPERATOR)
5232                     TERM(POSTINC);
5233                 else
5234                     OPERATOR(PREINC);
5235             }
5236             if (PL_expect == XOPERATOR) {
5237                 if (*s == '=' && !PL_lex_allbrackets &&
5238                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5239                     s--;
5240                     TOKEN(0);
5241                 }
5242                 Aop(OP_ADD);
5243             }
5244             else {
5245                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5246                     check_uni();
5247                 OPERATOR('+');
5248             }
5249         }
5250
5251     case '*':
5252         if (PL_expect != XOPERATOR) {
5253             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5254             PL_expect = XOPERATOR;
5255             force_ident(PL_tokenbuf, '*');
5256             if (!*PL_tokenbuf)
5257                 PREREF('*');
5258             TERM('*');
5259         }
5260         s++;
5261         if (*s == '*') {
5262             s++;
5263             if (*s == '=' && !PL_lex_allbrackets &&
5264                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5265                 s -= 2;
5266                 TOKEN(0);
5267             }
5268             PWop(OP_POW);
5269         }
5270         if (*s == '=' && !PL_lex_allbrackets &&
5271                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5272             s--;
5273             TOKEN(0);
5274         }
5275         Mop(OP_MULTIPLY);
5276
5277     case '%':
5278         if (PL_expect == XOPERATOR) {
5279             if (s[1] == '=' && !PL_lex_allbrackets &&
5280                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5281                 TOKEN(0);
5282             ++s;
5283             Mop(OP_MODULO);
5284         }
5285         PL_tokenbuf[0] = '%';
5286         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5287                 sizeof PL_tokenbuf - 1, FALSE);
5288         if (!PL_tokenbuf[1]) {
5289             PREREF('%');
5290         }
5291         PL_pending_ident = '%';
5292         TERM('%');
5293
5294     case '^':
5295         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5296                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5297             TOKEN(0);
5298         s++;
5299         BOop(OP_BIT_XOR);
5300     case '[':
5301         if (PL_lex_brackets > 100)
5302             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5303         PL_lex_brackstack[PL_lex_brackets++] = 0;
5304         PL_lex_allbrackets++;
5305         {
5306             const char tmp = *s++;
5307             OPERATOR(tmp);
5308         }
5309     case '~':
5310         if (s[1] == '~'
5311             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5312         {
5313             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5314                 TOKEN(0);
5315             s += 2;
5316             Eop(OP_SMARTMATCH);
5317         }
5318         s++;
5319         OPERATOR('~');
5320     case ',':
5321         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5322             TOKEN(0);
5323         s++;
5324         OPERATOR(',');
5325     case ':':
5326         if (s[1] == ':') {
5327             len = 0;
5328             goto just_a_word_zero_gv;
5329         }
5330         s++;
5331         switch (PL_expect) {
5332             OP *attrs;
5333 #ifdef PERL_MAD
5334             I32 stuffstart;
5335 #endif
5336         case XOPERATOR:
5337             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5338                 break;
5339             PL_bufptr = s;      /* update in case we back off */
5340             if (*s == '=') {
5341                 Perl_croak(aTHX_
5342                            "Use of := for an empty attribute list is not allowed");
5343             }
5344             goto grabattrs;
5345         case XATTRBLOCK:
5346             PL_expect = XBLOCK;
5347             goto grabattrs;
5348         case XATTRTERM:
5349             PL_expect = XTERMBLOCK;
5350          grabattrs:
5351 #ifdef PERL_MAD
5352             stuffstart = s - SvPVX(PL_linestr) - 1;
5353 #endif
5354             s = PEEKSPACE(s);
5355             attrs = NULL;
5356             while (isIDFIRST_lazy_if(s,UTF)) {
5357                 I32 tmp;
5358                 SV *sv;
5359                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5360                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5361                     if (tmp < 0) tmp = -tmp;
5362                     switch (tmp) {
5363                     case KEY_or:
5364                     case KEY_and:
5365                     case KEY_for:
5366                     case KEY_foreach:
5367                     case KEY_unless:
5368                     case KEY_if:
5369                     case KEY_while:
5370                     case KEY_until:
5371                         goto got_attrs;
5372                     default:
5373                         break;
5374                     }
5375                 }
5376                 sv = newSVpvn(s, len);
5377                 if (*d == '(') {
5378                     d = scan_str(d,TRUE,TRUE);
5379                     if (!d) {
5380                         /* MUST advance bufptr here to avoid bogus
5381                            "at end of line" context messages from yyerror().
5382                          */
5383                         PL_bufptr = s + len;
5384                         yyerror("Unterminated attribute parameter in attribute list");
5385                         if (attrs)
5386                             op_free(attrs);
5387                         sv_free(sv);
5388                         return REPORT(0);       /* EOF indicator */
5389                     }
5390                 }
5391                 if (PL_lex_stuff) {
5392                     sv_catsv(sv, PL_lex_stuff);
5393                     attrs = op_append_elem(OP_LIST, attrs,
5394                                         newSVOP(OP_CONST, 0, sv));
5395                     SvREFCNT_dec(PL_lex_stuff);
5396                     PL_lex_stuff = NULL;
5397                 }
5398                 else {
5399                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5400                         sv_free(sv);
5401                         if (PL_in_my == KEY_our) {
5402                             deprecate(":unique");
5403                         }
5404                         else
5405                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5406                     }
5407
5408                     /* NOTE: any CV attrs applied here need to be part of
5409                        the CVf_BUILTIN_ATTRS define in cv.h! */
5410                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5411                         sv_free(sv);
5412                         CvLVALUE_on(PL_compcv);
5413                     }
5414                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5415                         sv_free(sv);
5416                         deprecate(":locked");
5417                     }
5418                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5419                         sv_free(sv);
5420                         CvMETHOD_on(PL_compcv);
5421                     }
5422                     /* After we've set the flags, it could be argued that
5423                        we don't need to do the attributes.pm-based setting
5424                        process, and shouldn't bother appending recognized
5425                        flags.  To experiment with that, uncomment the
5426                        following "else".  (Note that's already been
5427                        uncommented.  That keeps the above-applied built-in
5428                        attributes from being intercepted (and possibly
5429                        rejected) by a package's attribute routines, but is
5430                        justified by the performance win for the common case
5431                        of applying only built-in attributes.) */
5432                     else
5433                         attrs = op_append_elem(OP_LIST, attrs,
5434                                             newSVOP(OP_CONST, 0,
5435                                                     sv));
5436                 }
5437                 s = PEEKSPACE(d);
5438                 if (*s == ':' && s[1] != ':')
5439                     s = PEEKSPACE(s+1);
5440                 else if (s == d)
5441                     break;      /* require real whitespace or :'s */
5442                 /* XXX losing whitespace on sequential attributes here */
5443             }
5444             {
5445                 const char tmp
5446                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5447                 if (*s != ';' && *s != '}' && *s != tmp
5448                     && (tmp != '=' || *s != ')')) {
5449                     const char q = ((*s == '\'') ? '"' : '\'');
5450                     /* If here for an expression, and parsed no attrs, back
5451                        off. */
5452                     if (tmp == '=' && !attrs) {
5453                         s = PL_bufptr;
5454                         break;
5455                     }
5456                     /* MUST advance bufptr here to avoid bogus "at end of line"
5457                        context messages from yyerror().
5458                     */
5459                     PL_bufptr = s;
5460                     yyerror( (const char *)
5461                              (*s
5462                               ? Perl_form(aTHX_ "Invalid separator character "
5463                                           "%c%c%c in attribute list", q, *s, q)
5464                               : "Unterminated attribute list" ) );
5465                     if (attrs)
5466                         op_free(attrs);
5467                     OPERATOR(':');
5468                 }
5469             }
5470         got_attrs:
5471             if (attrs) {
5472                 start_force(PL_curforce);
5473                 NEXTVAL_NEXTTOKE.opval = attrs;
5474                 CURMAD('_', PL_nextwhite);
5475                 force_next(THING);
5476             }
5477 #ifdef PERL_MAD
5478             if (PL_madskills) {
5479                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5480                                      (s - SvPVX(PL_linestr)) - stuffstart);
5481             }
5482 #endif
5483             TOKEN(COLONATTR);
5484         }
5485         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5486             s--;
5487             TOKEN(0);
5488         }
5489         PL_lex_allbrackets--;
5490         OPERATOR(':');
5491     case '(':
5492         s++;
5493         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5494             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5495         else
5496             PL_expect = XTERM;
5497         s = SKIPSPACE1(s);
5498         PL_lex_allbrackets++;
5499         TOKEN('(');
5500     case ';':
5501         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5502             TOKEN(0);
5503         CLINE;
5504         s++;
5505         OPERATOR(';');
5506     case ')':
5507         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5508             TOKEN(0);
5509         s++;
5510         PL_lex_allbrackets--;
5511         s = SKIPSPACE1(s);
5512         if (*s == '{')
5513             PREBLOCK(')');
5514         TERM(')');
5515     case ']':
5516         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5517             TOKEN(0);
5518         s++;
5519         if (PL_lex_brackets <= 0)
5520             yyerror("Unmatched right square bracket");
5521         else
5522             --PL_lex_brackets;
5523         PL_lex_allbrackets--;
5524         if (PL_lex_state == LEX_INTERPNORMAL) {
5525             if (PL_lex_brackets == 0) {
5526                 if (*s == '-' && s[1] == '>')
5527                     PL_lex_state = LEX_INTERPENDMAYBE;
5528                 else if (*s != '[' && *s != '{')
5529                     PL_lex_state = LEX_INTERPEND;
5530             }
5531         }
5532         TERM(']');
5533     case '{':
5534       leftbracket:
5535         s++;
5536         if (PL_lex_brackets > 100) {
5537             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5538         }
5539         switch (PL_expect) {
5540         case XTERM:
5541             if (PL_lex_formbrack) {
5542                 s--;
5543                 PRETERMBLOCK(DO);
5544             }
5545             if (PL_oldoldbufptr == PL_last_lop)
5546                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5547             else
5548                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5549             PL_lex_allbrackets++;
5550             OPERATOR(HASHBRACK);
5551         case XOPERATOR:
5552             while (s < PL_bufend && SPACE_OR_TAB(*s))
5553                 s++;
5554             d = s;
5555             PL_tokenbuf[0] = '\0';
5556             if (d < PL_bufend && *d == '-') {
5557                 PL_tokenbuf[0] = '-';
5558                 d++;
5559                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5560                     d++;
5561             }
5562             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5563                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5564                               FALSE, &len);
5565                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5566                     d++;
5567                 if (*d == '}') {
5568                     const char minus = (PL_tokenbuf[0] == '-');
5569                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5570                     if (minus)
5571                         force_next('-');
5572                 }
5573             }
5574             /* FALL THROUGH */
5575         case XATTRBLOCK:
5576         case XBLOCK:
5577             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5578             PL_lex_allbrackets++;
5579             PL_expect = XSTATE;
5580             break;
5581         case XATTRTERM:
5582         case XTERMBLOCK:
5583             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5584             PL_lex_allbrackets++;
5585             PL_expect = XSTATE;
5586             break;
5587         default: {
5588                 const char *t;
5589                 if (PL_oldoldbufptr == PL_last_lop)
5590                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5591                 else
5592                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5593                 PL_lex_allbrackets++;
5594                 s = SKIPSPACE1(s);
5595                 if (*s == '}') {
5596                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5597                         PL_expect = XTERM;
5598                         /* This hack is to get the ${} in the message. */
5599                         PL_bufptr = s+1;
5600                         yyerror("syntax error");
5601                         break;
5602                     }
5603                     OPERATOR(HASHBRACK);
5604                 }
5605                 /* This hack serves to disambiguate a pair of curlies
5606                  * as being a block or an anon hash.  Normally, expectation
5607                  * determines that, but in cases where we're not in a
5608                  * position to expect anything in particular (like inside
5609                  * eval"") we have to resolve the ambiguity.  This code
5610                  * covers the case where the first term in the curlies is a
5611                  * quoted string.  Most other cases need to be explicitly
5612                  * disambiguated by prepending a "+" before the opening
5613                  * curly in order to force resolution as an anon hash.
5614                  *
5615                  * XXX should probably propagate the outer expectation
5616                  * into eval"" to rely less on this hack, but that could
5617                  * potentially break current behavior of eval"".
5618                  * GSAR 97-07-21
5619                  */
5620                 t = s;
5621                 if (*s == '\'' || *s == '"' || *s == '`') {
5622                     /* common case: get past first string, handling escapes */
5623                     for (t++; t < PL_bufend && *t != *s;)
5624                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5625                             t++;
5626                     t++;
5627                 }
5628                 else if (*s == 'q') {
5629                     if (++t < PL_bufend
5630                         && (!isALNUM(*t)
5631                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5632                                 && !isALNUM(*t))))
5633                     {
5634                         /* skip q//-like construct */
5635                         const char *tmps;
5636                         char open, close, term;
5637                         I32 brackets = 1;
5638
5639                         while (t < PL_bufend && isSPACE(*t))
5640                             t++;
5641                         /* check for q => */
5642                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5643                             OPERATOR(HASHBRACK);
5644                         }
5645                         term = *t;
5646                         open = term;
5647                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5648                             term = tmps[5];
5649                         close = term;
5650                         if (open == close)
5651                             for (t++; t < PL_bufend; t++) {
5652                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5653                                     t++;
5654                                 else if (*t == open)
5655                                     break;
5656                             }
5657                         else {
5658                             for (t++; t < PL_bufend; t++) {
5659                                 if (*t == '\\' && t+1 < PL_bufend)
5660                                     t++;
5661                                 else if (*t == close && --brackets <= 0)
5662                                     break;
5663                                 else if (*t == open)
5664                                     brackets++;
5665                             }
5666                         }
5667                         t++;
5668                     }
5669                     else
5670                         /* skip plain q word */
5671                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5672                              t += UTF8SKIP(t);
5673                 }
5674                 else if (isALNUM_lazy_if(t,UTF)) {
5675                     t += UTF8SKIP(t);
5676                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5677                          t += UTF8SKIP(t);
5678                 }
5679                 while (t < PL_bufend && isSPACE(*t))
5680                     t++;
5681                 /* if comma follows first term, call it an anon hash */
5682                 /* XXX it could be a comma expression with loop modifiers */
5683                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5684                                    || (*t == '=' && t[1] == '>')))
5685                     OPERATOR(HASHBRACK);
5686                 if (PL_expect == XREF)
5687                     PL_expect = XTERM;
5688                 else {
5689                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5690                     PL_expect = XSTATE;
5691                 }
5692             }
5693             break;
5694         }
5695         pl_yylval.ival = CopLINE(PL_curcop);
5696         if (isSPACE(*s) || *s == '#')
5697             PL_copline = NOLINE;   /* invalidate current command line number */
5698         TOKEN('{');
5699     case '}':
5700         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5701             TOKEN(0);
5702       rightbracket:
5703         s++;
5704         if (PL_lex_brackets <= 0)
5705             yyerror("Unmatched right curly bracket");
5706         else
5707             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5708         PL_lex_allbrackets--;
5709         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5710             PL_lex_formbrack = 0;
5711         if (PL_lex_state == LEX_INTERPNORMAL) {
5712             if (PL_lex_brackets == 0) {
5713                 if (PL_expect & XFAKEBRACK) {
5714                     PL_expect &= XENUMMASK;
5715                     PL_lex_state = LEX_INTERPEND;
5716                     PL_bufptr = s;
5717 #if 0
5718                     if (PL_madskills) {
5719                         if (!PL_thiswhite)
5720                             PL_thiswhite = newSVpvs("");
5721                         sv_catpvs(PL_thiswhite,"}");
5722                     }
5723 #endif
5724                     return yylex();     /* ignore fake brackets */
5725                 }
5726                 if (*s == '-' && s[1] == '>')
5727                     PL_lex_state = LEX_INTERPENDMAYBE;
5728                 else if (*s != '[' && *s != '{')
5729                     PL_lex_state = LEX_INTERPEND;
5730             }
5731         }
5732         if (PL_expect & XFAKEBRACK) {
5733             PL_expect &= XENUMMASK;
5734             PL_bufptr = s;
5735             return yylex();             /* ignore fake brackets */
5736         }
5737         start_force(PL_curforce);
5738         if (PL_madskills) {
5739             curmad('X', newSVpvn(s-1,1));
5740             CURMAD('_', PL_thiswhite);
5741         }
5742         force_next('}');
5743 #ifdef PERL_MAD
5744         if (!PL_thistoken)
5745             PL_thistoken = newSVpvs("");
5746 #endif
5747         TOKEN(';');
5748     case '&':
5749         s++;
5750         if (*s++ == '&') {
5751             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5752                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5753                 s -= 2;
5754                 TOKEN(0);
5755             }
5756             AOPERATOR(ANDAND);
5757         }
5758         s--;
5759         if (PL_expect == XOPERATOR) {
5760             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5761                 && isIDFIRST_lazy_if(s,UTF))
5762             {
5763                 CopLINE_dec(PL_curcop);
5764                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5765                 CopLINE_inc(PL_curcop);
5766             }
5767             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5768                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5769                 s--;
5770                 TOKEN(0);
5771             }
5772             BAop(OP_BIT_AND);
5773         }
5774
5775         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5776         if (*PL_tokenbuf) {
5777             PL_expect = XOPERATOR;
5778             force_ident(PL_tokenbuf, '&');
5779         }
5780         else
5781             PREREF('&');
5782         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5783         TERM('&');
5784
5785     case '|':
5786         s++;
5787         if (*s++ == '|') {
5788             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5789                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5790                 s -= 2;
5791                 TOKEN(0);
5792             }
5793             AOPERATOR(OROR);
5794         }
5795         s--;
5796         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5797                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5798             s--;
5799             TOKEN(0);
5800         }
5801         BOop(OP_BIT_OR);
5802     case '=':
5803         s++;
5804         {
5805             const char tmp = *s++;
5806             if (tmp == '=') {
5807                 if (!PL_lex_allbrackets &&
5808                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5809                     s -= 2;
5810                     TOKEN(0);
5811                 }
5812                 Eop(OP_EQ);
5813             }
5814             if (tmp == '>') {
5815                 if (!PL_lex_allbrackets &&
5816                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5817                     s -= 2;
5818                     TOKEN(0);
5819                 }
5820                 OPERATOR(',');
5821             }
5822             if (tmp == '~')
5823                 PMop(OP_MATCH);
5824             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5825                 && strchr("+-*/%.^&|<",tmp))
5826                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5827                             "Reversed %c= operator",(int)tmp);
5828             s--;
5829             if (PL_expect == XSTATE && isALPHA(tmp) &&
5830                 (s == PL_linestart+1 || s[-2] == '\n') )
5831                 {
5832                     if (PL_in_eval && !PL_rsfp) {
5833                         d = PL_bufend;
5834                         while (s < d) {
5835                             if (*s++ == '\n') {
5836                                 incline(s);
5837                                 if (strnEQ(s,"=cut",4)) {
5838                                     s = strchr(s,'\n');
5839                                     if (s)
5840                                         s++;
5841                                     else
5842                                         s = d;
5843                                     incline(s);
5844                                     goto retry;
5845                                 }
5846                             }
5847                         }
5848                         goto retry;
5849                     }
5850 #ifdef PERL_MAD
5851                     if (PL_madskills) {
5852                         if (!PL_thiswhite)
5853                             PL_thiswhite = newSVpvs("");
5854                         sv_catpvn(PL_thiswhite, PL_linestart,
5855                                   PL_bufend - PL_linestart);
5856                     }
5857 #endif
5858                     s = PL_bufend;
5859                     PL_parser->in_pod = 1;
5860                     goto retry;
5861                 }
5862         }
5863         if (PL_lex_brackets < PL_lex_formbrack) {
5864             const char *t = s;
5865 #ifdef PERL_STRICT_CR
5866             while (SPACE_OR_TAB(*t))
5867 #else
5868             while (SPACE_OR_TAB(*t) || *t == '\r')
5869 #endif
5870                 t++;
5871             if (*t == '\n' || *t == '#') {
5872                 s--;
5873                 PL_expect = XBLOCK;
5874                 goto leftbracket;
5875             }
5876         }
5877         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5878             s--;
5879             TOKEN(0);
5880         }
5881         pl_yylval.ival = 0;
5882         OPERATOR(ASSIGNOP);
5883     case '!':
5884         s++;
5885         {
5886             const char tmp = *s++;
5887             if (tmp == '=') {
5888                 /* was this !=~ where !~ was meant?
5889                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5890
5891                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5892                     const char *t = s+1;
5893
5894                     while (t < PL_bufend && isSPACE(*t))
5895                         ++t;
5896
5897                     if (*t == '/' || *t == '?' ||
5898                         ((*t == 'm' || *t == 's' || *t == 'y')
5899                          && !isALNUM(t[1])) ||
5900                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5901                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5902                                     "!=~ should be !~");
5903                 }
5904                 if (!PL_lex_allbrackets &&
5905                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5906                     s -= 2;
5907                     TOKEN(0);
5908                 }
5909                 Eop(OP_NE);
5910             }
5911             if (tmp == '~')
5912                 PMop(OP_NOT);
5913         }
5914         s--;
5915         OPERATOR('!');
5916     case '<':
5917         if (PL_expect != XOPERATOR) {
5918             if (s[1] != '<' && !strchr(s,'>'))
5919                 check_uni();
5920             if (s[1] == '<')
5921                 s = scan_heredoc(s);
5922             else
5923                 s = scan_inputsymbol(s);
5924             TERM(sublex_start());
5925         }
5926         s++;
5927         {
5928             char tmp = *s++;
5929             if (tmp == '<') {
5930                 if (*s == '=' && !PL_lex_allbrackets &&
5931                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5932                     s -= 2;
5933                     TOKEN(0);
5934                 }
5935                 SHop(OP_LEFT_SHIFT);
5936             }
5937             if (tmp == '=') {
5938                 tmp = *s++;
5939                 if (tmp == '>') {
5940                     if (!PL_lex_allbrackets &&
5941                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5942                         s -= 3;
5943                         TOKEN(0);
5944                     }
5945                     Eop(OP_NCMP);
5946                 }
5947                 s--;
5948                 if (!PL_lex_allbrackets &&
5949                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5950                     s -= 2;
5951                     TOKEN(0);
5952                 }
5953                 Rop(OP_LE);
5954             }
5955         }
5956         s--;
5957         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5958             s--;
5959             TOKEN(0);
5960         }
5961         Rop(OP_LT);
5962     case '>':
5963         s++;
5964         {
5965             const char tmp = *s++;
5966             if (tmp == '>') {
5967                 if (*s == '=' && !PL_lex_allbrackets &&
5968                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5969                     s -= 2;
5970                     TOKEN(0);
5971                 }
5972                 SHop(OP_RIGHT_SHIFT);
5973             }
5974             else if (tmp == '=') {
5975                 if (!PL_lex_allbrackets &&
5976                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5977                     s -= 2;
5978                     TOKEN(0);
5979                 }
5980                 Rop(OP_GE);
5981             }
5982         }
5983         s--;
5984         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5985             s--;
5986             TOKEN(0);
5987         }
5988         Rop(OP_GT);
5989
5990     case '$':
5991         CLINE;
5992
5993         if (PL_expect == XOPERATOR) {
5994             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5995                 return deprecate_commaless_var_list();
5996             }
5997         }
5998
5999         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6000             PL_tokenbuf[0] = '@';
6001             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6002                            sizeof PL_tokenbuf - 1, FALSE);
6003             if (PL_expect == XOPERATOR)
6004                 no_op("Array length", s);
6005             if (!PL_tokenbuf[1])
6006                 PREREF(DOLSHARP);
6007             PL_expect = XOPERATOR;
6008             PL_pending_ident = '#';
6009             TOKEN(DOLSHARP);
6010         }
6011
6012         PL_tokenbuf[0] = '$';
6013         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6014                        sizeof PL_tokenbuf - 1, FALSE);
6015         if (PL_expect == XOPERATOR)
6016             no_op("Scalar", s);
6017         if (!PL_tokenbuf[1]) {
6018             if (s == PL_bufend)
6019                 yyerror("Final $ should be \\$ or $name");
6020             PREREF('$');
6021         }
6022
6023         /* This kludge not intended to be bulletproof. */
6024         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6025             pl_yylval.opval = newSVOP(OP_CONST, 0,
6026                                    newSViv(CopARYBASE_get(&PL_compiling)));
6027             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
6028             TERM(THING);
6029         }
6030
6031         d = s;
6032         {
6033             const char tmp = *s;
6034             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6035                 s = SKIPSPACE1(s);
6036
6037             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6038                 && intuit_more(s)) {
6039                 if (*s == '[') {
6040                     PL_tokenbuf[0] = '@';
6041                     if (ckWARN(WARN_SYNTAX)) {
6042                         char *t = s+1;
6043
6044                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6045                             t++;
6046                         if (*t++ == ',') {
6047                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6048                             while (t < PL_bufend && *t != ']')
6049                                 t++;
6050                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6051                                         "Multidimensional syntax %.*s not supported",
6052                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6053                         }
6054                     }
6055                 }
6056                 else if (*s == '{') {
6057                     char *t;
6058                     PL_tokenbuf[0] = '%';
6059                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6060                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6061                         {
6062                             char tmpbuf[sizeof PL_tokenbuf];
6063                             do {
6064                                 t++;
6065                             } while (isSPACE(*t));
6066                             if (isIDFIRST_lazy_if(t,UTF)) {
6067                                 STRLEN len;
6068                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6069                                               &len);
6070                                 while (isSPACE(*t))
6071                                     t++;
6072                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6073                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6074                                                 "You need to quote \"%s\"",
6075                                                 tmpbuf);
6076                             }
6077                         }
6078                 }
6079             }
6080
6081             PL_expect = XOPERATOR;
6082             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6083                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6084                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6085                     PL_expect = XOPERATOR;
6086                 else if (strchr("$@\"'`q", *s))
6087                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6088                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6089                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6090                 else if (isIDFIRST_lazy_if(s,UTF)) {
6091                     char tmpbuf[sizeof PL_tokenbuf];
6092                     int t2;
6093                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6094                     if ((t2 = keyword(tmpbuf, len, 0))) {
6095                         /* binary operators exclude handle interpretations */
6096                         switch (t2) {
6097                         case -KEY_x:
6098                         case -KEY_eq:
6099                         case -KEY_ne:
6100                         case -KEY_gt:
6101                         case -KEY_lt:
6102                         case -KEY_ge:
6103                         case -KEY_le:
6104                         case -KEY_cmp:
6105                             break;
6106                         default:
6107                             PL_expect = XTERM;  /* e.g. print $fh length() */
6108                             break;
6109                         }
6110                     }
6111                     else {
6112                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6113                     }
6114                 }
6115                 else if (isDIGIT(*s))
6116                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6117                 else if (*s == '.' && isDIGIT(s[1]))
6118                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6119                 else if ((*s == '?' || *s == '-' || *s == '+')
6120                          && !isSPACE(s[1]) && s[1] != '=')
6121                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6122                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6123                          && s[1] != '/')
6124                     PL_expect = XTERM;          /* e.g. print $fh /.../
6125                                                    XXX except DORDOR operator
6126                                                 */
6127                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6128                          && s[2] != '=')
6129                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6130             }
6131         }
6132         PL_pending_ident = '$';
6133         TOKEN('$');
6134
6135     case '@':
6136         if (PL_expect == XOPERATOR)
6137             no_op("Array", s);
6138         PL_tokenbuf[0] = '@';
6139         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6140         if (!PL_tokenbuf[1]) {
6141             PREREF('@');
6142         }
6143         if (PL_lex_state == LEX_NORMAL)
6144             s = SKIPSPACE1(s);
6145         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6146             if (*s == '{')
6147                 PL_tokenbuf[0] = '%';
6148
6149             /* Warn about @ where they meant $. */
6150             if (*s == '[' || *s == '{') {
6151                 if (ckWARN(WARN_SYNTAX)) {
6152                     const char *t = s + 1;
6153                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6154                         t++;
6155                     if (*t == '}' || *t == ']') {
6156                         t++;
6157                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6158                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6159                             "Scalar value %.*s better written as $%.*s",
6160                             (int)(t-PL_bufptr), PL_bufptr,
6161                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6162                     }
6163                 }
6164             }
6165         }
6166         PL_pending_ident = '@';
6167         TERM('@');
6168
6169      case '/':                  /* may be division, defined-or, or pattern */
6170         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6171             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6172                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6173                 TOKEN(0);
6174             s += 2;
6175             AOPERATOR(DORDOR);
6176         }
6177      case '?':                  /* may either be conditional or pattern */
6178         if (PL_expect == XOPERATOR) {
6179              char tmp = *s++;
6180              if(tmp == '?') {
6181                 if (!PL_lex_allbrackets &&
6182                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6183                     s--;
6184                     TOKEN(0);
6185                 }
6186                 PL_lex_allbrackets++;
6187                 OPERATOR('?');
6188              }
6189              else {
6190                  tmp = *s++;
6191                  if(tmp == '/') {
6192                      /* A // operator. */
6193                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6194                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6195                                             LEX_FAKEEOF_LOGIC)) {
6196                         s -= 2;
6197                         TOKEN(0);
6198                     }
6199                     AOPERATOR(DORDOR);
6200                  }
6201                  else {
6202                      s--;
6203                      if (*s == '=' && !PL_lex_allbrackets &&
6204                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6205                          s--;
6206                          TOKEN(0);
6207                      }
6208                      Mop(OP_DIVIDE);
6209                  }
6210              }
6211          }
6212          else {
6213              /* Disable warning on "study /blah/" */
6214              if (PL_oldoldbufptr == PL_last_uni
6215               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6216                   || memNE(PL_last_uni, "study", 5)
6217                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6218               ))
6219                  check_uni();
6220              if (*s == '?')
6221                  deprecate("?PATTERN? without explicit operator");
6222              s = scan_pat(s,OP_MATCH);
6223              TERM(sublex_start());
6224          }
6225
6226     case '.':
6227         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6228 #ifdef PERL_STRICT_CR
6229             && s[1] == '\n'
6230 #else
6231             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6232 #endif
6233             && (s == PL_linestart || s[-1] == '\n') )
6234         {
6235             PL_lex_formbrack = 0;
6236             PL_expect = XSTATE;
6237             goto rightbracket;
6238         }
6239         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6240             s += 3;
6241             OPERATOR(YADAYADA);
6242         }
6243         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6244             char tmp = *s++;
6245             if (*s == tmp) {
6246                 if (!PL_lex_allbrackets &&
6247                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6248                     s--;
6249                     TOKEN(0);
6250                 }
6251                 s++;
6252                 if (*s == tmp) {
6253                     s++;
6254                     pl_yylval.ival = OPf_SPECIAL;
6255                 }
6256                 else
6257                     pl_yylval.ival = 0;
6258                 OPERATOR(DOTDOT);
6259             }
6260             if (*s == '=' && !PL_lex_allbrackets &&
6261                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6262                 s--;
6263                 TOKEN(0);
6264             }
6265             Aop(OP_CONCAT);
6266         }
6267         /* FALL THROUGH */
6268     case '0': case '1': case '2': case '3': case '4':
6269     case '5': case '6': case '7': case '8': case '9':
6270         s = scan_num(s, &pl_yylval);
6271         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6272         if (PL_expect == XOPERATOR)
6273             no_op("Number",s);
6274         TERM(THING);
6275
6276     case '\'':
6277         s = scan_str(s,!!PL_madskills,FALSE);
6278         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6279         if (PL_expect == XOPERATOR) {
6280             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6281                 return deprecate_commaless_var_list();
6282             }
6283             else
6284                 no_op("String",s);
6285         }
6286         if (!s)
6287             missingterm(NULL);
6288         pl_yylval.ival = OP_CONST;
6289         TERM(sublex_start());
6290
6291     case '"':
6292         s = scan_str(s,!!PL_madskills,FALSE);
6293         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6294         if (PL_expect == XOPERATOR) {
6295             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6296                 return deprecate_commaless_var_list();
6297             }
6298             else
6299                 no_op("String",s);
6300         }
6301         if (!s)
6302             missingterm(NULL);
6303         pl_yylval.ival = OP_CONST;
6304         /* FIXME. I think that this can be const if char *d is replaced by
6305            more localised variables.  */
6306         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6307             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6308                 pl_yylval.ival = OP_STRINGIFY;
6309                 break;
6310             }
6311         }
6312         TERM(sublex_start());
6313
6314     case '`':
6315         s = scan_str(s,!!PL_madskills,FALSE);
6316         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6317         if (PL_expect == XOPERATOR)
6318             no_op("Backticks",s);
6319         if (!s)
6320             missingterm(NULL);
6321         readpipe_override();
6322         TERM(sublex_start());
6323
6324     case '\\':
6325         s++;
6326         if (PL_lex_inwhat && isDIGIT(*s))
6327             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6328                            *s, *s);
6329         if (PL_expect == XOPERATOR)
6330             no_op("Backslash",s);
6331         OPERATOR(REFGEN);
6332
6333     case 'v':
6334         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6335             char *start = s + 2;
6336             while (isDIGIT(*start) || *start == '_')
6337                 start++;
6338             if (*start == '.' && isDIGIT(start[1])) {
6339                 s = scan_num(s, &pl_yylval);
6340                 TERM(THING);
6341             }
6342             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6343             else if (!isALPHA(*start) && (PL_expect == XTERM
6344                         || PL_expect == XREF || PL_expect == XSTATE
6345                         || PL_expect == XTERMORDORDOR)) {
6346                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6347                 if (!gv) {
6348                     s = scan_num(s, &pl_yylval);
6349                     TERM(THING);
6350                 }
6351             }
6352         }
6353         goto keylookup;
6354     case 'x':
6355         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6356             s++;
6357             Mop(OP_REPEAT);
6358         }
6359         goto keylookup;
6360
6361     case '_':
6362     case 'a': case 'A':
6363     case 'b': case 'B':
6364     case 'c': case 'C':
6365     case 'd': case 'D':
6366     case 'e': case 'E':
6367     case 'f': case 'F':
6368     case 'g': case 'G':
6369     case 'h': case 'H':
6370     case 'i': case 'I':
6371     case 'j': case 'J':
6372     case 'k': case 'K':
6373     case 'l': case 'L':
6374     case 'm': case 'M':
6375     case 'n': case 'N':
6376     case 'o': case 'O':
6377     case 'p': case 'P':
6378     case 'q': case 'Q':
6379     case 'r': case 'R':
6380     case 's': case 'S':
6381     case 't': case 'T':
6382     case 'u': case 'U':
6383               case 'V':
6384     case 'w': case 'W':
6385               case 'X':
6386     case 'y': case 'Y':
6387     case 'z': case 'Z':
6388
6389       keylookup: {
6390         bool anydelim;
6391         I32 tmp;
6392
6393         orig_keyword = 0;
6394         gv = NULL;
6395         gvp = NULL;
6396
6397         PL_bufptr = s;
6398         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6399
6400         /* Some keywords can be followed by any delimiter, including ':' */
6401         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6402
6403         /* x::* is just a word, unless x is "CORE" */
6404         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6405             goto just_a_word;
6406
6407         d = s;
6408         while (d < PL_bufend && isSPACE(*d))
6409                 d++;    /* no comments skipped here, or s### is misparsed */
6410
6411         /* Is this a word before a => operator? */
6412         if (*d == '=' && d[1] == '>') {
6413             CLINE;
6414             pl_yylval.opval
6415                 = (OP*)newSVOP(OP_CONST, 0,
6416                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6417             pl_yylval.opval->op_private = OPpCONST_BARE;
6418             TERM(WORD);
6419         }
6420
6421         /* Check for plugged-in keyword */
6422         {
6423             OP *o;
6424             int result;
6425             char *saved_bufptr = PL_bufptr;
6426             PL_bufptr = s;
6427             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6428             s = PL_bufptr;
6429             if (result == KEYWORD_PLUGIN_DECLINE) {
6430                 /* not a plugged-in keyword */
6431                 PL_bufptr = saved_bufptr;
6432             } else if (result == KEYWORD_PLUGIN_STMT) {
6433                 pl_yylval.opval = o;
6434                 CLINE;
6435                 PL_expect = XSTATE;
6436                 return REPORT(PLUGSTMT);
6437             } else if (result == KEYWORD_PLUGIN_EXPR) {
6438                 pl_yylval.opval = o;
6439                 CLINE;
6440                 PL_expect = XOPERATOR;
6441                 return REPORT(PLUGEXPR);
6442             } else {
6443                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6444                                         PL_tokenbuf);
6445             }
6446         }
6447
6448         /* Check for built-in keyword */
6449         tmp = keyword(PL_tokenbuf, len, 0);
6450
6451         /* Is this a label? */
6452         if (!anydelim && PL_expect == XSTATE
6453               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6454             s = d + 1;
6455             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6456             CLINE;
6457             TOKEN(LABEL);
6458         }
6459
6460         if (tmp < 0) {                  /* second-class keyword? */
6461             GV *ogv = NULL;     /* override (winner) */
6462             GV *hgv = NULL;     /* hidden (loser) */
6463             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6464                 CV *cv;
6465                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6466                     (cv = GvCVu(gv)))
6467                 {
6468                     if (GvIMPORTED_CV(gv))
6469                         ogv = gv;
6470                     else if (! CvMETHOD(cv))
6471                         hgv = gv;
6472                 }
6473                 if (!ogv &&
6474                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6475                     (gv = *gvp) && isGV_with_GP(gv) &&
6476                     GvCVu(gv) && GvIMPORTED_CV(gv))
6477                 {
6478                     ogv = gv;
6479                 }
6480             }
6481             if (ogv) {
6482                 orig_keyword = tmp;
6483                 tmp = 0;                /* overridden by import or by GLOBAL */
6484             }
6485             else if (gv && !gvp
6486                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6487                      && GvCVu(gv))
6488             {
6489                 tmp = 0;                /* any sub overrides "weak" keyword */
6490             }
6491             else {                      /* no override */
6492                 tmp = -tmp;
6493                 if (tmp == KEY_dump) {
6494                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6495                                    "dump() better written as CORE::dump()");
6496                 }
6497                 gv = NULL;
6498                 gvp = 0;
6499                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6500                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6501                                    "Ambiguous call resolved as CORE::%s(), "
6502                                    "qualify as such or use &",
6503                                    GvENAME(hgv));
6504             }
6505         }
6506
6507       reserved_word:
6508         switch (tmp) {
6509
6510         default:                        /* not a keyword */
6511             /* Trade off - by using this evil construction we can pull the
6512                variable gv into the block labelled keylookup. If not, then
6513                we have to give it function scope so that the goto from the
6514                earlier ':' case doesn't bypass the initialisation.  */
6515             if (0) {
6516             just_a_word_zero_gv:
6517                 gv = NULL;
6518                 gvp = NULL;
6519                 orig_keyword = 0;
6520             }
6521           just_a_word: {
6522                 SV *sv;
6523                 int pkgname = 0;
6524                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6525                 OP *rv2cv_op;
6526                 CV *cv;
6527 #ifdef PERL_MAD
6528                 SV *nextPL_nextwhite = 0;
6529 #endif
6530
6531
6532                 /* Get the rest if it looks like a package qualifier */
6533
6534                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6535                     STRLEN morelen;
6536                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6537                                   TRUE, &morelen);
6538                     if (!morelen)
6539                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6540                                 *s == '\'' ? "'" : "::");
6541                     len += morelen;
6542                     pkgname = 1;
6543                 }
6544
6545                 if (PL_expect == XOPERATOR) {
6546                     if (PL_bufptr == PL_linestart) {
6547                         CopLINE_dec(PL_curcop);
6548                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6549                         CopLINE_inc(PL_curcop);
6550                     }
6551                     else
6552                         no_op("Bareword",s);
6553                 }
6554
6555                 /* Look for a subroutine with this name in current package,
6556                    unless name is "Foo::", in which case Foo is a bareword
6557                    (and a package name). */
6558
6559                 if (len > 2 && !PL_madskills &&
6560                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6561                 {
6562                     if (ckWARN(WARN_BAREWORD)
6563                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6564                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6565                             "Bareword \"%s\" refers to nonexistent package",
6566                              PL_tokenbuf);
6567                     len -= 2;
6568                     PL_tokenbuf[len] = '\0';
6569                     gv = NULL;
6570                     gvp = 0;
6571                 }
6572                 else {
6573                     if (!gv) {
6574                         /* Mustn't actually add anything to a symbol table.
6575                            But also don't want to "initialise" any placeholder
6576                            constants that might already be there into full
6577                            blown PVGVs with attached PVCV.  */
6578                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6579                                                GV_NOADD_NOINIT, SVt_PVCV);
6580                     }
6581                     len = 0;
6582                 }
6583
6584                 /* if we saw a global override before, get the right name */
6585
6586                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6587                     len ? len : strlen(PL_tokenbuf));
6588                 if (gvp) {
6589                     SV * const tmp_sv = sv;
6590                     sv = newSVpvs("CORE::GLOBAL::");
6591                     sv_catsv(sv, tmp_sv);
6592                     SvREFCNT_dec(tmp_sv);
6593                 }
6594
6595 #ifdef PERL_MAD
6596                 if (PL_madskills && !PL_thistoken) {
6597                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6598                     PL_thistoken = newSVpvn(start,s - start);
6599                     PL_realtokenstart = s - SvPVX(PL_linestr);
6600                 }
6601 #endif
6602
6603                 /* Presume this is going to be a bareword of some sort. */
6604                 CLINE;
6605                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6606                 pl_yylval.opval->op_private = OPpCONST_BARE;
6607
6608                 /* And if "Foo::", then that's what it certainly is. */
6609                 if (len)
6610                     goto safe_bareword;
6611
6612                 {
6613                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6614                     const_op->op_private = OPpCONST_BARE;
6615                     rv2cv_op = newCVREF(0, const_op);
6616                 }
6617                 cv = rv2cv_op_cv(rv2cv_op, 0);
6618
6619                 /* See if it's the indirect object for a list operator. */
6620
6621                 if (PL_oldoldbufptr &&
6622                     PL_oldoldbufptr < PL_bufptr &&
6623                     (PL_oldoldbufptr == PL_last_lop
6624                      || PL_oldoldbufptr == PL_last_uni) &&
6625                     /* NO SKIPSPACE BEFORE HERE! */
6626                     (PL_expect == XREF ||
6627                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6628                 {
6629                     bool immediate_paren = *s == '(';
6630
6631                     /* (Now we can afford to cross potential line boundary.) */
6632                     s = SKIPSPACE2(s,nextPL_nextwhite);
6633 #ifdef PERL_MAD
6634                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6635 #endif
6636
6637                     /* Two barewords in a row may indicate method call. */
6638
6639                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6640                         (tmp = intuit_method(s, gv, cv))) {
6641                         op_free(rv2cv_op);
6642                         if (tmp == METHOD && !PL_lex_allbrackets &&
6643                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6644                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6645                         return REPORT(tmp);
6646                     }
6647
6648                     /* If not a declared subroutine, it's an indirect object. */
6649                     /* (But it's an indir obj regardless for sort.) */
6650                     /* Also, if "_" follows a filetest operator, it's a bareword */
6651
6652                     if (
6653                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6654                          (!cv &&
6655                         (PL_last_lop_op != OP_MAPSTART &&
6656                          PL_last_lop_op != OP_GREPSTART))))
6657                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6658                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6659                        )
6660                     {
6661                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6662                         goto bareword;
6663                     }
6664                 }
6665
6666                 PL_expect = XOPERATOR;
6667 #ifdef PERL_MAD
6668                 if (isSPACE(*s))
6669                     s = SKIPSPACE2(s,nextPL_nextwhite);
6670                 PL_nextwhite = nextPL_nextwhite;
6671 #else
6672                 s = skipspace(s);
6673 #endif
6674
6675                 /* Is this a word before a => operator? */
6676                 if (*s == '=' && s[1] == '>' && !pkgname) {
6677                     op_free(rv2cv_op);
6678                     CLINE;
6679                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6680                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6681                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6682                     TERM(WORD);
6683                 }
6684
6685                 /* If followed by a paren, it's certainly a subroutine. */
6686                 if (*s == '(') {
6687                     CLINE;
6688                     if (cv) {
6689                         d = s + 1;
6690                         while (SPACE_OR_TAB(*d))
6691                             d++;
6692                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6693                             s = d + 1;
6694                             goto its_constant;
6695                         }
6696                     }
6697 #ifdef PERL_MAD
6698                     if (PL_madskills) {
6699                         PL_nextwhite = PL_thiswhite;
6700                         PL_thiswhite = 0;
6701                     }
6702                     start_force(PL_curforce);
6703 #endif
6704                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6705                     PL_expect = XOPERATOR;
6706 #ifdef PERL_MAD
6707                     if (PL_madskills) {
6708                         PL_nextwhite = nextPL_nextwhite;
6709                         curmad('X', PL_thistoken);
6710                         PL_thistoken = newSVpvs("");
6711                     }
6712 #endif
6713                     op_free(rv2cv_op);
6714                     force_next(WORD);
6715                     pl_yylval.ival = 0;
6716                     TOKEN('&');
6717                 }
6718
6719                 /* If followed by var or block, call it a method (unless sub) */
6720
6721                 if ((*s == '$' || *s == '{') && !cv) {
6722                     op_free(rv2cv_op);
6723                     PL_last_lop = PL_oldbufptr;
6724                     PL_last_lop_op = OP_METHOD;
6725                     if (!PL_lex_allbrackets &&
6726                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6727                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6728                     PREBLOCK(METHOD);
6729                 }
6730
6731                 /* If followed by a bareword, see if it looks like indir obj. */
6732
6733                 if (!orig_keyword
6734                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6735                         && (tmp = intuit_method(s, gv, cv))) {
6736                     op_free(rv2cv_op);
6737                     if (tmp == METHOD && !PL_lex_allbrackets &&
6738                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6739                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6740                     return REPORT(tmp);
6741                 }
6742
6743                 /* Not a method, so call it a subroutine (if defined) */
6744
6745                 if (cv) {
6746                     if (lastchar == '-')
6747                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6748                                          "Ambiguous use of -%s resolved as -&%s()",
6749                                          PL_tokenbuf, PL_tokenbuf);
6750                     /* Check for a constant sub */
6751                     if ((sv = cv_const_sv(cv))) {
6752                   its_constant:
6753                         op_free(rv2cv_op);
6754                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6755                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6756                         pl_yylval.opval->op_private = 0;
6757                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6758                         TOKEN(WORD);
6759                     }
6760
6761                     op_free(pl_yylval.opval);
6762                     pl_yylval.opval = rv2cv_op;
6763                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6764                     PL_last_lop = PL_oldbufptr;
6765                     PL_last_lop_op = OP_ENTERSUB;
6766                     /* Is there a prototype? */
6767                     if (
6768 #ifdef PERL_MAD
6769                         cv &&
6770 #endif
6771                         SvPOK(cv))
6772                     {
6773                         STRLEN protolen;
6774                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6775                         if (!protolen)
6776                             TERM(FUNC0SUB);
6777                         while (*proto == ';')
6778                             proto++;
6779                         if (
6780                             (
6781                                 (
6782                                     *proto == '$' || *proto == '_'
6783                                  || *proto == '*' || *proto == '+'
6784                                 )
6785                              && proto[1] == '\0'
6786                             )
6787                          || (
6788                              *proto == '\\' && proto[1] && proto[2] == '\0'
6789                             )
6790                         )
6791                             OPERATOR(UNIOPSUB);
6792                         if (*proto == '\\' && proto[1] == '[') {
6793                             const char *p = proto + 2;
6794                             while(*p && *p != ']')
6795                                 ++p;
6796                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6797                         }
6798                         if (*proto == '&' && *s == '{') {
6799                             if (PL_curstash)
6800                                 sv_setpvs(PL_subname, "__ANON__");
6801                             else
6802                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6803                             if (!PL_lex_allbrackets &&
6804                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6805                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6806                             PREBLOCK(LSTOPSUB);
6807                         }
6808                     }
6809 #ifdef PERL_MAD
6810                     {
6811                         if (PL_madskills) {
6812                             PL_nextwhite = PL_thiswhite;
6813                             PL_thiswhite = 0;
6814                         }
6815                         start_force(PL_curforce);
6816                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6817                         PL_expect = XTERM;
6818                         if (PL_madskills) {
6819                             PL_nextwhite = nextPL_nextwhite;
6820                             curmad('X', PL_thistoken);
6821                             PL_thistoken = newSVpvs("");
6822                         }
6823                         force_next(WORD);
6824                         if (!PL_lex_allbrackets &&
6825                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6826                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6827                         TOKEN(NOAMP);
6828                     }
6829                 }
6830
6831                 /* Guess harder when madskills require "best effort". */
6832                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6833                     int probable_sub = 0;
6834                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6835                         probable_sub = 1;
6836                     else if (isALPHA(*s)) {
6837                         char tmpbuf[1024];
6838                         STRLEN tmplen;
6839                         d = s;
6840                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6841                         if (!keyword(tmpbuf, tmplen, 0))
6842                             probable_sub = 1;
6843                         else {
6844                             while (d < PL_bufend && isSPACE(*d))
6845                                 d++;
6846                             if (*d == '=' && d[1] == '>')
6847                                 probable_sub = 1;
6848                         }
6849                     }
6850                     if (probable_sub) {
6851                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6852                         op_free(pl_yylval.opval);
6853                         pl_yylval.opval = rv2cv_op;
6854                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6855                         PL_last_lop = PL_oldbufptr;
6856                         PL_last_lop_op = OP_ENTERSUB;
6857                         PL_nextwhite = PL_thiswhite;
6858                         PL_thiswhite = 0;
6859                         start_force(PL_curforce);
6860                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6861                         PL_expect = XTERM;
6862                         PL_nextwhite = nextPL_nextwhite;
6863                         curmad('X', PL_thistoken);
6864                         PL_thistoken = newSVpvs("");
6865                         force_next(WORD);
6866                         if (!PL_lex_allbrackets &&
6867                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6868                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6869                         TOKEN(NOAMP);
6870                     }
6871 #else
6872                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6873                     PL_expect = XTERM;
6874                     force_next(WORD);
6875                     if (!PL_lex_allbrackets &&
6876                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6877                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6878                     TOKEN(NOAMP);
6879 #endif
6880                 }
6881
6882                 /* Call it a bare word */
6883
6884                 if (PL_hints & HINT_STRICT_SUBS)
6885                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6886                 else {
6887                 bareword:
6888                     /* after "print" and similar functions (corresponding to
6889                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6890                      * a filehandle should be subject to "strict subs".
6891                      * Likewise for the optional indirect-object argument to system
6892                      * or exec, which can't be a bareword */
6893                     if ((PL_last_lop_op == OP_PRINT
6894                             || PL_last_lop_op == OP_PRTF
6895                             || PL_last_lop_op == OP_SAY
6896                             || PL_last_lop_op == OP_SYSTEM
6897                             || PL_last_lop_op == OP_EXEC)
6898                             && (PL_hints & HINT_STRICT_SUBS))
6899                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6900                     if (lastchar != '-') {
6901                         if (ckWARN(WARN_RESERVED)) {
6902                             d = PL_tokenbuf;
6903                             while (isLOWER(*d))
6904                                 d++;
6905                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6906                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6907                                        PL_tokenbuf);
6908                         }
6909                     }
6910                 }
6911                 op_free(rv2cv_op);
6912
6913             safe_bareword:
6914                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6915                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6916                                      "Operator or semicolon missing before %c%s",
6917                                      lastchar, PL_tokenbuf);
6918                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6919                                      "Ambiguous use of %c resolved as operator %c",
6920                                      lastchar, lastchar);
6921                 }
6922                 TOKEN(WORD);
6923             }
6924
6925         case KEY___FILE__:
6926             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6927                                         newSVpv(CopFILE(PL_curcop),0));
6928             TERM(THING);
6929
6930         case KEY___LINE__:
6931             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6932                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6933             TERM(THING);
6934
6935         case KEY___PACKAGE__:
6936             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6937                                         (PL_curstash
6938                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6939                                          : &PL_sv_undef));
6940             TERM(THING);
6941
6942         case KEY___DATA__:
6943         case KEY___END__: {
6944             GV *gv;
6945             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6946                 const char *pname = "main";
6947                 if (PL_tokenbuf[2] == 'D')
6948                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6949                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6950                                 SVt_PVIO);
6951                 GvMULTI_on(gv);
6952                 if (!GvIO(gv))
6953                     GvIOp(gv) = newIO();
6954                 IoIFP(GvIOp(gv)) = PL_rsfp;
6955 #if defined(HAS_FCNTL) && defined(F_SETFD)
6956                 {
6957                     const int fd = PerlIO_fileno(PL_rsfp);
6958                     fcntl(fd,F_SETFD,fd >= 3);
6959                 }
6960 #endif
6961                 /* Mark this internal pseudo-handle as clean */
6962                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6963                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6964                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6965                 else
6966                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6967 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6968                 /* if the script was opened in binmode, we need to revert
6969                  * it to text mode for compatibility; but only iff it has CRs
6970                  * XXX this is a questionable hack at best. */
6971                 if (PL_bufend-PL_bufptr > 2
6972                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6973                 {
6974                     Off_t loc = 0;
6975                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6976                         loc = PerlIO_tell(PL_rsfp);
6977                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6978                     }
6979 #ifdef NETWARE
6980                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6981 #else
6982                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6983 #endif  /* NETWARE */
6984 #ifdef PERLIO_IS_STDIO /* really? */
6985 #  if defined(__BORLANDC__)
6986                         /* XXX see note in do_binmode() */
6987                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6988 #  endif
6989 #endif
6990                         if (loc > 0)
6991                             PerlIO_seek(PL_rsfp, loc, 0);
6992                     }
6993                 }
6994 #endif
6995 #ifdef PERLIO_LAYERS
6996                 if (!IN_BYTES) {
6997                     if (UTF)
6998                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6999                     else if (PL_encoding) {
7000                         SV *name;
7001                         dSP;
7002                         ENTER;
7003                         SAVETMPS;
7004                         PUSHMARK(sp);
7005                         EXTEND(SP, 1);
7006                         XPUSHs(PL_encoding);
7007                         PUTBACK;
7008                         call_method("name", G_SCALAR);
7009                         SPAGAIN;
7010                         name = POPs;
7011                         PUTBACK;
7012                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7013                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7014                                                       SVfARG(name)));
7015                         FREETMPS;
7016                         LEAVE;
7017                     }
7018                 }
7019 #endif
7020 #ifdef PERL_MAD
7021                 if (PL_madskills) {
7022                     if (PL_realtokenstart >= 0) {
7023                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7024                         if (!PL_endwhite)
7025                             PL_endwhite = newSVpvs("");
7026                         sv_catsv(PL_endwhite, PL_thiswhite);
7027                         PL_thiswhite = 0;
7028                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7029                         PL_realtokenstart = -1;
7030                     }
7031                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7032                            != NULL) ;
7033                 }
7034 #endif
7035                 PL_rsfp = NULL;
7036             }
7037             goto fake_eof;
7038         }
7039
7040         case KEY_AUTOLOAD:
7041         case KEY_DESTROY:
7042         case KEY_BEGIN:
7043         case KEY_UNITCHECK:
7044         case KEY_CHECK:
7045         case KEY_INIT:
7046         case KEY_END:
7047             if (PL_expect == XSTATE) {
7048                 s = PL_bufptr;
7049                 goto really_sub;
7050             }
7051             goto just_a_word;
7052
7053         case KEY_CORE:
7054             if (*s == ':' && s[1] == ':') {
7055                 s += 2;
7056                 d = s;
7057                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7058                 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7059                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7060                 if (tmp < 0)
7061                     tmp = -tmp;
7062                 else if (tmp == KEY_require || tmp == KEY_do)
7063                     /* that's a way to remember we saw "CORE::" */
7064                     orig_keyword = tmp;
7065                 goto reserved_word;
7066             }
7067             goto just_a_word;
7068
7069         case KEY_abs:
7070             UNI(OP_ABS);
7071
7072         case KEY_alarm:
7073             UNI(OP_ALARM);
7074
7075         case KEY_accept:
7076             LOP(OP_ACCEPT,XTERM);
7077
7078         case KEY_and:
7079             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7080                 return REPORT(0);
7081             OPERATOR(ANDOP);
7082
7083         case KEY_atan2:
7084             LOP(OP_ATAN2,XTERM);
7085
7086         case KEY_bind:
7087             LOP(OP_BIND,XTERM);
7088
7089         case KEY_binmode:
7090             LOP(OP_BINMODE,XTERM);
7091
7092         case KEY_bless:
7093             LOP(OP_BLESS,XTERM);
7094
7095         case KEY_break:
7096             FUN0(OP_BREAK);
7097
7098         case KEY_chop:
7099             UNI(OP_CHOP);
7100
7101         case KEY_continue:
7102                     /* We have to disambiguate the two senses of
7103                       "continue". If the next token is a '{' then
7104                       treat it as the start of a continue block;
7105                       otherwise treat it as a control operator.
7106                      */
7107                     s = skipspace(s);
7108                     if (*s == '{')
7109             PREBLOCK(CONTINUE);
7110                     else
7111                         FUN0(OP_CONTINUE);
7112
7113         case KEY_chdir:
7114             /* may use HOME */
7115             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7116             UNI(OP_CHDIR);
7117
7118         case KEY_close:
7119             UNI(OP_CLOSE);
7120
7121         case KEY_closedir:
7122             UNI(OP_CLOSEDIR);
7123
7124         case KEY_cmp:
7125             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7126                 return REPORT(0);
7127             Eop(OP_SCMP);
7128
7129         case KEY_caller:
7130             UNI(OP_CALLER);
7131
7132         case KEY_crypt:
7133 #ifdef FCRYPT
7134             if (!PL_cryptseen) {
7135                 PL_cryptseen = TRUE;
7136                 init_des();
7137             }
7138 #endif
7139             LOP(OP_CRYPT,XTERM);
7140
7141         case KEY_chmod:
7142             LOP(OP_CHMOD,XTERM);
7143
7144         case KEY_chown:
7145             LOP(OP_CHOWN,XTERM);
7146
7147         case KEY_connect:
7148             LOP(OP_CONNECT,XTERM);
7149
7150         case KEY_chr:
7151             UNI(OP_CHR);
7152
7153         case KEY_cos:
7154             UNI(OP_COS);
7155
7156         case KEY_chroot:
7157             UNI(OP_CHROOT);
7158
7159         case KEY_default:
7160             PREBLOCK(DEFAULT);
7161
7162         case KEY_do:
7163             s = SKIPSPACE1(s);
7164             if (*s == '{')
7165                 PRETERMBLOCK(DO);
7166             if (*s != '\'')
7167                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7168             if (orig_keyword == KEY_do) {
7169                 orig_keyword = 0;
7170                 pl_yylval.ival = 1;
7171             }
7172             else
7173                 pl_yylval.ival = 0;
7174             OPERATOR(DO);
7175
7176         case KEY_die:
7177             PL_hints |= HINT_BLOCK_SCOPE;
7178             LOP(OP_DIE,XTERM);
7179
7180         case KEY_defined:
7181             UNI(OP_DEFINED);
7182
7183         case KEY_delete:
7184             UNI(OP_DELETE);
7185
7186         case KEY_dbmopen:
7187             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7188                               STR_WITH_LEN("NDBM_File::"),
7189                               STR_WITH_LEN("DB_File::"),
7190                               STR_WITH_LEN("GDBM_File::"),
7191                               STR_WITH_LEN("SDBM_File::"),
7192                               STR_WITH_LEN("ODBM_File::"),
7193                               NULL);
7194             LOP(OP_DBMOPEN,XTERM);
7195
7196         case KEY_dbmclose:
7197             UNI(OP_DBMCLOSE);
7198
7199         case KEY_dump:
7200             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7201             LOOPX(OP_DUMP);
7202
7203         case KEY_else:
7204             PREBLOCK(ELSE);
7205
7206         case KEY_elsif:
7207             pl_yylval.ival = CopLINE(PL_curcop);
7208             OPERATOR(ELSIF);
7209
7210         case KEY_eq:
7211             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7212                 return REPORT(0);
7213             Eop(OP_SEQ);
7214
7215         case KEY_exists:
7216             UNI(OP_EXISTS);
7217
7218         case KEY_exit:
7219             if (PL_madskills)
7220                 UNI(OP_INT);
7221             UNI(OP_EXIT);
7222
7223         case KEY_eval:
7224             s = SKIPSPACE1(s);
7225             if (*s == '{') { /* block eval */
7226                 PL_expect = XTERMBLOCK;
7227                 UNIBRACK(OP_ENTERTRY);
7228             }
7229             else { /* string eval */
7230                 PL_expect = XTERM;
7231                 UNIBRACK(OP_ENTEREVAL);
7232             }
7233
7234         case KEY_eof:
7235             UNI(OP_EOF);
7236
7237         case KEY_exp:
7238             UNI(OP_EXP);
7239
7240         case KEY_each:
7241             UNI(OP_EACH);
7242
7243         case KEY_exec:
7244             LOP(OP_EXEC,XREF);
7245
7246         case KEY_endhostent:
7247             FUN0(OP_EHOSTENT);
7248
7249         case KEY_endnetent:
7250             FUN0(OP_ENETENT);
7251
7252         case KEY_endservent:
7253             FUN0(OP_ESERVENT);
7254
7255         case KEY_endprotoent:
7256             FUN0(OP_EPROTOENT);
7257
7258         case KEY_endpwent:
7259             FUN0(OP_EPWENT);
7260
7261         case KEY_endgrent:
7262             FUN0(OP_EGRENT);
7263
7264         case KEY_for:
7265         case KEY_foreach:
7266             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7267                 return REPORT(0);
7268             pl_yylval.ival = CopLINE(PL_curcop);
7269             s = SKIPSPACE1(s);
7270             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7271                 char *p = s;
7272 #ifdef PERL_MAD
7273                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7274 #endif
7275
7276                 if ((PL_bufend - p) >= 3 &&
7277                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7278                     p += 2;
7279                 else if ((PL_bufend - p) >= 4 &&
7280                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7281                     p += 3;
7282                 p = PEEKSPACE(p);
7283                 if (isIDFIRST_lazy_if(p,UTF)) {
7284                     p = scan_ident(p, PL_bufend,
7285                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7286                     p = PEEKSPACE(p);
7287                 }
7288                 if (*p != '$')
7289                     Perl_croak(aTHX_ "Missing $ on loop variable");
7290 #ifdef PERL_MAD
7291                 s = SvPVX(PL_linestr) + soff;
7292 #endif
7293             }
7294             OPERATOR(FOR);
7295
7296         case KEY_formline:
7297             LOP(OP_FORMLINE,XTERM);
7298
7299         case KEY_fork:
7300             FUN0(OP_FORK);
7301
7302         case KEY_fcntl:
7303             LOP(OP_FCNTL,XTERM);
7304
7305         case KEY_fileno:
7306             UNI(OP_FILENO);
7307
7308         case KEY_flock:
7309             LOP(OP_FLOCK,XTERM);
7310
7311         case KEY_gt:
7312             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7313                 return REPORT(0);
7314             Rop(OP_SGT);
7315
7316         case KEY_ge:
7317             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7318                 return REPORT(0);
7319             Rop(OP_SGE);
7320
7321         case KEY_grep:
7322             LOP(OP_GREPSTART, XREF);
7323
7324         case KEY_goto:
7325             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7326             LOOPX(OP_GOTO);
7327
7328         case KEY_gmtime:
7329             UNI(OP_GMTIME);
7330
7331         case KEY_getc:
7332             UNIDOR(OP_GETC);
7333
7334         case KEY_getppid:
7335             FUN0(OP_GETPPID);
7336
7337         case KEY_getpgrp:
7338             UNI(OP_GETPGRP);
7339
7340         case KEY_getpriority:
7341             LOP(OP_GETPRIORITY,XTERM);
7342
7343         case KEY_getprotobyname:
7344             UNI(OP_GPBYNAME);
7345
7346         case KEY_getprotobynumber:
7347             LOP(OP_GPBYNUMBER,XTERM);
7348
7349         case KEY_getprotoent:
7350             FUN0(OP_GPROTOENT);
7351
7352         case KEY_getpwent:
7353             FUN0(OP_GPWENT);
7354
7355         case KEY_getpwnam:
7356             UNI(OP_GPWNAM);
7357
7358         case KEY_getpwuid:
7359             UNI(OP_GPWUID);
7360
7361         case KEY_getpeername:
7362             UNI(OP_GETPEERNAME);
7363
7364         case KEY_gethostbyname:
7365             UNI(OP_GHBYNAME);
7366
7367         case KEY_gethostbyaddr:
7368             LOP(OP_GHBYADDR,XTERM);
7369
7370         case KEY_gethostent:
7371             FUN0(OP_GHOSTENT);
7372
7373         case KEY_getnetbyname:
7374             UNI(OP_GNBYNAME);
7375
7376         case KEY_getnetbyaddr:
7377             LOP(OP_GNBYADDR,XTERM);
7378
7379         case KEY_getnetent:
7380             FUN0(OP_GNETENT);
7381
7382         case KEY_getservbyname:
7383             LOP(OP_GSBYNAME,XTERM);
7384
7385         case KEY_getservbyport:
7386             LOP(OP_GSBYPORT,XTERM);
7387
7388         case KEY_getservent:
7389             FUN0(OP_GSERVENT);
7390
7391         case KEY_getsockname:
7392             UNI(OP_GETSOCKNAME);
7393
7394         case KEY_getsockopt:
7395             LOP(OP_GSOCKOPT,XTERM);
7396
7397         case KEY_getgrent:
7398             FUN0(OP_GGRENT);
7399
7400         case KEY_getgrnam:
7401             UNI(OP_GGRNAM);
7402
7403         case KEY_getgrgid:
7404             UNI(OP_GGRGID);
7405
7406         case KEY_getlogin:
7407             FUN0(OP_GETLOGIN);
7408
7409         case KEY_given:
7410             pl_yylval.ival = CopLINE(PL_curcop);
7411             OPERATOR(GIVEN);
7412
7413         case KEY_glob:
7414             LOP(OP_GLOB,XTERM);
7415
7416         case KEY_hex:
7417             UNI(OP_HEX);
7418
7419         case KEY_if:
7420             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7421                 return REPORT(0);
7422             pl_yylval.ival = CopLINE(PL_curcop);
7423             OPERATOR(IF);
7424
7425         case KEY_index:
7426             LOP(OP_INDEX,XTERM);
7427
7428         case KEY_int:
7429             UNI(OP_INT);
7430
7431         case KEY_ioctl:
7432             LOP(OP_IOCTL,XTERM);
7433
7434         case KEY_join:
7435             LOP(OP_JOIN,XTERM);
7436
7437         case KEY_keys:
7438             UNI(OP_KEYS);
7439
7440         case KEY_kill:
7441             LOP(OP_KILL,XTERM);
7442
7443         case KEY_last:
7444             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7445             LOOPX(OP_LAST);
7446
7447         case KEY_lc:
7448             UNI(OP_LC);
7449
7450         case KEY_lcfirst:
7451             UNI(OP_LCFIRST);
7452
7453         case KEY_local:
7454             pl_yylval.ival = 0;
7455             OPERATOR(LOCAL);
7456
7457         case KEY_length:
7458             UNI(OP_LENGTH);
7459
7460         case KEY_lt:
7461             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7462                 return REPORT(0);
7463             Rop(OP_SLT);
7464
7465         case KEY_le:
7466             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7467                 return REPORT(0);
7468             Rop(OP_SLE);
7469
7470         case KEY_localtime:
7471             UNI(OP_LOCALTIME);
7472
7473         case KEY_log:
7474             UNI(OP_LOG);
7475
7476         case KEY_link:
7477             LOP(OP_LINK,XTERM);
7478
7479         case KEY_listen:
7480             LOP(OP_LISTEN,XTERM);
7481
7482         case KEY_lock:
7483             UNI(OP_LOCK);
7484
7485         case KEY_lstat:
7486             UNI(OP_LSTAT);
7487
7488         case KEY_m:
7489             s = scan_pat(s,OP_MATCH);
7490             TERM(sublex_start());
7491
7492         case KEY_map:
7493             LOP(OP_MAPSTART, XREF);
7494
7495         case KEY_mkdir:
7496             LOP(OP_MKDIR,XTERM);
7497
7498         case KEY_msgctl:
7499             LOP(OP_MSGCTL,XTERM);
7500
7501         case KEY_msgget:
7502             LOP(OP_MSGGET,XTERM);
7503
7504         case KEY_msgrcv:
7505             LOP(OP_MSGRCV,XTERM);
7506
7507         case KEY_msgsnd:
7508             LOP(OP_MSGSND,XTERM);
7509
7510         case KEY_our:
7511         case KEY_my:
7512         case KEY_state:
7513             PL_in_my = (U16)tmp;
7514             s = SKIPSPACE1(s);
7515             if (isIDFIRST_lazy_if(s,UTF)) {
7516 #ifdef PERL_MAD
7517                 char* start = s;
7518 #endif
7519                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7520                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7521                     goto really_sub;
7522                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7523                 if (!PL_in_my_stash) {
7524                     char tmpbuf[1024];
7525                     PL_bufptr = s;
7526                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7527                     yyerror(tmpbuf);
7528                 }
7529 #ifdef PERL_MAD
7530                 if (PL_madskills) {     /* just add type to declarator token */
7531                     sv_catsv(PL_thistoken, PL_nextwhite);
7532                     PL_nextwhite = 0;
7533                     sv_catpvn(PL_thistoken, start, s - start);
7534                 }
7535 #endif
7536             }
7537             pl_yylval.ival = 1;
7538             OPERATOR(MY);
7539
7540         case KEY_next:
7541             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7542             LOOPX(OP_NEXT);
7543
7544         case KEY_ne:
7545             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7546                 return REPORT(0);
7547             Eop(OP_SNE);
7548
7549         case KEY_no:
7550             s = tokenize_use(0, s);
7551             OPERATOR(USE);
7552
7553         case KEY_not:
7554             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7555                 FUN1(OP_NOT);
7556             else {
7557                 if (!PL_lex_allbrackets &&
7558                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7559                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7560                 OPERATOR(NOTOP);
7561             }
7562
7563         case KEY_open:
7564             s = SKIPSPACE1(s);
7565             if (isIDFIRST_lazy_if(s,UTF)) {
7566                 const char *t;
7567                 for (d = s; isALNUM_lazy_if(d,UTF);)
7568                     d++;
7569                 for (t=d; isSPACE(*t);)
7570                     t++;
7571                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7572                     /* [perl #16184] */
7573                     && !(t[0] == '=' && t[1] == '>')
7574                 ) {
7575                     int parms_len = (int)(d-s);
7576                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7577                            "Precedence problem: open %.*s should be open(%.*s)",
7578                             parms_len, s, parms_len, s);
7579                 }
7580             }
7581             LOP(OP_OPEN,XTERM);
7582
7583         case KEY_or:
7584             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7585                 return REPORT(0);
7586             pl_yylval.ival = OP_OR;
7587             OPERATOR(OROP);
7588
7589         case KEY_ord:
7590             UNI(OP_ORD);
7591
7592         case KEY_oct:
7593             UNI(OP_OCT);
7594
7595         case KEY_opendir:
7596             LOP(OP_OPEN_DIR,XTERM);
7597
7598         case KEY_print:
7599             checkcomma(s,PL_tokenbuf,"filehandle");
7600             LOP(OP_PRINT,XREF);
7601
7602         case KEY_printf:
7603             checkcomma(s,PL_tokenbuf,"filehandle");
7604             LOP(OP_PRTF,XREF);
7605
7606         case KEY_prototype:
7607             UNI(OP_PROTOTYPE);
7608
7609         case KEY_push:
7610             LOP(OP_PUSH,XTERM);
7611
7612         case KEY_pop:
7613             UNIDOR(OP_POP);
7614
7615         case KEY_pos:
7616             UNIDOR(OP_POS);
7617
7618         case KEY_pack:
7619             LOP(OP_PACK,XTERM);
7620
7621         case KEY_package:
7622             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7623             s = SKIPSPACE1(s);
7624             s = force_strict_version(s);
7625             PL_lex_expect = XBLOCK;
7626             OPERATOR(PACKAGE);
7627
7628         case KEY_pipe:
7629             LOP(OP_PIPE_OP,XTERM);
7630
7631         case KEY_q:
7632             s = scan_str(s,!!PL_madskills,FALSE);
7633             if (!s)
7634                 missingterm(NULL);
7635             pl_yylval.ival = OP_CONST;
7636             TERM(sublex_start());
7637
7638         case KEY_quotemeta:
7639             UNI(OP_QUOTEMETA);
7640
7641         case KEY_qw: {
7642             OP *words = NULL;
7643             s = scan_str(s,!!PL_madskills,FALSE);
7644             if (!s)
7645                 missingterm(NULL);
7646             PL_expect = XOPERATOR;
7647             if (SvCUR(PL_lex_stuff)) {
7648                 int warned_comma = !ckWARN(WARN_QW);
7649                 int warned_comment = warned_comma;
7650                 d = SvPV_force(PL_lex_stuff, len);
7651                 while (len) {
7652                     for (; isSPACE(*d) && len; --len, ++d)
7653                         /**/;
7654                     if (len) {
7655                         SV *sv;
7656                         const char *b = d;
7657                         if (!warned_comma || !warned_comment) {
7658                             for (; !isSPACE(*d) && len; --len, ++d) {
7659                                 if (!warned_comma && *d == ',') {
7660                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7661                                         "Possible attempt to separate words with commas");
7662                                     ++warned_comma;
7663                                 }
7664                                 else if (!warned_comment && *d == '#') {
7665                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7666                                         "Possible attempt to put comments in qw() list");
7667                                     ++warned_comment;
7668                                 }
7669                             }
7670                         }
7671                         else {
7672                             for (; !isSPACE(*d) && len; --len, ++d)
7673                                 /**/;
7674                         }
7675                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7676                         words = op_append_elem(OP_LIST, words,
7677                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7678                     }
7679                 }
7680             }
7681             if (!words)
7682                 words = newNULLLIST();
7683             if (PL_lex_stuff) {
7684                 SvREFCNT_dec(PL_lex_stuff);
7685                 PL_lex_stuff = NULL;
7686             }
7687             PL_expect = XOPERATOR;
7688             pl_yylval.opval = sawparens(words);
7689             TOKEN(QWLIST);
7690         }
7691
7692         case KEY_qq:
7693             s = scan_str(s,!!PL_madskills,FALSE);
7694             if (!s)
7695                 missingterm(NULL);
7696             pl_yylval.ival = OP_STRINGIFY;
7697             if (SvIVX(PL_lex_stuff) == '\'')
7698                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7699             TERM(sublex_start());
7700
7701         case KEY_qr:
7702             s = scan_pat(s,OP_QR);
7703             TERM(sublex_start());
7704
7705         case KEY_qx:
7706             s = scan_str(s,!!PL_madskills,FALSE);
7707             if (!s)
7708                 missingterm(NULL);
7709             readpipe_override();
7710             TERM(sublex_start());
7711
7712         case KEY_return:
7713             OLDLOP(OP_RETURN);
7714
7715         case KEY_require:
7716             s = SKIPSPACE1(s);
7717             if (isDIGIT(*s)) {
7718                 s = force_version(s, FALSE);
7719             }
7720             else if (*s != 'v' || !isDIGIT(s[1])
7721                     || (s = force_version(s, TRUE), *s == 'v'))
7722             {
7723                 *PL_tokenbuf = '\0';
7724                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7725                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7726                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7727                 else if (*s == '<')
7728                     yyerror("<> should be quotes");
7729             }
7730             if (orig_keyword == KEY_require) {
7731                 orig_keyword = 0;
7732                 pl_yylval.ival = 1;
7733             }
7734             else
7735                 pl_yylval.ival = 0;
7736             PL_expect = XTERM;
7737             PL_bufptr = s;
7738             PL_last_uni = PL_oldbufptr;
7739             PL_last_lop_op = OP_REQUIRE;
7740             s = skipspace(s);
7741             return REPORT( (int)REQUIRE );
7742
7743         case KEY_reset:
7744             UNI(OP_RESET);
7745
7746         case KEY_redo:
7747             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7748             LOOPX(OP_REDO);
7749
7750         case KEY_rename:
7751             LOP(OP_RENAME,XTERM);
7752
7753         case KEY_rand:
7754             UNI(OP_RAND);
7755
7756         case KEY_rmdir:
7757             UNI(OP_RMDIR);
7758
7759         case KEY_rindex:
7760             LOP(OP_RINDEX,XTERM);
7761
7762         case KEY_read:
7763             LOP(OP_READ,XTERM);
7764
7765         case KEY_readdir:
7766             UNI(OP_READDIR);
7767
7768         case KEY_readline:
7769             UNIDOR(OP_READLINE);
7770
7771         case KEY_readpipe:
7772             UNIDOR(OP_BACKTICK);
7773
7774         case KEY_rewinddir:
7775             UNI(OP_REWINDDIR);
7776
7777         case KEY_recv:
7778             LOP(OP_RECV,XTERM);
7779
7780         case KEY_reverse:
7781             LOP(OP_REVERSE,XTERM);
7782
7783         case KEY_readlink:
7784             UNIDOR(OP_READLINK);
7785
7786         case KEY_ref:
7787             UNI(OP_REF);
7788
7789         case KEY_s:
7790             s = scan_subst(s);
7791             if (pl_yylval.opval)
7792                 TERM(sublex_start());
7793             else
7794                 TOKEN(1);       /* force error */
7795
7796         case KEY_say:
7797             checkcomma(s,PL_tokenbuf,"filehandle");
7798             LOP(OP_SAY,XREF);
7799
7800         case KEY_chomp:
7801             UNI(OP_CHOMP);
7802
7803         case KEY_scalar:
7804             UNI(OP_SCALAR);
7805
7806         case KEY_select:
7807             LOP(OP_SELECT,XTERM);
7808
7809         case KEY_seek:
7810             LOP(OP_SEEK,XTERM);
7811
7812         case KEY_semctl:
7813             LOP(OP_SEMCTL,XTERM);
7814
7815         case KEY_semget:
7816             LOP(OP_SEMGET,XTERM);
7817
7818         case KEY_semop:
7819             LOP(OP_SEMOP,XTERM);
7820
7821         case KEY_send:
7822             LOP(OP_SEND,XTERM);
7823
7824         case KEY_setpgrp:
7825             LOP(OP_SETPGRP,XTERM);
7826
7827         case KEY_setpriority:
7828             LOP(OP_SETPRIORITY,XTERM);
7829
7830         case KEY_sethostent:
7831             UNI(OP_SHOSTENT);
7832
7833         case KEY_setnetent:
7834             UNI(OP_SNETENT);
7835
7836         case KEY_setservent:
7837             UNI(OP_SSERVENT);
7838
7839         case KEY_setprotoent:
7840             UNI(OP_SPROTOENT);
7841
7842         case KEY_setpwent:
7843             FUN0(OP_SPWENT);
7844
7845         case KEY_setgrent:
7846             FUN0(OP_SGRENT);
7847
7848         case KEY_seekdir:
7849             LOP(OP_SEEKDIR,XTERM);
7850
7851         case KEY_setsockopt:
7852             LOP(OP_SSOCKOPT,XTERM);
7853
7854         case KEY_shift:
7855             UNIDOR(OP_SHIFT);
7856
7857         case KEY_shmctl:
7858             LOP(OP_SHMCTL,XTERM);
7859
7860         case KEY_shmget:
7861             LOP(OP_SHMGET,XTERM);
7862
7863         case KEY_shmread:
7864             LOP(OP_SHMREAD,XTERM);
7865
7866         case KEY_shmwrite:
7867             LOP(OP_SHMWRITE,XTERM);
7868
7869         case KEY_shutdown:
7870             LOP(OP_SHUTDOWN,XTERM);
7871
7872         case KEY_sin:
7873             UNI(OP_SIN);
7874
7875         case KEY_sleep:
7876             UNI(OP_SLEEP);
7877
7878         case KEY_socket:
7879             LOP(OP_SOCKET,XTERM);
7880
7881         case KEY_socketpair:
7882             LOP(OP_SOCKPAIR,XTERM);
7883
7884         case KEY_sort:
7885             checkcomma(s,PL_tokenbuf,"subroutine name");
7886             s = SKIPSPACE1(s);
7887             if (*s == ';' || *s == ')')         /* probably a close */
7888                 Perl_croak(aTHX_ "sort is now a reserved word");
7889             PL_expect = XTERM;
7890             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7891             LOP(OP_SORT,XREF);
7892
7893         case KEY_split:
7894             LOP(OP_SPLIT,XTERM);
7895
7896         case KEY_sprintf:
7897             LOP(OP_SPRINTF,XTERM);
7898
7899         case KEY_splice:
7900             LOP(OP_SPLICE,XTERM);
7901
7902         case KEY_sqrt:
7903             UNI(OP_SQRT);
7904
7905         case KEY_srand:
7906             UNI(OP_SRAND);
7907
7908         case KEY_stat:
7909             UNI(OP_STAT);
7910
7911         case KEY_study:
7912             UNI(OP_STUDY);
7913
7914         case KEY_substr:
7915             LOP(OP_SUBSTR,XTERM);
7916
7917         case KEY_format:
7918         case KEY_sub:
7919           really_sub:
7920             {
7921                 char tmpbuf[sizeof PL_tokenbuf];
7922                 SSize_t tboffset = 0;
7923                 expectation attrful;
7924                 bool have_name, have_proto;
7925                 const int key = tmp;
7926
7927 #ifdef PERL_MAD
7928                 SV *tmpwhite = 0;
7929
7930                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7931                 SV *subtoken = newSVpvn(tstart, s - tstart);
7932                 PL_thistoken = 0;
7933
7934                 d = s;
7935                 s = SKIPSPACE2(s,tmpwhite);
7936 #else
7937                 s = skipspace(s);
7938 #endif
7939
7940                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7941                     (*s == ':' && s[1] == ':'))
7942                 {
7943 #ifdef PERL_MAD
7944                     SV *nametoke = NULL;
7945 #endif
7946
7947                     PL_expect = XBLOCK;
7948                     attrful = XATTRBLOCK;
7949                     /* remember buffer pos'n for later force_word */
7950                     tboffset = s - PL_oldbufptr;
7951                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7952 #ifdef PERL_MAD
7953                     if (PL_madskills)
7954                         nametoke = newSVpvn(s, d - s);
7955 #endif
7956                     if (memchr(tmpbuf, ':', len))
7957                         sv_setpvn(PL_subname, tmpbuf, len);
7958                     else {
7959                         sv_setsv(PL_subname,PL_curstname);
7960                         sv_catpvs(PL_subname,"::");
7961                         sv_catpvn(PL_subname,tmpbuf,len);
7962                     }
7963                     have_name = TRUE;
7964
7965 #ifdef PERL_MAD
7966
7967                     start_force(0);
7968                     CURMAD('X', nametoke);
7969                     CURMAD('_', tmpwhite);
7970                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7971                                       FALSE, TRUE, TRUE);
7972
7973                     s = SKIPSPACE2(d,tmpwhite);
7974 #else
7975                     s = skipspace(d);
7976 #endif
7977                 }
7978                 else {
7979                     if (key == KEY_my)
7980                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7981                     PL_expect = XTERMBLOCK;
7982                     attrful = XATTRTERM;
7983                     sv_setpvs(PL_subname,"?");
7984                     have_name = FALSE;
7985                 }
7986
7987                 if (key == KEY_format) {
7988                     if (*s == '=')
7989                         PL_lex_formbrack = PL_lex_brackets + 1;
7990 #ifdef PERL_MAD
7991                     PL_thistoken = subtoken;
7992                     s = d;
7993 #else
7994                     if (have_name)
7995                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7996                                           FALSE, TRUE, TRUE);
7997 #endif
7998                     OPERATOR(FORMAT);
7999                 }
8000
8001                 /* Look for a prototype */
8002                 if (*s == '(') {
8003                     char *p;
8004                     bool bad_proto = FALSE;
8005                     bool in_brackets = FALSE;
8006                     char greedy_proto = ' ';
8007                     bool proto_after_greedy_proto = FALSE;
8008                     bool must_be_last = FALSE;
8009                     bool underscore = FALSE;
8010                     bool seen_underscore = FALSE;
8011                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8012
8013                     s = scan_str(s,!!PL_madskills,FALSE);
8014                     if (!s)
8015                         Perl_croak(aTHX_ "Prototype not terminated");
8016                     /* strip spaces and check for bad characters */
8017                     d = SvPVX(PL_lex_stuff);
8018                     tmp = 0;
8019                     for (p = d; *p; ++p) {
8020                         if (!isSPACE(*p)) {
8021                             d[tmp++] = *p;
8022
8023                             if (warnillegalproto) {
8024                                 if (must_be_last)
8025                                     proto_after_greedy_proto = TRUE;
8026                                 if (!strchr("$@%*;[]&\\_+", *p)) {
8027                                     bad_proto = TRUE;
8028                                 }
8029                                 else {
8030                                     if ( underscore ) {
8031                                         if ( *p != ';' )
8032                                             bad_proto = TRUE;
8033                                         underscore = FALSE;
8034                                     }
8035                                     if ( *p == '[' ) {
8036                                         in_brackets = TRUE;
8037                                     }
8038                                     else if ( *p == ']' ) {
8039                                         in_brackets = FALSE;
8040                                     }
8041                                     else if ( (*p == '@' || *p == '%') &&
8042                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8043                                          !in_brackets ) {
8044                                         must_be_last = TRUE;
8045                                         greedy_proto = *p;
8046                                     }
8047                                     else if ( *p == '_' ) {
8048                                         underscore = seen_underscore = TRUE;
8049                                     }
8050                                 }
8051                             }
8052                         }
8053                     }
8054                     d[tmp] = '\0';
8055                     if (proto_after_greedy_proto)
8056                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8057                                     "Prototype after '%c' for %"SVf" : %s",
8058                                     greedy_proto, SVfARG(PL_subname), d);
8059                     if (bad_proto)
8060                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8061                                     "Illegal character %sin prototype for %"SVf" : %s",
8062                                     seen_underscore ? "after '_' " : "",
8063                                     SVfARG(PL_subname), d);
8064                     SvCUR_set(PL_lex_stuff, tmp);
8065                     have_proto = TRUE;
8066
8067 #ifdef PERL_MAD
8068                     start_force(0);
8069                     CURMAD('q', PL_thisopen);
8070                     CURMAD('_', tmpwhite);
8071                     CURMAD('=', PL_thisstuff);
8072                     CURMAD('Q', PL_thisclose);
8073                     NEXTVAL_NEXTTOKE.opval =
8074                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8075                     PL_lex_stuff = NULL;
8076                     force_next(THING);
8077
8078                     s = SKIPSPACE2(s,tmpwhite);
8079 #else
8080                     s = skipspace(s);
8081 #endif
8082                 }
8083                 else
8084                     have_proto = FALSE;
8085
8086                 if (*s == ':' && s[1] != ':')
8087                     PL_expect = attrful;
8088                 else if (*s != '{' && key == KEY_sub) {
8089                     if (!have_name)
8090                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8091                     else if (*s != ';' && *s != '}')
8092                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8093                 }
8094
8095 #ifdef PERL_MAD
8096                 start_force(0);
8097                 if (tmpwhite) {
8098                     if (PL_madskills)
8099                         curmad('^', newSVpvs(""));
8100                     CURMAD('_', tmpwhite);
8101                 }
8102                 force_next(0);
8103
8104                 PL_thistoken = subtoken;
8105 #else
8106                 if (have_proto) {
8107                     NEXTVAL_NEXTTOKE.opval =
8108                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8109                     PL_lex_stuff = NULL;
8110                     force_next(THING);
8111                 }
8112 #endif
8113                 if (!have_name) {
8114                     if (PL_curstash)
8115                         sv_setpvs(PL_subname, "__ANON__");
8116                     else
8117                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8118                     TOKEN(ANONSUB);
8119                 }
8120 #ifndef PERL_MAD
8121                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8122                                   FALSE, TRUE, TRUE);
8123 #endif
8124                 if (key == KEY_my)
8125                     TOKEN(MYSUB);
8126                 TOKEN(SUB);
8127             }
8128
8129         case KEY_system:
8130             LOP(OP_SYSTEM,XREF);
8131
8132         case KEY_symlink:
8133             LOP(OP_SYMLINK,XTERM);
8134
8135         case KEY_syscall:
8136             LOP(OP_SYSCALL,XTERM);
8137
8138         case KEY_sysopen:
8139             LOP(OP_SYSOPEN,XTERM);
8140
8141         case KEY_sysseek:
8142             LOP(OP_SYSSEEK,XTERM);
8143
8144         case KEY_sysread:
8145             LOP(OP_SYSREAD,XTERM);
8146
8147         case KEY_syswrite:
8148             LOP(OP_SYSWRITE,XTERM);
8149
8150         case KEY_tr:
8151             s = scan_trans(s);
8152             TERM(sublex_start());
8153
8154         case KEY_tell:
8155             UNI(OP_TELL);
8156
8157         case KEY_telldir:
8158             UNI(OP_TELLDIR);
8159
8160         case KEY_tie:
8161             LOP(OP_TIE,XTERM);
8162
8163         case KEY_tied:
8164             UNI(OP_TIED);
8165
8166         case KEY_time:
8167             FUN0(OP_TIME);
8168
8169         case KEY_times:
8170             FUN0(OP_TMS);
8171
8172         case KEY_truncate:
8173             LOP(OP_TRUNCATE,XTERM);
8174
8175         case KEY_uc:
8176             UNI(OP_UC);
8177
8178         case KEY_ucfirst:
8179             UNI(OP_UCFIRST);
8180
8181         case KEY_untie:
8182             UNI(OP_UNTIE);
8183
8184         case KEY_until:
8185             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8186                 return REPORT(0);
8187             pl_yylval.ival = CopLINE(PL_curcop);
8188             OPERATOR(UNTIL);
8189
8190         case KEY_unless:
8191             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8192                 return REPORT(0);
8193             pl_yylval.ival = CopLINE(PL_curcop);
8194             OPERATOR(UNLESS);
8195
8196         case KEY_unlink:
8197             LOP(OP_UNLINK,XTERM);
8198
8199         case KEY_undef:
8200             UNIDOR(OP_UNDEF);
8201
8202         case KEY_unpack:
8203             LOP(OP_UNPACK,XTERM);
8204
8205         case KEY_utime:
8206             LOP(OP_UTIME,XTERM);
8207
8208         case KEY_umask:
8209             UNIDOR(OP_UMASK);
8210
8211         case KEY_unshift:
8212             LOP(OP_UNSHIFT,XTERM);
8213
8214         case KEY_use:
8215             s = tokenize_use(1, s);
8216             OPERATOR(USE);
8217
8218         case KEY_values:
8219             UNI(OP_VALUES);
8220
8221         case KEY_vec:
8222             LOP(OP_VEC,XTERM);
8223
8224         case KEY_when:
8225             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8226                 return REPORT(0);
8227             pl_yylval.ival = CopLINE(PL_curcop);
8228             OPERATOR(WHEN);
8229
8230         case KEY_while:
8231             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8232                 return REPORT(0);
8233             pl_yylval.ival = CopLINE(PL_curcop);
8234             OPERATOR(WHILE);
8235
8236         case KEY_warn:
8237             PL_hints |= HINT_BLOCK_SCOPE;
8238             LOP(OP_WARN,XTERM);
8239
8240         case KEY_wait:
8241             FUN0(OP_WAIT);
8242
8243         case KEY_waitpid:
8244             LOP(OP_WAITPID,XTERM);
8245
8246         case KEY_wantarray:
8247             FUN0(OP_WANTARRAY);
8248
8249         case KEY_write:
8250 #ifdef EBCDIC
8251         {
8252             char ctl_l[2];
8253             ctl_l[0] = toCTRL('L');
8254             ctl_l[1] = '\0';
8255             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8256         }
8257 #else
8258             /* Make sure $^L is defined */
8259             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8260 #endif
8261             UNI(OP_ENTERWRITE);
8262
8263         case KEY_x:
8264             if (PL_expect == XOPERATOR) {
8265                 if (*s == '=' && !PL_lex_allbrackets &&
8266                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8267                     return REPORT(0);
8268                 Mop(OP_REPEAT);
8269             }
8270             check_uni();
8271             goto just_a_word;
8272
8273         case KEY_xor:
8274             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8275                 return REPORT(0);
8276             pl_yylval.ival = OP_XOR;
8277             OPERATOR(OROP);
8278
8279         case KEY_y:
8280             s = scan_trans(s);
8281             TERM(sublex_start());
8282         }
8283     }}
8284 }
8285 #ifdef __SC__
8286 #pragma segment Main
8287 #endif
8288
8289 static int
8290 S_pending_ident(pTHX)
8291 {
8292     dVAR;
8293     register char *d;
8294     PADOFFSET tmp = 0;
8295     /* pit holds the identifier we read and pending_ident is reset */
8296     char pit = PL_pending_ident;
8297     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8298     /* All routes through this function want to know if there is a colon.  */
8299     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8300     PL_pending_ident = 0;
8301
8302     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8303     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8304           "### Pending identifier '%s'\n", PL_tokenbuf); });
8305
8306     /* if we're in a my(), we can't allow dynamics here.
8307        $foo'bar has already been turned into $foo::bar, so
8308        just check for colons.
8309
8310        if it's a legal name, the OP is a PADANY.
8311     */
8312     if (PL_in_my) {
8313         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8314             if (has_colon)
8315                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8316                                   "variable %s in \"our\"",
8317                                   PL_tokenbuf));
8318             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8319         }
8320         else {
8321             if (has_colon)
8322                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8323                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8324
8325             pl_yylval.opval = newOP(OP_PADANY, 0);
8326             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8327                                                         UTF ? SVf_UTF8 : 0);
8328             return PRIVATEREF;
8329         }
8330     }
8331
8332     /*
8333        build the ops for accesses to a my() variable.
8334
8335        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8336        then used in a comparison.  This catches most, but not
8337        all cases.  For instance, it catches
8338            sort { my($a); $a <=> $b }
8339        but not
8340            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8341        (although why you'd do that is anyone's guess).
8342     */
8343
8344     if (!has_colon) {
8345         if (!PL_in_my)
8346             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8347                                     UTF ? SVf_UTF8 : 0);
8348         if (tmp != NOT_IN_PAD) {
8349             /* might be an "our" variable" */
8350             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8351                 /* build ops for a bareword */
8352                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8353                 HEK * const stashname = HvNAME_HEK(stash);
8354                 SV *  const sym = newSVhek(stashname);
8355                 sv_catpvs(sym, "::");
8356                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8357                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8358                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8359                 gv_fetchsv(sym,
8360                     (PL_in_eval
8361                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8362                         : GV_ADDMULTI
8363                     ),
8364                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8365                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8366                      : SVt_PVHV));
8367                 return WORD;
8368             }
8369
8370             /* if it's a sort block and they're naming $a or $b */
8371             if (PL_last_lop_op == OP_SORT &&
8372                 PL_tokenbuf[0] == '$' &&
8373                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8374                 && !PL_tokenbuf[2])
8375             {
8376                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8377                      d < PL_bufend && *d != '\n';
8378                      d++)
8379                 {
8380                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8381                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8382                               PL_tokenbuf);
8383                     }
8384                 }
8385             }
8386
8387             pl_yylval.opval = newOP(OP_PADANY, 0);
8388             pl_yylval.opval->op_targ = tmp;
8389             return PRIVATEREF;
8390         }
8391     }
8392
8393     /*
8394        Whine if they've said @foo in a doublequoted string,
8395        and @foo isn't a variable we can find in the symbol
8396        table.
8397     */
8398     if (ckWARN(WARN_AMBIGUOUS) &&
8399         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8400         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8401                                          SVt_PVAV);
8402         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8403                 /* DO NOT warn for @- and @+ */
8404                 && !( PL_tokenbuf[2] == '\0' &&
8405                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8406            )
8407         {
8408             /* Downgraded from fatal to warning 20000522 mjd */
8409             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8410                         "Possible unintended interpolation of %s in string",
8411                         PL_tokenbuf);
8412         }
8413     }
8414
8415     /* build ops for a bareword */
8416     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8417                                                       tokenbuf_len - 1));
8418     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8419     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8420                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8421                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8422                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8423                       : SVt_PVHV));
8424     return WORD;
8425 }
8426
8427 STATIC void
8428 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8429 {
8430     dVAR;
8431
8432     PERL_ARGS_ASSERT_CHECKCOMMA;
8433
8434     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8435         if (ckWARN(WARN_SYNTAX)) {
8436             int level = 1;
8437             const char *w;
8438             for (w = s+2; *w && level; w++) {
8439                 if (*w == '(')
8440                     ++level;
8441                 else if (*w == ')')
8442                     --level;
8443             }
8444             while (isSPACE(*w))
8445                 ++w;
8446             /* the list of chars below is for end of statements or
8447              * block / parens, boolean operators (&&, ||, //) and branch
8448              * constructs (or, and, if, until, unless, while, err, for).
8449              * Not a very solid hack... */
8450             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8451                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8452                             "%s (...) interpreted as function",name);
8453         }
8454     }
8455     while (s < PL_bufend && isSPACE(*s))
8456         s++;
8457     if (*s == '(')
8458         s++;
8459     while (s < PL_bufend && isSPACE(*s))
8460         s++;
8461     if (isIDFIRST_lazy_if(s,UTF)) {
8462         const char * const w = s++;
8463         while (isALNUM_lazy_if(s,UTF))
8464             s++;
8465         while (s < PL_bufend && isSPACE(*s))
8466             s++;
8467         if (*s == ',') {
8468             GV* gv;
8469             if (keyword(w, s - w, 0))
8470                 return;
8471
8472             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8473             if (gv && GvCVu(gv))
8474                 return;
8475             Perl_croak(aTHX_ "No comma allowed after %s", what);
8476         }
8477     }
8478 }
8479
8480 /* Either returns sv, or mortalizes sv and returns a new SV*.
8481    Best used as sv=new_constant(..., sv, ...).
8482    If s, pv are NULL, calls subroutine with one argument,
8483    and type is used with error messages only. */
8484
8485 STATIC SV *
8486 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8487                SV *sv, SV *pv, const char *type, STRLEN typelen)
8488 {
8489     dVAR; dSP;
8490     HV * const table = GvHV(PL_hintgv);          /* ^H */
8491     SV *res;
8492     SV **cvp;
8493     SV *cv, *typesv;
8494     const char *why1 = "", *why2 = "", *why3 = "";
8495
8496     PERL_ARGS_ASSERT_NEW_CONSTANT;
8497
8498     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8499         SV *msg;
8500
8501         why2 = (const char *)
8502             (strEQ(key,"charnames")
8503              ? "(possibly a missing \"use charnames ...\")"
8504              : "");
8505         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8506                             (type ? type: "undef"), why2);
8507
8508         /* This is convoluted and evil ("goto considered harmful")
8509          * but I do not understand the intricacies of all the different
8510          * failure modes of %^H in here.  The goal here is to make
8511          * the most probable error message user-friendly. --jhi */
8512
8513         goto msgdone;
8514
8515     report:
8516         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8517                             (type ? type: "undef"), why1, why2, why3);
8518     msgdone:
8519         yyerror(SvPVX_const(msg));
8520         SvREFCNT_dec(msg);
8521         return sv;
8522     }
8523
8524     /* charnames doesn't work well if there have been errors found */
8525     if (PL_error_count > 0 && strEQ(key,"charnames"))
8526         return &PL_sv_undef;
8527
8528     cvp = hv_fetch(table, key, keylen, FALSE);
8529     if (!cvp || !SvOK(*cvp)) {
8530         why1 = "$^H{";
8531         why2 = key;
8532         why3 = "} is not defined";
8533         goto report;
8534     }
8535     sv_2mortal(sv);                     /* Parent created it permanently */
8536     cv = *cvp;
8537     if (!pv && s)
8538         pv = newSVpvn_flags(s, len, SVs_TEMP);
8539     if (type && pv)
8540         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8541     else
8542         typesv = &PL_sv_undef;
8543
8544     PUSHSTACKi(PERLSI_OVERLOAD);
8545     ENTER ;
8546     SAVETMPS;
8547
8548     PUSHMARK(SP) ;
8549     EXTEND(sp, 3);
8550     if (pv)
8551         PUSHs(pv);
8552     PUSHs(sv);
8553     if (pv)
8554         PUSHs(typesv);
8555     PUTBACK;
8556     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8557
8558     SPAGAIN ;
8559
8560     /* Check the eval first */
8561     if (!PL_in_eval && SvTRUE(ERRSV)) {
8562         sv_catpvs(ERRSV, "Propagated");
8563         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8564         (void)POPs;
8565         res = SvREFCNT_inc_simple(sv);
8566     }
8567     else {
8568         res = POPs;
8569         SvREFCNT_inc_simple_void(res);
8570     }
8571
8572     PUTBACK ;
8573     FREETMPS ;
8574     LEAVE ;
8575     POPSTACK;
8576
8577     if (!SvOK(res)) {
8578         why1 = "Call to &{$^H{";
8579         why2 = key;
8580         why3 = "}} did not return a defined value";
8581         sv = res;
8582         goto report;
8583     }
8584
8585     return res;
8586 }
8587
8588 /* Returns a NUL terminated string, with the length of the string written to
8589    *slp
8590    */
8591 STATIC char *
8592 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8593 {
8594     dVAR;
8595     register char *d = dest;
8596     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8597
8598     PERL_ARGS_ASSERT_SCAN_WORD;
8599
8600     for (;;) {
8601         if (d >= e)
8602             Perl_croak(aTHX_ ident_too_long);
8603         if (isALNUM(*s))        /* UTF handled below */
8604             *d++ = *s++;
8605         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8606             *d++ = ':';
8607             *d++ = ':';
8608             s++;
8609         }
8610         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8611             *d++ = *s++;
8612             *d++ = *s++;
8613         }
8614         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8615             char *t = s + UTF8SKIP(s);
8616             size_t len;
8617             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8618                 t += UTF8SKIP(t);
8619             len = t - s;
8620             if (d + len > e)
8621                 Perl_croak(aTHX_ ident_too_long);
8622             Copy(s, d, len, char);
8623             d += len;
8624             s = t;
8625         }
8626         else {
8627             *d = '\0';
8628             *slp = d - dest;
8629             return s;
8630         }
8631     }
8632 }
8633
8634 STATIC char *
8635 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8636 {
8637     dVAR;
8638     char *bracket = NULL;
8639     char funny = *s++;
8640     register char *d = dest;
8641     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8642
8643     PERL_ARGS_ASSERT_SCAN_IDENT;
8644
8645     if (isSPACE(*s))
8646         s = PEEKSPACE(s);
8647     if (isDIGIT(*s)) {
8648         while (isDIGIT(*s)) {
8649             if (d >= e)
8650                 Perl_croak(aTHX_ ident_too_long);
8651             *d++ = *s++;
8652         }
8653     }
8654     else {
8655         for (;;) {
8656             if (d >= e)
8657                 Perl_croak(aTHX_ ident_too_long);
8658             if (isALNUM(*s))    /* UTF handled below */
8659                 *d++ = *s++;
8660             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8661                 *d++ = ':';
8662                 *d++ = ':';
8663                 s++;
8664             }
8665             else if (*s == ':' && s[1] == ':') {
8666                 *d++ = *s++;
8667                 *d++ = *s++;
8668             }
8669             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8670                 char *t = s + UTF8SKIP(s);
8671                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8672                     t += UTF8SKIP(t);
8673                 if (d + (t - s) > e)
8674                     Perl_croak(aTHX_ ident_too_long);
8675                 Copy(s, d, t - s, char);
8676                 d += t - s;
8677                 s = t;
8678             }
8679             else
8680                 break;
8681         }
8682     }
8683     *d = '\0';
8684     d = dest;
8685     if (*d) {
8686         if (PL_lex_state != LEX_NORMAL)
8687             PL_lex_state = LEX_INTERPENDMAYBE;
8688         return s;
8689     }
8690     if (*s == '$' && s[1] &&
8691         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8692     {
8693         return s;
8694     }
8695     if (*s == '{') {
8696         bracket = s;
8697         s++;
8698     }
8699     else if (ck_uni)
8700         check_uni();
8701     if (s < send)
8702         *d = *s++;
8703     d[1] = '\0';
8704     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8705         *d = toCTRL(*s);
8706         s++;
8707     }
8708     if (bracket) {
8709         if (isSPACE(s[-1])) {
8710             while (s < send) {
8711                 const char ch = *s++;
8712                 if (!SPACE_OR_TAB(ch)) {
8713                     *d = ch;
8714                     break;
8715                 }
8716             }
8717         }
8718         if (isIDFIRST_lazy_if(d,UTF)) {
8719             d++;
8720             if (UTF) {
8721                 char *end = s;
8722                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8723                     end += UTF8SKIP(end);
8724                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8725                         end += UTF8SKIP(end);
8726                 }
8727                 Copy(s, d, end - s, char);
8728                 d += end - s;
8729                 s = end;
8730             }
8731             else {
8732                 while ((isALNUM(*s) || *s == ':') && d < e)
8733                     *d++ = *s++;
8734                 if (d >= e)
8735                     Perl_croak(aTHX_ ident_too_long);
8736             }
8737             *d = '\0';
8738             while (s < send && SPACE_OR_TAB(*s))
8739                 s++;
8740             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8741                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8742                     const char * const brack =
8743                         (const char *)
8744                         ((*s == '[') ? "[...]" : "{...}");
8745    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8746                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8747                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8748                         funny, dest, brack, funny, dest, brack);
8749                 }
8750                 bracket++;
8751                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8752                 PL_lex_allbrackets++;
8753                 return s;
8754             }
8755         }
8756         /* Handle extended ${^Foo} variables
8757          * 1999-02-27 mjd-perl-patch@plover.com */
8758         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8759                  && isALNUM(*s))
8760         {
8761             d++;
8762             while (isALNUM(*s) && d < e) {
8763                 *d++ = *s++;
8764             }
8765             if (d >= e)
8766                 Perl_croak(aTHX_ ident_too_long);
8767             *d = '\0';
8768         }
8769         if (*s == '}') {
8770             s++;
8771             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8772                 PL_lex_state = LEX_INTERPEND;
8773                 PL_expect = XREF;
8774             }
8775             if (PL_lex_state == LEX_NORMAL) {
8776                 if (ckWARN(WARN_AMBIGUOUS) &&
8777                     (keyword(dest, d - dest, 0)
8778                      || get_cvn_flags(dest, d - dest, 0)))
8779                 {
8780                     if (funny == '#')
8781                         funny = '@';
8782                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8783                         "Ambiguous use of %c{%s} resolved to %c%s",
8784                         funny, dest, funny, dest);
8785                 }
8786             }
8787         }
8788         else {
8789             s = bracket;                /* let the parser handle it */
8790             *dest = '\0';
8791         }
8792     }
8793     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8794         PL_lex_state = LEX_INTERPEND;
8795     return s;
8796 }
8797
8798 static bool
8799 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8800
8801     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8802      * the parse starting at 's', based on the subset that are valid in this
8803      * context input to this routine in 'valid_flags'. Advances s.  Returns
8804      * TRUE if the input was a valid flag, so the next char may be as well;
8805      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8806      * current regex.  This routine will set it to any charset modifier found.
8807      * The caller shouldn't change it.  This way, another charset modifier
8808      * encountered in the parse can be detected as an error, as we have decided
8809      * allow only one */
8810
8811     const char c = **s;
8812
8813     if (! strchr(valid_flags, c)) {
8814         if (isALNUM(c)) {
8815             goto deprecate;
8816         }
8817         return FALSE;
8818     }
8819
8820     switch (c) {
8821
8822         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8823         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8824         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8825         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8826         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8827         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8828         case LOCALE_PAT_MOD:
8829
8830             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8831              * can't be regex modifiers.
8832              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8833              * much as easily done.  s///lei, for example, has to mean regex
8834              * modifiers if it's not an error (as does any word character
8835              * following the 'e').  Otherwise, we resolve to the backwards-
8836              * compatible, but less likely 's/// le ...', i.e. as meaning
8837              * less-than-or-equal.  The reason it's not likely is that s//
8838              * returns a number for code in the field (/r returns a string, but
8839              * that wasn't added until the 5.13 series), and so '<=' should be
8840              * used for comparing, not 'le'. */
8841             if (*((*s) + 1) == 't') {
8842                 goto deprecate;
8843             }
8844             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8845
8846                 /* 'e' is valid only for substitutes, s///e.  If it is not
8847                  * valid in the current context, then 'm//le' must mean the
8848                  * comparison operator, so use the regular deprecation message.
8849                  */
8850                 if (! strchr(valid_flags, 'e')) {
8851                     goto deprecate;
8852                 }
8853                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8854                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8855                 return FALSE;
8856             }
8857             if (*charset) {
8858                 goto multiple_charsets;
8859             }
8860             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8861             *charset = c;
8862             break;
8863         case UNICODE_PAT_MOD:
8864             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8865              * 'n' means they can't be regex modifiers */
8866             if (*((*s) + 1) == 'n') {
8867                 goto deprecate;
8868             }
8869             if (*charset) {
8870                 goto multiple_charsets;
8871             }
8872             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8873             *charset = c;
8874             break;
8875         case ASCII_RESTRICT_PAT_MOD:
8876             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8877              * can't be regex modifiers */
8878             if (*((*s) + 1) == 'n') {
8879                 goto deprecate;
8880             }
8881
8882             if (! *charset) {
8883                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8884             }
8885             else {
8886
8887                 /* Error if previous modifier wasn't an 'a', but if it was, see
8888                  * if, and accept, a second occurrence (only) */
8889                 if (*charset != 'a'
8890                     || get_regex_charset(*pmfl)
8891                         != REGEX_ASCII_RESTRICTED_CHARSET)
8892                 {
8893                         goto multiple_charsets;
8894                 }
8895                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8896             }
8897             *charset = c;
8898             break;
8899         case DEPENDS_PAT_MOD:
8900             if (*charset) {
8901                 goto multiple_charsets;
8902             }
8903             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8904             *charset = c;
8905             break;
8906     }
8907
8908     (*s)++;
8909     return TRUE;
8910
8911     deprecate:
8912         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8913             "Having no space between pattern and following word is deprecated");
8914         return FALSE;
8915
8916     multiple_charsets:
8917         if (*charset != c) {
8918             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8919         }
8920         else if (c == 'a') {
8921             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8922         }
8923         else {
8924             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8925         }
8926
8927         /* Pretend that it worked, so will continue processing before dieing */
8928         (*s)++;
8929         return TRUE;
8930 }
8931
8932 STATIC char *
8933 S_scan_pat(pTHX_ char *start, I32 type)
8934 {
8935     dVAR;
8936     PMOP *pm;
8937     char *s = scan_str(start,!!PL_madskills,FALSE);
8938     const char * const valid_flags =
8939         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8940     char charset = '\0';    /* character set modifier */
8941 #ifdef PERL_MAD
8942     char *modstart;
8943 #endif
8944
8945     PERL_ARGS_ASSERT_SCAN_PAT;
8946
8947     if (!s) {
8948         const char * const delimiter = skipspace(start);
8949         Perl_croak(aTHX_
8950                    (const char *)
8951                    (*delimiter == '?'
8952                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8953                     : "Search pattern not terminated" ));
8954     }
8955
8956     pm = (PMOP*)newPMOP(type, 0);
8957     if (PL_multi_open == '?') {
8958         /* This is the only point in the code that sets PMf_ONCE:  */
8959         pm->op_pmflags |= PMf_ONCE;
8960
8961         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8962            allows us to restrict the list needed by reset to just the ??
8963            matches.  */
8964         assert(type != OP_TRANS);
8965         if (PL_curstash) {
8966             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8967             U32 elements;
8968             if (!mg) {
8969                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8970                                  0);
8971             }
8972             elements = mg->mg_len / sizeof(PMOP**);
8973             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8974             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8975             mg->mg_len = elements * sizeof(PMOP**);
8976             PmopSTASH_set(pm,PL_curstash);
8977         }
8978     }
8979 #ifdef PERL_MAD
8980     modstart = s;
8981 #endif
8982     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8983 #ifdef PERL_MAD
8984     if (PL_madskills && modstart != s) {
8985         SV* tmptoken = newSVpvn(modstart, s - modstart);
8986         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8987     }
8988 #endif
8989     /* issue a warning if /c is specified,but /g is not */
8990     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8991     {
8992         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8993                        "Use of /c modifier is meaningless without /g" );
8994     }
8995
8996     PL_lex_op = (OP*)pm;
8997     pl_yylval.ival = OP_MATCH;
8998     return s;
8999 }
9000
9001 STATIC char *
9002 S_scan_subst(pTHX_ char *start)
9003 {
9004     dVAR;
9005     char *s;
9006     register PMOP *pm;
9007     I32 first_start;
9008     I32 es = 0;
9009     char charset = '\0';    /* character set modifier */
9010 #ifdef PERL_MAD
9011     char *modstart;
9012 #endif
9013
9014     PERL_ARGS_ASSERT_SCAN_SUBST;
9015
9016     pl_yylval.ival = OP_NULL;
9017
9018     s = scan_str(start,!!PL_madskills,FALSE);
9019
9020     if (!s)
9021         Perl_croak(aTHX_ "Substitution pattern not terminated");
9022
9023     if (s[-1] == PL_multi_open)
9024         s--;
9025 #ifdef PERL_MAD
9026     if (PL_madskills) {
9027         CURMAD('q', PL_thisopen);
9028         CURMAD('_', PL_thiswhite);
9029         CURMAD('E', PL_thisstuff);
9030         CURMAD('Q', PL_thisclose);
9031         PL_realtokenstart = s - SvPVX(PL_linestr);
9032     }
9033 #endif
9034
9035     first_start = PL_multi_start;
9036     s = scan_str(s,!!PL_madskills,FALSE);
9037     if (!s) {
9038         if (PL_lex_stuff) {
9039             SvREFCNT_dec(PL_lex_stuff);
9040             PL_lex_stuff = NULL;
9041         }
9042         Perl_croak(aTHX_ "Substitution replacement not terminated");
9043     }
9044     PL_multi_start = first_start;       /* so whole substitution is taken together */
9045
9046     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9047
9048 #ifdef PERL_MAD
9049     if (PL_madskills) {
9050         CURMAD('z', PL_thisopen);
9051         CURMAD('R', PL_thisstuff);
9052         CURMAD('Z', PL_thisclose);
9053     }
9054     modstart = s;
9055 #endif
9056
9057     while (*s) {
9058         if (*s == EXEC_PAT_MOD) {
9059             s++;
9060             es++;
9061         }
9062         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9063         {
9064             break;
9065         }
9066     }
9067
9068 #ifdef PERL_MAD
9069     if (PL_madskills) {
9070         if (modstart != s)
9071             curmad('m', newSVpvn(modstart, s - modstart));
9072         append_madprops(PL_thismad, (OP*)pm, 0);
9073         PL_thismad = 0;
9074     }
9075 #endif
9076     if ((pm->op_pmflags & PMf_CONTINUE)) {
9077         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9078     }
9079
9080     if (es) {
9081         SV * const repl = newSVpvs("");
9082
9083         PL_sublex_info.super_bufptr = s;
9084         PL_sublex_info.super_bufend = PL_bufend;
9085         PL_multi_end = 0;
9086         pm->op_pmflags |= PMf_EVAL;
9087         while (es-- > 0) {
9088             if (es)
9089                 sv_catpvs(repl, "eval ");
9090             else
9091                 sv_catpvs(repl, "do ");
9092         }
9093         sv_catpvs(repl, "{");
9094         sv_catsv(repl, PL_lex_repl);
9095         if (strchr(SvPVX(PL_lex_repl), '#'))
9096             sv_catpvs(repl, "\n");
9097         sv_catpvs(repl, "}");
9098         SvEVALED_on(repl);
9099         SvREFCNT_dec(PL_lex_repl);
9100         PL_lex_repl = repl;
9101     }
9102
9103     PL_lex_op = (OP*)pm;
9104     pl_yylval.ival = OP_SUBST;
9105     return s;
9106 }
9107
9108 STATIC char *
9109 S_scan_trans(pTHX_ char *start)
9110 {
9111     dVAR;
9112     register char* s;
9113     OP *o;
9114     short *tbl;
9115     U8 squash;
9116     U8 del;
9117     U8 complement;
9118     bool nondestruct = 0;
9119 #ifdef PERL_MAD
9120     char *modstart;
9121 #endif
9122
9123     PERL_ARGS_ASSERT_SCAN_TRANS;
9124
9125     pl_yylval.ival = OP_NULL;
9126
9127     s = scan_str(start,!!PL_madskills,FALSE);
9128     if (!s)
9129         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9130
9131     if (s[-1] == PL_multi_open)
9132         s--;
9133 #ifdef PERL_MAD
9134     if (PL_madskills) {
9135         CURMAD('q', PL_thisopen);
9136         CURMAD('_', PL_thiswhite);
9137         CURMAD('E', PL_thisstuff);
9138         CURMAD('Q', PL_thisclose);
9139         PL_realtokenstart = s - SvPVX(PL_linestr);
9140     }
9141 #endif
9142
9143     s = scan_str(s,!!PL_madskills,FALSE);
9144     if (!s) {
9145         if (PL_lex_stuff) {
9146             SvREFCNT_dec(PL_lex_stuff);
9147             PL_lex_stuff = NULL;
9148         }
9149         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9150     }
9151     if (PL_madskills) {
9152         CURMAD('z', PL_thisopen);
9153         CURMAD('R', PL_thisstuff);
9154         CURMAD('Z', PL_thisclose);
9155     }
9156
9157     complement = del = squash = 0;
9158 #ifdef PERL_MAD
9159     modstart = s;
9160 #endif
9161     while (1) {
9162         switch (*s) {
9163         case 'c':
9164             complement = OPpTRANS_COMPLEMENT;
9165             break;
9166         case 'd':
9167             del = OPpTRANS_DELETE;
9168             break;
9169         case 's':
9170             squash = OPpTRANS_SQUASH;
9171             break;
9172         case 'r':
9173             nondestruct = 1;
9174             break;
9175         default:
9176             goto no_more;
9177         }
9178         s++;
9179     }
9180   no_more:
9181
9182     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9183     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9184     o->op_private &= ~OPpTRANS_ALL;
9185     o->op_private |= del|squash|complement|
9186       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9187       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9188
9189     PL_lex_op = o;
9190     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9191
9192 #ifdef PERL_MAD
9193     if (PL_madskills) {
9194         if (modstart != s)
9195             curmad('m', newSVpvn(modstart, s - modstart));
9196         append_madprops(PL_thismad, o, 0);
9197         PL_thismad = 0;
9198     }
9199 #endif
9200
9201     return s;
9202 }
9203
9204 STATIC char *
9205 S_scan_heredoc(pTHX_ register char *s)
9206 {
9207     dVAR;
9208     SV *herewas;
9209     I32 op_type = OP_SCALAR;
9210     I32 len;
9211     SV *tmpstr;
9212     char term;
9213     const char *found_newline;
9214     register char *d;
9215     register char *e;
9216     char *peek;
9217     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9218 #ifdef PERL_MAD
9219     I32 stuffstart = s - SvPVX(PL_linestr);
9220     char *tstart;
9221
9222     PL_realtokenstart = -1;
9223 #endif
9224
9225     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9226
9227     s += 2;
9228     d = PL_tokenbuf;
9229     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9230     if (!outer)
9231         *d++ = '\n';
9232     peek = s;
9233     while (SPACE_OR_TAB(*peek))
9234         peek++;
9235     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9236         s = peek;
9237         term = *s++;
9238         s = delimcpy(d, e, s, PL_bufend, term, &len);
9239         d += len;
9240         if (s < PL_bufend)
9241             s++;
9242     }
9243     else {
9244         if (*s == '\\')
9245             s++, term = '\'';
9246         else
9247             term = '"';
9248         if (!isALNUM_lazy_if(s,UTF))
9249             deprecate("bare << to mean <<\"\"");
9250         for (; isALNUM_lazy_if(s,UTF); s++) {
9251             if (d < e)
9252                 *d++ = *s;
9253         }
9254     }
9255     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9256         Perl_croak(aTHX_ "Delimiter for here document is too long");
9257     *d++ = '\n';
9258     *d = '\0';
9259     len = d - PL_tokenbuf;
9260
9261 #ifdef PERL_MAD
9262     if (PL_madskills) {
9263         tstart = PL_tokenbuf + !outer;
9264         PL_thisclose = newSVpvn(tstart, len - !outer);
9265         tstart = SvPVX(PL_linestr) + stuffstart;
9266         PL_thisopen = newSVpvn(tstart, s - tstart);
9267         stuffstart = s - SvPVX(PL_linestr);
9268     }
9269 #endif
9270 #ifndef PERL_STRICT_CR
9271     d = strchr(s, '\r');
9272     if (d) {
9273         char * const olds = s;
9274         s = d;
9275         while (s < PL_bufend) {
9276             if (*s == '\r') {
9277                 *d++ = '\n';
9278                 if (*++s == '\n')
9279                     s++;
9280             }
9281             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9282                 *d++ = *s++;
9283                 s++;
9284             }
9285             else
9286                 *d++ = *s++;
9287         }
9288         *d = '\0';
9289         PL_bufend = d;
9290         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9291         s = olds;
9292     }
9293 #endif
9294 #ifdef PERL_MAD
9295     found_newline = 0;
9296 #endif
9297     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9298         herewas = newSVpvn(s,PL_bufend-s);
9299     }
9300     else {
9301 #ifdef PERL_MAD
9302         herewas = newSVpvn(s-1,found_newline-s+1);
9303 #else
9304         s--;
9305         herewas = newSVpvn(s,found_newline-s);
9306 #endif
9307     }
9308 #ifdef PERL_MAD
9309     if (PL_madskills) {
9310         tstart = SvPVX(PL_linestr) + stuffstart;
9311         if (PL_thisstuff)
9312             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9313         else
9314             PL_thisstuff = newSVpvn(tstart, s - tstart);
9315     }
9316 #endif
9317     s += SvCUR(herewas);
9318
9319 #ifdef PERL_MAD
9320     stuffstart = s - SvPVX(PL_linestr);
9321
9322     if (found_newline)
9323         s--;
9324 #endif
9325
9326     tmpstr = newSV_type(SVt_PVIV);
9327     SvGROW(tmpstr, 80);
9328     if (term == '\'') {
9329         op_type = OP_CONST;
9330         SvIV_set(tmpstr, -1);
9331     }
9332     else if (term == '`') {
9333         op_type = OP_BACKTICK;
9334         SvIV_set(tmpstr, '\\');
9335     }
9336
9337     CLINE;
9338     PL_multi_start = CopLINE(PL_curcop);
9339     PL_multi_open = PL_multi_close = '<';
9340     term = *PL_tokenbuf;
9341     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9342         char * const bufptr = PL_sublex_info.super_bufptr;
9343         char * const bufend = PL_sublex_info.super_bufend;
9344         char * const olds = s - SvCUR(herewas);
9345         s = strchr(bufptr, '\n');
9346         if (!s)
9347             s = bufend;
9348         d = s;
9349         while (s < bufend &&
9350           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9351             if (*s++ == '\n')
9352                 CopLINE_inc(PL_curcop);
9353         }
9354         if (s >= bufend) {
9355             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9356             missingterm(PL_tokenbuf);
9357         }
9358         sv_setpvn(herewas,bufptr,d-bufptr+1);
9359         sv_setpvn(tmpstr,d+1,s-d);
9360         s += len - 1;
9361         sv_catpvn(herewas,s,bufend-s);
9362         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9363
9364         s = olds;
9365         goto retval;
9366     }
9367     else if (!outer) {
9368         d = s;
9369         while (s < PL_bufend &&
9370           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9371             if (*s++ == '\n')
9372                 CopLINE_inc(PL_curcop);
9373         }
9374         if (s >= PL_bufend) {
9375             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9376             missingterm(PL_tokenbuf);
9377         }
9378         sv_setpvn(tmpstr,d+1,s-d);
9379 #ifdef PERL_MAD
9380         if (PL_madskills) {
9381             if (PL_thisstuff)
9382                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9383             else
9384                 PL_thisstuff = newSVpvn(d + 1, s - d);
9385             stuffstart = s - SvPVX(PL_linestr);
9386         }
9387 #endif
9388         s += len - 1;
9389         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9390
9391         sv_catpvn(herewas,s,PL_bufend-s);
9392         sv_setsv(PL_linestr,herewas);
9393         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9394         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9395         PL_last_lop = PL_last_uni = NULL;
9396     }
9397     else
9398         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9399     while (s >= PL_bufend) {    /* multiple line string? */
9400 #ifdef PERL_MAD
9401         if (PL_madskills) {
9402             tstart = SvPVX(PL_linestr) + stuffstart;
9403             if (PL_thisstuff)
9404                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9405             else
9406                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9407         }
9408 #endif
9409         PL_bufptr = s;
9410         CopLINE_inc(PL_curcop);
9411         if (!outer || !lex_next_chunk(0)) {
9412             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9413             missingterm(PL_tokenbuf);
9414         }
9415         CopLINE_dec(PL_curcop);
9416         s = PL_bufptr;
9417 #ifdef PERL_MAD
9418         stuffstart = s - SvPVX(PL_linestr);
9419 #endif
9420         CopLINE_inc(PL_curcop);
9421         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9422         PL_last_lop = PL_last_uni = NULL;
9423 #ifndef PERL_STRICT_CR
9424         if (PL_bufend - PL_linestart >= 2) {
9425             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9426                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9427             {
9428                 PL_bufend[-2] = '\n';
9429                 PL_bufend--;
9430                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9431             }
9432             else if (PL_bufend[-1] == '\r')
9433                 PL_bufend[-1] = '\n';
9434         }
9435         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9436             PL_bufend[-1] = '\n';
9437 #endif
9438         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9439             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9440             *(SvPVX(PL_linestr) + off ) = ' ';
9441             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9442             sv_catsv(PL_linestr,herewas);
9443             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9444             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9445         }
9446         else {
9447             s = PL_bufend;
9448             sv_catsv(tmpstr,PL_linestr);
9449         }
9450     }
9451     s++;
9452 retval:
9453     PL_multi_end = CopLINE(PL_curcop);
9454     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9455         SvPV_shrink_to_cur(tmpstr);
9456     }
9457     SvREFCNT_dec(herewas);
9458     if (!IN_BYTES) {
9459         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9460             SvUTF8_on(tmpstr);
9461         else if (PL_encoding)
9462             sv_recode_to_utf8(tmpstr, PL_encoding);
9463     }
9464     PL_lex_stuff = tmpstr;
9465     pl_yylval.ival = op_type;
9466     return s;
9467 }
9468
9469 /* scan_inputsymbol
9470    takes: current position in input buffer
9471    returns: new position in input buffer
9472    side-effects: pl_yylval and lex_op are set.
9473
9474    This code handles:
9475
9476    <>           read from ARGV
9477    <FH>         read from filehandle
9478    <pkg::FH>    read from package qualified filehandle
9479    <pkg'FH>     read from package qualified filehandle
9480    <$fh>        read from filehandle in $fh
9481    <*.h>        filename glob
9482
9483 */
9484
9485 STATIC char *
9486 S_scan_inputsymbol(pTHX_ char *start)
9487 {
9488     dVAR;
9489     register char *s = start;           /* current position in buffer */
9490     char *end;
9491     I32 len;
9492     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9493     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9494
9495     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9496
9497     end = strchr(s, '\n');
9498     if (!end)
9499         end = PL_bufend;
9500     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9501
9502     /* die if we didn't have space for the contents of the <>,
9503        or if it didn't end, or if we see a newline
9504     */
9505
9506     if (len >= (I32)sizeof PL_tokenbuf)
9507         Perl_croak(aTHX_ "Excessively long <> operator");
9508     if (s >= end)
9509         Perl_croak(aTHX_ "Unterminated <> operator");
9510
9511     s++;
9512
9513     /* check for <$fh>
9514        Remember, only scalar variables are interpreted as filehandles by
9515        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9516        treated as a glob() call.
9517        This code makes use of the fact that except for the $ at the front,
9518        a scalar variable and a filehandle look the same.
9519     */
9520     if (*d == '$' && d[1]) d++;
9521
9522     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9523     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9524         d++;
9525
9526     /* If we've tried to read what we allow filehandles to look like, and
9527        there's still text left, then it must be a glob() and not a getline.
9528        Use scan_str to pull out the stuff between the <> and treat it
9529        as nothing more than a string.
9530     */
9531
9532     if (d - PL_tokenbuf != len) {
9533         pl_yylval.ival = OP_GLOB;
9534         s = scan_str(start,!!PL_madskills,FALSE);
9535         if (!s)
9536            Perl_croak(aTHX_ "Glob not terminated");
9537         return s;
9538     }
9539     else {
9540         bool readline_overriden = FALSE;
9541         GV *gv_readline;
9542         GV **gvp;
9543         /* we're in a filehandle read situation */
9544         d = PL_tokenbuf;
9545
9546         /* turn <> into <ARGV> */
9547         if (!len)
9548             Copy("ARGV",d,5,char);
9549
9550         /* Check whether readline() is overriden */
9551         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9552         if ((gv_readline
9553                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9554                 ||
9555                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9556                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9557                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9558             readline_overriden = TRUE;
9559
9560         /* if <$fh>, create the ops to turn the variable into a
9561            filehandle
9562         */
9563         if (*d == '$') {
9564             /* try to find it in the pad for this block, otherwise find
9565                add symbol table ops
9566             */
9567             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9568             if (tmp != NOT_IN_PAD) {
9569                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9570                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9571                     HEK * const stashname = HvNAME_HEK(stash);
9572                     SV * const sym = sv_2mortal(newSVhek(stashname));
9573                     sv_catpvs(sym, "::");
9574                     sv_catpv(sym, d+1);
9575                     d = SvPVX(sym);
9576                     goto intro_sym;
9577                 }
9578                 else {
9579                     OP * const o = newOP(OP_PADSV, 0);
9580                     o->op_targ = tmp;
9581                     PL_lex_op = readline_overriden
9582                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9583                                 op_append_elem(OP_LIST, o,
9584                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9585                         : (OP*)newUNOP(OP_READLINE, 0, o);
9586                 }
9587             }
9588             else {
9589                 GV *gv;
9590                 ++d;
9591 intro_sym:
9592                 gv = gv_fetchpv(d,
9593                                 (PL_in_eval
9594                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9595                                  : GV_ADDMULTI),
9596                                 SVt_PV);
9597                 PL_lex_op = readline_overriden
9598                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9599                             op_append_elem(OP_LIST,
9600                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9601                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9602                     : (OP*)newUNOP(OP_READLINE, 0,
9603                             newUNOP(OP_RV2SV, 0,
9604                                 newGVOP(OP_GV, 0, gv)));
9605             }
9606             if (!readline_overriden)
9607                 PL_lex_op->op_flags |= OPf_SPECIAL;
9608             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9609             pl_yylval.ival = OP_NULL;
9610         }
9611
9612         /* If it's none of the above, it must be a literal filehandle
9613            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9614         else {
9615             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9616             PL_lex_op = readline_overriden
9617                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9618                         op_append_elem(OP_LIST,
9619                             newGVOP(OP_GV, 0, gv),
9620                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9621                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9622             pl_yylval.ival = OP_NULL;
9623         }
9624     }
9625
9626     return s;
9627 }
9628
9629
9630 /* scan_str
9631    takes: start position in buffer
9632           keep_quoted preserve \ on the embedded delimiter(s)
9633           keep_delims preserve the delimiters around the string
9634    returns: position to continue reading from buffer
9635    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9636         updates the read buffer.
9637
9638    This subroutine pulls a string out of the input.  It is called for:
9639         q               single quotes           q(literal text)
9640         '               single quotes           'literal text'
9641         qq              double quotes           qq(interpolate $here please)
9642         "               double quotes           "interpolate $here please"
9643         qx              backticks               qx(/bin/ls -l)
9644         `               backticks               `/bin/ls -l`
9645         qw              quote words             @EXPORT_OK = qw( func() $spam )
9646         m//             regexp match            m/this/
9647         s///            regexp substitute       s/this/that/
9648         tr///           string transliterate    tr/this/that/
9649         y///            string transliterate    y/this/that/
9650         ($*@)           sub prototypes          sub foo ($)
9651         (stuff)         sub attr parameters     sub foo : attr(stuff)
9652         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9653
9654    In most of these cases (all but <>, patterns and transliterate)
9655    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9656    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9657    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9658    calls scan_str().
9659
9660    It skips whitespace before the string starts, and treats the first
9661    character as the delimiter.  If the delimiter is one of ([{< then
9662    the corresponding "close" character )]}> is used as the closing
9663    delimiter.  It allows quoting of delimiters, and if the string has
9664    balanced delimiters ([{<>}]) it allows nesting.
9665
9666    On success, the SV with the resulting string is put into lex_stuff or,
9667    if that is already non-NULL, into lex_repl. The second case occurs only
9668    when parsing the RHS of the special constructs s/// and tr/// (y///).
9669    For convenience, the terminating delimiter character is stuffed into
9670    SvIVX of the SV.
9671 */
9672
9673 STATIC char *
9674 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9675 {
9676     dVAR;
9677     SV *sv;                             /* scalar value: string */
9678     const char *tmps;                   /* temp string, used for delimiter matching */
9679     register char *s = start;           /* current position in the buffer */
9680     register char term;                 /* terminating character */
9681     register char *to;                  /* current position in the sv's data */
9682     I32 brackets = 1;                   /* bracket nesting level */
9683     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9684     I32 termcode;                       /* terminating char. code */
9685     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9686     STRLEN termlen;                     /* length of terminating string */
9687     int last_off = 0;                   /* last position for nesting bracket */
9688 #ifdef PERL_MAD
9689     int stuffstart;
9690     char *tstart;
9691 #endif
9692
9693     PERL_ARGS_ASSERT_SCAN_STR;
9694
9695     /* skip space before the delimiter */
9696     if (isSPACE(*s)) {
9697         s = PEEKSPACE(s);
9698     }
9699
9700 #ifdef PERL_MAD
9701     if (PL_realtokenstart >= 0) {
9702         stuffstart = PL_realtokenstart;
9703         PL_realtokenstart = -1;
9704     }
9705     else
9706         stuffstart = start - SvPVX(PL_linestr);
9707 #endif
9708     /* mark where we are, in case we need to report errors */
9709     CLINE;
9710
9711     /* after skipping whitespace, the next character is the terminator */
9712     term = *s;
9713     if (!UTF) {
9714         termcode = termstr[0] = term;
9715         termlen = 1;
9716     }
9717     else {
9718         termcode = utf8_to_uvchr((U8*)s, &termlen);
9719         Copy(s, termstr, termlen, U8);
9720         if (!UTF8_IS_INVARIANT(term))
9721             has_utf8 = TRUE;
9722     }
9723
9724     /* mark where we are */
9725     PL_multi_start = CopLINE(PL_curcop);
9726     PL_multi_open = term;
9727
9728     /* find corresponding closing delimiter */
9729     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9730         termcode = termstr[0] = term = tmps[5];
9731
9732     PL_multi_close = term;
9733
9734     /* create a new SV to hold the contents.  79 is the SV's initial length.
9735        What a random number. */
9736     sv = newSV_type(SVt_PVIV);
9737     SvGROW(sv, 80);
9738     SvIV_set(sv, termcode);
9739     (void)SvPOK_only(sv);               /* validate pointer */
9740
9741     /* move past delimiter and try to read a complete string */
9742     if (keep_delims)
9743         sv_catpvn(sv, s, termlen);
9744     s += termlen;
9745 #ifdef PERL_MAD
9746     tstart = SvPVX(PL_linestr) + stuffstart;
9747     if (!PL_thisopen && !keep_delims) {
9748         PL_thisopen = newSVpvn(tstart, s - tstart);
9749         stuffstart = s - SvPVX(PL_linestr);
9750     }
9751 #endif
9752     for (;;) {
9753         if (PL_encoding && !UTF) {
9754             bool cont = TRUE;
9755
9756             while (cont) {
9757                 int offset = s - SvPVX_const(PL_linestr);
9758                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9759                                            &offset, (char*)termstr, termlen);
9760                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9761                 char * const svlast = SvEND(sv) - 1;
9762
9763                 for (; s < ns; s++) {
9764                     if (*s == '\n' && !PL_rsfp)
9765                         CopLINE_inc(PL_curcop);
9766                 }
9767                 if (!found)
9768                     goto read_more_line;
9769                 else {
9770                     /* handle quoted delimiters */
9771                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9772                         const char *t;
9773                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9774                             t--;
9775                         if ((svlast-1 - t) % 2) {
9776                             if (!keep_quoted) {
9777                                 *(svlast-1) = term;
9778                                 *svlast = '\0';
9779                                 SvCUR_set(sv, SvCUR(sv) - 1);
9780                             }
9781                             continue;
9782                         }
9783                     }
9784                     if (PL_multi_open == PL_multi_close) {
9785                         cont = FALSE;
9786                     }
9787                     else {
9788                         const char *t;
9789                         char *w;
9790                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9791                             /* At here, all closes are "was quoted" one,
9792                                so we don't check PL_multi_close. */
9793                             if (*t == '\\') {
9794                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9795                                     t++;
9796                                 else
9797                                     *w++ = *t++;
9798                             }
9799                             else if (*t == PL_multi_open)
9800                                 brackets++;
9801
9802                             *w = *t;
9803                         }
9804                         if (w < t) {
9805                             *w++ = term;
9806                             *w = '\0';
9807                             SvCUR_set(sv, w - SvPVX_const(sv));
9808                         }
9809                         last_off = w - SvPVX(sv);
9810                         if (--brackets <= 0)
9811                             cont = FALSE;
9812                     }
9813                 }
9814             }
9815             if (!keep_delims) {
9816                 SvCUR_set(sv, SvCUR(sv) - 1);
9817                 *SvEND(sv) = '\0';
9818             }
9819             break;
9820         }
9821
9822         /* extend sv if need be */
9823         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9824         /* set 'to' to the next character in the sv's string */
9825         to = SvPVX(sv)+SvCUR(sv);
9826
9827         /* if open delimiter is the close delimiter read unbridle */
9828         if (PL_multi_open == PL_multi_close) {
9829             for (; s < PL_bufend; s++,to++) {
9830                 /* embedded newlines increment the current line number */
9831                 if (*s == '\n' && !PL_rsfp)
9832                     CopLINE_inc(PL_curcop);
9833                 /* handle quoted delimiters */
9834                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9835                     if (!keep_quoted && s[1] == term)
9836                         s++;
9837                 /* any other quotes are simply copied straight through */
9838                     else
9839                         *to++ = *s++;
9840                 }
9841                 /* terminate when run out of buffer (the for() condition), or
9842                    have found the terminator */
9843                 else if (*s == term) {
9844                     if (termlen == 1)
9845                         break;
9846                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9847                         break;
9848                 }
9849                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9850                     has_utf8 = TRUE;
9851                 *to = *s;
9852             }
9853         }
9854
9855         /* if the terminator isn't the same as the start character (e.g.,
9856            matched brackets), we have to allow more in the quoting, and
9857            be prepared for nested brackets.
9858         */
9859         else {
9860             /* read until we run out of string, or we find the terminator */
9861             for (; s < PL_bufend; s++,to++) {
9862                 /* embedded newlines increment the line count */
9863                 if (*s == '\n' && !PL_rsfp)
9864                     CopLINE_inc(PL_curcop);
9865                 /* backslashes can escape the open or closing characters */
9866                 if (*s == '\\' && s+1 < PL_bufend) {
9867                     if (!keep_quoted &&
9868                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9869                         s++;
9870                     else
9871                         *to++ = *s++;
9872                 }
9873                 /* allow nested opens and closes */
9874                 else if (*s == PL_multi_close && --brackets <= 0)
9875                     break;
9876                 else if (*s == PL_multi_open)
9877                     brackets++;
9878                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9879                     has_utf8 = TRUE;
9880                 *to = *s;
9881             }
9882         }
9883         /* terminate the copied string and update the sv's end-of-string */
9884         *to = '\0';
9885         SvCUR_set(sv, to - SvPVX_const(sv));
9886
9887         /*
9888          * this next chunk reads more into the buffer if we're not done yet
9889          */
9890
9891         if (s < PL_bufend)
9892             break;              /* handle case where we are done yet :-) */
9893
9894 #ifndef PERL_STRICT_CR
9895         if (to - SvPVX_const(sv) >= 2) {
9896             if ((to[-2] == '\r' && to[-1] == '\n') ||
9897                 (to[-2] == '\n' && to[-1] == '\r'))
9898             {
9899                 to[-2] = '\n';
9900                 to--;
9901                 SvCUR_set(sv, to - SvPVX_const(sv));
9902             }
9903             else if (to[-1] == '\r')
9904                 to[-1] = '\n';
9905         }
9906         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9907             to[-1] = '\n';
9908 #endif
9909
9910      read_more_line:
9911         /* if we're out of file, or a read fails, bail and reset the current
9912            line marker so we can report where the unterminated string began
9913         */
9914 #ifdef PERL_MAD
9915         if (PL_madskills) {
9916             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9917             if (PL_thisstuff)
9918                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9919             else
9920                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9921         }
9922 #endif
9923         CopLINE_inc(PL_curcop);
9924         PL_bufptr = PL_bufend;
9925         if (!lex_next_chunk(0)) {
9926             sv_free(sv);
9927             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9928             return NULL;
9929         }
9930         s = PL_bufptr;
9931 #ifdef PERL_MAD
9932         stuffstart = 0;
9933 #endif
9934     }
9935
9936     /* at this point, we have successfully read the delimited string */
9937
9938     if (!PL_encoding || UTF) {
9939 #ifdef PERL_MAD
9940         if (PL_madskills) {
9941             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9942             const int len = s - tstart;
9943             if (PL_thisstuff)
9944                 sv_catpvn(PL_thisstuff, tstart, len);
9945             else
9946                 PL_thisstuff = newSVpvn(tstart, len);
9947             if (!PL_thisclose && !keep_delims)
9948                 PL_thisclose = newSVpvn(s,termlen);
9949         }
9950 #endif
9951
9952         if (keep_delims)
9953             sv_catpvn(sv, s, termlen);
9954         s += termlen;
9955     }
9956 #ifdef PERL_MAD
9957     else {
9958         if (PL_madskills) {
9959             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9960             const int len = s - tstart - termlen;
9961             if (PL_thisstuff)
9962                 sv_catpvn(PL_thisstuff, tstart, len);
9963             else
9964                 PL_thisstuff = newSVpvn(tstart, len);
9965             if (!PL_thisclose && !keep_delims)
9966                 PL_thisclose = newSVpvn(s - termlen,termlen);
9967         }
9968     }
9969 #endif
9970     if (has_utf8 || PL_encoding)
9971         SvUTF8_on(sv);
9972
9973     PL_multi_end = CopLINE(PL_curcop);
9974
9975     /* if we allocated too much space, give some back */
9976     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9977         SvLEN_set(sv, SvCUR(sv) + 1);
9978         SvPV_renew(sv, SvLEN(sv));
9979     }
9980
9981     /* decide whether this is the first or second quoted string we've read
9982        for this op
9983     */
9984
9985     if (PL_lex_stuff)
9986         PL_lex_repl = sv;
9987     else
9988         PL_lex_stuff = sv;
9989     return s;
9990 }
9991
9992 /*
9993   scan_num
9994   takes: pointer to position in buffer
9995   returns: pointer to new position in buffer
9996   side-effects: builds ops for the constant in pl_yylval.op
9997
9998   Read a number in any of the formats that Perl accepts:
9999
10000   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10001   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10002   0b[01](_?[01])*
10003   0[0-7](_?[0-7])*
10004   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10005
10006   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10007   thing it reads.
10008
10009   If it reads a number without a decimal point or an exponent, it will
10010   try converting the number to an integer and see if it can do so
10011   without loss of precision.
10012 */
10013
10014 char *
10015 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10016 {
10017     dVAR;
10018     register const char *s = start;     /* current position in buffer */
10019     register char *d;                   /* destination in temp buffer */
10020     register char *e;                   /* end of temp buffer */
10021     NV nv;                              /* number read, as a double */
10022     SV *sv = NULL;                      /* place to put the converted number */
10023     bool floatit;                       /* boolean: int or float? */
10024     const char *lastub = NULL;          /* position of last underbar */
10025     static char const number_too_long[] = "Number too long";
10026
10027     PERL_ARGS_ASSERT_SCAN_NUM;
10028
10029     /* We use the first character to decide what type of number this is */
10030
10031     switch (*s) {
10032     default:
10033       Perl_croak(aTHX_ "panic: scan_num");
10034
10035     /* if it starts with a 0, it could be an octal number, a decimal in
10036        0.13 disguise, or a hexadecimal number, or a binary number. */
10037     case '0':
10038         {
10039           /* variables:
10040              u          holds the "number so far"
10041              shift      the power of 2 of the base
10042                         (hex == 4, octal == 3, binary == 1)
10043              overflowed was the number more than we can hold?
10044
10045              Shift is used when we add a digit.  It also serves as an "are
10046              we in octal/hex/binary?" indicator to disallow hex characters
10047              when in octal mode.
10048            */
10049             NV n = 0.0;
10050             UV u = 0;
10051             I32 shift;
10052             bool overflowed = FALSE;
10053             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10054             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10055             static const char* const bases[5] =
10056               { "", "binary", "", "octal", "hexadecimal" };
10057             static const char* const Bases[5] =
10058               { "", "Binary", "", "Octal", "Hexadecimal" };
10059             static const char* const maxima[5] =
10060               { "",
10061                 "0b11111111111111111111111111111111",
10062                 "",
10063                 "037777777777",
10064                 "0xffffffff" };
10065             const char *base, *Base, *max;
10066
10067             /* check for hex */
10068             if (s[1] == 'x' || s[1] == 'X') {
10069                 shift = 4;
10070                 s += 2;
10071                 just_zero = FALSE;
10072             } else if (s[1] == 'b' || s[1] == 'B') {
10073                 shift = 1;
10074                 s += 2;
10075                 just_zero = FALSE;
10076             }
10077             /* check for a decimal in disguise */
10078             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10079                 goto decimal;
10080             /* so it must be octal */
10081             else {
10082                 shift = 3;
10083                 s++;
10084             }
10085
10086             if (*s == '_') {
10087                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10088                                "Misplaced _ in number");
10089                lastub = s++;
10090             }
10091
10092             base = bases[shift];
10093             Base = Bases[shift];
10094             max  = maxima[shift];
10095
10096             /* read the rest of the number */
10097             for (;;) {
10098                 /* x is used in the overflow test,
10099                    b is the digit we're adding on. */
10100                 UV x, b;
10101
10102                 switch (*s) {
10103
10104                 /* if we don't mention it, we're done */
10105                 default:
10106                     goto out;
10107
10108                 /* _ are ignored -- but warned about if consecutive */
10109                 case '_':
10110                     if (lastub && s == lastub + 1)
10111                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10112                                        "Misplaced _ in number");
10113                     lastub = s++;
10114                     break;
10115
10116                 /* 8 and 9 are not octal */
10117                 case '8': case '9':
10118                     if (shift == 3)
10119                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10120                     /* FALL THROUGH */
10121
10122                 /* octal digits */
10123                 case '2': case '3': case '4':
10124                 case '5': case '6': case '7':
10125                     if (shift == 1)
10126                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10127                     /* FALL THROUGH */
10128
10129                 case '0': case '1':
10130                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10131                     goto digit;
10132
10133                 /* hex digits */
10134                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10135                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10136                     /* make sure they said 0x */
10137                     if (shift != 4)
10138                         goto out;
10139                     b = (*s++ & 7) + 9;
10140
10141                     /* Prepare to put the digit we have onto the end
10142                        of the number so far.  We check for overflows.
10143                     */
10144
10145                   digit:
10146                     just_zero = FALSE;
10147                     if (!overflowed) {
10148                         x = u << shift; /* make room for the digit */
10149
10150                         if ((x >> shift) != u
10151                             && !(PL_hints & HINT_NEW_BINARY)) {
10152                             overflowed = TRUE;
10153                             n = (NV) u;
10154                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10155                                              "Integer overflow in %s number",
10156                                              base);
10157                         } else
10158                             u = x | b;          /* add the digit to the end */
10159                     }
10160                     if (overflowed) {
10161                         n *= nvshift[shift];
10162                         /* If an NV has not enough bits in its
10163                          * mantissa to represent an UV this summing of
10164                          * small low-order numbers is a waste of time
10165                          * (because the NV cannot preserve the
10166                          * low-order bits anyway): we could just
10167                          * remember when did we overflow and in the
10168                          * end just multiply n by the right
10169                          * amount. */
10170                         n += (NV) b;
10171                     }
10172                     break;
10173                 }
10174             }
10175
10176           /* if we get here, we had success: make a scalar value from
10177              the number.
10178           */
10179           out:
10180
10181             /* final misplaced underbar check */
10182             if (s[-1] == '_') {
10183                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10184             }
10185
10186             if (overflowed) {
10187                 if (n > 4294967295.0)
10188                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10189                                    "%s number > %s non-portable",
10190                                    Base, max);
10191                 sv = newSVnv(n);
10192             }
10193             else {
10194 #if UVSIZE > 4
10195                 if (u > 0xffffffff)
10196                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10197                                    "%s number > %s non-portable",
10198                                    Base, max);
10199 #endif
10200                 sv = newSVuv(u);
10201             }
10202             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10203                 sv = new_constant(start, s - start, "integer",
10204                                   sv, NULL, NULL, 0);
10205             else if (PL_hints & HINT_NEW_BINARY)
10206                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10207         }
10208         break;
10209
10210     /*
10211       handle decimal numbers.
10212       we're also sent here when we read a 0 as the first digit
10213     */
10214     case '1': case '2': case '3': case '4': case '5':
10215     case '6': case '7': case '8': case '9': case '.':
10216       decimal:
10217         d = PL_tokenbuf;
10218         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10219         floatit = FALSE;
10220
10221         /* read next group of digits and _ and copy into d */
10222         while (isDIGIT(*s) || *s == '_') {
10223             /* skip underscores, checking for misplaced ones
10224                if -w is on
10225             */
10226             if (*s == '_') {
10227                 if (lastub && s == lastub + 1)
10228                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10229                                    "Misplaced _ in number");
10230                 lastub = s++;
10231             }
10232             else {
10233                 /* check for end of fixed-length buffer */
10234                 if (d >= e)
10235                     Perl_croak(aTHX_ number_too_long);
10236                 /* if we're ok, copy the character */
10237                 *d++ = *s++;
10238             }
10239         }
10240
10241         /* final misplaced underbar check */
10242         if (lastub && s == lastub + 1) {
10243             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10244         }
10245
10246         /* read a decimal portion if there is one.  avoid
10247            3..5 being interpreted as the number 3. followed
10248            by .5
10249         */
10250         if (*s == '.' && s[1] != '.') {
10251             floatit = TRUE;
10252             *d++ = *s++;
10253
10254             if (*s == '_') {
10255                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10256                                "Misplaced _ in number");
10257                 lastub = s;
10258             }
10259
10260             /* copy, ignoring underbars, until we run out of digits.
10261             */
10262             for (; isDIGIT(*s) || *s == '_'; s++) {
10263                 /* fixed length buffer check */
10264                 if (d >= e)
10265                     Perl_croak(aTHX_ number_too_long);
10266                 if (*s == '_') {
10267                    if (lastub && s == lastub + 1)
10268                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10269                                       "Misplaced _ in number");
10270                    lastub = s;
10271                 }
10272                 else
10273                     *d++ = *s;
10274             }
10275             /* fractional part ending in underbar? */
10276             if (s[-1] == '_') {
10277                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10278                                "Misplaced _ in number");
10279             }
10280             if (*s == '.' && isDIGIT(s[1])) {
10281                 /* oops, it's really a v-string, but without the "v" */
10282                 s = start;
10283                 goto vstring;
10284             }
10285         }
10286
10287         /* read exponent part, if present */
10288         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10289             floatit = TRUE;
10290             s++;
10291
10292             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10293             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10294
10295             /* stray preinitial _ */
10296             if (*s == '_') {
10297                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10298                                "Misplaced _ in number");
10299                 lastub = s++;
10300             }
10301
10302             /* allow positive or negative exponent */
10303             if (*s == '+' || *s == '-')
10304                 *d++ = *s++;
10305
10306             /* stray initial _ */
10307             if (*s == '_') {
10308                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10309                                "Misplaced _ in number");
10310                 lastub = s++;
10311             }
10312
10313             /* read digits of exponent */
10314             while (isDIGIT(*s) || *s == '_') {
10315                 if (isDIGIT(*s)) {
10316                     if (d >= e)
10317                         Perl_croak(aTHX_ number_too_long);
10318                     *d++ = *s++;
10319                 }
10320                 else {
10321                    if (((lastub && s == lastub + 1) ||
10322                         (!isDIGIT(s[1]) && s[1] != '_')))
10323                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10324                                       "Misplaced _ in number");
10325                    lastub = s++;
10326                 }
10327             }
10328         }
10329
10330
10331         /*
10332            We try to do an integer conversion first if no characters
10333            indicating "float" have been found.
10334          */
10335
10336         if (!floatit) {
10337             UV uv;
10338             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10339
10340             if (flags == IS_NUMBER_IN_UV) {
10341               if (uv <= IV_MAX)
10342                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10343               else
10344                 sv = newSVuv(uv);
10345             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10346               if (uv <= (UV) IV_MIN)
10347                 sv = newSViv(-(IV)uv);
10348               else
10349                 floatit = TRUE;
10350             } else
10351               floatit = TRUE;
10352         }
10353         if (floatit) {
10354             /* terminate the string */
10355             *d = '\0';
10356             nv = Atof(PL_tokenbuf);
10357             sv = newSVnv(nv);
10358         }
10359
10360         if ( floatit
10361              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10362             const char *const key = floatit ? "float" : "integer";
10363             const STRLEN keylen = floatit ? 5 : 7;
10364             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10365                                 key, keylen, sv, NULL, NULL, 0);
10366         }
10367         break;
10368
10369     /* if it starts with a v, it could be a v-string */
10370     case 'v':
10371 vstring:
10372                 sv = newSV(5); /* preallocate storage space */
10373                 s = scan_vstring(s, PL_bufend, sv);
10374         break;
10375     }
10376
10377     /* make the op for the constant and return */
10378
10379     if (sv)
10380         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10381     else
10382         lvalp->opval = NULL;
10383
10384     return (char *)s;
10385 }
10386
10387 STATIC char *
10388 S_scan_formline(pTHX_ register char *s)
10389 {
10390     dVAR;
10391     register char *eol;
10392     register char *t;
10393     SV * const stuff = newSVpvs("");
10394     bool needargs = FALSE;
10395     bool eofmt = FALSE;
10396 #ifdef PERL_MAD
10397     char *tokenstart = s;
10398     SV* savewhite = NULL;
10399
10400     if (PL_madskills) {
10401         savewhite = PL_thiswhite;
10402         PL_thiswhite = 0;
10403     }
10404 #endif
10405
10406     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10407
10408     while (!needargs) {
10409         if (*s == '.') {
10410             t = s+1;
10411 #ifdef PERL_STRICT_CR
10412             while (SPACE_OR_TAB(*t))
10413                 t++;
10414 #else
10415             while (SPACE_OR_TAB(*t) || *t == '\r')
10416                 t++;
10417 #endif
10418             if (*t == '\n' || t == PL_bufend) {
10419                 eofmt = TRUE;
10420                 break;
10421             }
10422         }
10423         if (PL_in_eval && !PL_rsfp) {
10424             eol = (char *) memchr(s,'\n',PL_bufend-s);
10425             if (!eol++)
10426                 eol = PL_bufend;
10427         }
10428         else
10429             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10430         if (*s != '#') {
10431             for (t = s; t < eol; t++) {
10432                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10433                     needargs = FALSE;
10434                     goto enough;        /* ~~ must be first line in formline */
10435                 }
10436                 if (*t == '@' || *t == '^')
10437                     needargs = TRUE;
10438             }
10439             if (eol > s) {
10440                 sv_catpvn(stuff, s, eol-s);
10441 #ifndef PERL_STRICT_CR
10442                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10443                     char *end = SvPVX(stuff) + SvCUR(stuff);
10444                     end[-2] = '\n';
10445                     end[-1] = '\0';
10446                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10447                 }
10448 #endif
10449             }
10450             else
10451               break;
10452         }
10453         s = (char*)eol;
10454         if (PL_rsfp) {
10455             bool got_some;
10456 #ifdef PERL_MAD
10457             if (PL_madskills) {
10458                 if (PL_thistoken)
10459                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10460                 else
10461                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10462             }
10463 #endif
10464             PL_bufptr = PL_bufend;
10465             CopLINE_inc(PL_curcop);
10466             got_some = lex_next_chunk(0);
10467             CopLINE_dec(PL_curcop);
10468             s = PL_bufptr;
10469 #ifdef PERL_MAD
10470             tokenstart = PL_bufptr;
10471 #endif
10472             if (!got_some)
10473                 break;
10474         }
10475         incline(s);
10476     }
10477   enough:
10478     if (SvCUR(stuff)) {
10479         PL_expect = XTERM;
10480         if (needargs) {
10481             PL_lex_state = LEX_NORMAL;
10482             start_force(PL_curforce);
10483             NEXTVAL_NEXTTOKE.ival = 0;
10484             force_next(',');
10485         }
10486         else
10487             PL_lex_state = LEX_FORMLINE;
10488         if (!IN_BYTES) {
10489             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10490                 SvUTF8_on(stuff);
10491             else if (PL_encoding)
10492                 sv_recode_to_utf8(stuff, PL_encoding);
10493         }
10494         start_force(PL_curforce);
10495         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10496         force_next(THING);
10497         start_force(PL_curforce);
10498         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10499         force_next(LSTOP);
10500     }
10501     else {
10502         SvREFCNT_dec(stuff);
10503         if (eofmt)
10504             PL_lex_formbrack = 0;
10505         PL_bufptr = s;
10506     }
10507 #ifdef PERL_MAD
10508     if (PL_madskills) {
10509         if (PL_thistoken)
10510             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10511         else
10512             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10513         PL_thiswhite = savewhite;
10514     }
10515 #endif
10516     return s;
10517 }
10518
10519 I32
10520 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10521 {
10522     dVAR;
10523     const I32 oldsavestack_ix = PL_savestack_ix;
10524     CV* const outsidecv = PL_compcv;
10525
10526     if (PL_compcv) {
10527         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10528     }
10529     SAVEI32(PL_subline);
10530     save_item(PL_subname);
10531     SAVESPTR(PL_compcv);
10532
10533     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10534     CvFLAGS(PL_compcv) |= flags;
10535
10536     PL_subline = CopLINE(PL_curcop);
10537     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10538     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10539     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10540
10541     return oldsavestack_ix;
10542 }
10543
10544 #ifdef __SC__
10545 #pragma segment Perl_yylex
10546 #endif
10547 static int
10548 S_yywarn(pTHX_ const char *const s)
10549 {
10550     dVAR;
10551
10552     PERL_ARGS_ASSERT_YYWARN;
10553
10554     PL_in_eval |= EVAL_WARNONLY;
10555     yyerror(s);
10556     PL_in_eval &= ~EVAL_WARNONLY;
10557     return 0;
10558 }
10559
10560 int
10561 Perl_yyerror(pTHX_ const char *const s)
10562 {
10563     dVAR;
10564     const char *where = NULL;
10565     const char *context = NULL;
10566     int contlen = -1;
10567     SV *msg;
10568     int yychar  = PL_parser->yychar;
10569
10570     PERL_ARGS_ASSERT_YYERROR;
10571
10572     if (!yychar || (yychar == ';' && !PL_rsfp))
10573         where = "at EOF";
10574     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10575       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10576       PL_oldbufptr != PL_bufptr) {
10577         /*
10578                 Only for NetWare:
10579                 The code below is removed for NetWare because it abends/crashes on NetWare
10580                 when the script has error such as not having the closing quotes like:
10581                     if ($var eq "value)
10582                 Checking of white spaces is anyway done in NetWare code.
10583         */
10584 #ifndef NETWARE
10585         while (isSPACE(*PL_oldoldbufptr))
10586             PL_oldoldbufptr++;
10587 #endif
10588         context = PL_oldoldbufptr;
10589         contlen = PL_bufptr - PL_oldoldbufptr;
10590     }
10591     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10592       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10593         /*
10594                 Only for NetWare:
10595                 The code below is removed for NetWare because it abends/crashes on NetWare
10596                 when the script has error such as not having the closing quotes like:
10597                     if ($var eq "value)
10598                 Checking of white spaces is anyway done in NetWare code.
10599         */
10600 #ifndef NETWARE
10601         while (isSPACE(*PL_oldbufptr))
10602             PL_oldbufptr++;
10603 #endif
10604         context = PL_oldbufptr;
10605         contlen = PL_bufptr - PL_oldbufptr;
10606     }
10607     else if (yychar > 255)
10608         where = "next token ???";
10609     else if (yychar == -2) { /* YYEMPTY */
10610         if (PL_lex_state == LEX_NORMAL ||
10611            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10612             where = "at end of line";
10613         else if (PL_lex_inpat)
10614             where = "within pattern";
10615         else
10616             where = "within string";
10617     }
10618     else {
10619         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10620         if (yychar < 32)
10621             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10622         else if (isPRINT_LC(yychar)) {
10623             const char string = yychar;
10624             sv_catpvn(where_sv, &string, 1);
10625         }
10626         else
10627             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10628         where = SvPVX_const(where_sv);
10629     }
10630     msg = sv_2mortal(newSVpv(s, 0));
10631     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10632         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10633     if (context)
10634         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10635     else
10636         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10637     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10638         Perl_sv_catpvf(aTHX_ msg,
10639         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10640                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10641         PL_multi_end = 0;
10642     }
10643     if (PL_in_eval & EVAL_WARNONLY) {
10644         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10645     }
10646     else
10647         qerror(msg);
10648     if (PL_error_count >= 10) {
10649         if (PL_in_eval && SvCUR(ERRSV))
10650             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10651                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10652         else
10653             Perl_croak(aTHX_ "%s has too many errors.\n",
10654             OutCopFILE(PL_curcop));
10655     }
10656     PL_in_my = 0;
10657     PL_in_my_stash = NULL;
10658     return 0;
10659 }
10660 #ifdef __SC__
10661 #pragma segment Main
10662 #endif
10663
10664 STATIC char*
10665 S_swallow_bom(pTHX_ U8 *s)
10666 {
10667     dVAR;
10668     const STRLEN slen = SvCUR(PL_linestr);
10669
10670     PERL_ARGS_ASSERT_SWALLOW_BOM;
10671
10672     switch (s[0]) {
10673     case 0xFF:
10674         if (s[1] == 0xFE) {
10675             /* UTF-16 little-endian? (or UTF-32LE?) */
10676             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10677                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10678 #ifndef PERL_NO_UTF16_FILTER
10679             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10680             s += 2;
10681             if (PL_bufend > (char*)s) {
10682                 s = add_utf16_textfilter(s, TRUE);
10683             }
10684 #else
10685             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10686 #endif
10687         }
10688         break;
10689     case 0xFE:
10690         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10691 #ifndef PERL_NO_UTF16_FILTER
10692             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10693             s += 2;
10694             if (PL_bufend > (char *)s) {
10695                 s = add_utf16_textfilter(s, FALSE);
10696             }
10697 #else
10698             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10699 #endif
10700         }
10701         break;
10702     case 0xEF:
10703         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10704             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10705             s += 3;                      /* UTF-8 */
10706         }
10707         break;
10708     case 0:
10709         if (slen > 3) {
10710              if (s[1] == 0) {
10711                   if (s[2] == 0xFE && s[3] == 0xFF) {
10712                        /* UTF-32 big-endian */
10713                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10714                   }
10715              }
10716              else if (s[2] == 0 && s[3] != 0) {
10717                   /* Leading bytes
10718                    * 00 xx 00 xx
10719                    * are a good indicator of UTF-16BE. */
10720 #ifndef PERL_NO_UTF16_FILTER
10721                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10722                   s = add_utf16_textfilter(s, FALSE);
10723 #else
10724                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10725 #endif
10726              }
10727         }
10728 #ifdef EBCDIC
10729     case 0xDD:
10730         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10731             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10732             s += 4;                      /* UTF-8 */
10733         }
10734         break;
10735 #endif
10736
10737     default:
10738          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10739                   /* Leading bytes
10740                    * xx 00 xx 00
10741                    * are a good indicator of UTF-16LE. */
10742 #ifndef PERL_NO_UTF16_FILTER
10743               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10744               s = add_utf16_textfilter(s, TRUE);
10745 #else
10746               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10747 #endif
10748          }
10749     }
10750     return (char*)s;
10751 }
10752
10753
10754 #ifndef PERL_NO_UTF16_FILTER
10755 static I32
10756 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10757 {
10758     dVAR;
10759     SV *const filter = FILTER_DATA(idx);
10760     /* We re-use this each time round, throwing the contents away before we
10761        return.  */
10762     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10763     SV *const utf8_buffer = filter;
10764     IV status = IoPAGE(filter);
10765     const bool reverse = cBOOL(IoLINES(filter));
10766     I32 retval;
10767
10768     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10769
10770     /* As we're automatically added, at the lowest level, and hence only called
10771        from this file, we can be sure that we're not called in block mode. Hence
10772        don't bother writing code to deal with block mode.  */
10773     if (maxlen) {
10774         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10775     }
10776     if (status < 0) {
10777         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10778     }
10779     DEBUG_P(PerlIO_printf(Perl_debug_log,
10780                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10781                           FPTR2DPTR(void *, S_utf16_textfilter),
10782                           reverse ? 'l' : 'b', idx, maxlen, status,
10783                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10784
10785     while (1) {
10786         STRLEN chars;
10787         STRLEN have;
10788         I32 newlen;
10789         U8 *end;
10790         /* First, look in our buffer of existing UTF-8 data:  */
10791         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10792
10793         if (nl) {
10794             ++nl;
10795         } else if (status == 0) {
10796             /* EOF */
10797             IoPAGE(filter) = 0;
10798             nl = SvEND(utf8_buffer);
10799         }
10800         if (nl) {
10801             STRLEN got = nl - SvPVX(utf8_buffer);
10802             /* Did we have anything to append?  */
10803             retval = got != 0;
10804             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10805             /* Everything else in this code works just fine if SVp_POK isn't
10806                set.  This, however, needs it, and we need it to work, else
10807                we loop infinitely because the buffer is never consumed.  */
10808             sv_chop(utf8_buffer, nl);
10809             break;
10810         }
10811
10812         /* OK, not a complete line there, so need to read some more UTF-16.
10813            Read an extra octect if the buffer currently has an odd number. */
10814         while (1) {
10815             if (status <= 0)
10816                 break;
10817             if (SvCUR(utf16_buffer) >= 2) {
10818                 /* Location of the high octet of the last complete code point.
10819                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10820                    *coupled* with all the benefits of partial reads and
10821                    endianness.  */
10822                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10823                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10824
10825                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10826                     break;
10827                 }
10828
10829                 /* We have the first half of a surrogate. Read more.  */
10830                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10831             }
10832
10833             status = FILTER_READ(idx + 1, utf16_buffer,
10834                                  160 + (SvCUR(utf16_buffer) & 1));
10835             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10836             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10837             if (status < 0) {
10838                 /* Error */
10839                 IoPAGE(filter) = status;
10840                 return status;
10841             }
10842         }
10843
10844         chars = SvCUR(utf16_buffer) >> 1;
10845         have = SvCUR(utf8_buffer);
10846         SvGROW(utf8_buffer, have + chars * 3 + 1);
10847
10848         if (reverse) {
10849             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10850                                          (U8*)SvPVX_const(utf8_buffer) + have,
10851                                          chars * 2, &newlen);
10852         } else {
10853             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10854                                 (U8*)SvPVX_const(utf8_buffer) + have,
10855                                 chars * 2, &newlen);
10856         }
10857         SvCUR_set(utf8_buffer, have + newlen);
10858         *end = '\0';
10859
10860         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10861            it's private to us, and utf16_to_utf8{,reversed} take a
10862            (pointer,length) pair, rather than a NUL-terminated string.  */
10863         if(SvCUR(utf16_buffer) & 1) {
10864             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10865             SvCUR_set(utf16_buffer, 1);
10866         } else {
10867             SvCUR_set(utf16_buffer, 0);
10868         }
10869     }
10870     DEBUG_P(PerlIO_printf(Perl_debug_log,
10871                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10872                           status,
10873                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10874     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10875     return retval;
10876 }
10877
10878 static U8 *
10879 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10880 {
10881     SV *filter = filter_add(S_utf16_textfilter, NULL);
10882
10883     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10884
10885     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10886     sv_setpvs(filter, "");
10887     IoLINES(filter) = reversed;
10888     IoPAGE(filter) = 1; /* Not EOF */
10889
10890     /* Sadly, we have to return a valid pointer, come what may, so we have to
10891        ignore any error return from this.  */
10892     SvCUR_set(PL_linestr, 0);
10893     if (FILTER_READ(0, PL_linestr, 0)) {
10894         SvUTF8_on(PL_linestr);
10895     } else {
10896         SvUTF8_on(PL_linestr);
10897     }
10898     PL_bufend = SvEND(PL_linestr);
10899     return (U8*)SvPVX(PL_linestr);
10900 }
10901 #endif
10902
10903 /*
10904 Returns a pointer to the next character after the parsed
10905 vstring, as well as updating the passed in sv.
10906
10907 Function must be called like
10908
10909         sv = newSV(5);
10910         s = scan_vstring(s,e,sv);
10911
10912 where s and e are the start and end of the string.
10913 The sv should already be large enough to store the vstring
10914 passed in, for performance reasons.
10915
10916 */
10917
10918 char *
10919 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10920 {
10921     dVAR;
10922     const char *pos = s;
10923     const char *start = s;
10924
10925     PERL_ARGS_ASSERT_SCAN_VSTRING;
10926
10927     if (*pos == 'v') pos++;  /* get past 'v' */
10928     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10929         pos++;
10930     if ( *pos != '.') {
10931         /* this may not be a v-string if followed by => */
10932         const char *next = pos;
10933         while (next < e && isSPACE(*next))
10934             ++next;
10935         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10936             /* return string not v-string */
10937             sv_setpvn(sv,(char *)s,pos-s);
10938             return (char *)pos;
10939         }
10940     }
10941
10942     if (!isALPHA(*pos)) {
10943         U8 tmpbuf[UTF8_MAXBYTES+1];
10944
10945         if (*s == 'v')
10946             s++;  /* get past 'v' */
10947
10948         sv_setpvs(sv, "");
10949
10950         for (;;) {
10951             /* this is atoi() that tolerates underscores */
10952             U8 *tmpend;
10953             UV rev = 0;
10954             const char *end = pos;
10955             UV mult = 1;
10956             while (--end >= s) {
10957                 if (*end != '_') {
10958                     const UV orev = rev;
10959                     rev += (*end - '0') * mult;
10960                     mult *= 10;
10961                     if (orev > rev)
10962                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10963                                          "Integer overflow in decimal number");
10964                 }
10965             }
10966 #ifdef EBCDIC
10967             if (rev > 0x7FFFFFFF)
10968                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10969 #endif
10970             /* Append native character for the rev point */
10971             tmpend = uvchr_to_utf8(tmpbuf, rev);
10972             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10973             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10974                  SvUTF8_on(sv);
10975             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10976                  s = ++pos;
10977             else {
10978                  s = pos;
10979                  break;
10980             }
10981             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10982                  pos++;
10983         }
10984         SvPOK_on(sv);
10985         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10986         SvRMAGICAL_on(sv);
10987     }
10988     return (char *)s;
10989 }
10990
10991 int
10992 Perl_keyword_plugin_standard(pTHX_
10993         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10994 {
10995     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10996     PERL_UNUSED_CONTEXT;
10997     PERL_UNUSED_ARG(keyword_ptr);
10998     PERL_UNUSED_ARG(keyword_len);
10999     PERL_UNUSED_ARG(op_ptr);
11000     return KEYWORD_PLUGIN_DECLINE;
11001 }
11002
11003 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11004 static void
11005 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11006 {
11007     SAVEI32(PL_lex_brackets);
11008     if (PL_lex_brackets > 100)
11009         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11010     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11011     SAVEI32(PL_lex_allbrackets);
11012     PL_lex_allbrackets = 0;
11013     SAVEI8(PL_lex_fakeeof);
11014     PL_lex_fakeeof = (U8)fakeeof;
11015     if(yyparse(gramtype) && !PL_parser->error_count)
11016         qerror(Perl_mess(aTHX_ "Parse error"));
11017 }
11018
11019 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11020 static OP *
11021 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11022 {
11023     OP *o;
11024     ENTER;
11025     SAVEVPTR(PL_eval_root);
11026     PL_eval_root = NULL;
11027     parse_recdescent(gramtype, fakeeof);
11028     o = PL_eval_root;
11029     LEAVE;
11030     return o;
11031 }
11032
11033 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11034 static OP *
11035 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11036 {
11037     OP *exprop;
11038     if (flags & ~PARSE_OPTIONAL)
11039         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11040     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11041     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11042         if (!PL_parser->error_count)
11043             qerror(Perl_mess(aTHX_ "Parse error"));
11044         exprop = newOP(OP_NULL, 0);
11045     }
11046     return exprop;
11047 }
11048
11049 /*
11050 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11051
11052 Parse a Perl arithmetic expression.  This may contain operators of precedence
11053 down to the bit shift operators.  The expression must be followed (and thus
11054 terminated) either by a comparison or lower-precedence operator or by
11055 something that would normally terminate an expression such as semicolon.
11056 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11057 otherwise it is mandatory.  It is up to the caller to ensure that the
11058 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11059 the source of the code to be parsed and the lexical context for the
11060 expression.
11061
11062 The op tree representing the expression is returned.  If an optional
11063 expression is absent, a null pointer is returned, otherwise the pointer
11064 will be non-null.
11065
11066 If an error occurs in parsing or compilation, in most cases a valid op
11067 tree is returned anyway.  The error is reflected in the parser state,
11068 normally resulting in a single exception at the top level of parsing
11069 which covers all the compilation errors that occurred.  Some compilation
11070 errors, however, will throw an exception immediately.
11071
11072 =cut
11073 */
11074
11075 OP *
11076 Perl_parse_arithexpr(pTHX_ U32 flags)
11077 {
11078     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11079 }
11080
11081 /*
11082 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11083
11084 Parse a Perl term expression.  This may contain operators of precedence
11085 down to the assignment operators.  The expression must be followed (and thus
11086 terminated) either by a comma or lower-precedence operator or by
11087 something that would normally terminate an expression such as semicolon.
11088 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11089 otherwise it is mandatory.  It is up to the caller to ensure that the
11090 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11091 the source of the code to be parsed and the lexical context for the
11092 expression.
11093
11094 The op tree representing the expression is returned.  If an optional
11095 expression is absent, a null pointer is returned, otherwise the pointer
11096 will be non-null.
11097
11098 If an error occurs in parsing or compilation, in most cases a valid op
11099 tree is returned anyway.  The error is reflected in the parser state,
11100 normally resulting in a single exception at the top level of parsing
11101 which covers all the compilation errors that occurred.  Some compilation
11102 errors, however, will throw an exception immediately.
11103
11104 =cut
11105 */
11106
11107 OP *
11108 Perl_parse_termexpr(pTHX_ U32 flags)
11109 {
11110     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11111 }
11112
11113 /*
11114 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11115
11116 Parse a Perl list expression.  This may contain operators of precedence
11117 down to the comma operator.  The expression must be followed (and thus
11118 terminated) either by a low-precedence logic operator such as C<or> or by
11119 something that would normally terminate an expression such as semicolon.
11120 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11121 otherwise it is mandatory.  It is up to the caller to ensure that the
11122 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11123 the source of the code to be parsed and the lexical context for the
11124 expression.
11125
11126 The op tree representing the expression is returned.  If an optional
11127 expression is absent, a null pointer is returned, otherwise the pointer
11128 will be non-null.
11129
11130 If an error occurs in parsing or compilation, in most cases a valid op
11131 tree is returned anyway.  The error is reflected in the parser state,
11132 normally resulting in a single exception at the top level of parsing
11133 which covers all the compilation errors that occurred.  Some compilation
11134 errors, however, will throw an exception immediately.
11135
11136 =cut
11137 */
11138
11139 OP *
11140 Perl_parse_listexpr(pTHX_ U32 flags)
11141 {
11142     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11143 }
11144
11145 /*
11146 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11147
11148 Parse a single complete Perl expression.  This allows the full
11149 expression grammar, including the lowest-precedence operators such
11150 as C<or>.  The expression must be followed (and thus terminated) by a
11151 token that an expression would normally be terminated by: end-of-file,
11152 closing bracketing punctuation, semicolon, or one of the keywords that
11153 signals a postfix expression-statement modifier.  If I<flags> includes
11154 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11155 mandatory.  It is up to the caller to ensure that the dynamic parser
11156 state (L</PL_parser> et al) is correctly set to reflect the source of
11157 the code to be parsed and the lexical context for the expression.
11158
11159 The op tree representing the expression is returned.  If an optional
11160 expression is absent, a null pointer is returned, otherwise the pointer
11161 will be non-null.
11162
11163 If an error occurs in parsing or compilation, in most cases a valid op
11164 tree is returned anyway.  The error is reflected in the parser state,
11165 normally resulting in a single exception at the top level of parsing
11166 which covers all the compilation errors that occurred.  Some compilation
11167 errors, however, will throw an exception immediately.
11168
11169 =cut
11170 */
11171
11172 OP *
11173 Perl_parse_fullexpr(pTHX_ U32 flags)
11174 {
11175     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11176 }
11177
11178 /*
11179 =for apidoc Amx|OP *|parse_block|U32 flags
11180
11181 Parse a single complete Perl code block.  This consists of an opening
11182 brace, a sequence of statements, and a closing brace.  The block
11183 constitutes a lexical scope, so C<my> variables and various compile-time
11184 effects can be contained within it.  It is up to the caller to ensure
11185 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11186 reflect the source of the code to be parsed and the lexical context for
11187 the statement.
11188
11189 The op tree representing the code block is returned.  This is always a
11190 real op, never a null pointer.  It will normally be a C<lineseq> list,
11191 including C<nextstate> or equivalent ops.  No ops to construct any kind
11192 of runtime scope are included by virtue of it being a block.
11193
11194 If an error occurs in parsing or compilation, in most cases a valid op
11195 tree (most likely null) is returned anyway.  The error is reflected in
11196 the parser state, normally resulting in a single exception at the top
11197 level of parsing which covers all the compilation errors that occurred.
11198 Some compilation errors, however, will throw an exception immediately.
11199
11200 The I<flags> parameter is reserved for future use, and must always
11201 be zero.
11202
11203 =cut
11204 */
11205
11206 OP *
11207 Perl_parse_block(pTHX_ U32 flags)
11208 {
11209     if (flags)
11210         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11211     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11212 }
11213
11214 /*
11215 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11216
11217 Parse a single unadorned Perl statement.  This may be a normal imperative
11218 statement or a declaration that has compile-time effect.  It does not
11219 include any label or other affixture.  It is up to the caller to ensure
11220 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11221 reflect the source of the code to be parsed and the lexical context for
11222 the statement.
11223
11224 The op tree representing the statement is returned.  This may be a
11225 null pointer if the statement is null, for example if it was actually
11226 a subroutine definition (which has compile-time side effects).  If not
11227 null, it will be ops directly implementing the statement, suitable to
11228 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11229 equivalent op (except for those embedded in a scope contained entirely
11230 within the statement).
11231
11232 If an error occurs in parsing or compilation, in most cases a valid op
11233 tree (most likely null) is returned anyway.  The error is reflected in
11234 the parser state, normally resulting in a single exception at the top
11235 level of parsing which covers all the compilation errors that occurred.
11236 Some compilation errors, however, will throw an exception immediately.
11237
11238 The I<flags> parameter is reserved for future use, and must always
11239 be zero.
11240
11241 =cut
11242 */
11243
11244 OP *
11245 Perl_parse_barestmt(pTHX_ U32 flags)
11246 {
11247     if (flags)
11248         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11249     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11250 }
11251
11252 /*
11253 =for apidoc Amx|SV *|parse_label|U32 flags
11254
11255 Parse a single label, possibly optional, of the type that may prefix a
11256 Perl statement.  It is up to the caller to ensure that the dynamic parser
11257 state (L</PL_parser> et al) is correctly set to reflect the source of
11258 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11259 label is optional, otherwise it is mandatory.
11260
11261 The name of the label is returned in the form of a fresh scalar.  If an
11262 optional label is absent, a null pointer is returned.
11263
11264 If an error occurs in parsing, which can only occur if the label is
11265 mandatory, a valid label is returned anyway.  The error is reflected in
11266 the parser state, normally resulting in a single exception at the top
11267 level of parsing which covers all the compilation errors that occurred.
11268
11269 =cut
11270 */
11271
11272 SV *
11273 Perl_parse_label(pTHX_ U32 flags)
11274 {
11275     if (flags & ~PARSE_OPTIONAL)
11276         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11277     if (PL_lex_state == LEX_KNOWNEXT) {
11278         PL_parser->yychar = yylex();
11279         if (PL_parser->yychar == LABEL) {
11280             char *lpv = pl_yylval.pval;
11281             STRLEN llen = strlen(lpv);
11282             SV *lsv;
11283             PL_parser->yychar = YYEMPTY;
11284             lsv = newSV_type(SVt_PV);
11285             SvPV_set(lsv, lpv);
11286             SvCUR_set(lsv, llen);
11287             SvLEN_set(lsv, llen+1);
11288             SvPOK_on(lsv);
11289             return lsv;
11290         } else {
11291             yyunlex();
11292             goto no_label;
11293         }
11294     } else {
11295         char *s, *t;
11296         U8 c;
11297         STRLEN wlen, bufptr_pos;
11298         lex_read_space(0);
11299         t = s = PL_bufptr;
11300         c = (U8)*s;
11301         if (!isIDFIRST_A(c))
11302             goto no_label;
11303         do {
11304             c = (U8)*++t;
11305         } while(isWORDCHAR_A(c));
11306         wlen = t - s;
11307         if (word_takes_any_delimeter(s, wlen))
11308             goto no_label;
11309         bufptr_pos = s - SvPVX(PL_linestr);
11310         PL_bufptr = t;
11311         lex_read_space(LEX_KEEP_PREVIOUS);
11312         t = PL_bufptr;
11313         s = SvPVX(PL_linestr) + bufptr_pos;
11314         if (t[0] == ':' && t[1] != ':') {
11315             PL_oldoldbufptr = PL_oldbufptr;
11316             PL_oldbufptr = s;
11317             PL_bufptr = t+1;
11318             return newSVpvn(s, wlen);
11319         } else {
11320             PL_bufptr = s;
11321             no_label:
11322             if (flags & PARSE_OPTIONAL) {
11323                 return NULL;
11324             } else {
11325                 qerror(Perl_mess(aTHX_ "Parse error"));
11326                 return newSVpvs("x");
11327             }
11328         }
11329     }
11330 }
11331
11332 /*
11333 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11334
11335 Parse a single complete Perl statement.  This may be a normal imperative
11336 statement or a declaration that has compile-time effect, and may include
11337 optional labels.  It is up to the caller to ensure that the dynamic
11338 parser state (L</PL_parser> et al) is correctly set to reflect the source
11339 of the code to be parsed and the lexical context for the statement.
11340
11341 The op tree representing the statement is returned.  This may be a
11342 null pointer if the statement is null, for example if it was actually
11343 a subroutine definition (which has compile-time side effects).  If not
11344 null, it will be the result of a L</newSTATEOP> call, normally including
11345 a C<nextstate> or equivalent op.
11346
11347 If an error occurs in parsing or compilation, in most cases a valid op
11348 tree (most likely null) is returned anyway.  The error is reflected in
11349 the parser state, normally resulting in a single exception at the top
11350 level of parsing which covers all the compilation errors that occurred.
11351 Some compilation errors, however, will throw an exception immediately.
11352
11353 The I<flags> parameter is reserved for future use, and must always
11354 be zero.
11355
11356 =cut
11357 */
11358
11359 OP *
11360 Perl_parse_fullstmt(pTHX_ U32 flags)
11361 {
11362     if (flags)
11363         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11364     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11365 }
11366
11367 /*
11368 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11369
11370 Parse a sequence of zero or more Perl statements.  These may be normal
11371 imperative statements, including optional labels, or declarations
11372 that have compile-time effect, or any mixture thereof.  The statement
11373 sequence ends when a closing brace or end-of-file is encountered in a
11374 place where a new statement could have validly started.  It is up to
11375 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11376 is correctly set to reflect the source of the code to be parsed and the
11377 lexical context for the statements.
11378
11379 The op tree representing the statement sequence is returned.  This may
11380 be a null pointer if the statements were all null, for example if there
11381 were no statements or if there were only subroutine definitions (which
11382 have compile-time side effects).  If not null, it will be a C<lineseq>
11383 list, normally including C<nextstate> or equivalent ops.
11384
11385 If an error occurs in parsing or compilation, in most cases a valid op
11386 tree is returned anyway.  The error is reflected in the parser state,
11387 normally resulting in a single exception at the top level of parsing
11388 which covers all the compilation errors that occurred.  Some compilation
11389 errors, however, will throw an exception immediately.
11390
11391 The I<flags> parameter is reserved for future use, and must always
11392 be zero.
11393
11394 =cut
11395 */
11396
11397 OP *
11398 Perl_parse_stmtseq(pTHX_ U32 flags)
11399 {
11400     OP *stmtseqop;
11401     I32 c;
11402     if (flags)
11403         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11404     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11405     c = lex_peek_unichar(0);
11406     if (c != -1 && c != /*{*/'}')
11407         qerror(Perl_mess(aTHX_ "Parse error"));
11408     return stmtseqop;
11409 }
11410
11411 void
11412 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11413 {
11414     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11415     deprecate("qw(...) as parentheses");
11416     force_next((4<<24)|')');
11417     if (qwlist->op_type == OP_STUB) {
11418         op_free(qwlist);
11419     }
11420     else {
11421         start_force(PL_curforce);
11422         NEXTVAL_NEXTTOKE.opval = qwlist;
11423         force_next(THING);
11424     }
11425     force_next((2<<24)|'(');
11426 }
11427
11428 /*
11429  * Local variables:
11430  * c-indentation-style: bsd
11431  * c-basic-offset: 4
11432  * indent-tabs-mode: t
11433  * End:
11434  *
11435  * ex: set ts=8 sts=4 sw=4 noet:
11436  */