toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 #define force_ident_maybe_lex(p) \
 114         (PL_bufptr = s, S_force_ident_maybe_lex(aTHX_ p))
 115
 116 static const char ident_too_long[] = "Identifier too long";
 117
 118 #ifdef PERL_MAD
 119 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 120 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 121 #else
 122 #  define CURMAD(slot,sv)
 123 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 124 #endif
 125
 126 #define XENUMMASK  0x3f
 127 #define XFAKEEOF   0x40
 128 #define XFAKEBRACK 0x80
 129
 130 #ifdef USE_UTF8_SCRIPTS
 131 #   define UTF (!IN_BYTES)
 132 #else
 133 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 134 #endif
 135
 136 /* The maximum number of characters preceding the unrecognized one to display */
 137 #define UNRECOGNIZED_PRECEDE_COUNT 10
 138
 139 /* In variables named $^X, these are the legal values for X.
 140  * 1999-02-27 mjd-perl-patch@plover.com */
 141 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 142
 143 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 144
 145 /* LEX_* are values for PL_lex_state, the state of the lexer.
 146  * They are arranged oddly so that the guard on the switch statement
 147  * can get by with a single comparison (if the compiler is smart enough).
 148  *
 149  * These values refer to the various states within a sublex parse,
 150  * i.e. within a double quotish string
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN0OP       : zero-argument function, with its op created in this file
 228  * FUN1         : not used, except for not, which isn't a UNIOP
 229  * BOop         : bitwise or or xor
 230  * BAop         : bitwise and
 231  * SHop         : shift operator
 232  * PWop         : power operator
 233  * PMop         : pattern-matching operator
 234  * Aop          : addition-level operator
 235  * Mop          : multiplication-level operator
 236  * Eop          : equality-testing operator
 237  * Rop          : relational operator <= != gt
 238  *
 239  * Also see LOP and lop() below.
 240  */
 241
 242 #ifdef DEBUGGING /* Serve -DT. */
 243 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 244 #else
 245 #   define REPORT(retval) (retval)
 246 #endif
 247
 248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 256 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 257 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 258 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 259 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 260 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 261 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 262 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 263 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 264 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 265 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 266 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 267 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 268 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 269
 270 /* This bit of chicanery makes a unary function followed by
 271  * a parenthesis into a function with one argument, highest precedence.
 272  * The UNIDOR macro is for unary functions that can be followed by the //
 273  * operator (such as C<shift // 0>).
 274  */
 275 #define UNI3(f,x,have_x) { \
 276         pl_yylval.ival = f; \
 277         if (have_x) PL_expect = x; \
 278         PL_bufptr = s; \
 279         PL_last_uni = PL_oldbufptr; \
 280         PL_last_lop_op = f; \
 281         if (*s == '(') \
 282             return REPORT( (int)FUNC1 ); \
 283         s = PEEKSPACE(s); \
 284         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 285         }
 286 #define UNI(f)    UNI3(f,XTERM,1)
 287 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 288 #define UNIPROTO(f,optional) { \
 289         if (optional) PL_last_uni = PL_oldbufptr; \
 290         OPERATOR(f); \
 291         }
 292
 293 #define UNIBRACK(f) UNI3(f,0,0)
 294
 295 /* grandfather return to old style */
 296 #define OLDLOP(f) \
 297         do { \
 298             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 299                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 300             pl_yylval.ival = (f); \
 301             PL_expect = XTERM; \
 302             PL_bufptr = s; \
 303             return (int)LSTOP; \
 304         } while(0)
 305
 306 #define COPLINE_INC_WITH_HERELINES                  \
 307     STMT_START {                                     \
 308         CopLINE_inc(PL_curcop);                       \
 309         if (PL_parser->lex_shared->herelines)          \
 310             CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
 311             PL_parser->lex_shared->herelines = 0;                    \
 312     } STMT_END
 313
 314
 315 #ifdef DEBUGGING
 316
 317 /* how to interpret the pl_yylval associated with the token */
 318 enum token_type {
 319     TOKENTYPE_NONE,
 320     TOKENTYPE_IVAL,
 321     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 322     TOKENTYPE_PVAL,
 323     TOKENTYPE_OPVAL
 324 };
 325
 326 static struct debug_tokens {
 327     const int token;
 328     enum token_type type;
 329     const char *name;
 330 } const debug_tokens[] =
 331 {
 332     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 333     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 334     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 335     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 336     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 337     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 338     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 339     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 340     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 341     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 342     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 343     { DO,               TOKENTYPE_NONE,         "DO" },
 344     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 345     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 346     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 347     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 348     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 349     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 350     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 351     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 352     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 353     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 354     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 355     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 356     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 357     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 358     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 359     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 360     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 361     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 362     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 363     { IF,               TOKENTYPE_IVAL,         "IF" },
 364     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 365     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 366     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 367     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 368     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 369     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 370     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 371     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 372     { MY,               TOKENTYPE_IVAL,         "MY" },
 373     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 374     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 375     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 376     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 377     { OROR,             TOKENTYPE_NONE,         "OROR" },
 378     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 379     { PEG,              TOKENTYPE_NONE,         "PEG" },
 380     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 381     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 382     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 383     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 384     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 385     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 386     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 387     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 388     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 389     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 390     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 391     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 392     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 393     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 394     { SUB,              TOKENTYPE_NONE,         "SUB" },
 395     { THING,            TOKENTYPE_OPVAL,        "THING" },
 396     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 397     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 398     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 399     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 400     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 401     { USE,              TOKENTYPE_IVAL,         "USE" },
 402     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 403     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 404     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 405     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 406     { 0,                TOKENTYPE_NONE,         NULL }
 407 };
 408
 409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 410
 411 STATIC int
 412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 413 {
 414     dVAR;
 415
 416     PERL_ARGS_ASSERT_TOKEREPORT;
 417
 418     if (DEBUG_T_TEST) {
 419         const char *name = NULL;
 420         enum token_type type = TOKENTYPE_NONE;
 421         const struct debug_tokens *p;
 422         SV* const report = newSVpvs("<== ");
 423
 424         for (p = debug_tokens; p->token; p++) {
 425             if (p->token == (int)rv) {
 426                 name = p->name;
 427                 type = p->type;
 428                 break;
 429             }
 430         }
 431         if (name)
 432             Perl_sv_catpv(aTHX_ report, name);
 433         else if ((char)rv > ' ' && (char)rv <= '~')
 434             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 435         else if (!rv)
 436             sv_catpvs(report, "EOF");
 437         else
 438             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 439         switch (type) {
 440         case TOKENTYPE_NONE:
 441             break;
 442         case TOKENTYPE_IVAL:
 443             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 444             break;
 445         case TOKENTYPE_OPNUM:
 446             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 447                                     PL_op_name[lvalp->ival]);
 448             break;
 449         case TOKENTYPE_PVAL:
 450             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 451             break;
 452         case TOKENTYPE_OPVAL:
 453             if (lvalp->opval) {
 454                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 455                                     PL_op_name[lvalp->opval->op_type]);
 456                 if (lvalp->opval->op_type == OP_CONST) {
 457                     Perl_sv_catpvf(aTHX_ report, " %s",
 458                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 459                 }
 460
 461             }
 462             else
 463                 sv_catpvs(report, "(opval=null)");
 464             break;
 465         }
 466         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 467     };
 468     return (int)rv;
 469 }
 470
 471
 472 /* print the buffer with suitable escapes */
 473
 474 STATIC void
 475 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 476 {
 477     SV* const tmp = newSVpvs("");
 478
 479     PERL_ARGS_ASSERT_PRINTBUF;
 480
 481     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 482     SvREFCNT_dec(tmp);
 483 }
 484
 485 #endif
 486
 487 static int
 488 S_deprecate_commaless_var_list(pTHX) {
 489     PL_expect = XTERM;
 490     deprecate("comma-less variable list");
 491     return REPORT(','); /* grandfather non-comma-format format */
 492 }
 493
 494 /*
 495  * S_ao
 496  *
 497  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 498  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 499  */
 500
 501 STATIC int
 502 S_ao(pTHX_ int toketype)
 503 {
 504     dVAR;
 505     if (*PL_bufptr == '=') {
 506         PL_bufptr++;
 507         if (toketype == ANDAND)
 508             pl_yylval.ival = OP_ANDASSIGN;
 509         else if (toketype == OROR)
 510             pl_yylval.ival = OP_ORASSIGN;
 511         else if (toketype == DORDOR)
 512             pl_yylval.ival = OP_DORASSIGN;
 513         toketype = ASSIGNOP;
 514     }
 515     return toketype;
 516 }
 517
 518 /*
 519  * S_no_op
 520  * When Perl expects an operator and finds something else, no_op
 521  * prints the warning.  It always prints "<something> found where
 522  * operator expected.  It prints "Missing semicolon on previous line?"
 523  * if the surprise occurs at the start of the line.  "do you need to
 524  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 525  * where the compiler doesn't know if foo is a method call or a function.
 526  * It prints "Missing operator before end of line" if there's nothing
 527  * after the missing operator, or "... before <...>" if there is something
 528  * after the missing operator.
 529  */
 530
 531 STATIC void
 532 S_no_op(pTHX_ const char *const what, char *s)
 533 {
 534     dVAR;
 535     char * const oldbp = PL_bufptr;
 536     const bool is_first = (PL_oldbufptr == PL_linestart);
 537
 538     PERL_ARGS_ASSERT_NO_OP;
 539
 540     if (!s)
 541         s = oldbp;
 542     else
 543         PL_bufptr = s;
 544     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 545     if (ckWARN_d(WARN_SYNTAX)) {
 546         if (is_first)
 547             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 548                     "\t(Missing semicolon on previous line?)\n");
 549         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 550             const char *t;
 551             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
 552                                                             t += UTF ? UTF8SKIP(t) : 1)
 553                 NOOP;
 554             if (t < PL_bufptr && isSPACE(*t))
 555                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 556                         "\t(Do you need to predeclare %"SVf"?)\n",
 557                     SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
 558                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 559         }
 560         else {
 561             assert(s >= oldbp);
 562             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 563                     "\t(Missing operator before %"SVf"?)\n",
 564                     SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
 565                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 566         }
 567     }
 568     PL_bufptr = oldbp;
 569 }
 570
 571 /*
 572  * S_missingterm
 573  * Complain about missing quote/regexp/heredoc terminator.
 574  * If it's called with NULL then it cauterizes the line buffer.
 575  * If we're in a delimited string and the delimiter is a control
 576  * character, it's reformatted into a two-char sequence like ^C.
 577  * This is fatal.
 578  */
 579
 580 STATIC void
 581 S_missingterm(pTHX_ char *s)
 582 {
 583     dVAR;
 584     char tmpbuf[3];
 585     char q;
 586     if (s) {
 587         char * const nl = strrchr(s,'\n');
 588         if (nl)
 589             *nl = '\0';
 590     }
 591     else if (isCNTRL(PL_multi_close)) {
 592         *tmpbuf = '^';
 593         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 594         tmpbuf[2] = '\0';
 595         s = tmpbuf;
 596     }
 597     else {
 598         *tmpbuf = (char)PL_multi_close;
 599         tmpbuf[1] = '\0';
 600         s = tmpbuf;
 601     }
 602     q = strchr(s,'"') ? '\'' : '"';
 603     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 604 }
 605
 606 #include "feature.h"
 607
 608 /*
 609  * Check whether the named feature is enabled.
 610  */
 611 bool
 612 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 613 {
 614     dVAR;
 615     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 616
 617     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 618
 619     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 620
 621     if (namelen > MAX_FEATURE_LEN)
 622         return FALSE;
 623     memcpy(&he_name[8], name, namelen);
 624
 625     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 626                                      REFCOUNTED_HE_EXISTS));
 627 }
 628
 629 /*
 630  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 631  * utf16-to-utf8-reversed.
 632  */
 633
 634 #ifdef PERL_CR_FILTER
 635 static void
 636 strip_return(SV *sv)
 637 {
 638     const char *s = SvPVX_const(sv);
 639     const char * const e = s + SvCUR(sv);
 640
 641     PERL_ARGS_ASSERT_STRIP_RETURN;
 642
 643     /* outer loop optimized to do nothing if there are no CR-LFs */
 644     while (s < e) {
 645         if (*s++ == '\r' && *s == '\n') {
 646             /* hit a CR-LF, need to copy the rest */
 647             char *d = s - 1;
 648             *d++ = *s++;
 649             while (s < e) {
 650                 if (*s == '\r' && s[1] == '\n')
 651                     s++;
 652                 *d++ = *s++;
 653             }
 654             SvCUR(sv) -= s - d;
 655             return;
 656         }
 657     }
 658 }
 659
 660 STATIC I32
 661 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 662 {
 663     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 664     if (count > 0 && !maxlen)
 665         strip_return(sv);
 666     return count;
 667 }
 668 #endif
 669
 670 /*
 671 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 672
 673 Creates and initialises a new lexer/parser state object, supplying
 674 a context in which to lex and parse from a new source of Perl code.
 675 A pointer to the new state object is placed in L</PL_parser>.  An entry
 676 is made on the save stack so that upon unwinding the new state object
 677 will be destroyed and the former value of L</PL_parser> will be restored.
 678 Nothing else need be done to clean up the parsing context.
 679
 680 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 681 non-null, provides a string (in SV form) containing code to be parsed.
 682 A copy of the string is made, so subsequent modification of I<line>
 683 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 684 from which code will be read to be parsed.  If both are non-null, the
 685 code in I<line> comes first and must consist of complete lines of input,
 686 and I<rsfp> supplies the remainder of the source.
 687
 688 The I<flags> parameter is reserved for future use.  Currently it is only
 689 used by perl internally, so extensions should always pass zero.
 690
 691 =cut
 692 */
 693
 694 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 695    can share filters with the current parser.
 696    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 697    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 698    destruction. This is used to handle the case of defaulting to reading the
 699    script from the standard input because no filename was given on the command
 700    line (without getting confused by situation where STDIN has been closed, so
 701    the script handle is opened on fd 0)  */
 702
 703 void
 704 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 705 {
 706     dVAR;
 707     const char *s = NULL;
 708     yy_parser *parser, *oparser;
 709     if (flags && flags & ~LEX_START_FLAGS)
 710         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 711
 712     /* create and initialise a parser */
 713
 714     Newxz(parser, 1, yy_parser);
 715     parser->old_parser = oparser = PL_parser;
 716     PL_parser = parser;
 717
 718     parser->stack = NULL;
 719     parser->ps = NULL;
 720     parser->stack_size = 0;
 721
 722     /* on scope exit, free this parser and restore any outer one */
 723     SAVEPARSER(parser);
 724     parser->saved_curcop = PL_curcop;
 725
 726     /* initialise lexer state */
 727
 728 #ifdef PERL_MAD
 729     parser->curforce = -1;
 730 #else
 731     parser->nexttoke = 0;
 732 #endif
 733     parser->error_count = oparser ? oparser->error_count : 0;
 734     parser->copline = NOLINE;
 735     parser->lex_state = LEX_NORMAL;
 736     parser->expect = XSTATE;
 737     parser->rsfp = rsfp;
 738     parser->rsfp_filters =
 739       !(flags & LEX_START_SAME_FILTER) || !oparser
 740         ? NULL
 741         : MUTABLE_AV(SvREFCNT_inc(
 742             oparser->rsfp_filters
 743              ? oparser->rsfp_filters
 744              : (oparser->rsfp_filters = newAV())
 745           ));
 746
 747     Newx(parser->lex_brackstack, 120, char);
 748     Newx(parser->lex_casestack, 12, char);
 749     *parser->lex_casestack = '\0';
 750     Newxz(parser->lex_shared, 1, LEXSHARED);
 751
 752     if (line) {
 753         STRLEN len;
 754         s = SvPV_const(line, len);
 755         parser->linestr = flags & LEX_START_COPIED
 756                             ? SvREFCNT_inc_simple_NN(line)
 757                             : newSVpvn_flags(s, len, SvUTF8(line));
 758         sv_catpvs(parser->linestr, "\n;");
 759     } else {
 760         parser->linestr = newSVpvs("\n;");
 761     }
 762     parser->oldoldbufptr =
 763         parser->oldbufptr =
 764         parser->bufptr =
 765         parser->linestart = SvPVX(parser->linestr);
 766     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 767     parser->last_lop = parser->last_uni = NULL;
 768     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 769                                  |LEX_DONT_CLOSE_RSFP);
 770
 771     parser->in_pod = parser->filtered = 0;
 772 }
 773
 774
 775 /* delete a parser object */
 776
 777 void
 778 Perl_parser_free(pTHX_  const yy_parser *parser)
 779 {
 780     PERL_ARGS_ASSERT_PARSER_FREE;
 781
 782     PL_curcop = parser->saved_curcop;
 783     SvREFCNT_dec(parser->linestr);
 784
 785     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 786         PerlIO_clearerr(parser->rsfp);
 787     else if (parser->rsfp && (!parser->old_parser ||
 788                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 789         PerlIO_close(parser->rsfp);
 790     SvREFCNT_dec(parser->rsfp_filters);
 791
 792     Safefree(parser->lex_brackstack);
 793     Safefree(parser->lex_casestack);
 794     Safefree(parser->lex_shared);
 795     PL_parser = parser->old_parser;
 796     Safefree(parser);
 797 }
 798
 799
 800 /*
 801 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 802
 803 Buffer scalar containing the chunk currently under consideration of the
 804 text currently being lexed.  This is always a plain string scalar (for
 805 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 806 normal scalar means; instead refer to the buffer directly by the pointer
 807 variables described below.
 808
 809 The lexer maintains various C<char*> pointers to things in the
 810 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 811 reallocated, all of these pointers must be updated.  Don't attempt to
 812 do this manually, but rather use L</lex_grow_linestr> if you need to
 813 reallocate the buffer.
 814
 815 The content of the text chunk in the buffer is commonly exactly one
 816 complete line of input, up to and including a newline terminator,
 817 but there are situations where it is otherwise.  The octets of the
 818 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 819 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 820 flag on this scalar, which may disagree with it.
 821
 822 For direct examination of the buffer, the variable
 823 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 824 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 825 of these pointers is usually preferable to examination of the scalar
 826 through normal scalar means.
 827
 828 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 829
 830 Direct pointer to the end of the chunk of text currently being lexed, the
 831 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 832 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 833 always located at the end of the buffer, and does not count as part of
 834 the buffer's contents.
 835
 836 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 837
 838 Points to the current position of lexing inside the lexer buffer.
 839 Characters around this point may be freely examined, within
 840 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 841 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 842 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 843
 844 Lexing code (whether in the Perl core or not) moves this pointer past
 845 the characters that it consumes.  It is also expected to perform some
 846 bookkeeping whenever a newline character is consumed.  This movement
 847 can be more conveniently performed by the function L</lex_read_to>,
 848 which handles newlines appropriately.
 849
 850 Interpretation of the buffer's octets can be abstracted out by
 851 using the slightly higher-level functions L</lex_peek_unichar> and
 852 L</lex_read_unichar>.
 853
 854 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 855
 856 Points to the start of the current line inside the lexer buffer.
 857 This is useful for indicating at which column an error occurred, and
 858 not much else.  This must be updated by any lexing code that consumes
 859 a newline; the function L</lex_read_to> handles this detail.
 860
 861 =cut
 862 */
 863
 864 /*
 865 =for apidoc Amx|bool|lex_bufutf8
 866
 867 Indicates whether the octets in the lexer buffer
 868 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 869 of Unicode characters.  If not, they should be interpreted as Latin-1
 870 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 871
 872 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 873 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 874 encoding.
 875
 876 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 877 is significant, but not the whole story regarding the input character
 878 encoding.  Normally, when a file is being read, the scalar contains octets
 879 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 880 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 881 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 882 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 883 is in effect.  This logic may change in the future; use this function
 884 instead of implementing the logic yourself.
 885
 886 =cut
 887 */
 888
 889 bool
 890 Perl_lex_bufutf8(pTHX)
 891 {
 892     return UTF;
 893 }
 894
 895 /*
 896 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 897
 898 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 899 at least I<len> octets (including terminating NUL).  Returns a
 900 pointer to the reallocated buffer.  This is necessary before making
 901 any direct modification of the buffer that would increase its length.
 902 L</lex_stuff_pvn> provides a more convenient way to insert text into
 903 the buffer.
 904
 905 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 906 this function updates all of the lexer's variables that point directly
 907 into the buffer.
 908
 909 =cut
 910 */
 911
 912 char *
 913 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 914 {
 915     SV *linestr;
 916     char *buf;
 917     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 918     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 919     linestr = PL_parser->linestr;
 920     buf = SvPVX(linestr);
 921     if (len <= SvLEN(linestr))
 922         return buf;
 923     bufend_pos = PL_parser->bufend - buf;
 924     bufptr_pos = PL_parser->bufptr - buf;
 925     oldbufptr_pos = PL_parser->oldbufptr - buf;
 926     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 927     linestart_pos = PL_parser->linestart - buf;
 928     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 929     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 930     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 931                             PL_parser->lex_shared->re_eval_start - buf : 0;
 932
 933     buf = sv_grow(linestr, len);
 934
 935     PL_parser->bufend = buf + bufend_pos;
 936     PL_parser->bufptr = buf + bufptr_pos;
 937     PL_parser->oldbufptr = buf + oldbufptr_pos;
 938     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 939     PL_parser->linestart = buf + linestart_pos;
 940     if (PL_parser->last_uni)
 941         PL_parser->last_uni = buf + last_uni_pos;
 942     if (PL_parser->last_lop)
 943         PL_parser->last_lop = buf + last_lop_pos;
 944     if (PL_parser->lex_shared->re_eval_start)
 945         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 946     return buf;
 947 }
 948
 949 /*
 950 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 951
 952 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 953 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 954 reallocating the buffer if necessary.  This means that lexing code that
 955 runs later will see the characters as if they had appeared in the input.
 956 It is not recommended to do this as part of normal parsing, and most
 957 uses of this facility run the risk of the inserted characters being
 958 interpreted in an unintended manner.
 959
 960 The string to be inserted is represented by I<len> octets starting
 961 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 962 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 963 The characters are recoded for the lexer buffer, according to how the
 964 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 965 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 966 function is more convenient.
 967
 968 =cut
 969 */
 970
 971 void
 972 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 973 {
 974     dVAR;
 975     char *bufptr;
 976     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 977     if (flags & ~(LEX_STUFF_UTF8))
 978         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 979     if (UTF) {
 980         if (flags & LEX_STUFF_UTF8) {
 981             goto plain_copy;
 982         } else {
 983             STRLEN highhalf = 0;
 984             const char *p, *e = pv+len;
 985             for (p = pv; p != e; p++)
 986                 highhalf += !!(((U8)*p) & 0x80);
 987             if (!highhalf)
 988                 goto plain_copy;
 989             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 990             bufptr = PL_parser->bufptr;
 991             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 992             SvCUR_set(PL_parser->linestr,
 993                 SvCUR(PL_parser->linestr) + len+highhalf);
 994             PL_parser->bufend += len+highhalf;
 995             for (p = pv; p != e; p++) {
 996                 U8 c = (U8)*p;
 997                 if (c & 0x80) {
 998                     *bufptr++ = (char)(0xc0 | (c >> 6));
 999                     *bufptr++ = (char)(0x80 | (c & 0x3f));
1000                 } else {
1001                     *bufptr++ = (char)c;
1002                 }
1003             }
1004         }
1005     } else {
1006         if (flags & LEX_STUFF_UTF8) {
1007             STRLEN highhalf = 0;
1008             const char *p, *e = pv+len;
1009             for (p = pv; p != e; p++) {
1010                 U8 c = (U8)*p;
1011                 if (c >= 0xc4) {
1012                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1013                                 "non-Latin-1 character into Latin-1 input");
1014                 } else if (c >= 0xc2 && p+1 != e &&
1015                             (((U8)p[1]) & 0xc0) == 0x80) {
1016                     p++;
1017                     highhalf++;
1018                 } else if (c >= 0x80) {
1019                     /* malformed UTF-8 */
1020                     ENTER;
1021                     SAVESPTR(PL_warnhook);
1022                     PL_warnhook = PERL_WARNHOOK_FATAL;
1023                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1024                     LEAVE;
1025                 }
1026             }
1027             if (!highhalf)
1028                 goto plain_copy;
1029             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1030             bufptr = PL_parser->bufptr;
1031             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1032             SvCUR_set(PL_parser->linestr,
1033                 SvCUR(PL_parser->linestr) + len-highhalf);
1034             PL_parser->bufend += len-highhalf;
1035             for (p = pv; p != e; p++) {
1036                 U8 c = (U8)*p;
1037                 if (c & 0x80) {
1038                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1039                     p++;
1040                 } else {
1041                     *bufptr++ = (char)c;
1042                 }
1043             }
1044         } else {
1045             plain_copy:
1046             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1047             bufptr = PL_parser->bufptr;
1048             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1049             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1050             PL_parser->bufend += len;
1051             Copy(pv, bufptr, len, char);
1052         }
1053     }
1054 }
1055
1056 /*
1057 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1058
1059 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1060 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1061 reallocating the buffer if necessary.  This means that lexing code that
1062 runs later will see the characters as if they had appeared in the input.
1063 It is not recommended to do this as part of normal parsing, and most
1064 uses of this facility run the risk of the inserted characters being
1065 interpreted in an unintended manner.
1066
1067 The string to be inserted is represented by octets starting at I<pv>
1068 and continuing to the first nul.  These octets are interpreted as either
1069 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1070 in I<flags>.  The characters are recoded for the lexer buffer, according
1071 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1072 If it is not convenient to nul-terminate a string to be inserted, the
1073 L</lex_stuff_pvn> function is more appropriate.
1074
1075 =cut
1076 */
1077
1078 void
1079 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1080 {
1081     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1082     lex_stuff_pvn(pv, strlen(pv), flags);
1083 }
1084
1085 /*
1086 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1087
1088 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1089 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1090 reallocating the buffer if necessary.  This means that lexing code that
1091 runs later will see the characters as if they had appeared in the input.
1092 It is not recommended to do this as part of normal parsing, and most
1093 uses of this facility run the risk of the inserted characters being
1094 interpreted in an unintended manner.
1095
1096 The string to be inserted is the string value of I<sv>.  The characters
1097 are recoded for the lexer buffer, according to how the buffer is currently
1098 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1099 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1100 need to construct a scalar.
1101
1102 =cut
1103 */
1104
1105 void
1106 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1107 {
1108     char *pv;
1109     STRLEN len;
1110     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1111     if (flags)
1112         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1113     pv = SvPV(sv, len);
1114     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1115 }
1116
1117 /*
1118 =for apidoc Amx|void|lex_unstuff|char *ptr
1119
1120 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1121 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1122 This hides the discarded text from any lexing code that runs later,
1123 as if the text had never appeared.
1124
1125 This is not the normal way to consume lexed text.  For that, use
1126 L</lex_read_to>.
1127
1128 =cut
1129 */
1130
1131 void
1132 Perl_lex_unstuff(pTHX_ char *ptr)
1133 {
1134     char *buf, *bufend;
1135     STRLEN unstuff_len;
1136     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1137     buf = PL_parser->bufptr;
1138     if (ptr < buf)
1139         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1140     if (ptr == buf)
1141         return;
1142     bufend = PL_parser->bufend;
1143     if (ptr > bufend)
1144         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1145     unstuff_len = ptr - buf;
1146     Move(ptr, buf, bufend+1-ptr, char);
1147     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1148     PL_parser->bufend = bufend - unstuff_len;
1149 }
1150
1151 /*
1152 =for apidoc Amx|void|lex_read_to|char *ptr
1153
1154 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1155 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1156 performing the correct bookkeeping whenever a newline character is passed.
1157 This is the normal way to consume lexed text.
1158
1159 Interpretation of the buffer's octets can be abstracted out by
1160 using the slightly higher-level functions L</lex_peek_unichar> and
1161 L</lex_read_unichar>.
1162
1163 =cut
1164 */
1165
1166 void
1167 Perl_lex_read_to(pTHX_ char *ptr)
1168 {
1169     char *s;
1170     PERL_ARGS_ASSERT_LEX_READ_TO;
1171     s = PL_parser->bufptr;
1172     if (ptr < s || ptr > PL_parser->bufend)
1173         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1174     for (; s != ptr; s++)
1175         if (*s == '\n') {
1176             COPLINE_INC_WITH_HERELINES;
1177             PL_parser->linestart = s+1;
1178         }
1179     PL_parser->bufptr = ptr;
1180 }
1181
1182 /*
1183 =for apidoc Amx|void|lex_discard_to|char *ptr
1184
1185 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1186 up to I<ptr>.  The remaining content of the buffer will be moved, and
1187 all pointers into the buffer updated appropriately.  I<ptr> must not
1188 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1189 it is not permitted to discard text that has yet to be lexed.
1190
1191 Normally it is not necessarily to do this directly, because it suffices to
1192 use the implicit discarding behaviour of L</lex_next_chunk> and things
1193 based on it.  However, if a token stretches across multiple lines,
1194 and the lexing code has kept multiple lines of text in the buffer for
1195 that purpose, then after completion of the token it would be wise to
1196 explicitly discard the now-unneeded earlier lines, to avoid future
1197 multi-line tokens growing the buffer without bound.
1198
1199 =cut
1200 */
1201
1202 void
1203 Perl_lex_discard_to(pTHX_ char *ptr)
1204 {
1205     char *buf;
1206     STRLEN discard_len;
1207     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1208     buf = SvPVX(PL_parser->linestr);
1209     if (ptr < buf)
1210         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211     if (ptr == buf)
1212         return;
1213     if (ptr > PL_parser->bufptr)
1214         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1215     discard_len = ptr - buf;
1216     if (PL_parser->oldbufptr < ptr)
1217         PL_parser->oldbufptr = ptr;
1218     if (PL_parser->oldoldbufptr < ptr)
1219         PL_parser->oldoldbufptr = ptr;
1220     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1221         PL_parser->last_uni = NULL;
1222     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1223         PL_parser->last_lop = NULL;
1224     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1225     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1226     PL_parser->bufend -= discard_len;
1227     PL_parser->bufptr -= discard_len;
1228     PL_parser->oldbufptr -= discard_len;
1229     PL_parser->oldoldbufptr -= discard_len;
1230     if (PL_parser->last_uni)
1231         PL_parser->last_uni -= discard_len;
1232     if (PL_parser->last_lop)
1233         PL_parser->last_lop -= discard_len;
1234 }
1235
1236 /*
1237 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1238
1239 Reads in the next chunk of text to be lexed, appending it to
1240 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1241 looked to the end of the current chunk and wants to know more.  It is
1242 usual, but not necessary, for lexing to have consumed the entirety of
1243 the current chunk at this time.
1244
1245 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1246 chunk (i.e., the current chunk has been entirely consumed), normally the
1247 current chunk will be discarded at the same time that the new chunk is
1248 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1249 will not be discarded.  If the current chunk has not been entirely
1250 consumed, then it will not be discarded regardless of the flag.
1251
1252 Returns true if some new text was added to the buffer, or false if the
1253 buffer has reached the end of the input text.
1254
1255 =cut
1256 */
1257
1258 #define LEX_FAKE_EOF 0x80000000
1259 #define LEX_NO_TERM  0x40000000
1260
1261 bool
1262 Perl_lex_next_chunk(pTHX_ U32 flags)
1263 {
1264     SV *linestr;
1265     char *buf;
1266     STRLEN old_bufend_pos, new_bufend_pos;
1267     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1268     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1269     bool got_some_for_debugger = 0;
1270     bool got_some;
1271     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1272         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1273     linestr = PL_parser->linestr;
1274     buf = SvPVX(linestr);
1275     if (!(flags & LEX_KEEP_PREVIOUS) &&
1276             PL_parser->bufptr == PL_parser->bufend) {
1277         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1278         linestart_pos = 0;
1279         if (PL_parser->last_uni != PL_parser->bufend)
1280             PL_parser->last_uni = NULL;
1281         if (PL_parser->last_lop != PL_parser->bufend)
1282             PL_parser->last_lop = NULL;
1283         last_uni_pos = last_lop_pos = 0;
1284         *buf = 0;
1285         SvCUR(linestr) = 0;
1286     } else {
1287         old_bufend_pos = PL_parser->bufend - buf;
1288         bufptr_pos = PL_parser->bufptr - buf;
1289         oldbufptr_pos = PL_parser->oldbufptr - buf;
1290         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1291         linestart_pos = PL_parser->linestart - buf;
1292         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1293         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1294     }
1295     if (flags & LEX_FAKE_EOF) {
1296         goto eof;
1297     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1298         got_some = 0;
1299     } else if (filter_gets(linestr, old_bufend_pos)) {
1300         got_some = 1;
1301         got_some_for_debugger = 1;
1302     } else if (flags & LEX_NO_TERM) {
1303         got_some = 0;
1304     } else {
1305         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1306             sv_setpvs(linestr, "");
1307         eof:
1308         /* End of real input.  Close filehandle (unless it was STDIN),
1309          * then add implicit termination.
1310          */
1311         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1312             PerlIO_clearerr(PL_parser->rsfp);
1313         else if (PL_parser->rsfp)
1314             (void)PerlIO_close(PL_parser->rsfp);
1315         PL_parser->rsfp = NULL;
1316         PL_parser->in_pod = PL_parser->filtered = 0;
1317 #ifdef PERL_MAD
1318         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1319             PL_faketokens = 1;
1320 #endif
1321         if (!PL_in_eval && PL_minus_p) {
1322             sv_catpvs(linestr,
1323                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1324             PL_minus_n = PL_minus_p = 0;
1325         } else if (!PL_in_eval && PL_minus_n) {
1326             sv_catpvs(linestr, /*{*/";}");
1327             PL_minus_n = 0;
1328         } else
1329             sv_catpvs(linestr, ";");
1330         got_some = 1;
1331     }
1332     buf = SvPVX(linestr);
1333     new_bufend_pos = SvCUR(linestr);
1334     PL_parser->bufend = buf + new_bufend_pos;
1335     PL_parser->bufptr = buf + bufptr_pos;
1336     PL_parser->oldbufptr = buf + oldbufptr_pos;
1337     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1338     PL_parser->linestart = buf + linestart_pos;
1339     if (PL_parser->last_uni)
1340         PL_parser->last_uni = buf + last_uni_pos;
1341     if (PL_parser->last_lop)
1342         PL_parser->last_lop = buf + last_lop_pos;
1343     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1344             PL_curstash != PL_debstash) {
1345         /* debugger active and we're not compiling the debugger code,
1346          * so store the line into the debugger's array of lines
1347          */
1348         update_debugger_info(NULL, buf+old_bufend_pos,
1349             new_bufend_pos-old_bufend_pos);
1350     }
1351     return got_some;
1352 }
1353
1354 /*
1355 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1356
1357 Looks ahead one (Unicode) character in the text currently being lexed.
1358 Returns the codepoint (unsigned integer value) of the next character,
1359 or -1 if lexing has reached the end of the input text.  To consume the
1360 peeked character, use L</lex_read_unichar>.
1361
1362 If the next character is in (or extends into) the next chunk of input
1363 text, the next chunk will be read in.  Normally the current chunk will be
1364 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1365 then the current chunk will not be discarded.
1366
1367 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1368 is encountered, an exception is generated.
1369
1370 =cut
1371 */
1372
1373 I32
1374 Perl_lex_peek_unichar(pTHX_ U32 flags)
1375 {
1376     dVAR;
1377     char *s, *bufend;
1378     if (flags & ~(LEX_KEEP_PREVIOUS))
1379         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1380     s = PL_parser->bufptr;
1381     bufend = PL_parser->bufend;
1382     if (UTF) {
1383         U8 head;
1384         I32 unichar;
1385         STRLEN len, retlen;
1386         if (s == bufend) {
1387             if (!lex_next_chunk(flags))
1388                 return -1;
1389             s = PL_parser->bufptr;
1390             bufend = PL_parser->bufend;
1391         }
1392         head = (U8)*s;
1393         if (!(head & 0x80))
1394             return head;
1395         if (head & 0x40) {
1396             len = PL_utf8skip[head];
1397             while ((STRLEN)(bufend-s) < len) {
1398                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1399                     break;
1400                 s = PL_parser->bufptr;
1401                 bufend = PL_parser->bufend;
1402             }
1403         }
1404         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1405         if (retlen == (STRLEN)-1) {
1406             /* malformed UTF-8 */
1407             ENTER;
1408             SAVESPTR(PL_warnhook);
1409             PL_warnhook = PERL_WARNHOOK_FATAL;
1410             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1411             LEAVE;
1412         }
1413         return unichar;
1414     } else {
1415         if (s == bufend) {
1416             if (!lex_next_chunk(flags))
1417                 return -1;
1418             s = PL_parser->bufptr;
1419         }
1420         return (U8)*s;
1421     }
1422 }
1423
1424 /*
1425 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1426
1427 Reads the next (Unicode) character in the text currently being lexed.
1428 Returns the codepoint (unsigned integer value) of the character read,
1429 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1430 if lexing has reached the end of the input text.  To non-destructively
1431 examine the next character, use L</lex_peek_unichar> instead.
1432
1433 If the next character is in (or extends into) the next chunk of input
1434 text, the next chunk will be read in.  Normally the current chunk will be
1435 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1436 then the current chunk will not be discarded.
1437
1438 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1439 is encountered, an exception is generated.
1440
1441 =cut
1442 */
1443
1444 I32
1445 Perl_lex_read_unichar(pTHX_ U32 flags)
1446 {
1447     I32 c;
1448     if (flags & ~(LEX_KEEP_PREVIOUS))
1449         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1450     c = lex_peek_unichar(flags);
1451     if (c != -1) {
1452         if (c == '\n')
1453             COPLINE_INC_WITH_HERELINES;
1454         if (UTF)
1455             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1456         else
1457             ++(PL_parser->bufptr);
1458     }
1459     return c;
1460 }
1461
1462 /*
1463 =for apidoc Amx|void|lex_read_space|U32 flags
1464
1465 Reads optional spaces, in Perl style, in the text currently being
1466 lexed.  The spaces may include ordinary whitespace characters and
1467 Perl-style comments.  C<#line> directives are processed if encountered.
1468 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1469 at a non-space character (or the end of the input text).
1470
1471 If spaces extend into the next chunk of input text, the next chunk will
1472 be read in.  Normally the current chunk will be discarded at the same
1473 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1474 chunk will not be discarded.
1475
1476 =cut
1477 */
1478
1479 #define LEX_NO_NEXT_CHUNK 0x80000000
1480
1481 void
1482 Perl_lex_read_space(pTHX_ U32 flags)
1483 {
1484     char *s, *bufend;
1485     bool need_incline = 0;
1486     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1487         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1488 #ifdef PERL_MAD
1489     if (PL_skipwhite) {
1490         sv_free(PL_skipwhite);
1491         PL_skipwhite = NULL;
1492     }
1493     if (PL_madskills)
1494         PL_skipwhite = newSVpvs("");
1495 #endif /* PERL_MAD */
1496     s = PL_parser->bufptr;
1497     bufend = PL_parser->bufend;
1498     while (1) {
1499         char c = *s;
1500         if (c == '#') {
1501             do {
1502                 c = *++s;
1503             } while (!(c == '\n' || (c == 0 && s == bufend)));
1504         } else if (c == '\n') {
1505             s++;
1506             PL_parser->linestart = s;
1507             if (s == bufend)
1508                 need_incline = 1;
1509             else
1510                 incline(s);
1511         } else if (isSPACE(c)) {
1512             s++;
1513         } else if (c == 0 && s == bufend) {
1514             bool got_more;
1515 #ifdef PERL_MAD
1516             if (PL_madskills)
1517                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1518 #endif /* PERL_MAD */
1519             if (flags & LEX_NO_NEXT_CHUNK)
1520                 break;
1521             PL_parser->bufptr = s;
1522             COPLINE_INC_WITH_HERELINES;
1523             got_more = lex_next_chunk(flags);
1524             CopLINE_dec(PL_curcop);
1525             s = PL_parser->bufptr;
1526             bufend = PL_parser->bufend;
1527             if (!got_more)
1528                 break;
1529             if (need_incline && PL_parser->rsfp) {
1530                 incline(s);
1531                 need_incline = 0;
1532             }
1533         } else {
1534             break;
1535         }
1536     }
1537 #ifdef PERL_MAD
1538     if (PL_madskills)
1539         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1540 #endif /* PERL_MAD */
1541     PL_parser->bufptr = s;
1542 }
1543
1544 /*
1545  * S_incline
1546  * This subroutine has nothing to do with tilting, whether at windmills
1547  * or pinball tables.  Its name is short for "increment line".  It
1548  * increments the current line number in CopLINE(PL_curcop) and checks
1549  * to see whether the line starts with a comment of the form
1550  *    # line 500 "foo.pm"
1551  * If so, it sets the current line number and file to the values in the comment.
1552  */
1553
1554 STATIC void
1555 S_incline(pTHX_ const char *s)
1556 {
1557     dVAR;
1558     const char *t;
1559     const char *n;
1560     const char *e;
1561     line_t line_num;
1562
1563     PERL_ARGS_ASSERT_INCLINE;
1564
1565     COPLINE_INC_WITH_HERELINES;
1566     if (*s++ != '#')
1567         return;
1568     while (SPACE_OR_TAB(*s))
1569         s++;
1570     if (strnEQ(s, "line", 4))
1571         s += 4;
1572     else
1573         return;
1574     if (SPACE_OR_TAB(*s))
1575         s++;
1576     else
1577         return;
1578     while (SPACE_OR_TAB(*s))
1579         s++;
1580     if (!isDIGIT(*s))
1581         return;
1582
1583     n = s;
1584     while (isDIGIT(*s))
1585         s++;
1586     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1587         return;
1588     while (SPACE_OR_TAB(*s))
1589         s++;
1590     if (*s == '"' && (t = strchr(s+1, '"'))) {
1591         s++;
1592         e = t + 1;
1593     }
1594     else {
1595         t = s;
1596         while (!isSPACE(*t))
1597             t++;
1598         e = t;
1599     }
1600     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1601         e++;
1602     if (*e != '\n' && *e != '\0')
1603         return;         /* false alarm */
1604
1605     line_num = atoi(n)-1;
1606
1607     if (t - s > 0) {
1608         const STRLEN len = t - s;
1609         SV *const temp_sv = CopFILESV(PL_curcop);
1610         const char *cf;
1611         STRLEN tmplen;
1612
1613         if (temp_sv) {
1614             cf = SvPVX(temp_sv);
1615             tmplen = SvCUR(temp_sv);
1616         } else {
1617             cf = NULL;
1618             tmplen = 0;
1619         }
1620
1621         if (!PL_rsfp && !PL_parser->filtered) {
1622             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1623              * to *{"::_<newfilename"} */
1624             /* However, the long form of evals is only turned on by the
1625                debugger - usually they're "(eval %lu)" */
1626             char smallbuf[128];
1627             char *tmpbuf;
1628             GV **gvp;
1629             STRLEN tmplen2 = len;
1630             if (tmplen + 2 <= sizeof smallbuf)
1631                 tmpbuf = smallbuf;
1632             else
1633                 Newx(tmpbuf, tmplen + 2, char);
1634             tmpbuf[0] = '_';
1635             tmpbuf[1] = '<';
1636             memcpy(tmpbuf + 2, cf, tmplen);
1637             tmplen += 2;
1638             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1639             if (gvp) {
1640                 char *tmpbuf2;
1641                 GV *gv2;
1642
1643                 if (tmplen2 + 2 <= sizeof smallbuf)
1644                     tmpbuf2 = smallbuf;
1645                 else
1646                     Newx(tmpbuf2, tmplen2 + 2, char);
1647
1648                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1649                     /* Either they malloc'd it, or we malloc'd it,
1650                        so no prefix is present in ours.  */
1651                     tmpbuf2[0] = '_';
1652                     tmpbuf2[1] = '<';
1653                 }
1654
1655                 memcpy(tmpbuf2 + 2, s, tmplen2);
1656                 tmplen2 += 2;
1657
1658                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1659                 if (!isGV(gv2)) {
1660                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1661                     /* adjust ${"::_<newfilename"} to store the new file name */
1662                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1663                     /* The line number may differ. If that is the case,
1664                        alias the saved lines that are in the array.
1665                        Otherwise alias the whole array. */
1666                     if (CopLINE(PL_curcop) == line_num) {
1667                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1668                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1669                     }
1670                     else if (GvAV(*gvp)) {
1671                         AV * const av = GvAV(*gvp);
1672                         const I32 start = CopLINE(PL_curcop)+1;
1673                         I32 items = AvFILLp(av) - start;
1674                         if (items > 0) {
1675                             AV * const av2 = GvAVn(gv2);
1676                             SV **svp = AvARRAY(av) + start;
1677                             I32 l = (I32)line_num+1;
1678                             while (items--)
1679                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1680                         }
1681                     }
1682                 }
1683
1684                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1685             }
1686             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1687         }
1688         CopFILE_free(PL_curcop);
1689         CopFILE_setn(PL_curcop, s, len);
1690     }
1691     CopLINE_set(PL_curcop, line_num);
1692 }
1693
1694 #ifdef PERL_MAD
1695 /* skip space before PL_thistoken */
1696
1697 STATIC char *
1698 S_skipspace0(pTHX_ register char *s)
1699 {
1700     PERL_ARGS_ASSERT_SKIPSPACE0;
1701
1702     s = skipspace(s);
1703     if (!PL_madskills)
1704         return s;
1705     if (PL_skipwhite) {
1706         if (!PL_thiswhite)
1707             PL_thiswhite = newSVpvs("");
1708         sv_catsv(PL_thiswhite, PL_skipwhite);
1709         sv_free(PL_skipwhite);
1710         PL_skipwhite = 0;
1711     }
1712     PL_realtokenstart = s - SvPVX(PL_linestr);
1713     return s;
1714 }
1715
1716 /* skip space after PL_thistoken */
1717
1718 STATIC char *
1719 S_skipspace1(pTHX_ register char *s)
1720 {
1721     const char *start = s;
1722     I32 startoff = start - SvPVX(PL_linestr);
1723
1724     PERL_ARGS_ASSERT_SKIPSPACE1;
1725
1726     s = skipspace(s);
1727     if (!PL_madskills)
1728         return s;
1729     start = SvPVX(PL_linestr) + startoff;
1730     if (!PL_thistoken && PL_realtokenstart >= 0) {
1731         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1732         PL_thistoken = newSVpvn(tstart, start - tstart);
1733     }
1734     PL_realtokenstart = -1;
1735     if (PL_skipwhite) {
1736         if (!PL_nextwhite)
1737             PL_nextwhite = newSVpvs("");
1738         sv_catsv(PL_nextwhite, PL_skipwhite);
1739         sv_free(PL_skipwhite);
1740         PL_skipwhite = 0;
1741     }
1742     return s;
1743 }
1744
1745 STATIC char *
1746 S_skipspace2(pTHX_ register char *s, SV **svp)
1747 {
1748     char *start;
1749     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1750     const I32 startoff = s - SvPVX(PL_linestr);
1751
1752     PERL_ARGS_ASSERT_SKIPSPACE2;
1753
1754     s = skipspace(s);
1755     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1756     if (!PL_madskills || !svp)
1757         return s;
1758     start = SvPVX(PL_linestr) + startoff;
1759     if (!PL_thistoken && PL_realtokenstart >= 0) {
1760         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1761         PL_thistoken = newSVpvn(tstart, start - tstart);
1762         PL_realtokenstart = -1;
1763     }
1764     if (PL_skipwhite) {
1765         if (!*svp)
1766             *svp = newSVpvs("");
1767         sv_setsv(*svp, PL_skipwhite);
1768         sv_free(PL_skipwhite);
1769         PL_skipwhite = 0;
1770     }
1771
1772     return s;
1773 }
1774 #endif
1775
1776 STATIC void
1777 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1778 {
1779     AV *av = CopFILEAVx(PL_curcop);
1780     if (av) {
1781         SV * const sv = newSV_type(SVt_PVMG);
1782         if (orig_sv)
1783             sv_setsv(sv, orig_sv);
1784         else
1785             sv_setpvn(sv, buf, len);
1786         (void)SvIOK_on(sv);
1787         SvIV_set(sv, 0);
1788         av_store(av, (I32)CopLINE(PL_curcop), sv);
1789     }
1790 }
1791
1792 /*
1793  * S_skipspace
1794  * Called to gobble the appropriate amount and type of whitespace.
1795  * Skips comments as well.
1796  */
1797
1798 STATIC char *
1799 S_skipspace(pTHX_ register char *s)
1800 {
1801 #ifdef PERL_MAD
1802     char *start = s;
1803 #endif /* PERL_MAD */
1804     PERL_ARGS_ASSERT_SKIPSPACE;
1805 #ifdef PERL_MAD
1806     if (PL_skipwhite) {
1807         sv_free(PL_skipwhite);
1808         PL_skipwhite = NULL;
1809     }
1810 #endif /* PERL_MAD */
1811     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1812         while (s < PL_bufend && SPACE_OR_TAB(*s))
1813             s++;
1814     } else {
1815         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1816         PL_bufptr = s;
1817         lex_read_space(LEX_KEEP_PREVIOUS |
1818                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1819                     LEX_NO_NEXT_CHUNK : 0));
1820         s = PL_bufptr;
1821         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1822         if (PL_linestart > PL_bufptr)
1823             PL_bufptr = PL_linestart;
1824         return s;
1825     }
1826 #ifdef PERL_MAD
1827     if (PL_madskills)
1828         PL_skipwhite = newSVpvn(start, s-start);
1829 #endif /* PERL_MAD */
1830     return s;
1831 }
1832
1833 /*
1834  * S_check_uni
1835  * Check the unary operators to ensure there's no ambiguity in how they're
1836  * used.  An ambiguous piece of code would be:
1837  *     rand + 5
1838  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1839  * the +5 is its argument.
1840  */
1841
1842 STATIC void
1843 S_check_uni(pTHX)
1844 {
1845     dVAR;
1846     const char *s;
1847     const char *t;
1848
1849     if (PL_oldoldbufptr != PL_last_uni)
1850         return;
1851     while (isSPACE(*PL_last_uni))
1852         PL_last_uni++;
1853     s = PL_last_uni;
1854     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1855         s++;
1856     if ((t = strchr(s, '(')) && t < PL_bufptr)
1857         return;
1858
1859     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1860                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1861                      (int)(s - PL_last_uni), PL_last_uni);
1862 }
1863
1864 /*
1865  * LOP : macro to build a list operator.  Its behaviour has been replaced
1866  * with a subroutine, S_lop() for which LOP is just another name.
1867  */
1868
1869 #define LOP(f,x) return lop(f,x,s)
1870
1871 /*
1872  * S_lop
1873  * Build a list operator (or something that might be one).  The rules:
1874  *  - if we have a next token, then it's a list operator [why?]
1875  *  - if the next thing is an opening paren, then it's a function
1876  *  - else it's a list operator
1877  */
1878
1879 STATIC I32
1880 S_lop(pTHX_ I32 f, int x, char *s)
1881 {
1882     dVAR;
1883
1884     PERL_ARGS_ASSERT_LOP;
1885
1886     pl_yylval.ival = f;
1887     CLINE;
1888     PL_expect = x;
1889     PL_bufptr = s;
1890     PL_last_lop = PL_oldbufptr;
1891     PL_last_lop_op = (OPCODE)f;
1892 #ifdef PERL_MAD
1893     if (PL_lasttoke)
1894         goto lstop;
1895 #else
1896     if (PL_nexttoke)
1897         goto lstop;
1898 #endif
1899     if (*s == '(')
1900         return REPORT(FUNC);
1901     s = PEEKSPACE(s);
1902     if (*s == '(')
1903         return REPORT(FUNC);
1904     else {
1905         lstop:
1906         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1907             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1908         return REPORT(LSTOP);
1909     }
1910 }
1911
1912 #ifdef PERL_MAD
1913  /*
1914  * S_start_force
1915  * Sets up for an eventual force_next().  start_force(0) basically does
1916  * an unshift, while start_force(-1) does a push.  yylex removes items
1917  * on the "pop" end.
1918  */
1919
1920 STATIC void
1921 S_start_force(pTHX_ int where)
1922 {
1923     int i;
1924
1925     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1926         where = PL_lasttoke;
1927     assert(PL_curforce < 0 || PL_curforce == where);
1928     if (PL_curforce != where) {
1929         for (i = PL_lasttoke; i > where; --i) {
1930             PL_nexttoke[i] = PL_nexttoke[i-1];
1931         }
1932         PL_lasttoke++;
1933     }
1934     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1935         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1936     PL_curforce = where;
1937     if (PL_nextwhite) {
1938         if (PL_madskills)
1939             curmad('^', newSVpvs(""));
1940         CURMAD('_', PL_nextwhite);
1941     }
1942 }
1943
1944 STATIC void
1945 S_curmad(pTHX_ char slot, SV *sv)
1946 {
1947     MADPROP **where;
1948
1949     if (!sv)
1950         return;
1951     if (PL_curforce < 0)
1952         where = &PL_thismad;
1953     else
1954         where = &PL_nexttoke[PL_curforce].next_mad;
1955
1956     if (PL_faketokens)
1957         sv_setpvs(sv, "");
1958     else {
1959         if (!IN_BYTES) {
1960             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1961                 SvUTF8_on(sv);
1962             else if (PL_encoding) {
1963                 sv_recode_to_utf8(sv, PL_encoding);
1964             }
1965         }
1966     }
1967
1968     /* keep a slot open for the head of the list? */
1969     if (slot != '_' && *where && (*where)->mad_key == '^') {
1970         (*where)->mad_key = slot;
1971         sv_free(MUTABLE_SV(((*where)->mad_val)));
1972         (*where)->mad_val = (void*)sv;
1973     }
1974     else
1975         addmad(newMADsv(slot, sv), where, 0);
1976 }
1977 #else
1978 #  define start_force(where)    NOOP
1979 #  define curmad(slot, sv)      NOOP
1980 #endif
1981
1982 /*
1983  * S_force_next
1984  * When the lexer realizes it knows the next token (for instance,
1985  * it is reordering tokens for the parser) then it can call S_force_next
1986  * to know what token to return the next time the lexer is called.  Caller
1987  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1988  * and possibly PL_expect to ensure the lexer handles the token correctly.
1989  */
1990
1991 STATIC void
1992 S_force_next(pTHX_ I32 type)
1993 {
1994     dVAR;
1995 #ifdef DEBUGGING
1996     if (DEBUG_T_TEST) {
1997         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1998         tokereport(type, &NEXTVAL_NEXTTOKE);
1999     }
2000 #endif
2001     /* Don’t let opslab_force_free snatch it */
2002     if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2003         assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2004         NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2005     }
2006 #ifdef PERL_MAD
2007     if (PL_curforce < 0)
2008         start_force(PL_lasttoke);
2009     PL_nexttoke[PL_curforce].next_type = type;
2010     if (PL_lex_state != LEX_KNOWNEXT)
2011         PL_lex_defer = PL_lex_state;
2012     PL_lex_state = LEX_KNOWNEXT;
2013     PL_lex_expect = PL_expect;
2014     PL_curforce = -1;
2015 #else
2016     PL_nexttype[PL_nexttoke] = type;
2017     PL_nexttoke++;
2018     if (PL_lex_state != LEX_KNOWNEXT) {
2019         PL_lex_defer = PL_lex_state;
2020         PL_lex_expect = PL_expect;
2021         PL_lex_state = LEX_KNOWNEXT;
2022     }
2023 #endif
2024 }
2025
2026 void
2027 Perl_yyunlex(pTHX)
2028 {
2029     int yyc = PL_parser->yychar;
2030     if (yyc != YYEMPTY) {
2031         if (yyc) {
2032             start_force(-1);
2033             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2034             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2035                 PL_lex_allbrackets--;
2036                 PL_lex_brackets--;
2037                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2038             } else if (yyc == '('/*)*/) {
2039                 PL_lex_allbrackets--;
2040                 yyc |= (2<<24);
2041             }
2042             force_next(yyc);
2043         }
2044         PL_parser->yychar = YYEMPTY;
2045     }
2046 }
2047
2048 STATIC SV *
2049 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2050 {
2051     dVAR;
2052     SV * const sv = newSVpvn_utf8(start, len,
2053                                   !IN_BYTES
2054                                   && UTF
2055                                   && !is_ascii_string((const U8*)start, len)
2056                                   && is_utf8_string((const U8*)start, len));
2057     return sv;
2058 }
2059
2060 /*
2061  * S_force_word
2062  * When the lexer knows the next thing is a word (for instance, it has
2063  * just seen -> and it knows that the next char is a word char, then
2064  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2065  * lookahead.
2066  *
2067  * Arguments:
2068  *   char *start : buffer position (must be within PL_linestr)
2069  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2070  *   int check_keyword : if true, Perl checks to make sure the word isn't
2071  *       a keyword (do this if the word is a label, e.g. goto FOO)
2072  *   int allow_pack : if true, : characters will also be allowed (require,
2073  *       use, etc. do this)
2074  *   int allow_initial_tick : used by the "sub" lexer only.
2075  */
2076
2077 STATIC char *
2078 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2079 {
2080     dVAR;
2081     char *s;
2082     STRLEN len;
2083
2084     PERL_ARGS_ASSERT_FORCE_WORD;
2085
2086     start = SKIPSPACE1(start);
2087     s = start;
2088     if (isIDFIRST_lazy_if(s,UTF) ||
2089         (allow_pack && *s == ':') ||
2090         (allow_initial_tick && *s == '\'') )
2091     {
2092         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2093         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2094             return start;
2095         start_force(PL_curforce);
2096         if (PL_madskills)
2097             curmad('X', newSVpvn(start,s-start));
2098         if (token == METHOD) {
2099             s = SKIPSPACE1(s);
2100             if (*s == '(')
2101                 PL_expect = XTERM;
2102             else {
2103                 PL_expect = XOPERATOR;
2104             }
2105         }
2106         if (PL_madskills)
2107             curmad('g', newSVpvs( "forced" ));
2108         NEXTVAL_NEXTTOKE.opval
2109             = (OP*)newSVOP(OP_CONST,0,
2110                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2111         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2112         force_next(token);
2113     }
2114     return s;
2115 }
2116
2117 /*
2118  * S_force_ident
2119  * Called when the lexer wants $foo *foo &foo etc, but the program
2120  * text only contains the "foo" portion.  The first argument is a pointer
2121  * to the "foo", and the second argument is the type symbol to prefix.
2122  * Forces the next token to be a "WORD".
2123  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2124  */
2125
2126 STATIC void
2127 S_force_ident(pTHX_ register const char *s, int kind)
2128 {
2129     dVAR;
2130
2131     PERL_ARGS_ASSERT_FORCE_IDENT;
2132
2133     if (*s) {
2134         const STRLEN len = strlen(s);
2135         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2136                                                                 UTF ? SVf_UTF8 : 0));
2137         start_force(PL_curforce);
2138         NEXTVAL_NEXTTOKE.opval = o;
2139         force_next(WORD);
2140         if (kind) {
2141             o->op_private = OPpCONST_ENTERED;
2142             /* XXX see note in pp_entereval() for why we forgo typo
2143                warnings if the symbol must be introduced in an eval.
2144                GSAR 96-10-12 */
2145             gv_fetchpvn_flags(s, len,
2146                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2147                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2148                               kind == '$' ? SVt_PV :
2149                               kind == '@' ? SVt_PVAV :
2150                               kind == '%' ? SVt_PVHV :
2151                               SVt_PVGV
2152                               );
2153         }
2154     }
2155 }
2156
2157 NV
2158 Perl_str_to_version(pTHX_ SV *sv)
2159 {
2160     NV retval = 0.0;
2161     NV nshift = 1.0;
2162     STRLEN len;
2163     const char *start = SvPV_const(sv,len);
2164     const char * const end = start + len;
2165     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2166
2167     PERL_ARGS_ASSERT_STR_TO_VERSION;
2168
2169     while (start < end) {
2170         STRLEN skip;
2171         UV n;
2172         if (utf)
2173             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2174         else {
2175             n = *(U8*)start;
2176             skip = 1;
2177         }
2178         retval += ((NV)n)/nshift;
2179         start += skip;
2180         nshift *= 1000;
2181     }
2182     return retval;
2183 }
2184
2185 /*
2186  * S_force_version
2187  * Forces the next token to be a version number.
2188  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2189  * and if "guessing" is TRUE, then no new token is created (and the caller
2190  * must use an alternative parsing method).
2191  */
2192
2193 STATIC char *
2194 S_force_version(pTHX_ char *s, int guessing)
2195 {
2196     dVAR;
2197     OP *version = NULL;
2198     char *d;
2199 #ifdef PERL_MAD
2200     I32 startoff = s - SvPVX(PL_linestr);
2201 #endif
2202
2203     PERL_ARGS_ASSERT_FORCE_VERSION;
2204
2205     s = SKIPSPACE1(s);
2206
2207     d = s;
2208     if (*d == 'v')
2209         d++;
2210     if (isDIGIT(*d)) {
2211         while (isDIGIT(*d) || *d == '_' || *d == '.')
2212             d++;
2213 #ifdef PERL_MAD
2214         if (PL_madskills) {
2215             start_force(PL_curforce);
2216             curmad('X', newSVpvn(s,d-s));
2217         }
2218 #endif
2219         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2220             SV *ver;
2221 #ifdef USE_LOCALE_NUMERIC
2222             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2223             setlocale(LC_NUMERIC, "C");
2224 #endif
2225             s = scan_num(s, &pl_yylval);
2226 #ifdef USE_LOCALE_NUMERIC
2227             setlocale(LC_NUMERIC, loc);
2228             Safefree(loc);
2229 #endif
2230             version = pl_yylval.opval;
2231             ver = cSVOPx(version)->op_sv;
2232             if (SvPOK(ver) && !SvNIOK(ver)) {
2233                 SvUPGRADE(ver, SVt_PVNV);
2234                 SvNV_set(ver, str_to_version(ver));
2235                 SvNOK_on(ver);          /* hint that it is a version */
2236             }
2237         }
2238         else if (guessing) {
2239 #ifdef PERL_MAD
2240             if (PL_madskills) {
2241                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2242                 PL_nextwhite = 0;
2243                 s = SvPVX(PL_linestr) + startoff;
2244             }
2245 #endif
2246             return s;
2247         }
2248     }
2249
2250 #ifdef PERL_MAD
2251     if (PL_madskills && !version) {
2252         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2253         PL_nextwhite = 0;
2254         s = SvPVX(PL_linestr) + startoff;
2255     }
2256 #endif
2257     /* NOTE: The parser sees the package name and the VERSION swapped */
2258     start_force(PL_curforce);
2259     NEXTVAL_NEXTTOKE.opval = version;
2260     force_next(WORD);
2261
2262     return s;
2263 }
2264
2265 /*
2266  * S_force_strict_version
2267  * Forces the next token to be a version number using strict syntax rules.
2268  */
2269
2270 STATIC char *
2271 S_force_strict_version(pTHX_ char *s)
2272 {
2273     dVAR;
2274     OP *version = NULL;
2275 #ifdef PERL_MAD
2276     I32 startoff = s - SvPVX(PL_linestr);
2277 #endif
2278     const char *errstr = NULL;
2279
2280     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2281
2282     while (isSPACE(*s)) /* leading whitespace */
2283         s++;
2284
2285     if (is_STRICT_VERSION(s,&errstr)) {
2286         SV *ver = newSV(0);
2287         s = (char *)scan_version(s, ver, 0);
2288         version = newSVOP(OP_CONST, 0, ver);
2289     }
2290     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2291             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2292     {
2293         PL_bufptr = s;
2294         if (errstr)
2295             yyerror(errstr); /* version required */
2296         return s;
2297     }
2298
2299 #ifdef PERL_MAD
2300     if (PL_madskills && !version) {
2301         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2302         PL_nextwhite = 0;
2303         s = SvPVX(PL_linestr) + startoff;
2304     }
2305 #endif
2306     /* NOTE: The parser sees the package name and the VERSION swapped */
2307     start_force(PL_curforce);
2308     NEXTVAL_NEXTTOKE.opval = version;
2309     force_next(WORD);
2310
2311     return s;
2312 }
2313
2314 /*
2315  * S_tokeq
2316  * Tokenize a quoted string passed in as an SV.  It finds the next
2317  * chunk, up to end of string or a backslash.  It may make a new
2318  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2319  * turns \\ into \.
2320  */
2321
2322 STATIC SV *
2323 S_tokeq(pTHX_ SV *sv)
2324 {
2325     dVAR;
2326     char *s;
2327     char *send;
2328     char *d;
2329     STRLEN len = 0;
2330     SV *pv = sv;
2331
2332     PERL_ARGS_ASSERT_TOKEQ;
2333
2334     if (!SvLEN(sv))
2335         goto finish;
2336
2337     s = SvPV_force(sv, len);
2338     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2339         goto finish;
2340     send = s + len;
2341     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2342     while (s < send && !(*s == '\\' && s[1] == '\\'))
2343         s++;
2344     if (s == send)
2345         goto finish;
2346     d = s;
2347     if ( PL_hints & HINT_NEW_STRING ) {
2348         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2349     }
2350     while (s < send) {
2351         if (*s == '\\') {
2352             if (s + 1 < send && (s[1] == '\\'))
2353                 s++;            /* all that, just for this */
2354         }
2355         *d++ = *s++;
2356     }
2357     *d = '\0';
2358     SvCUR_set(sv, d - SvPVX_const(sv));
2359   finish:
2360     if ( PL_hints & HINT_NEW_STRING )
2361        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2362     return sv;
2363 }
2364
2365 /*
2366  * Now come three functions related to double-quote context,
2367  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2368  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2369  * interact with PL_lex_state, and create fake ( ... ) argument lists
2370  * to handle functions and concatenation.
2371  * For example,
2372  *   "foo\lbar"
2373  * is tokenised as
2374  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2375  */
2376
2377 /*
2378  * S_sublex_start
2379  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2380  *
2381  * Pattern matching will set PL_lex_op to the pattern-matching op to
2382  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2383  *
2384  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2385  *
2386  * Everything else becomes a FUNC.
2387  *
2388  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2389  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2390  * call to S_sublex_push().
2391  */
2392
2393 STATIC I32
2394 S_sublex_start(pTHX)
2395 {
2396     dVAR;
2397     const I32 op_type = pl_yylval.ival;
2398
2399     if (op_type == OP_NULL) {
2400         pl_yylval.opval = PL_lex_op;
2401         PL_lex_op = NULL;
2402         return THING;
2403     }
2404     if (op_type == OP_CONST || op_type == OP_READLINE) {
2405         SV *sv = tokeq(PL_lex_stuff);
2406
2407         if (SvTYPE(sv) == SVt_PVIV) {
2408             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2409             STRLEN len;
2410             const char * const p = SvPV_const(sv, len);
2411             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2412             SvREFCNT_dec(sv);
2413             sv = nsv;
2414         }
2415         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2416         PL_lex_stuff = NULL;
2417         /* Allow <FH> // "foo" */
2418         if (op_type == OP_READLINE)
2419             PL_expect = XTERMORDORDOR;
2420         return THING;
2421     }
2422     else if (op_type == OP_BACKTICK && PL_lex_op) {
2423         /* readpipe() vas overriden */
2424         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2425         pl_yylval.opval = PL_lex_op;
2426         PL_lex_op = NULL;
2427         PL_lex_stuff = NULL;
2428         return THING;
2429     }
2430
2431     PL_sublex_info.super_state = PL_lex_state;
2432     PL_sublex_info.sub_inwhat = (U16)op_type;
2433     PL_sublex_info.sub_op = PL_lex_op;
2434     PL_lex_state = LEX_INTERPPUSH;
2435
2436     PL_expect = XTERM;
2437     if (PL_lex_op) {
2438         pl_yylval.opval = PL_lex_op;
2439         PL_lex_op = NULL;
2440         return PMFUNC;
2441     }
2442     else
2443         return FUNC;
2444 }
2445
2446 /*
2447  * S_sublex_push
2448  * Create a new scope to save the lexing state.  The scope will be
2449  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2450  * to the uc, lc, etc. found before.
2451  * Sets PL_lex_state to LEX_INTERPCONCAT.
2452  */
2453
2454 STATIC I32
2455 S_sublex_push(pTHX)
2456 {
2457     dVAR;
2458     LEXSHARED *shared;
2459     ENTER;
2460
2461     PL_lex_state = PL_sublex_info.super_state;
2462     SAVEBOOL(PL_lex_dojoin);
2463     SAVEI32(PL_lex_brackets);
2464     SAVEI32(PL_lex_allbrackets);
2465     SAVEI32(PL_lex_formbrack);
2466     SAVEI8(PL_lex_fakeeof);
2467     SAVEI32(PL_lex_casemods);
2468     SAVEI32(PL_lex_starts);
2469     SAVEI8(PL_lex_state);
2470     SAVESPTR(PL_lex_repl);
2471     SAVEVPTR(PL_lex_inpat);
2472     SAVEI16(PL_lex_inwhat);
2473     SAVECOPLINE(PL_curcop);
2474     SAVEPPTR(PL_bufptr);
2475     SAVEPPTR(PL_bufend);
2476     SAVEPPTR(PL_oldbufptr);
2477     SAVEPPTR(PL_oldoldbufptr);
2478     SAVEPPTR(PL_last_lop);
2479     SAVEPPTR(PL_last_uni);
2480     SAVEPPTR(PL_linestart);
2481     SAVESPTR(PL_linestr);
2482     SAVEGENERICPV(PL_lex_brackstack);
2483     SAVEGENERICPV(PL_lex_casestack);
2484     SAVEGENERICPV(PL_parser->lex_shared);
2485
2486     /* The here-doc parser needs to be able to peek into outer lexing
2487        scopes to find the body of the here-doc.  So we put PL_linestr and
2488        PL_bufptr into lex_shared, to ‘share’ those values.
2489      */
2490     PL_parser->lex_shared->ls_linestr = PL_linestr;
2491     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2492
2493     PL_linestr = PL_lex_stuff;
2494     PL_lex_repl = PL_sublex_info.repl;
2495     PL_lex_stuff = NULL;
2496     PL_sublex_info.repl = NULL;
2497
2498     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2499         = SvPVX(PL_linestr);
2500     PL_bufend += SvCUR(PL_linestr);
2501     PL_last_lop = PL_last_uni = NULL;
2502     SAVEFREESV(PL_linestr);
2503     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2504
2505     PL_lex_dojoin = FALSE;
2506     PL_lex_brackets = PL_lex_formbrack = 0;
2507     PL_lex_allbrackets = 0;
2508     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2509     Newx(PL_lex_brackstack, 120, char);
2510     Newx(PL_lex_casestack, 12, char);
2511     PL_lex_casemods = 0;
2512     *PL_lex_casestack = '\0';
2513     PL_lex_starts = 0;
2514     PL_lex_state = LEX_INTERPCONCAT;
2515     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2516
2517     Newxz(shared, 1, LEXSHARED);
2518     shared->ls_prev = PL_parser->lex_shared;
2519     PL_parser->lex_shared = shared;
2520
2521     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2522     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2523     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2524         PL_lex_inpat = PL_sublex_info.sub_op;
2525     else
2526         PL_lex_inpat = NULL;
2527
2528     return '(';
2529 }
2530
2531 /*
2532  * S_sublex_done
2533  * Restores lexer state after a S_sublex_push.
2534  */
2535
2536 STATIC I32
2537 S_sublex_done(pTHX)
2538 {
2539     dVAR;
2540     if (!PL_lex_starts++) {
2541         SV * const sv = newSVpvs("");
2542         if (SvUTF8(PL_linestr))
2543             SvUTF8_on(sv);
2544         PL_expect = XOPERATOR;
2545         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2546         return THING;
2547     }
2548
2549     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2550         PL_lex_state = LEX_INTERPCASEMOD;
2551         return yylex();
2552     }
2553
2554     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2555     assert(PL_lex_inwhat != OP_TRANSR);
2556     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2557         PL_linestr = PL_lex_repl;
2558         PL_lex_inpat = 0;
2559         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2560         PL_bufend += SvCUR(PL_linestr);
2561         PL_last_lop = PL_last_uni = NULL;
2562         PL_lex_dojoin = FALSE;
2563         PL_lex_brackets = 0;
2564         PL_lex_allbrackets = 0;
2565         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2566         PL_lex_casemods = 0;
2567         *PL_lex_casestack = '\0';
2568         PL_lex_starts = 0;
2569         if (SvEVALED(PL_lex_repl)) {
2570             PL_lex_state = LEX_INTERPNORMAL;
2571             PL_lex_starts++;
2572             /*  we don't clear PL_lex_repl here, so that we can check later
2573                 whether this is an evalled subst; that means we rely on the
2574                 logic to ensure sublex_done() is called again only via the
2575                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2576         }
2577         else {
2578             PL_lex_state = LEX_INTERPCONCAT;
2579             PL_lex_repl = NULL;
2580         }
2581         return ',';
2582     }
2583     else {
2584 #ifdef PERL_MAD
2585         if (PL_madskills) {
2586             if (PL_thiswhite) {
2587                 if (!PL_endwhite)
2588                     PL_endwhite = newSVpvs("");
2589                 sv_catsv(PL_endwhite, PL_thiswhite);
2590                 PL_thiswhite = 0;
2591             }
2592             if (PL_thistoken)
2593                 sv_setpvs(PL_thistoken,"");
2594             else
2595                 PL_realtokenstart = -1;
2596         }
2597 #endif
2598         LEAVE;
2599         PL_bufend = SvPVX(PL_linestr);
2600         PL_bufend += SvCUR(PL_linestr);
2601         PL_expect = XOPERATOR;
2602         PL_sublex_info.sub_inwhat = 0;
2603         return ')';
2604     }
2605 }
2606
2607 /*
2608   scan_const
2609
2610   Extracts the next constant part of a pattern, double-quoted string,
2611   or transliteration.  This is terrifying code.
2612
2613   For example, in parsing the double-quoted string "ab\x63$d", it would
2614   stop at the '$' and return an OP_CONST containing 'abc'.
2615
2616   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2617   processing a pattern (PL_lex_inpat is true), a transliteration
2618   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2619
2620   Returns a pointer to the character scanned up to. If this is
2621   advanced from the start pointer supplied (i.e. if anything was
2622   successfully parsed), will leave an OP_CONST for the substring scanned
2623   in pl_yylval. Caller must intuit reason for not parsing further
2624   by looking at the next characters herself.
2625
2626   In patterns:
2627     expand:
2628       \N{ABC}  => \N{U+41.42.43}
2629
2630     pass through:
2631         all other \-char, including \N and \N{ apart from \N{ABC}
2632
2633     stops on:
2634         @ and $ where it appears to be a var, but not for $ as tail anchor
2635         \l \L \u \U \Q \E
2636         (?{  or  (??{
2637
2638
2639   In transliterations:
2640     characters are VERY literal, except for - not at the start or end
2641     of the string, which indicates a range. If the range is in bytes,
2642     scan_const expands the range to the full set of intermediate
2643     characters. If the range is in utf8, the hyphen is replaced with
2644     a certain range mark which will be handled by pmtrans() in op.c.
2645
2646   In double-quoted strings:
2647     backslashes:
2648       double-quoted style: \r and \n
2649       constants: \x31, etc.
2650       deprecated backrefs: \1 (in substitution replacements)
2651       case and quoting: \U \Q \E
2652     stops on @ and $
2653
2654   scan_const does *not* construct ops to handle interpolated strings.
2655   It stops processing as soon as it finds an embedded $ or @ variable
2656   and leaves it to the caller to work out what's going on.
2657
2658   embedded arrays (whether in pattern or not) could be:
2659       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2660
2661   $ in double-quoted strings must be the symbol of an embedded scalar.
2662
2663   $ in pattern could be $foo or could be tail anchor.  Assumption:
2664   it's a tail anchor if $ is the last thing in the string, or if it's
2665   followed by one of "()| \r\n\t"
2666
2667   \1 (backreferences) are turned into $1 in substitutions
2668
2669   The structure of the code is
2670       while (there's a character to process) {
2671           handle transliteration ranges
2672           skip regexp comments /(?#comment)/ and codes /(?{code})/
2673           skip #-initiated comments in //x patterns
2674           check for embedded arrays
2675           check for embedded scalars
2676           if (backslash) {
2677               deprecate \1 in substitution replacements
2678               handle string-changing backslashes \l \U \Q \E, etc.
2679               switch (what was escaped) {
2680                   handle \- in a transliteration (becomes a literal -)
2681                   if a pattern and not \N{, go treat as regular character
2682                   handle \132 (octal characters)
2683                   handle \x15 and \x{1234} (hex characters)
2684                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2685                   handle \cV (control characters)
2686                   handle printf-style backslashes (\f, \r, \n, etc)
2687               } (end switch)
2688               continue
2689           } (end if backslash)
2690           handle regular character
2691     } (end while character to read)
2692
2693 */
2694
2695 STATIC char *
2696 S_scan_const(pTHX_ char *start)
2697 {
2698     dVAR;
2699     char *send = PL_bufend;             /* end of the constant */
2700     SV *sv = newSV(send - start);               /* sv for the constant.  See
2701                                                    note below on sizing. */
2702     char *s = start;                    /* start of the constant */
2703     char *d = SvPVX(sv);                /* destination for copies */
2704     bool dorange = FALSE;                       /* are we in a translit range? */
2705     bool didrange = FALSE;                      /* did we just finish a range? */
2706     bool in_charclass = FALSE;                  /* within /[...]/ */
2707     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2708     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2709                                                    to be UTF8?  But, this can
2710                                                    show as true when the source
2711                                                    isn't utf8, as for example
2712                                                    when it is entirely composed
2713                                                    of hex constants */
2714
2715     /* Note on sizing:  The scanned constant is placed into sv, which is
2716      * initialized by newSV() assuming one byte of output for every byte of
2717      * input.  This routine expects newSV() to allocate an extra byte for a
2718      * trailing NUL, which this routine will append if it gets to the end of
2719      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2720      * CAPITAL LETTER A}), or more output than input if the constant ends up
2721      * recoded to utf8, but each time a construct is found that might increase
2722      * the needed size, SvGROW() is called.  Its size parameter each time is
2723      * based on the best guess estimate at the time, namely the length used so
2724      * far, plus the length the current construct will occupy, plus room for
2725      * the trailing NUL, plus one byte for every input byte still unscanned */
2726
2727     UV uv;
2728 #ifdef EBCDIC
2729     UV literal_endpoint = 0;
2730     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2731 #endif
2732
2733     PERL_ARGS_ASSERT_SCAN_CONST;
2734
2735     assert(PL_lex_inwhat != OP_TRANSR);
2736     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2737         /* If we are doing a trans and we know we want UTF8 set expectation */
2738         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2739         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2740     }
2741
2742
2743     while (s < send || dorange) {
2744
2745         /* get transliterations out of the way (they're most literal) */
2746         if (PL_lex_inwhat == OP_TRANS) {
2747             /* expand a range A-Z to the full set of characters.  AIE! */
2748             if (dorange) {
2749                 I32 i;                          /* current expanded character */
2750                 I32 min;                        /* first character in range */
2751                 I32 max;                        /* last character in range */
2752
2753 #ifdef EBCDIC
2754                 UV uvmax = 0;
2755 #endif
2756
2757                 if (has_utf8
2758 #ifdef EBCDIC
2759                     && !native_range
2760 #endif
2761                     ) {
2762                     char * const c = (char*)utf8_hop((U8*)d, -1);
2763                     char *e = d++;
2764                     while (e-- > c)
2765                         *(e + 1) = *e;
2766                     *c = (char)UTF_TO_NATIVE(0xff);
2767                     /* mark the range as done, and continue */
2768                     dorange = FALSE;
2769                     didrange = TRUE;
2770                     continue;
2771                 }
2772
2773                 i = d - SvPVX_const(sv);                /* remember current offset */
2774 #ifdef EBCDIC
2775                 SvGROW(sv,
2776                        SvLEN(sv) + (has_utf8 ?
2777                                     (512 - UTF_CONTINUATION_MARK +
2778                                      UNISKIP(0x100))
2779                                     : 256));
2780                 /* How many two-byte within 0..255: 128 in UTF-8,
2781                  * 96 in UTF-8-mod. */
2782 #else
2783                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2784 #endif
2785                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2786 #ifdef EBCDIC
2787                 if (has_utf8) {
2788                     int j;
2789                     for (j = 0; j <= 1; j++) {
2790                         char * const c = (char*)utf8_hop((U8*)d, -1);
2791                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2792                         if (j)
2793                             min = (U8)uv;
2794                         else if (uv < 256)
2795                             max = (U8)uv;
2796                         else {
2797                             max = (U8)0xff; /* only to \xff */
2798                             uvmax = uv; /* \x{100} to uvmax */
2799                         }
2800                         d = c; /* eat endpoint chars */
2801                      }
2802                 }
2803                else {
2804 #endif
2805                    d -= 2;              /* eat the first char and the - */
2806                    min = (U8)*d;        /* first char in range */
2807                    max = (U8)d[1];      /* last char in range  */
2808 #ifdef EBCDIC
2809                }
2810 #endif
2811
2812                 if (min > max) {
2813                     SvREFCNT_dec(sv);
2814                     Perl_croak(aTHX_
2815                                "Invalid range \"%c-%c\" in transliteration operator",
2816                                (char)min, (char)max);
2817                 }
2818
2819 #ifdef EBCDIC
2820                 if (literal_endpoint == 2 &&
2821                     ((isLOWER(min) && isLOWER(max)) ||
2822                      (isUPPER(min) && isUPPER(max)))) {
2823                     if (isLOWER(min)) {
2824                         for (i = min; i <= max; i++)
2825                             if (isLOWER(i))
2826                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2827                     } else {
2828                         for (i = min; i <= max; i++)
2829                             if (isUPPER(i))
2830                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2831                     }
2832                 }
2833                 else
2834 #endif
2835                     for (i = min; i <= max; i++)
2836 #ifdef EBCDIC
2837                         if (has_utf8) {
2838                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2839                             if (UNI_IS_INVARIANT(ch))
2840                                 *d++ = (U8)i;
2841                             else {
2842                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2843                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2844                             }
2845                         }
2846                         else
2847 #endif
2848                             *d++ = (char)i;
2849
2850 #ifdef EBCDIC
2851                 if (uvmax) {
2852                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2853                     if (uvmax > 0x101)
2854                         *d++ = (char)UTF_TO_NATIVE(0xff);
2855                     if (uvmax > 0x100)
2856                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2857                 }
2858 #endif
2859
2860                 /* mark the range as done, and continue */
2861                 dorange = FALSE;
2862                 didrange = TRUE;
2863 #ifdef EBCDIC
2864                 literal_endpoint = 0;
2865 #endif
2866                 continue;
2867             }
2868
2869             /* range begins (ignore - as first or last char) */
2870             else if (*s == '-' && s+1 < send  && s != start) {
2871                 if (didrange) {
2872                     SvREFCNT_dec(sv);
2873                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2874                 }
2875                 if (has_utf8
2876 #ifdef EBCDIC
2877                     && !native_range
2878 #endif
2879                     ) {
2880                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2881                     s++;
2882                     continue;
2883                 }
2884                 dorange = TRUE;
2885                 s++;
2886             }
2887             else {
2888                 didrange = FALSE;
2889 #ifdef EBCDIC
2890                 literal_endpoint = 0;
2891                 native_range = TRUE;
2892 #endif
2893             }
2894         }
2895
2896         /* if we get here, we're not doing a transliteration */
2897
2898         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2899             char *s1 = s-1;
2900             int esc = 0;
2901             while (s1 >= start && *s1-- == '\\')
2902                 esc = !esc;
2903             if (!esc)
2904                 in_charclass = TRUE;
2905         }
2906
2907         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2908             char *s1 = s-1;
2909             int esc = 0;
2910             while (s1 >= start && *s1-- == '\\')
2911                 esc = !esc;
2912             if (!esc)
2913                 in_charclass = FALSE;
2914         }
2915
2916         /* skip for regexp comments /(?#comment)/, except for the last
2917          * char, which will be done separately.
2918          * Stop on (?{..}) and friends */
2919
2920         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2921             if (s[2] == '#') {
2922                 while (s+1 < send && *s != ')')
2923                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2924             }
2925             else if (!PL_lex_casemods && !in_charclass &&
2926                      (    s[2] == '{' /* This should match regcomp.c */
2927                       || (s[2] == '?' && s[3] == '{')))
2928             {
2929                 break;
2930             }
2931         }
2932
2933         /* likewise skip #-initiated comments in //x patterns */
2934         else if (*s == '#' && PL_lex_inpat &&
2935           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2936             while (s+1 < send && *s != '\n')
2937                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2938         }
2939
2940         /* no further processing of single-quoted regex */
2941         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2942             goto default_action;
2943
2944         /* check for embedded arrays
2945            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2946            */
2947         else if (*s == '@' && s[1]) {
2948             if (isALNUM_lazy_if(s+1,UTF))
2949                 break;
2950             if (strchr(":'{$", s[1]))
2951                 break;
2952             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2953                 break; /* in regexp, neither @+ nor @- are interpolated */
2954         }
2955
2956         /* check for embedded scalars.  only stop if we're sure it's a
2957            variable.
2958         */
2959         else if (*s == '$') {
2960             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2961                 break;
2962             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2963                 if (s[1] == '\\') {
2964                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2965                                    "Possible unintended interpolation of $\\ in regex");
2966                 }
2967                 break;          /* in regexp, $ might be tail anchor */
2968             }
2969         }
2970
2971         /* End of else if chain - OP_TRANS rejoin rest */
2972
2973         /* backslashes */
2974         if (*s == '\\' && s+1 < send) {
2975             char* e;    /* Can be used for ending '}', etc. */
2976
2977             s++;
2978
2979             /* warn on \1 - \9 in substitution replacements, but note that \11
2980              * is an octal; and \19 is \1 followed by '9' */
2981             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2982                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2983             {
2984                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2985                 *--s = '$';
2986                 break;
2987             }
2988
2989             /* string-change backslash escapes */
2990             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
2991                 --s;
2992                 break;
2993             }
2994             /* In a pattern, process \N, but skip any other backslash escapes.
2995              * This is because we don't want to translate an escape sequence
2996              * into a meta symbol and have the regex compiler use the meta
2997              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2998              * in spite of this, we do have to process \N here while the proper
2999              * charnames handler is in scope.  See bugs #56444 and #62056.
3000              * There is a complication because \N in a pattern may also stand
3001              * for 'match a non-nl', and not mean a charname, in which case its
3002              * processing should be deferred to the regex compiler.  To be a
3003              * charname it must be followed immediately by a '{', and not look
3004              * like \N followed by a curly quantifier, i.e., not something like
3005              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3006              * quantifier */
3007             else if (PL_lex_inpat
3008                     && (*s != 'N'
3009                         || s[1] != '{'
3010                         || regcurly(s + 1)))
3011             {
3012                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3013                 goto default_action;
3014             }
3015
3016             switch (*s) {
3017
3018             /* quoted - in transliterations */
3019             case '-':
3020                 if (PL_lex_inwhat == OP_TRANS) {
3021                     *d++ = *s++;
3022                     continue;
3023                 }
3024                 /* FALL THROUGH */
3025             default:
3026                 {
3027                     if ((isALNUMC(*s)))
3028                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3029                                        "Unrecognized escape \\%c passed through",
3030                                        *s);
3031                     /* default action is to copy the quoted character */
3032                     goto default_action;
3033                 }
3034
3035             /* eg. \132 indicates the octal constant 0132 */
3036             case '0': case '1': case '2': case '3':
3037             case '4': case '5': case '6': case '7':
3038                 {
3039                     I32 flags = 0;
3040                     STRLEN len = 3;
3041                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3042                     s += len;
3043                 }
3044                 goto NUM_ESCAPE_INSERT;
3045
3046             /* eg. \o{24} indicates the octal constant \024 */
3047             case 'o':
3048                 {
3049                     STRLEN len;
3050                     const char* error;
3051
3052                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3053                     s += len;
3054                     if (! valid) {
3055                         yyerror(error);
3056                         continue;
3057                     }
3058                     goto NUM_ESCAPE_INSERT;
3059                 }
3060
3061             /* eg. \x24 indicates the hex constant 0x24 */
3062             case 'x':
3063                 {
3064                     STRLEN len;
3065                     const char* error;
3066
3067                     bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3068                     s += len;
3069                     if (! valid) {
3070                         yyerror(error);
3071                         continue;
3072                     }
3073                 }
3074
3075               NUM_ESCAPE_INSERT:
3076                 /* Insert oct or hex escaped character.  There will always be
3077                  * enough room in sv since such escapes will be longer than any
3078                  * UTF-8 sequence they can end up as, except if they force us
3079                  * to recode the rest of the string into utf8 */
3080
3081                 /* Here uv is the ordinal of the next character being added in
3082                  * unicode (converted from native). */
3083                 if (!UNI_IS_INVARIANT(uv)) {
3084                     if (!has_utf8 && uv > 255) {
3085                         /* Might need to recode whatever we have accumulated so
3086                          * far if it contains any chars variant in utf8 or
3087                          * utf-ebcdic. */
3088
3089                         SvCUR_set(sv, d - SvPVX_const(sv));
3090                         SvPOK_on(sv);
3091                         *d = '\0';
3092                         /* See Note on sizing above.  */
3093                         sv_utf8_upgrade_flags_grow(sv,
3094                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3095                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3096                         d = SvPVX(sv) + SvCUR(sv);
3097                         has_utf8 = TRUE;
3098                     }
3099
3100                     if (has_utf8) {
3101                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3102                         if (PL_lex_inwhat == OP_TRANS &&
3103                             PL_sublex_info.sub_op) {
3104                             PL_sublex_info.sub_op->op_private |=
3105                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3106                                              : OPpTRANS_TO_UTF);
3107                         }
3108 #ifdef EBCDIC
3109                         if (uv > 255 && !dorange)
3110                             native_range = FALSE;
3111 #endif
3112                     }
3113                     else {
3114                         *d++ = (char)uv;
3115                     }
3116                 }
3117                 else {
3118                     *d++ = (char) uv;
3119                 }
3120                 continue;
3121
3122             case 'N':
3123                 /* In a non-pattern \N must be a named character, like \N{LATIN
3124                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3125                  * mean to match a non-newline.  For non-patterns, named
3126                  * characters are converted to their string equivalents. In
3127                  * patterns, named characters are not converted to their
3128                  * ultimate forms for the same reasons that other escapes
3129                  * aren't.  Instead, they are converted to the \N{U+...} form
3130                  * to get the value from the charnames that is in effect right
3131                  * now, while preserving the fact that it was a named character
3132                  * so that the regex compiler knows this */
3133
3134                 /* This section of code doesn't generally use the
3135                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3136                  * a close examination of this macro and determined it is a
3137                  * no-op except on utfebcdic variant characters.  Every
3138                  * character generated by this that would normally need to be
3139                  * enclosed by this macro is invariant, so the macro is not
3140                  * needed, and would complicate use of copy().  XXX There are
3141                  * other parts of this file where the macro is used
3142                  * inconsistently, but are saved by it being a no-op */
3143
3144                 /* The structure of this section of code (besides checking for
3145                  * errors and upgrading to utf8) is:
3146                  *  Further disambiguate between the two meanings of \N, and if
3147                  *      not a charname, go process it elsewhere
3148                  *  If of form \N{U+...}, pass it through if a pattern;
3149                  *      otherwise convert to utf8
3150                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3151                  *  pattern; otherwise convert to utf8 */
3152
3153                 /* Here, s points to the 'N'; the test below is guaranteed to
3154                  * succeed if we are being called on a pattern as we already
3155                  * know from a test above that the next character is a '{'.
3156                  * On a non-pattern \N must mean 'named sequence, which
3157                  * requires braces */
3158                 s++;
3159                 if (*s != '{') {
3160                     yyerror("Missing braces on \\N{}");
3161                     continue;
3162                 }
3163                 s++;
3164
3165                 /* If there is no matching '}', it is an error. */
3166                 if (! (e = strchr(s, '}'))) {
3167                     if (! PL_lex_inpat) {
3168                         yyerror("Missing right brace on \\N{}");
3169                     } else {
3170                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3171                     }
3172                     continue;
3173                 }
3174
3175                 /* Here it looks like a named character */
3176
3177                 if (PL_lex_inpat) {
3178
3179                     /* XXX This block is temporary code.  \N{} implies that the
3180                      * pattern is to have Unicode semantics, and therefore
3181                      * currently has to be encoded in utf8.  By putting it in
3182                      * utf8 now, we save a whole pass in the regular expression
3183                      * compiler.  Once that code is changed so Unicode
3184                      * semantics doesn't necessarily have to be in utf8, this
3185                      * block should be removed.  However, the code that parses
3186                      * the output of this would have to be changed to not
3187                      * necessarily expect utf8 */
3188                     if (!has_utf8) {
3189                         SvCUR_set(sv, d - SvPVX_const(sv));
3190                         SvPOK_on(sv);
3191                         *d = '\0';
3192                         /* See Note on sizing above.  */
3193                         sv_utf8_upgrade_flags_grow(sv,
3194                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3195                                         /* 5 = '\N{' + cur char + NUL */
3196                                         (STRLEN)(send - s) + 5);
3197                         d = SvPVX(sv) + SvCUR(sv);
3198                         has_utf8 = TRUE;
3199                     }
3200                 }
3201
3202                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3203                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3204                                 | PERL_SCAN_DISALLOW_PREFIX;
3205                     STRLEN len;
3206
3207                     /* For \N{U+...}, the '...' is a unicode value even on
3208                      * EBCDIC machines */
3209                     s += 2;         /* Skip to next char after the 'U+' */
3210                     len = e - s;
3211                     uv = grok_hex(s, &len, &flags, NULL);
3212                     if (len == 0 || len != (STRLEN)(e - s)) {
3213                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3214                         s = e + 1;
3215                         continue;
3216                     }
3217
3218                     if (PL_lex_inpat) {
3219
3220                         /* On non-EBCDIC platforms, pass through to the regex
3221                          * compiler unchanged.  The reason we evaluated the
3222                          * number above is to make sure there wasn't a syntax
3223                          * error.  But on EBCDIC we convert to native so
3224                          * downstream code can continue to assume it's native
3225                          */
3226                         s -= 5;     /* Include the '\N{U+' */
3227 #ifdef EBCDIC
3228                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3229                                                                and the \0 */
3230                                     "\\N{U+%X}",
3231                                     (unsigned int) UNI_TO_NATIVE(uv));
3232 #else
3233                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3234                         d += e - s + 1;
3235 #endif
3236                     }
3237                     else {  /* Not a pattern: convert the hex to string */
3238
3239                          /* If destination is not in utf8, unconditionally
3240                           * recode it to be so.  This is because \N{} implies
3241                           * Unicode semantics, and scalars have to be in utf8
3242                           * to guarantee those semantics */
3243                         if (! has_utf8) {
3244                             SvCUR_set(sv, d - SvPVX_const(sv));
3245                             SvPOK_on(sv);
3246                             *d = '\0';
3247                             /* See Note on sizing above.  */
3248                             sv_utf8_upgrade_flags_grow(
3249                                         sv,
3250                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3251                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3252                             d = SvPVX(sv) + SvCUR(sv);
3253                             has_utf8 = TRUE;
3254                         }
3255
3256                         /* Add the string to the output */
3257                         if (UNI_IS_INVARIANT(uv)) {
3258                             *d++ = (char) uv;
3259                         }
3260                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3261                     }
3262                 }
3263                 else { /* Here is \N{NAME} but not \N{U+...}. */
3264
3265                     SV *res;            /* result from charnames */
3266                     const char *str;    /* the string in 'res' */
3267                     STRLEN len;         /* its length */
3268
3269                     /* Get the value for NAME */
3270                     res = newSVpvn(s, e - s);
3271                     res = new_constant( NULL, 0, "charnames",
3272                                         /* includes all of: \N{...} */
3273                                         res, NULL, s - 3, e - s + 4 );
3274
3275                     /* Most likely res will be in utf8 already since the
3276                      * standard charnames uses pack U, but a custom translator
3277                      * can leave it otherwise, so make sure.  XXX This can be
3278                      * revisited to not have charnames use utf8 for characters
3279                      * that don't need it when regexes don't have to be in utf8
3280                      * for Unicode semantics.  If doing so, remember EBCDIC */
3281                     sv_utf8_upgrade(res);
3282                     str = SvPV_const(res, len);
3283
3284                     /* Don't accept malformed input */
3285                     if (! is_utf8_string((U8 *) str, len)) {
3286                         yyerror("Malformed UTF-8 returned by \\N");
3287                     }
3288                     else if (PL_lex_inpat) {
3289
3290                         if (! len) { /* The name resolved to an empty string */
3291                             Copy("\\N{}", d, 4, char);
3292                             d += 4;
3293                         }
3294                         else {
3295                             /* In order to not lose information for the regex
3296                             * compiler, pass the result in the specially made
3297                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3298                             * the code points in hex of each character
3299                             * returned by charnames */
3300
3301                             const char *str_end = str + len;
3302                             STRLEN char_length;     /* cur char's byte length */
3303                             STRLEN output_length;   /* and the number of bytes
3304                                                        after this is translated
3305                                                        into hex digits */
3306                             const STRLEN off = d - SvPVX_const(sv);
3307
3308                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3309                              * max('U+', '.'); and 1 for NUL */
3310                             char hex_string[2 * UTF8_MAXBYTES + 5];
3311
3312                             /* Get the first character of the result. */
3313                             U32 uv = utf8n_to_uvuni((U8 *) str,
3314                                                     len,
3315                                                     &char_length,
3316                                                     UTF8_ALLOW_ANYUV);
3317
3318                             /* The call to is_utf8_string() above hopefully
3319                              * guarantees that there won't be an error.  But
3320                              * it's easy here to make sure.  The function just
3321                              * above warns and returns 0 if invalid utf8, but
3322                              * it can also return 0 if the input is validly a
3323                              * NUL. Disambiguate */
3324                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3325                                 uv = UNICODE_REPLACEMENT;
3326                             }
3327
3328                             /* Convert first code point to hex, including the
3329                              * boiler plate before it.  For all these, we
3330                              * convert to native format so that downstream code
3331                              * can continue to assume the input is native */
3332                             output_length =
3333                                 my_snprintf(hex_string, sizeof(hex_string),
3334                                             "\\N{U+%X",
3335                                             (unsigned int) UNI_TO_NATIVE(uv));
3336
3337                             /* Make sure there is enough space to hold it */
3338                             d = off + SvGROW(sv, off
3339                                                  + output_length
3340                                                  + (STRLEN)(send - e)
3341                                                  + 2);  /* '}' + NUL */
3342                             /* And output it */
3343                             Copy(hex_string, d, output_length, char);
3344                             d += output_length;
3345
3346                             /* For each subsequent character, append dot and
3347                              * its ordinal in hex */
3348                             while ((str += char_length) < str_end) {
3349                                 const STRLEN off = d - SvPVX_const(sv);
3350                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3351                                                         str_end - str,
3352                                                         &char_length,
3353                                                         UTF8_ALLOW_ANYUV);
3354                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3355                                     uv = UNICODE_REPLACEMENT;
3356                                 }
3357
3358                                 output_length =
3359                                     my_snprintf(hex_string, sizeof(hex_string),
3360                                             ".%X",
3361                                             (unsigned int) UNI_TO_NATIVE(uv));
3362
3363                                 d = off + SvGROW(sv, off
3364                                                      + output_length
3365                                                      + (STRLEN)(send - e)
3366                                                      + 2);      /* '}' +  NUL */
3367                                 Copy(hex_string, d, output_length, char);
3368                                 d += output_length;
3369                             }
3370
3371                             *d++ = '}'; /* Done.  Add the trailing brace */
3372                         }
3373                     }
3374                     else { /* Here, not in a pattern.  Convert the name to a
3375                             * string. */
3376
3377                          /* If destination is not in utf8, unconditionally
3378                           * recode it to be so.  This is because \N{} implies
3379                           * Unicode semantics, and scalars have to be in utf8
3380                           * to guarantee those semantics */
3381                         if (! has_utf8) {
3382                             SvCUR_set(sv, d - SvPVX_const(sv));
3383                             SvPOK_on(sv);
3384                             *d = '\0';
3385                             /* See Note on sizing above.  */
3386                             sv_utf8_upgrade_flags_grow(sv,
3387                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3388                                                 len + (STRLEN)(send - s) + 1);
3389                             d = SvPVX(sv) + SvCUR(sv);
3390                             has_utf8 = TRUE;
3391                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3392
3393                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3394                              * set correctly here). */
3395                             const STRLEN off = d - SvPVX_const(sv);
3396                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3397                         }
3398                         Copy(str, d, len, char);
3399                         d += len;
3400                     }
3401                     SvREFCNT_dec(res);
3402
3403                     /* Deprecate non-approved name syntax */
3404                     if (ckWARN_d(WARN_DEPRECATED)) {
3405                         bool problematic = FALSE;
3406                         char* i = s;
3407
3408                         /* For non-ut8 input, look to see that the first
3409                          * character is an alpha, then loop through the rest
3410                          * checking that each is a continuation */
3411                         if (! this_utf8) {
3412                             if (! isALPHAU(*i)) problematic = TRUE;
3413                             else for (i = s + 1; i < e; i++) {
3414                                 if (isCHARNAME_CONT(*i)) continue;
3415                                 problematic = TRUE;
3416                                 break;
3417                             }
3418                         }
3419                         else {
3420                             /* Similarly for utf8.  For invariants can check
3421                              * directly.  We accept anything above the latin1
3422                              * range because it is immaterial to Perl if it is
3423                              * correct or not, and is expensive to check.  But
3424                              * it is fairly easy in the latin1 range to convert
3425                              * the variants into a single character and check
3426                              * those */
3427                             if (UTF8_IS_INVARIANT(*i)) {
3428                                 if (! isALPHAU(*i)) problematic = TRUE;
3429                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3430                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3431                                                                             *(i+1)))))
3432                                 {
3433                                     problematic = TRUE;
3434                                 }
3435                             }
3436                             if (! problematic) for (i = s + UTF8SKIP(s);
3437                                                     i < e;
3438                                                     i+= UTF8SKIP(i))
3439                             {
3440                                 if (UTF8_IS_INVARIANT(*i)) {
3441                                     if (isCHARNAME_CONT(*i)) continue;
3442                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3443                                     continue;
3444                                 } else if (isCHARNAME_CONT(
3445                                             UNI_TO_NATIVE(
3446                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3447                                 {
3448                                     continue;
3449                                 }
3450                                 problematic = TRUE;
3451                                 break;
3452                             }
3453                         }
3454                         if (problematic) {
3455                             /* The e-i passed to the final %.*s makes sure that
3456                              * should the trailing NUL be missing that this
3457                              * print won't run off the end of the string */
3458                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3459                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3460                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3461                         }
3462                     }
3463                 } /* End \N{NAME} */
3464 #ifdef EBCDIC
3465                 if (!dorange)
3466                     native_range = FALSE; /* \N{} is defined to be Unicode */
3467 #endif
3468                 s = e + 1;  /* Point to just after the '}' */
3469                 continue;
3470
3471             /* \c is a control character */
3472             case 'c':
3473                 s++;
3474                 if (s < send) {
3475                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3476                 }
3477                 else {
3478                     yyerror("Missing control char name in \\c");
3479                 }
3480                 continue;
3481
3482             /* printf-style backslashes, formfeeds, newlines, etc */
3483             case 'b':
3484                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3485                 break;
3486             case 'n':
3487                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3488                 break;
3489             case 'r':
3490                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3491                 break;
3492             case 'f':
3493                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3494                 break;
3495             case 't':
3496                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3497                 break;
3498             case 'e':
3499                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3500                 break;
3501             case 'a':
3502                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3503                 break;
3504             } /* end switch */
3505
3506             s++;
3507             continue;
3508         } /* end if (backslash) */
3509 #ifdef EBCDIC
3510         else
3511             literal_endpoint++;
3512 #endif
3513
3514     default_action:
3515         /* If we started with encoded form, or already know we want it,
3516            then encode the next character */
3517         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3518             STRLEN len  = 1;
3519
3520
3521             /* One might think that it is wasted effort in the case of the
3522              * source being utf8 (this_utf8 == TRUE) to take the next character
3523              * in the source, convert it to an unsigned value, and then convert
3524              * it back again.  But the source has not been validated here.  The
3525              * routine that does the conversion checks for errors like
3526              * malformed utf8 */
3527
3528             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3529             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3530             if (!has_utf8) {
3531                 SvCUR_set(sv, d - SvPVX_const(sv));
3532                 SvPOK_on(sv);
3533                 *d = '\0';
3534                 /* See Note on sizing above.  */
3535                 sv_utf8_upgrade_flags_grow(sv,
3536                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3537                                         need + (STRLEN)(send - s) + 1);
3538                 d = SvPVX(sv) + SvCUR(sv);
3539                 has_utf8 = TRUE;
3540             } else if (need > len) {
3541                 /* encoded value larger than old, may need extra space (NOTE:
3542                  * SvCUR() is not set correctly here).   See Note on sizing
3543                  * above.  */
3544                 const STRLEN off = d - SvPVX_const(sv);
3545                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3546             }
3547             s += len;
3548
3549             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3550 #ifdef EBCDIC
3551             if (uv > 255 && !dorange)
3552                 native_range = FALSE;
3553 #endif
3554         }
3555         else {
3556             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3557         }
3558     } /* while loop to process each character */
3559
3560     /* terminate the string and set up the sv */
3561     *d = '\0';
3562     SvCUR_set(sv, d - SvPVX_const(sv));
3563     if (SvCUR(sv) >= SvLEN(sv))
3564         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3565                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3566
3567     SvPOK_on(sv);
3568     if (PL_encoding && !has_utf8) {
3569         sv_recode_to_utf8(sv, PL_encoding);
3570         if (SvUTF8(sv))
3571             has_utf8 = TRUE;
3572     }
3573     if (has_utf8) {
3574         SvUTF8_on(sv);
3575         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3576             PL_sublex_info.sub_op->op_private |=
3577                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3578         }
3579     }
3580
3581     /* shrink the sv if we allocated more than we used */
3582     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3583         SvPV_shrink_to_cur(sv);
3584     }
3585
3586     /* return the substring (via pl_yylval) only if we parsed anything */
3587     if (s > PL_bufptr) {
3588         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3589             const char *const key = PL_lex_inpat ? "qr" : "q";
3590             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3591             const char *type;
3592             STRLEN typelen;
3593
3594             if (PL_lex_inwhat == OP_TRANS) {
3595                 type = "tr";
3596                 typelen = 2;
3597             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3598                 type = "s";
3599                 typelen = 1;
3600             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3601                 type = "q";
3602                 typelen = 1;
3603             } else  {
3604                 type = "qq";
3605                 typelen = 2;
3606             }
3607
3608             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3609                                 type, typelen);
3610         }
3611         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3612     } else
3613         SvREFCNT_dec(sv);
3614     return s;
3615 }
3616
3617 /* S_intuit_more
3618  * Returns TRUE if there's more to the expression (e.g., a subscript),
3619  * FALSE otherwise.
3620  *
3621  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3622  *
3623  * ->[ and ->{ return TRUE
3624  * { and [ outside a pattern are always subscripts, so return TRUE
3625  * if we're outside a pattern and it's not { or [, then return FALSE
3626  * if we're in a pattern and the first char is a {
3627  *   {4,5} (any digits around the comma) returns FALSE
3628  * if we're in a pattern and the first char is a [
3629  *   [] returns FALSE
3630  *   [SOMETHING] has a funky algorithm to decide whether it's a
3631  *      character class or not.  It has to deal with things like
3632  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3633  * anything else returns TRUE
3634  */
3635
3636 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3637
3638 STATIC int
3639 S_intuit_more(pTHX_ register char *s)
3640 {
3641     dVAR;
3642
3643     PERL_ARGS_ASSERT_INTUIT_MORE;
3644
3645     if (PL_lex_brackets)
3646         return TRUE;
3647     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3648         return TRUE;
3649     if (*s != '{' && *s != '[')
3650         return FALSE;
3651     if (!PL_lex_inpat)
3652         return TRUE;
3653
3654     /* In a pattern, so maybe we have {n,m}. */
3655     if (*s == '{') {
3656         if (regcurly(s)) {
3657             return FALSE;
3658         }
3659         return TRUE;
3660     }
3661
3662     /* On the other hand, maybe we have a character class */
3663
3664     s++;
3665     if (*s == ']' || *s == '^')
3666         return FALSE;
3667     else {
3668         /* this is terrifying, and it works */
3669         int weight = 2;         /* let's weigh the evidence */
3670         char seen[256];
3671         unsigned char un_char = 255, last_un_char;
3672         const char * const send = strchr(s,']');
3673         char tmpbuf[sizeof PL_tokenbuf * 4];
3674
3675         if (!send)              /* has to be an expression */
3676             return TRUE;
3677
3678         Zero(seen,256,char);
3679         if (*s == '$')
3680             weight -= 3;
3681         else if (isDIGIT(*s)) {
3682             if (s[1] != ']') {
3683                 if (isDIGIT(s[1]) && s[2] == ']')
3684                     weight -= 10;
3685             }
3686             else
3687                 weight -= 100;
3688         }
3689         for (; s < send; s++) {
3690             last_un_char = un_char;
3691             un_char = (unsigned char)*s;
3692             switch (*s) {
3693             case '@':
3694             case '&':
3695             case '$':
3696                 weight -= seen[un_char] * 10;
3697                 if (isALNUM_lazy_if(s+1,UTF)) {
3698                     int len;
3699                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3700                     len = (int)strlen(tmpbuf);
3701                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3702                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3703                         weight -= 100;
3704                     else
3705                         weight -= 10;
3706                 }
3707                 else if (*s == '$' && s[1] &&
3708                   strchr("[#!%*<>()-=",s[1])) {
3709                     if (/*{*/ strchr("])} =",s[2]))
3710                         weight -= 10;
3711                     else
3712                         weight -= 1;
3713                 }
3714                 break;
3715             case '\\':
3716                 un_char = 254;
3717                 if (s[1]) {
3718                     if (strchr("wds]",s[1]))
3719                         weight += 100;
3720                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3721                         weight += 1;
3722                     else if (strchr("rnftbxcav",s[1]))
3723                         weight += 40;
3724                     else if (isDIGIT(s[1])) {
3725                         weight += 40;
3726                         while (s[1] && isDIGIT(s[1]))
3727                             s++;
3728                     }
3729                 }
3730                 else
3731                     weight += 100;
3732                 break;
3733             case '-':
3734                 if (s[1] == '\\')
3735                     weight += 50;
3736                 if (strchr("aA01! ",last_un_char))
3737                     weight += 30;
3738                 if (strchr("zZ79~",s[1]))
3739                     weight += 30;
3740                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3741                     weight -= 5;        /* cope with negative subscript */
3742                 break;
3743             default:
3744                 if (!isALNUM(last_un_char)
3745                     && !(last_un_char == '$' || last_un_char == '@'
3746                          || last_un_char == '&')
3747                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3748                     char *d = tmpbuf;
3749                     while (isALPHA(*s))
3750                         *d++ = *s++;
3751                     *d = '\0';
3752                     if (keyword(tmpbuf, d - tmpbuf, 0))
3753                         weight -= 150;
3754                 }
3755                 if (un_char == last_un_char + 1)
3756                     weight += 5;
3757                 weight -= seen[un_char];
3758                 break;
3759             }
3760             seen[un_char]++;
3761         }
3762         if (weight >= 0)        /* probably a character class */
3763             return FALSE;
3764     }
3765
3766     return TRUE;
3767 }
3768
3769 /*
3770  * S_intuit_method
3771  *
3772  * Does all the checking to disambiguate
3773  *   foo bar
3774  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3775  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3776  *
3777  * First argument is the stuff after the first token, e.g. "bar".
3778  *
3779  * Not a method if foo is a filehandle.
3780  * Not a method if foo is a subroutine prototyped to take a filehandle.
3781  * Not a method if it's really "Foo $bar"
3782  * Method if it's "foo $bar"
3783  * Not a method if it's really "print foo $bar"
3784  * Method if it's really "foo package::" (interpreted as package->foo)
3785  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3786  * Not a method if bar is a filehandle or package, but is quoted with
3787  *   =>
3788  */
3789
3790 STATIC int
3791 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3792 {
3793     dVAR;
3794     char *s = start + (*start == '$');
3795     char tmpbuf[sizeof PL_tokenbuf];
3796     STRLEN len;
3797     GV* indirgv;
3798 #ifdef PERL_MAD
3799     int soff;
3800 #endif
3801
3802     PERL_ARGS_ASSERT_INTUIT_METHOD;
3803
3804     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3805             return 0;
3806     if (cv && SvPOK(cv)) {
3807                 const char *proto = CvPROTO(cv);
3808                 if (proto) {
3809                     if (*proto == ';')
3810                         proto++;
3811                     if (*proto == '*')
3812                         return 0;
3813                 }
3814     }
3815     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3816     /* start is the beginning of the possible filehandle/object,
3817      * and s is the end of it
3818      * tmpbuf is a copy of it
3819      */
3820
3821     if (*start == '$') {
3822         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3823                 isUPPER(*PL_tokenbuf))
3824             return 0;
3825 #ifdef PERL_MAD
3826         len = start - SvPVX(PL_linestr);
3827 #endif
3828         s = PEEKSPACE(s);
3829 #ifdef PERL_MAD
3830         start = SvPVX(PL_linestr) + len;
3831 #endif
3832         PL_bufptr = start;
3833         PL_expect = XREF;
3834         return *s == '(' ? FUNCMETH : METHOD;
3835     }
3836     if (!keyword(tmpbuf, len, 0)) {
3837         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3838             len -= 2;
3839             tmpbuf[len] = '\0';
3840 #ifdef PERL_MAD
3841             soff = s - SvPVX(PL_linestr);
3842 #endif
3843             goto bare_package;
3844         }
3845         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3846         if (indirgv && GvCVu(indirgv))
3847             return 0;
3848         /* filehandle or package name makes it a method */
3849         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3850 #ifdef PERL_MAD
3851             soff = s - SvPVX(PL_linestr);
3852 #endif
3853             s = PEEKSPACE(s);
3854             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3855                 return 0;       /* no assumptions -- "=>" quotes bareword */
3856       bare_package:
3857             start_force(PL_curforce);
3858             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3859                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3860             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3861             if (PL_madskills)
3862                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3863                                                             ( UTF ? SVf_UTF8 : 0 )));
3864             PL_expect = XTERM;
3865             force_next(WORD);
3866             PL_bufptr = s;
3867 #ifdef PERL_MAD
3868             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3869 #endif
3870             return *s == '(' ? FUNCMETH : METHOD;
3871         }
3872     }
3873     return 0;
3874 }
3875
3876 /* Encoded script support. filter_add() effectively inserts a
3877  * 'pre-processing' function into the current source input stream.
3878  * Note that the filter function only applies to the current source file
3879  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3880  *
3881  * The datasv parameter (which may be NULL) can be used to pass
3882  * private data to this instance of the filter. The filter function
3883  * can recover the SV using the FILTER_DATA macro and use it to
3884  * store private buffers and state information.
3885  *
3886  * The supplied datasv parameter is upgraded to a PVIO type
3887  * and the IoDIRP/IoANY field is used to store the function pointer,
3888  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3889  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3890  * private use must be set using malloc'd pointers.
3891  */
3892
3893 SV *
3894 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3895 {
3896     dVAR;
3897     if (!funcp)
3898         return NULL;
3899
3900     if (!PL_parser)
3901         return NULL;
3902
3903     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3904         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3905
3906     if (!PL_rsfp_filters)
3907         PL_rsfp_filters = newAV();
3908     if (!datasv)
3909         datasv = newSV(0);
3910     SvUPGRADE(datasv, SVt_PVIO);
3911     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3912     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3913     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3914                           FPTR2DPTR(void *, IoANY(datasv)),
3915                           SvPV_nolen(datasv)));
3916     av_unshift(PL_rsfp_filters, 1);
3917     av_store(PL_rsfp_filters, 0, datasv) ;
3918     if (
3919         !PL_parser->filtered
3920      && PL_parser->lex_flags & LEX_EVALBYTES
3921      && PL_bufptr < PL_bufend
3922     ) {
3923         const char *s = PL_bufptr;
3924         while (s < PL_bufend) {
3925             if (*s == '\n') {
3926                 SV *linestr = PL_parser->linestr;
3927                 char *buf = SvPVX(linestr);
3928                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3929                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3930                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3931                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3932                 STRLEN const last_uni_pos =
3933                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3934                 STRLEN const last_lop_pos =
3935                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3936                 av_push(PL_rsfp_filters, linestr);
3937                 PL_parser->linestr =
3938                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3939                 buf = SvPVX(PL_parser->linestr);
3940                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3941                 PL_parser->bufptr = buf + bufptr_pos;
3942                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3943                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3944                 PL_parser->linestart = buf + linestart_pos;
3945                 if (PL_parser->last_uni)
3946                     PL_parser->last_uni = buf + last_uni_pos;
3947                 if (PL_parser->last_lop)
3948                     PL_parser->last_lop = buf + last_lop_pos;
3949                 SvLEN(linestr) = SvCUR(linestr);
3950                 SvCUR(linestr) = s-SvPVX(linestr);
3951                 PL_parser->filtered = 1;
3952                 break;
3953             }
3954             s++;
3955         }
3956     }
3957     return(datasv);
3958 }
3959
3960
3961 /* Delete most recently added instance of this filter function. */
3962 void
3963 Perl_filter_del(pTHX_ filter_t funcp)
3964 {
3965     dVAR;
3966     SV *datasv;
3967
3968     PERL_ARGS_ASSERT_FILTER_DEL;
3969
3970 #ifdef DEBUGGING
3971     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3972                           FPTR2DPTR(void*, funcp)));
3973 #endif
3974     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3975         return;
3976     /* if filter is on top of stack (usual case) just pop it off */
3977     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3978     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3979         sv_free(av_pop(PL_rsfp_filters));
3980
3981         return;
3982     }
3983     /* we need to search for the correct entry and clear it     */
3984     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3985 }
3986
3987
3988 /* Invoke the idxth filter function for the current rsfp.        */
3989 /* maxlen 0 = read one text line */
3990 I32
3991 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3992 {
3993     dVAR;
3994     filter_t funcp;
3995     SV *datasv = NULL;
3996     /* This API is bad. It should have been using unsigned int for maxlen.
3997        Not sure if we want to change the API, but if not we should sanity
3998        check the value here.  */
3999     unsigned int correct_length
4000         = maxlen < 0 ?
4001 #ifdef PERL_MICRO
4002         0x7FFFFFFF
4003 #else
4004         INT_MAX
4005 #endif
4006         : maxlen;
4007
4008     PERL_ARGS_ASSERT_FILTER_READ;
4009
4010     if (!PL_parser || !PL_rsfp_filters)
4011         return -1;
4012     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4013         /* Provide a default input filter to make life easy.    */
4014         /* Note that we append to the line. This is handy.      */
4015         DEBUG_P(PerlIO_printf(Perl_debug_log,
4016                               "filter_read %d: from rsfp\n", idx));
4017         if (correct_length) {
4018             /* Want a block */
4019             int len ;
4020             const int old_len = SvCUR(buf_sv);
4021
4022             /* ensure buf_sv is large enough */
4023             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4024             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4025                                    correct_length)) <= 0) {
4026                 if (PerlIO_error(PL_rsfp))
4027                     return -1;          /* error */
4028                 else
4029                     return 0 ;          /* end of file */
4030             }
4031             SvCUR_set(buf_sv, old_len + len) ;
4032             SvPVX(buf_sv)[old_len + len] = '\0';
4033         } else {
4034             /* Want a line */
4035             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4036                 if (PerlIO_error(PL_rsfp))
4037                     return -1;          /* error */
4038                 else
4039                     return 0 ;          /* end of file */
4040             }
4041         }
4042         return SvCUR(buf_sv);
4043     }
4044     /* Skip this filter slot if filter has been deleted */
4045     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4046         DEBUG_P(PerlIO_printf(Perl_debug_log,
4047                               "filter_read %d: skipped (filter deleted)\n",
4048                               idx));
4049         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4050     }
4051     if (SvTYPE(datasv) != SVt_PVIO) {
4052         if (correct_length) {
4053             /* Want a block */
4054             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4055             if (!remainder) return 0; /* eof */
4056             if (correct_length > remainder) correct_length = remainder;
4057             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4058             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4059         } else {
4060             /* Want a line */
4061             const char *s = SvEND(datasv);
4062             const char *send = SvPVX(datasv) + SvLEN(datasv);
4063             while (s < send) {
4064                 if (*s == '\n') {
4065                     s++;
4066                     break;
4067                 }
4068                 s++;
4069             }
4070             if (s == send) return 0; /* eof */
4071             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4072             SvCUR_set(datasv, s-SvPVX(datasv));
4073         }
4074         return SvCUR(buf_sv);
4075     }
4076     /* Get function pointer hidden within datasv        */
4077     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4078     DEBUG_P(PerlIO_printf(Perl_debug_log,
4079                           "filter_read %d: via function %p (%s)\n",
4080                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4081     /* Call function. The function is expected to       */
4082     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4083     /* Return: <0:error, =0:eof, >0:not eof             */
4084     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4085 }
4086
4087 STATIC char *
4088 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4089 {
4090     dVAR;
4091
4092     PERL_ARGS_ASSERT_FILTER_GETS;
4093
4094 #ifdef PERL_CR_FILTER
4095     if (!PL_rsfp_filters) {
4096         filter_add(S_cr_textfilter,NULL);
4097     }
4098 #endif
4099     if (PL_rsfp_filters) {
4100         if (!append)
4101             SvCUR_set(sv, 0);   /* start with empty line        */
4102         if (FILTER_READ(0, sv, 0) > 0)
4103             return ( SvPVX(sv) ) ;
4104         else
4105             return NULL ;
4106     }
4107     else
4108         return (sv_gets(sv, PL_rsfp, append));
4109 }
4110
4111 STATIC HV *
4112 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4113 {
4114     dVAR;
4115     GV *gv;
4116
4117     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4118
4119     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4120         return PL_curstash;
4121
4122     if (len > 2 &&
4123         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4124         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4125     {
4126         return GvHV(gv);                        /* Foo:: */
4127     }
4128
4129     /* use constant CLASS => 'MyClass' */
4130     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4131     if (gv && GvCV(gv)) {
4132         SV * const sv = cv_const_sv(GvCV(gv));
4133         if (sv)
4134             pkgname = SvPV_const(sv, len);
4135     }
4136
4137     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4138 }
4139
4140 /*
4141  * S_readpipe_override
4142  * Check whether readpipe() is overridden, and generates the appropriate
4143  * optree, provided sublex_start() is called afterwards.
4144  */
4145 STATIC void
4146 S_readpipe_override(pTHX)
4147 {
4148     GV **gvp;
4149     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4150     pl_yylval.ival = OP_BACKTICK;
4151     if ((gv_readpipe
4152                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4153             ||
4154             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4155              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4156              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4157     {
4158         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4159             op_append_elem(OP_LIST,
4160                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4161                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4162     }
4163 }
4164
4165 #ifdef PERL_MAD
4166  /*
4167  * Perl_madlex
4168  * The intent of this yylex wrapper is to minimize the changes to the
4169  * tokener when we aren't interested in collecting madprops.  It remains
4170  * to be seen how successful this strategy will be...
4171  */
4172
4173 int
4174 Perl_madlex(pTHX)
4175 {
4176     int optype;
4177     char *s = PL_bufptr;
4178
4179     /* make sure PL_thiswhite is initialized */
4180     PL_thiswhite = 0;
4181     PL_thismad = 0;
4182
4183     /* previous token ate up our whitespace? */
4184     if (!PL_lasttoke && PL_nextwhite) {
4185         PL_thiswhite = PL_nextwhite;
4186         PL_nextwhite = 0;
4187     }
4188
4189     /* isolate the token, and figure out where it is without whitespace */
4190     PL_realtokenstart = -1;
4191     PL_thistoken = 0;
4192     optype = yylex();
4193     s = PL_bufptr;
4194     assert(PL_curforce < 0);
4195
4196     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4197         if (!PL_thistoken) {
4198             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4199                 PL_thistoken = newSVpvs("");
4200             else {
4201                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4202                 PL_thistoken = newSVpvn(tstart, s - tstart);
4203             }
4204         }
4205         if (PL_thismad) /* install head */
4206             CURMAD('X', PL_thistoken);
4207     }
4208
4209     /* last whitespace of a sublex? */
4210     if (optype == ')' && PL_endwhite) {
4211         CURMAD('X', PL_endwhite);
4212     }
4213
4214     if (!PL_thismad) {
4215
4216         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4217         if (!PL_thiswhite && !PL_endwhite && !optype) {
4218             sv_free(PL_thistoken);
4219             PL_thistoken = 0;
4220             return 0;
4221         }
4222
4223         /* put off final whitespace till peg */
4224         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4225             PL_nextwhite = PL_thiswhite;
4226             PL_thiswhite = 0;
4227         }
4228         else if (PL_thisopen) {
4229             CURMAD('q', PL_thisopen);
4230             if (PL_thistoken)
4231                 sv_free(PL_thistoken);
4232             PL_thistoken = 0;
4233         }
4234         else {
4235             /* Store actual token text as madprop X */
4236             CURMAD('X', PL_thistoken);
4237         }
4238
4239         if (PL_thiswhite) {
4240             /* add preceding whitespace as madprop _ */
4241             CURMAD('_', PL_thiswhite);
4242         }
4243
4244         if (PL_thisstuff) {
4245             /* add quoted material as madprop = */
4246             CURMAD('=', PL_thisstuff);
4247         }
4248
4249         if (PL_thisclose) {
4250             /* add terminating quote as madprop Q */
4251             CURMAD('Q', PL_thisclose);
4252         }
4253     }
4254
4255     /* special processing based on optype */
4256
4257     switch (optype) {
4258
4259     /* opval doesn't need a TOKEN since it can already store mp */
4260     case WORD:
4261     case METHOD:
4262     case FUNCMETH:
4263     case THING:
4264     case PMFUNC:
4265     case PRIVATEREF:
4266     case FUNC0SUB:
4267     case UNIOPSUB:
4268     case LSTOPSUB:
4269     case LABEL:
4270         if (pl_yylval.opval)
4271             append_madprops(PL_thismad, pl_yylval.opval, 0);
4272         PL_thismad = 0;
4273         return optype;
4274
4275     /* fake EOF */
4276     case 0:
4277         optype = PEG;
4278         if (PL_endwhite) {
4279             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4280             PL_endwhite = 0;
4281         }
4282         break;
4283
4284     case ']':
4285     case '}':
4286         if (PL_faketokens)
4287             break;
4288         /* remember any fake bracket that lexer is about to discard */
4289         if (PL_lex_brackets == 1 &&
4290             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4291         {
4292             s = PL_bufptr;
4293             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4294                 s++;
4295             if (*s == '}') {
4296                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4297                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4298                 PL_thiswhite = 0;
4299                 PL_bufptr = s - 1;
4300                 break;  /* don't bother looking for trailing comment */
4301             }
4302             else
4303                 s = PL_bufptr;
4304         }
4305         if (optype == ']')
4306             break;
4307         /* FALLTHROUGH */
4308
4309     /* attach a trailing comment to its statement instead of next token */
4310     case ';':
4311         if (PL_faketokens)
4312             break;
4313         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4314             s = PL_bufptr;
4315             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4316                 s++;
4317             if (*s == '\n' || *s == '#') {
4318                 while (s < PL_bufend && *s != '\n')
4319                     s++;
4320                 if (s < PL_bufend)
4321                     s++;
4322                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4323                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4324                 PL_thiswhite = 0;
4325                 PL_bufptr = s;
4326             }
4327         }
4328         break;
4329
4330     /* ival */
4331     default:
4332         break;
4333
4334     }
4335
4336     /* Create new token struct.  Note: opvals return early above. */
4337     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4338     PL_thismad = 0;
4339     return optype;
4340 }
4341 #endif
4342
4343 STATIC char *
4344 S_tokenize_use(pTHX_ int is_use, char *s) {
4345     dVAR;
4346
4347     PERL_ARGS_ASSERT_TOKENIZE_USE;
4348
4349     if (PL_expect != XSTATE)
4350         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4351                     is_use ? "use" : "no"));
4352     PL_expect = XTERM;
4353     s = SKIPSPACE1(s);
4354     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4355         s = force_version(s, TRUE);
4356         if (*s == ';' || *s == '}'
4357                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4358             start_force(PL_curforce);
4359             NEXTVAL_NEXTTOKE.opval = NULL;
4360             force_next(WORD);
4361         }
4362         else if (*s == 'v') {
4363             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4364             s = force_version(s, FALSE);
4365         }
4366     }
4367     else {
4368         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4369         s = force_version(s, FALSE);
4370     }
4371     pl_yylval.ival = is_use;
4372     return s;
4373 }
4374 #ifdef DEBUGGING
4375     static const char* const exp_name[] =
4376         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4377           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4378         };
4379 #endif
4380
4381 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4382 STATIC bool
4383 S_word_takes_any_delimeter(char *p, STRLEN len)
4384 {
4385     return (len == 1 && strchr("msyq", p[0])) ||
4386            (len == 2 && (
4387             (p[0] == 't' && p[1] == 'r') ||
4388             (p[0] == 'q' && strchr("qwxr", p[1]))));
4389 }
4390
4391 /*
4392   yylex
4393
4394   Works out what to call the token just pulled out of the input
4395   stream.  The yacc parser takes care of taking the ops we return and
4396   stitching them into a tree.
4397
4398   Returns:
4399     PRIVATEREF
4400
4401   Structure:
4402       if read an identifier
4403           if we're in a my declaration
4404               croak if they tried to say my($foo::bar)
4405               build the ops for a my() declaration
4406           if it's an access to a my() variable
4407               are we in a sort block?
4408                   croak if my($a); $a <=> $b
4409               build ops for access to a my() variable
4410           if in a dq string, and they've said @foo and we can't find @foo
4411               croak
4412           build ops for a bareword
4413       if we already built the token before, use it.
4414 */
4415
4416
4417 #ifdef __SC__
4418 #pragma segment Perl_yylex
4419 #endif
4420 int
4421 Perl_yylex(pTHX)
4422 {
4423     dVAR;
4424     char *s = PL_bufptr;
4425     char *d;
4426     STRLEN len;
4427     bool bof = FALSE;
4428     U8 formbrack = 0;
4429     U32 fake_eof = 0;
4430
4431     /* orig_keyword, gvp, and gv are initialized here because
4432      * jump to the label just_a_word_zero can bypass their
4433      * initialization later. */
4434     I32 orig_keyword = 0;
4435     GV *gv = NULL;
4436     GV **gvp = NULL;
4437
4438     DEBUG_T( {
4439         SV* tmp = newSVpvs("");
4440         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4441             (IV)CopLINE(PL_curcop),
4442             lex_state_names[PL_lex_state],
4443             exp_name[PL_expect],
4444             pv_display(tmp, s, strlen(s), 0, 60));
4445         SvREFCNT_dec(tmp);
4446     } );
4447
4448     switch (PL_lex_state) {
4449 #ifdef COMMENTARY
4450     case LEX_NORMAL:            /* Some compilers will produce faster */
4451     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4452         break;
4453 #endif
4454
4455     /* when we've already built the next token, just pull it out of the queue */
4456     case LEX_KNOWNEXT:
4457 #ifdef PERL_MAD
4458         PL_lasttoke--;
4459         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4460         if (PL_madskills) {
4461             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4462             PL_nexttoke[PL_lasttoke].next_mad = 0;
4463             if (PL_thismad && PL_thismad->mad_key == '_') {
4464                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4465                 PL_thismad->mad_val = 0;
4466                 mad_free(PL_thismad);
4467                 PL_thismad = 0;
4468             }
4469         }
4470         if (!PL_lasttoke) {
4471             PL_lex_state = PL_lex_defer;
4472             PL_expect = PL_lex_expect;
4473             PL_lex_defer = LEX_NORMAL;
4474             if (!PL_nexttoke[PL_lasttoke].next_type)
4475                 return yylex();
4476         }
4477 #else
4478         PL_nexttoke--;
4479         pl_yylval = PL_nextval[PL_nexttoke];
4480         if (!PL_nexttoke) {
4481             PL_lex_state = PL_lex_defer;
4482             PL_expect = PL_lex_expect;
4483             PL_lex_defer = LEX_NORMAL;
4484         }
4485 #endif
4486         {
4487             I32 next_type;
4488 #ifdef PERL_MAD
4489             next_type = PL_nexttoke[PL_lasttoke].next_type;
4490 #else
4491             next_type = PL_nexttype[PL_nexttoke];
4492 #endif
4493             if (next_type & (7<<24)) {
4494                 if (next_type & (1<<24)) {
4495                     if (PL_lex_brackets > 100)
4496                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4497                     PL_lex_brackstack[PL_lex_brackets++] =
4498                         (char) ((next_type >> 16) & 0xff);
4499                 }
4500                 if (next_type & (2<<24))
4501                     PL_lex_allbrackets++;
4502                 if (next_type & (4<<24))
4503                     PL_lex_allbrackets--;
4504                 next_type &= 0xffff;
4505             }
4506             if (S_is_opval_token(next_type) && pl_yylval.opval)
4507                 pl_yylval.opval->op_savefree = 0; /* release */
4508             return REPORT(next_type);
4509         }
4510
4511     /* interpolated case modifiers like \L \U, including \Q and \E.
4512        when we get here, PL_bufptr is at the \
4513     */
4514     case LEX_INTERPCASEMOD:
4515 #ifdef DEBUGGING
4516         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4517             Perl_croak(aTHX_
4518                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4519                        PL_bufptr, PL_bufend, *PL_bufptr);
4520 #endif
4521         /* handle \E or end of string */
4522         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4523             /* if at a \E */
4524             if (PL_lex_casemods) {
4525                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4526                 PL_lex_casestack[PL_lex_casemods] = '\0';
4527
4528                 if (PL_bufptr != PL_bufend
4529                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4530                         || oldmod == 'F')) {
4531                     PL_bufptr += 2;
4532                     PL_lex_state = LEX_INTERPCONCAT;
4533 #ifdef PERL_MAD
4534                     if (PL_madskills)
4535                         PL_thistoken = newSVpvs("\\E");
4536 #endif
4537                 }
4538                 PL_lex_allbrackets--;
4539                 return REPORT(')');
4540             }
4541             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4542                /* Got an unpaired \E */
4543                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4544                         "Useless use of \\E");
4545             }
4546 #ifdef PERL_MAD
4547             while (PL_bufptr != PL_bufend &&
4548               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4549                 if (!PL_thiswhite)
4550                     PL_thiswhite = newSVpvs("");
4551                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4552                 PL_bufptr += 2;
4553             }
4554 #else
4555             if (PL_bufptr != PL_bufend)
4556                 PL_bufptr += 2;
4557 #endif
4558             PL_lex_state = LEX_INTERPCONCAT;
4559             return yylex();
4560         }
4561         else {
4562             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4563               "### Saw case modifier\n"); });
4564             s = PL_bufptr + 1;
4565             if (s[1] == '\\' && s[2] == 'E') {
4566 #ifdef PERL_MAD
4567                 if (!PL_thiswhite)
4568                     PL_thiswhite = newSVpvs("");
4569                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4570 #endif
4571                 PL_bufptr = s + 3;
4572                 PL_lex_state = LEX_INTERPCONCAT;
4573                 return yylex();
4574             }
4575             else {
4576                 I32 tmp;
4577                 if (!PL_madskills) /* when just compiling don't need correct */
4578                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4579                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4580                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4581                     (strchr(PL_lex_casestack, 'L')
4582                         || strchr(PL_lex_casestack, 'U')
4583                         || strchr(PL_lex_casestack, 'F'))) {
4584                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4585                     PL_lex_allbrackets--;
4586                     return REPORT(')');
4587                 }
4588                 if (PL_lex_casemods > 10)
4589                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4590                 PL_lex_casestack[PL_lex_casemods++] = *s;
4591                 PL_lex_casestack[PL_lex_casemods] = '\0';
4592                 PL_lex_state = LEX_INTERPCONCAT;
4593                 start_force(PL_curforce);
4594                 NEXTVAL_NEXTTOKE.ival = 0;
4595                 force_next((2<<24)|'(');
4596                 start_force(PL_curforce);
4597                 if (*s == 'l')
4598                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4599                 else if (*s == 'u')
4600                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4601                 else if (*s == 'L')
4602                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4603                 else if (*s == 'U')
4604                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4605                 else if (*s == 'Q')
4606                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4607                 else if (*s == 'F')
4608                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4609                 else
4610                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4611                 if (PL_madskills) {
4612                     SV* const tmpsv = newSVpvs("\\ ");
4613                     /* replace the space with the character we want to escape
4614                      */
4615                     SvPVX(tmpsv)[1] = *s;
4616                     curmad('_', tmpsv);
4617                 }
4618                 PL_bufptr = s + 1;
4619             }
4620             force_next(FUNC);
4621             if (PL_lex_starts) {
4622                 s = PL_bufptr;
4623                 PL_lex_starts = 0;
4624 #ifdef PERL_MAD
4625                 if (PL_madskills) {
4626                     if (PL_thistoken)
4627                         sv_free(PL_thistoken);
4628                     PL_thistoken = newSVpvs("");
4629                 }
4630 #endif
4631                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4632                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4633                     OPERATOR(',');
4634                 else
4635                     Aop(OP_CONCAT);
4636             }
4637             else
4638                 return yylex();
4639         }
4640
4641     case LEX_INTERPPUSH:
4642         return REPORT(sublex_push());
4643
4644     case LEX_INTERPSTART:
4645         if (PL_bufptr == PL_bufend)
4646             return REPORT(sublex_done());
4647         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4648               "### Interpolated variable\n"); });
4649         PL_expect = XTERM;
4650         PL_lex_dojoin = (*PL_bufptr == '@');
4651         PL_lex_state = LEX_INTERPNORMAL;
4652         if (PL_lex_dojoin) {
4653             start_force(PL_curforce);
4654             NEXTVAL_NEXTTOKE.ival = 0;
4655             force_next(',');
4656             start_force(PL_curforce);
4657             force_ident("\"", '$');
4658             start_force(PL_curforce);
4659             NEXTVAL_NEXTTOKE.ival = 0;
4660             force_next('$');
4661             start_force(PL_curforce);
4662             NEXTVAL_NEXTTOKE.ival = 0;
4663             force_next((2<<24)|'(');
4664             start_force(PL_curforce);
4665             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4666             force_next(FUNC);
4667         }
4668         /* Convert (?{...}) and friends to 'do {...}' */
4669         if (PL_lex_inpat && *PL_bufptr == '(') {
4670             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4671             PL_bufptr += 2;
4672             if (*PL_bufptr != '{')
4673                 PL_bufptr++;
4674             start_force(PL_curforce);
4675             /* XXX probably need a CURMAD(something) here */
4676             PL_expect = XTERMBLOCK;
4677             force_next(DO);
4678         }
4679
4680         if (PL_lex_starts++) {
4681             s = PL_bufptr;
4682 #ifdef PERL_MAD
4683             if (PL_madskills) {
4684                 if (PL_thistoken)
4685                     sv_free(PL_thistoken);
4686                 PL_thistoken = newSVpvs("");
4687             }
4688 #endif
4689             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4690             if (!PL_lex_casemods && PL_lex_inpat)
4691                 OPERATOR(',');
4692             else
4693                 Aop(OP_CONCAT);
4694         }
4695         return yylex();
4696
4697     case LEX_INTERPENDMAYBE:
4698         if (intuit_more(PL_bufptr)) {
4699             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4700             break;
4701         }
4702         /* FALL THROUGH */
4703
4704     case LEX_INTERPEND:
4705         if (PL_lex_dojoin) {
4706             PL_lex_dojoin = FALSE;
4707             PL_lex_state = LEX_INTERPCONCAT;
4708 #ifdef PERL_MAD
4709             if (PL_madskills) {
4710                 if (PL_thistoken)
4711                     sv_free(PL_thistoken);
4712                 PL_thistoken = newSVpvs("");
4713             }
4714 #endif
4715             PL_lex_allbrackets--;
4716             return REPORT(')');
4717         }
4718         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4719             && SvEVALED(PL_lex_repl))
4720         {
4721             if (PL_bufptr != PL_bufend)
4722                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4723             PL_lex_repl = NULL;
4724         }
4725         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4726            re_eval_str.  If the here-doc body’s length equals the previous
4727            value of re_eval_start, re_eval_start will now be null.  So
4728            check re_eval_str as well. */
4729         if (PL_parser->lex_shared->re_eval_start
4730          || PL_parser->lex_shared->re_eval_str) {
4731             SV *sv;
4732             if (*PL_bufptr != ')')
4733                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4734             PL_bufptr++;
4735             /* having compiled a (?{..}) expression, return the original
4736              * text too, as a const */
4737             if (PL_parser->lex_shared->re_eval_str) {
4738                 sv = PL_parser->lex_shared->re_eval_str;
4739                 PL_parser->lex_shared->re_eval_str = NULL;
4740                 SvCUR_set(sv,
4741                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4742                 SvPV_shrink_to_cur(sv);
4743             }
4744             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4745                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4746             start_force(PL_curforce);
4747             /* XXX probably need a CURMAD(something) here */
4748             NEXTVAL_NEXTTOKE.opval =
4749                     (OP*)newSVOP(OP_CONST, 0,
4750                                  sv);
4751             force_next(THING);
4752             PL_parser->lex_shared->re_eval_start = NULL;
4753             PL_expect = XTERM;
4754             return REPORT(',');
4755         }
4756
4757         /* FALLTHROUGH */
4758     case LEX_INTERPCONCAT:
4759 #ifdef DEBUGGING
4760         if (PL_lex_brackets)
4761             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4762                        (long) PL_lex_brackets);
4763 #endif
4764         if (PL_bufptr == PL_bufend)
4765             return REPORT(sublex_done());
4766
4767         /* m'foo' still needs to be parsed for possible (?{...}) */
4768         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4769             SV *sv = newSVsv(PL_linestr);
4770             sv = tokeq(sv);
4771             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4772             s = PL_bufend;
4773         }
4774         else {
4775             s = scan_const(PL_bufptr);
4776             if (*s == '\\')
4777                 PL_lex_state = LEX_INTERPCASEMOD;
4778             else
4779                 PL_lex_state = LEX_INTERPSTART;
4780         }
4781
4782         if (s != PL_bufptr) {
4783             start_force(PL_curforce);
4784             if (PL_madskills) {
4785                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4786             }
4787             NEXTVAL_NEXTTOKE = pl_yylval;
4788             PL_expect = XTERM;
4789             force_next(THING);
4790             if (PL_lex_starts++) {
4791 #ifdef PERL_MAD
4792                 if (PL_madskills) {
4793                     if (PL_thistoken)
4794                         sv_free(PL_thistoken);
4795                     PL_thistoken = newSVpvs("");
4796                 }
4797 #endif
4798                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4799                 if (!PL_lex_casemods && PL_lex_inpat)
4800                     OPERATOR(',');
4801                 else
4802                     Aop(OP_CONCAT);
4803             }
4804             else {
4805                 PL_bufptr = s;
4806                 return yylex();
4807             }
4808         }
4809
4810         return yylex();
4811     case LEX_FORMLINE:
4812         s = scan_formline(PL_bufptr);
4813         if (!PL_lex_formbrack)
4814         {
4815             formbrack = 1;
4816             goto rightbracket;
4817         }
4818         PL_bufptr = s;
4819         return yylex();
4820     }
4821
4822     s = PL_bufptr;
4823     PL_oldoldbufptr = PL_oldbufptr;
4824     PL_oldbufptr = s;
4825
4826   retry:
4827 #ifdef PERL_MAD
4828     if (PL_thistoken) {
4829         sv_free(PL_thistoken);
4830         PL_thistoken = 0;
4831     }
4832     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4833 #endif
4834     switch (*s) {
4835     default:
4836         if (isIDFIRST_lazy_if(s,UTF))
4837             goto keylookup;
4838         {
4839         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4840         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4841                                                     UTF8SKIP(s),
4842                                                     SVs_TEMP | SVf_UTF8),
4843                                             10, UNI_DISPLAY_ISPRINT))
4844                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4845         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4846         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4847             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4848         } else {
4849             d = PL_linestart;
4850         }
4851         *s = '\0';
4852         sv_setpv(dsv, d);
4853         if (UTF)
4854             SvUTF8_on(dsv);
4855         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4856     }
4857     case 4:
4858     case 26:
4859         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4860     case 0:
4861 #ifdef PERL_MAD
4862         if (PL_madskills)
4863             PL_faketokens = 0;
4864 #endif
4865         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4866             PL_last_uni = 0;
4867             PL_last_lop = 0;
4868             if (PL_lex_brackets &&
4869                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4870                 yyerror((const char *)
4871                         (PL_lex_formbrack
4872                          ? "Format not terminated"
4873                          : "Missing right curly or square bracket"));
4874             }
4875             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4876                         "### Tokener got EOF\n");
4877             } );
4878             TOKEN(0);
4879         }
4880         if (s++ < PL_bufend)
4881             goto retry;                 /* ignore stray nulls */
4882         PL_last_uni = 0;
4883         PL_last_lop = 0;
4884         if (!PL_in_eval && !PL_preambled) {
4885             PL_preambled = TRUE;
4886 #ifdef PERL_MAD
4887             if (PL_madskills)
4888                 PL_faketokens = 1;
4889 #endif
4890             if (PL_perldb) {
4891                 /* Generate a string of Perl code to load the debugger.
4892                  * If PERL5DB is set, it will return the contents of that,
4893                  * otherwise a compile-time require of perl5db.pl.  */
4894
4895                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4896
4897                 if (pdb) {
4898                     sv_setpv(PL_linestr, pdb);
4899                     sv_catpvs(PL_linestr,";");
4900                 } else {
4901                     SETERRNO(0,SS_NORMAL);
4902                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4903                 }
4904             } else
4905                 sv_setpvs(PL_linestr,"");
4906             if (PL_preambleav) {
4907                 SV **svp = AvARRAY(PL_preambleav);
4908                 SV **const end = svp + AvFILLp(PL_preambleav);
4909                 while(svp <= end) {
4910                     sv_catsv(PL_linestr, *svp);
4911                     ++svp;
4912                     sv_catpvs(PL_linestr, ";");
4913                 }
4914                 sv_free(MUTABLE_SV(PL_preambleav));
4915                 PL_preambleav = NULL;
4916             }
4917             if (PL_minus_E)
4918                 sv_catpvs(PL_linestr,
4919                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4920             if (PL_minus_n || PL_minus_p) {
4921                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4922                 if (PL_minus_l)
4923                     sv_catpvs(PL_linestr,"chomp;");
4924                 if (PL_minus_a) {
4925                     if (PL_minus_F) {
4926                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4927                              || *PL_splitstr == '"')
4928                               && strchr(PL_splitstr + 1, *PL_splitstr))
4929                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4930                         else {
4931                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4932                                bytes can be used as quoting characters.  :-) */
4933                             const char *splits = PL_splitstr;
4934                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4935                             do {
4936                                 /* Need to \ \s  */
4937                                 if (*splits == '\\')
4938                                     sv_catpvn(PL_linestr, splits, 1);
4939                                 sv_catpvn(PL_linestr, splits, 1);
4940                             } while (*splits++);
4941                             /* This loop will embed the trailing NUL of
4942                                PL_linestr as the last thing it does before
4943                                terminating.  */
4944                             sv_catpvs(PL_linestr, ");");
4945                         }
4946                     }
4947                     else
4948                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4949                 }
4950             }
4951             sv_catpvs(PL_linestr, "\n");
4952             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4953             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4954             PL_last_lop = PL_last_uni = NULL;
4955             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4956                 update_debugger_info(PL_linestr, NULL, 0);
4957             goto retry;
4958         }
4959         do {
4960             fake_eof = 0;
4961             bof = PL_rsfp ? TRUE : FALSE;
4962             if (0) {
4963               fake_eof:
4964                 fake_eof = LEX_FAKE_EOF;
4965             }
4966             PL_bufptr = PL_bufend;
4967             COPLINE_INC_WITH_HERELINES;
4968             if (!lex_next_chunk(fake_eof)) {
4969                 CopLINE_dec(PL_curcop);
4970                 s = PL_bufptr;
4971                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4972             }
4973             CopLINE_dec(PL_curcop);
4974 #ifdef PERL_MAD
4975             if (!PL_rsfp)
4976                 PL_realtokenstart = -1;
4977 #endif
4978             s = PL_bufptr;
4979             /* If it looks like the start of a BOM or raw UTF-16,
4980              * check if it in fact is. */
4981             if (bof && PL_rsfp &&
4982                      (*s == 0 ||
4983                       *(U8*)s == 0xEF ||
4984                       *(U8*)s >= 0xFE ||
4985                       s[1] == 0)) {
4986                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4987                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4988 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4989                 /* offset may include swallowed CR */
4990                 if (!bof)
4991                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4992 #endif
4993                 if (bof) {
4994                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4995                     s = swallow_bom((U8*)s);
4996                 }
4997             }
4998             if (PL_parser->in_pod) {
4999                 /* Incest with pod. */
5000 #ifdef PERL_MAD
5001                 if (PL_madskills)
5002                     sv_catsv(PL_thiswhite, PL_linestr);
5003 #endif
5004                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5005                     sv_setpvs(PL_linestr, "");
5006                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5007                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5008                     PL_last_lop = PL_last_uni = NULL;
5009                     PL_parser->in_pod = 0;
5010                 }
5011             }
5012             if (PL_rsfp || PL_parser->filtered)
5013                 incline(s);
5014         } while (PL_parser->in_pod);
5015         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5016         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5017         PL_last_lop = PL_last_uni = NULL;
5018         if (CopLINE(PL_curcop) == 1) {
5019             while (s < PL_bufend && isSPACE(*s))
5020                 s++;
5021             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5022                 s++;
5023 #ifdef PERL_MAD
5024             if (PL_madskills)
5025                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5026 #endif
5027             d = NULL;
5028             if (!PL_in_eval) {
5029                 if (*s == '#' && *(s+1) == '!')
5030                     d = s + 2;
5031 #ifdef ALTERNATE_SHEBANG
5032                 else {
5033                     static char const as[] = ALTERNATE_SHEBANG;
5034                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5035                         d = s + (sizeof(as) - 1);
5036                 }
5037 #endif /* ALTERNATE_SHEBANG */
5038             }
5039             if (d) {
5040                 char *ipath;
5041                 char *ipathend;
5042
5043                 while (isSPACE(*d))
5044                     d++;
5045                 ipath = d;
5046                 while (*d && !isSPACE(*d))
5047                     d++;
5048                 ipathend = d;
5049
5050 #ifdef ARG_ZERO_IS_SCRIPT
5051                 if (ipathend > ipath) {
5052                     /*
5053                      * HP-UX (at least) sets argv[0] to the script name,
5054                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5055                      * at least, set argv[0] to the basename of the Perl
5056                      * interpreter. So, having found "#!", we'll set it right.
5057                      */
5058                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5059                                                     SVt_PV)); /* $^X */
5060                     assert(SvPOK(x) || SvGMAGICAL(x));
5061                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5062                         sv_setpvn(x, ipath, ipathend - ipath);
5063                         SvSETMAGIC(x);
5064                     }
5065                     else {
5066                         STRLEN blen;
5067                         STRLEN llen;
5068                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5069                         const char * const lstart = SvPV_const(x,llen);
5070                         if (llen < blen) {
5071                             bstart += blen - llen;
5072                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5073                                 sv_setpvn(x, ipath, ipathend - ipath);
5074                                 SvSETMAGIC(x);
5075                             }
5076                         }
5077                     }
5078                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5079                 }
5080 #endif /* ARG_ZERO_IS_SCRIPT */
5081
5082                 /*
5083                  * Look for options.
5084                  */
5085                 d = instr(s,"perl -");
5086                 if (!d) {
5087                     d = instr(s,"perl");
5088 #if defined(DOSISH)
5089                     /* avoid getting into infinite loops when shebang
5090                      * line contains "Perl" rather than "perl" */
5091                     if (!d) {
5092                         for (d = ipathend-4; d >= ipath; --d) {
5093                             if ((*d == 'p' || *d == 'P')
5094                                 && !ibcmp(d, "perl", 4))
5095                             {
5096                                 break;
5097                             }
5098                         }
5099                         if (d < ipath)
5100                             d = NULL;
5101                     }
5102 #endif
5103                 }
5104 #ifdef ALTERNATE_SHEBANG
5105                 /*
5106                  * If the ALTERNATE_SHEBANG on this system starts with a
5107                  * character that can be part of a Perl expression, then if
5108                  * we see it but not "perl", we're probably looking at the
5109                  * start of Perl code, not a request to hand off to some
5110                  * other interpreter.  Similarly, if "perl" is there, but
5111                  * not in the first 'word' of the line, we assume the line
5112                  * contains the start of the Perl program.
5113                  */
5114                 if (d && *s != '#') {
5115                     const char *c = ipath;
5116                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5117                         c++;
5118                     if (c < d)
5119                         d = NULL;       /* "perl" not in first word; ignore */
5120                     else
5121                         *s = '#';       /* Don't try to parse shebang line */
5122                 }
5123 #endif /* ALTERNATE_SHEBANG */
5124                 if (!d &&
5125                     *s == '#' &&
5126                     ipathend > ipath &&
5127                     !PL_minus_c &&
5128                     !instr(s,"indir") &&
5129                     instr(PL_origargv[0],"perl"))
5130                 {
5131                     dVAR;
5132                     char **newargv;
5133
5134                     *ipathend = '\0';
5135                     s = ipathend + 1;
5136                     while (s < PL_bufend && isSPACE(*s))
5137                         s++;
5138                     if (s < PL_bufend) {
5139                         Newx(newargv,PL_origargc+3,char*);
5140                         newargv[1] = s;
5141                         while (s < PL_bufend && !isSPACE(*s))
5142                             s++;
5143                         *s = '\0';
5144                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5145                     }
5146                     else
5147                         newargv = PL_origargv;
5148                     newargv[0] = ipath;
5149                     PERL_FPU_PRE_EXEC
5150                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5151                     PERL_FPU_POST_EXEC
5152                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5153                 }
5154                 if (d) {
5155                     while (*d && !isSPACE(*d))
5156                         d++;
5157                     while (SPACE_OR_TAB(*d))
5158                         d++;
5159
5160                     if (*d++ == '-') {
5161                         const bool switches_done = PL_doswitches;
5162                         const U32 oldpdb = PL_perldb;
5163                         const bool oldn = PL_minus_n;
5164                         const bool oldp = PL_minus_p;
5165                         const char *d1 = d;
5166
5167                         do {
5168                             bool baduni = FALSE;
5169                             if (*d1 == 'C') {
5170                                 const char *d2 = d1 + 1;
5171                                 if (parse_unicode_opts((const char **)&d2)
5172                                     != PL_unicode)
5173                                     baduni = TRUE;
5174                             }
5175                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5176                                 const char * const m = d1;
5177                                 while (*d1 && !isSPACE(*d1))
5178                                     d1++;
5179                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5180                                       (int)(d1 - m), m);
5181                             }
5182                             d1 = moreswitches(d1);
5183                         } while (d1);
5184                         if (PL_doswitches && !switches_done) {
5185                             int argc = PL_origargc;
5186                             char **argv = PL_origargv;
5187                             do {
5188                                 argc--,argv++;
5189                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5190                             init_argv_symbols(argc,argv);
5191                         }
5192                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5193                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5194                               /* if we have already added "LINE: while (<>) {",
5195                                  we must not do it again */
5196                         {
5197                             sv_setpvs(PL_linestr, "");
5198                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5199                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5200                             PL_last_lop = PL_last_uni = NULL;
5201                             PL_preambled = FALSE;
5202                             if (PERLDB_LINE || PERLDB_SAVESRC)
5203                                 (void)gv_fetchfile(PL_origfilename);
5204                             goto retry;
5205                         }
5206                     }
5207                 }
5208             }
5209         }
5210         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5211             PL_lex_state = LEX_FORMLINE;
5212             start_force(PL_curforce);
5213             NEXTVAL_NEXTTOKE.ival = 0;
5214             force_next(FORMRBRACK);
5215             TOKEN(';');
5216         }
5217         goto retry;
5218     case '\r':
5219 #ifdef PERL_STRICT_CR
5220         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5221         Perl_croak(aTHX_
5222       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5223 #endif
5224     case ' ': case '\t': case '\f': case 013:
5225 #ifdef PERL_MAD
5226         PL_realtokenstart = -1;
5227         if (!PL_thiswhite)
5228             PL_thiswhite = newSVpvs("");
5229         sv_catpvn(PL_thiswhite, s, 1);
5230 #endif
5231         s++;
5232         goto retry;
5233     case '#':
5234     case '\n':
5235 #ifdef PERL_MAD
5236         PL_realtokenstart = -1;
5237         if (PL_madskills)
5238             PL_faketokens = 0;
5239 #endif
5240         if (PL_lex_state != LEX_NORMAL ||
5241              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5242             if (*s == '#' && s == PL_linestart && PL_in_eval
5243              && !PL_rsfp && !PL_parser->filtered) {
5244                 /* handle eval qq[#line 1 "foo"\n ...] */
5245                 CopLINE_dec(PL_curcop);
5246                 incline(s);
5247             }
5248             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5249                 s = SKIPSPACE0(s);
5250                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5251                     incline(s);
5252             }
5253             else {
5254                 const bool in_comment = *s == '#';
5255                 d = s;
5256                 while (d < PL_bufend && *d != '\n')
5257                     d++;
5258                 if (d < PL_bufend)
5259                     d++;
5260                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5261                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5262                                d, PL_bufend);
5263 #ifdef PERL_MAD
5264                 if (PL_madskills)
5265                     PL_thiswhite = newSVpvn(s, d - s);
5266 #endif
5267                 s = d;
5268                 if (in_comment && d == PL_bufend
5269                  && PL_lex_state == LEX_INTERPNORMAL
5270                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5271                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5272                 else incline(s);
5273             }
5274             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5275                 PL_lex_state = LEX_FORMLINE;
5276                 start_force(PL_curforce);
5277                 NEXTVAL_NEXTTOKE.ival = 0;
5278                 force_next(FORMRBRACK);
5279                 TOKEN(';');
5280             }
5281         }
5282         else {
5283 #ifdef PERL_MAD
5284             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5285                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5286                     PL_faketokens = 0;
5287                     s = SKIPSPACE0(s);
5288                     TOKEN(PEG); /* make sure any #! line is accessible */
5289                 }
5290                 s = SKIPSPACE0(s);
5291             }
5292             else {
5293 /*              if (PL_madskills && PL_lex_formbrack) { */
5294                     d = s;
5295                     while (d < PL_bufend && *d != '\n')
5296                         d++;
5297                     if (d < PL_bufend)
5298                         d++;
5299                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5300                       Perl_croak(aTHX_ "panic: input overflow");
5301                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5302                         if (!PL_thiswhite)
5303                             PL_thiswhite = newSVpvs("");
5304                         if (CopLINE(PL_curcop) == 1) {
5305                             sv_setpvs(PL_thiswhite, "");
5306                             PL_faketokens = 0;
5307                         }
5308                         sv_catpvn(PL_thiswhite, s, d - s);
5309                     }
5310                     s = d;
5311 /*              }
5312                 *s = '\0';
5313                 PL_bufend = s; */
5314             }
5315 #else
5316             *s = '\0';
5317             PL_bufend = s;
5318 #endif
5319         }
5320         goto retry;
5321     case '-':
5322         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5323             I32 ftst = 0;
5324             char tmp;
5325
5326             s++;
5327             PL_bufptr = s;
5328             tmp = *s++;
5329
5330             while (s < PL_bufend && SPACE_OR_TAB(*s))
5331                 s++;
5332
5333             if (strnEQ(s,"=>",2)) {
5334                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5335                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5336                 OPERATOR('-');          /* unary minus */
5337             }
5338             PL_last_uni = PL_oldbufptr;
5339             switch (tmp) {
5340             case 'r': ftst = OP_FTEREAD;        break;
5341             case 'w': ftst = OP_FTEWRITE;       break;
5342             case 'x': ftst = OP_FTEEXEC;        break;
5343             case 'o': ftst = OP_FTEOWNED;       break;
5344             case 'R': ftst = OP_FTRREAD;        break;
5345             case 'W': ftst = OP_FTRWRITE;       break;
5346             case 'X': ftst = OP_FTREXEC;        break;
5347             case 'O': ftst = OP_FTROWNED;       break;
5348             case 'e': ftst = OP_FTIS;           break;
5349             case 'z': ftst = OP_FTZERO;         break;
5350             case 's': ftst = OP_FTSIZE;         break;
5351             case 'f': ftst = OP_FTFILE;         break;
5352             case 'd': ftst = OP_FTDIR;          break;
5353             case 'l': ftst = OP_FTLINK;         break;
5354             case 'p': ftst = OP_FTPIPE;         break;
5355             case 'S': ftst = OP_FTSOCK;         break;
5356             case 'u': ftst = OP_FTSUID;         break;
5357             case 'g': ftst = OP_FTSGID;         break;
5358             case 'k': ftst = OP_FTSVTX;         break;
5359             case 'b': ftst = OP_FTBLK;          break;
5360             case 'c': ftst = OP_FTCHR;          break;
5361             case 't': ftst = OP_FTTTY;          break;
5362             case 'T': ftst = OP_FTTEXT;         break;
5363             case 'B': ftst = OP_FTBINARY;       break;
5364             case 'M': case 'A': case 'C':
5365                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5366                 switch (tmp) {
5367                 case 'M': ftst = OP_FTMTIME;    break;
5368                 case 'A': ftst = OP_FTATIME;    break;
5369                 case 'C': ftst = OP_FTCTIME;    break;
5370                 default:                        break;
5371                 }
5372                 break;
5373             default:
5374                 break;
5375             }
5376             if (ftst) {
5377                 PL_last_lop_op = (OPCODE)ftst;
5378                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5379                         "### Saw file test %c\n", (int)tmp);
5380                 } );
5381                 FTST(ftst);
5382             }
5383             else {
5384                 /* Assume it was a minus followed by a one-letter named
5385                  * subroutine call (or a -bareword), then. */
5386                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5387                         "### '-%c' looked like a file test but was not\n",
5388                         (int) tmp);
5389                 } );
5390                 s = --PL_bufptr;
5391             }
5392         }
5393         {
5394             const char tmp = *s++;
5395             if (*s == tmp) {
5396                 s++;
5397                 if (PL_expect == XOPERATOR)
5398                     TERM(POSTDEC);
5399                 else
5400                     OPERATOR(PREDEC);
5401             }
5402             else if (*s == '>') {
5403                 s++;
5404                 s = SKIPSPACE1(s);
5405                 if (isIDFIRST_lazy_if(s,UTF)) {
5406                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5407                     TOKEN(ARROW);
5408                 }
5409                 else if (*s == '$')
5410                     OPERATOR(ARROW);
5411                 else
5412                     TERM(ARROW);
5413             }
5414             if (PL_expect == XOPERATOR) {
5415                 if (*s == '=' && !PL_lex_allbrackets &&
5416                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5417                     s--;
5418                     TOKEN(0);
5419                 }
5420                 Aop(OP_SUBTRACT);
5421             }
5422             else {
5423                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5424                     check_uni();
5425                 OPERATOR('-');          /* unary minus */
5426             }
5427         }
5428
5429     case '+':
5430         {
5431             const char tmp = *s++;
5432             if (*s == tmp) {
5433                 s++;
5434                 if (PL_expect == XOPERATOR)
5435                     TERM(POSTINC);
5436                 else
5437                     OPERATOR(PREINC);
5438             }
5439             if (PL_expect == XOPERATOR) {
5440                 if (*s == '=' && !PL_lex_allbrackets &&
5441                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5442                     s--;
5443                     TOKEN(0);
5444                 }
5445                 Aop(OP_ADD);
5446             }
5447             else {
5448                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5449                     check_uni();
5450                 OPERATOR('+');
5451             }
5452         }
5453
5454     case '*':
5455         if (PL_expect != XOPERATOR) {
5456             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5457             PL_expect = XOPERATOR;
5458             force_ident(PL_tokenbuf, '*');
5459             if (!*PL_tokenbuf)
5460                 PREREF('*');
5461             TERM('*');
5462         }
5463         s++;
5464         if (*s == '*') {
5465             s++;
5466             if (*s == '=' && !PL_lex_allbrackets &&
5467                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5468                 s -= 2;
5469                 TOKEN(0);
5470             }
5471             PWop(OP_POW);
5472         }
5473         if (*s == '=' && !PL_lex_allbrackets &&
5474                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5475             s--;
5476             TOKEN(0);
5477         }
5478         Mop(OP_MULTIPLY);
5479
5480     case '%':
5481         if (PL_expect == XOPERATOR) {
5482             if (s[1] == '=' && !PL_lex_allbrackets &&
5483                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5484                 TOKEN(0);
5485             ++s;
5486             Mop(OP_MODULO);
5487         }
5488         PL_tokenbuf[0] = '%';
5489         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5490                 sizeof PL_tokenbuf - 1, FALSE);
5491         if (!PL_tokenbuf[1]) {
5492             PREREF('%');
5493         }
5494         PL_expect = XOPERATOR;
5495         force_ident_maybe_lex('%');
5496         TERM('%');
5497
5498     case '^':
5499         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5500                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5501             TOKEN(0);
5502         s++;
5503         BOop(OP_BIT_XOR);
5504     case '[':
5505         if (PL_lex_brackets > 100)
5506             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5507         PL_lex_brackstack[PL_lex_brackets++] = 0;
5508         PL_lex_allbrackets++;
5509         {
5510             const char tmp = *s++;
5511             OPERATOR(tmp);
5512         }
5513     case '~':
5514         if (s[1] == '~'
5515             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5516         {
5517             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5518                 TOKEN(0);
5519             s += 2;
5520             Eop(OP_SMARTMATCH);
5521         }
5522         s++;
5523         OPERATOR('~');
5524     case ',':
5525         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5526             TOKEN(0);
5527         s++;
5528         OPERATOR(',');
5529     case ':':
5530         if (s[1] == ':') {
5531             len = 0;
5532             goto just_a_word_zero_gv;
5533         }
5534         s++;
5535         switch (PL_expect) {
5536             OP *attrs;
5537 #ifdef PERL_MAD
5538             I32 stuffstart;
5539 #endif
5540         case XOPERATOR:
5541             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5542                 break;
5543             PL_bufptr = s;      /* update in case we back off */
5544             if (*s == '=') {
5545                 Perl_croak(aTHX_
5546                            "Use of := for an empty attribute list is not allowed");
5547             }
5548             goto grabattrs;
5549         case XATTRBLOCK:
5550             PL_expect = XBLOCK;
5551             goto grabattrs;
5552         case XATTRTERM:
5553             PL_expect = XTERMBLOCK;
5554          grabattrs:
5555 #ifdef PERL_MAD
5556             stuffstart = s - SvPVX(PL_linestr) - 1;
5557 #endif
5558             s = PEEKSPACE(s);
5559             attrs = NULL;
5560             while (isIDFIRST_lazy_if(s,UTF)) {
5561                 I32 tmp;
5562                 SV *sv;
5563                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5564                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5565                     if (tmp < 0) tmp = -tmp;
5566                     switch (tmp) {
5567                     case KEY_or:
5568                     case KEY_and:
5569                     case KEY_for:
5570                     case KEY_foreach:
5571                     case KEY_unless:
5572                     case KEY_if:
5573                     case KEY_while:
5574                     case KEY_until:
5575                         goto got_attrs;
5576                     default:
5577                         break;
5578                     }
5579                 }
5580                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5581                 if (*d == '(') {
5582                     d = scan_str(d,TRUE,TRUE,FALSE);
5583                     if (!d) {
5584                         /* MUST advance bufptr here to avoid bogus
5585                            "at end of line" context messages from yyerror().
5586                          */
5587                         PL_bufptr = s + len;
5588                         yyerror("Unterminated attribute parameter in attribute list");
5589                         if (attrs)
5590                             op_free(attrs);
5591                         sv_free(sv);
5592                         return REPORT(0);       /* EOF indicator */
5593                     }
5594                 }
5595                 if (PL_lex_stuff) {
5596                     sv_catsv(sv, PL_lex_stuff);
5597                     attrs = op_append_elem(OP_LIST, attrs,
5598                                         newSVOP(OP_CONST, 0, sv));
5599                     SvREFCNT_dec(PL_lex_stuff);
5600                     PL_lex_stuff = NULL;
5601                 }
5602                 else {
5603                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5604                         sv_free(sv);
5605                         if (PL_in_my == KEY_our) {
5606                             deprecate(":unique");
5607                         }
5608                         else
5609                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5610                     }
5611
5612                     /* NOTE: any CV attrs applied here need to be part of
5613                        the CVf_BUILTIN_ATTRS define in cv.h! */
5614                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5615                         sv_free(sv);
5616                         CvLVALUE_on(PL_compcv);
5617                     }
5618                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5619                         sv_free(sv);
5620                         deprecate(":locked");
5621                     }
5622                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5623                         sv_free(sv);
5624                         CvMETHOD_on(PL_compcv);
5625                     }
5626                     /* After we've set the flags, it could be argued that
5627                        we don't need to do the attributes.pm-based setting
5628                        process, and shouldn't bother appending recognized
5629                        flags.  To experiment with that, uncomment the
5630                        following "else".  (Note that's already been
5631                        uncommented.  That keeps the above-applied built-in
5632                        attributes from being intercepted (and possibly
5633                        rejected) by a package's attribute routines, but is
5634                        justified by the performance win for the common case
5635                        of applying only built-in attributes.) */
5636                     else
5637                         attrs = op_append_elem(OP_LIST, attrs,
5638                                             newSVOP(OP_CONST, 0,
5639                                                     sv));
5640                 }
5641                 s = PEEKSPACE(d);
5642                 if (*s == ':' && s[1] != ':')
5643                     s = PEEKSPACE(s+1);
5644                 else if (s == d)
5645                     break;      /* require real whitespace or :'s */
5646                 /* XXX losing whitespace on sequential attributes here */
5647             }
5648             {
5649                 const char tmp
5650                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5651                 if (*s != ';' && *s != '}' && *s != tmp
5652                     && (tmp != '=' || *s != ')')) {
5653                     const char q = ((*s == '\'') ? '"' : '\'');
5654                     /* If here for an expression, and parsed no attrs, back
5655                        off. */
5656                     if (tmp == '=' && !attrs) {
5657                         s = PL_bufptr;
5658                         break;
5659                     }
5660                     /* MUST advance bufptr here to avoid bogus "at end of line"
5661                        context messages from yyerror().
5662                     */
5663                     PL_bufptr = s;
5664                     yyerror( (const char *)
5665                              (*s
5666                               ? Perl_form(aTHX_ "Invalid separator character "
5667                                           "%c%c%c in attribute list", q, *s, q)
5668                               : "Unterminated attribute list" ) );
5669                     if (attrs)
5670                         op_free(attrs);
5671                     OPERATOR(':');
5672                 }
5673             }
5674         got_attrs:
5675             if (attrs) {
5676                 start_force(PL_curforce);
5677                 NEXTVAL_NEXTTOKE.opval = attrs;
5678                 CURMAD('_', PL_nextwhite);
5679                 force_next(THING);
5680             }
5681 #ifdef PERL_MAD
5682             if (PL_madskills) {
5683                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5684                                      (s - SvPVX(PL_linestr)) - stuffstart);
5685             }
5686 #endif
5687             TOKEN(COLONATTR);
5688         }
5689         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5690             s--;
5691             TOKEN(0);
5692         }
5693         PL_lex_allbrackets--;
5694         OPERATOR(':');
5695     case '(':
5696         s++;
5697         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5698             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5699         else
5700             PL_expect = XTERM;
5701         s = SKIPSPACE1(s);
5702         PL_lex_allbrackets++;
5703         TOKEN('(');
5704     case ';':
5705         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5706             TOKEN(0);
5707         CLINE;
5708         s++;
5709         OPERATOR(';');
5710     case ')':
5711         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5712             TOKEN(0);
5713         s++;
5714         PL_lex_allbrackets--;
5715         s = SKIPSPACE1(s);
5716         if (*s == '{')
5717             PREBLOCK(')');
5718         TERM(')');
5719     case ']':
5720         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5721             TOKEN(0);
5722         s++;
5723         if (PL_lex_brackets <= 0)
5724             yyerror("Unmatched right square bracket");
5725         else
5726             --PL_lex_brackets;
5727         PL_lex_allbrackets--;
5728         if (PL_lex_state == LEX_INTERPNORMAL) {
5729             if (PL_lex_brackets == 0) {
5730                 if (*s == '-' && s[1] == '>')
5731                     PL_lex_state = LEX_INTERPENDMAYBE;
5732                 else if (*s != '[' && *s != '{')
5733                     PL_lex_state = LEX_INTERPEND;
5734             }
5735         }
5736         TERM(']');
5737     case '{':
5738         s++;
5739       leftbracket:
5740         if (PL_lex_brackets > 100) {
5741             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5742         }
5743         switch (PL_expect) {
5744         case XTERM:
5745             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5746             PL_lex_allbrackets++;
5747             OPERATOR(HASHBRACK);
5748         case XOPERATOR:
5749             while (s < PL_bufend && SPACE_OR_TAB(*s))
5750                 s++;
5751             d = s;
5752             PL_tokenbuf[0] = '\0';
5753             if (d < PL_bufend && *d == '-') {
5754                 PL_tokenbuf[0] = '-';
5755                 d++;
5756                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5757                     d++;
5758             }
5759             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5760                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5761                               FALSE, &len);
5762                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5763                     d++;
5764                 if (*d == '}') {
5765                     const char minus = (PL_tokenbuf[0] == '-');
5766                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5767                     if (minus)
5768                         force_next('-');
5769                 }
5770             }
5771             /* FALL THROUGH */
5772         case XATTRBLOCK:
5773         case XBLOCK:
5774             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5775             PL_lex_allbrackets++;
5776             PL_expect = XSTATE;
5777             break;
5778         case XATTRTERM:
5779         case XTERMBLOCK:
5780             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5781             PL_lex_allbrackets++;
5782             PL_expect = XSTATE;
5783             break;
5784         default: {
5785                 const char *t;
5786                 if (PL_oldoldbufptr == PL_last_lop)
5787                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5788                 else
5789                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5790                 PL_lex_allbrackets++;
5791                 s = SKIPSPACE1(s);
5792                 if (*s == '}') {
5793                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5794                         PL_expect = XTERM;
5795                         /* This hack is to get the ${} in the message. */
5796                         PL_bufptr = s+1;
5797                         yyerror("syntax error");
5798                         break;
5799                     }
5800                     OPERATOR(HASHBRACK);
5801                 }
5802                 /* This hack serves to disambiguate a pair of curlies
5803                  * as being a block or an anon hash.  Normally, expectation
5804                  * determines that, but in cases where we're not in a
5805                  * position to expect anything in particular (like inside
5806                  * eval"") we have to resolve the ambiguity.  This code
5807                  * covers the case where the first term in the curlies is a
5808                  * quoted string.  Most other cases need to be explicitly
5809                  * disambiguated by prepending a "+" before the opening
5810                  * curly in order to force resolution as an anon hash.
5811                  *
5812                  * XXX should probably propagate the outer expectation
5813                  * into eval"" to rely less on this hack, but that could
5814                  * potentially break current behavior of eval"".
5815                  * GSAR 97-07-21
5816                  */
5817                 t = s;
5818                 if (*s == '\'' || *s == '"' || *s == '`') {
5819                     /* common case: get past first string, handling escapes */
5820                     for (t++; t < PL_bufend && *t != *s;)
5821                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5822                             t++;
5823                     t++;
5824                 }
5825                 else if (*s == 'q') {
5826                     if (++t < PL_bufend
5827                         && (!isALNUM(*t)
5828                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5829                                 && !isALNUM(*t))))
5830                     {
5831                         /* skip q//-like construct */
5832                         const char *tmps;
5833                         char open, close, term;
5834                         I32 brackets = 1;
5835
5836                         while (t < PL_bufend && isSPACE(*t))
5837                             t++;
5838                         /* check for q => */
5839                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5840                             OPERATOR(HASHBRACK);
5841                         }
5842                         term = *t;
5843                         open = term;
5844                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5845                             term = tmps[5];
5846                         close = term;
5847                         if (open == close)
5848                             for (t++; t < PL_bufend; t++) {
5849                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5850                                     t++;
5851                                 else if (*t == open)
5852                                     break;
5853                             }
5854                         else {
5855                             for (t++; t < PL_bufend; t++) {
5856                                 if (*t == '\\' && t+1 < PL_bufend)
5857                                     t++;
5858                                 else if (*t == close && --brackets <= 0)
5859                                     break;
5860                                 else if (*t == open)
5861                                     brackets++;
5862                             }
5863                         }
5864                         t++;
5865                     }
5866                     else
5867                         /* skip plain q word */
5868                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5869                              t += UTF8SKIP(t);
5870                 }
5871                 else if (isALNUM_lazy_if(t,UTF)) {
5872                     t += UTF8SKIP(t);
5873                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5874                          t += UTF8SKIP(t);
5875                 }
5876                 while (t < PL_bufend && isSPACE(*t))
5877                     t++;
5878                 /* if comma follows first term, call it an anon hash */
5879                 /* XXX it could be a comma expression with loop modifiers */
5880                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5881                                    || (*t == '=' && t[1] == '>')))
5882                     OPERATOR(HASHBRACK);
5883                 if (PL_expect == XREF)
5884                     PL_expect = XTERM;
5885                 else {
5886                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5887                     PL_expect = XSTATE;
5888                 }
5889             }
5890             break;
5891         }
5892         pl_yylval.ival = CopLINE(PL_curcop);
5893         if (isSPACE(*s) || *s == '#')
5894             PL_copline = NOLINE;   /* invalidate current command line number */
5895         TOKEN(formbrack ? '=' : '{');
5896     case '}':
5897         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5898             TOKEN(0);
5899       rightbracket:
5900         s++;
5901         if (PL_lex_brackets <= 0)
5902             yyerror("Unmatched right curly bracket");
5903         else
5904             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5905         PL_lex_allbrackets--;
5906         if (PL_lex_state == LEX_INTERPNORMAL) {
5907             if (PL_lex_brackets == 0) {
5908                 if (PL_expect & XFAKEBRACK) {
5909                     PL_expect &= XENUMMASK;
5910                     PL_lex_state = LEX_INTERPEND;
5911                     PL_bufptr = s;
5912 #if 0
5913                     if (PL_madskills) {
5914                         if (!PL_thiswhite)
5915                             PL_thiswhite = newSVpvs("");
5916                         sv_catpvs(PL_thiswhite,"}");
5917                     }
5918 #endif
5919                     return yylex();     /* ignore fake brackets */
5920                 }
5921                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5922                  && SvEVALED(PL_lex_repl))
5923                     PL_lex_state = LEX_INTERPEND;
5924                 else if (*s == '-' && s[1] == '>')
5925                     PL_lex_state = LEX_INTERPENDMAYBE;
5926                 else if (*s != '[' && *s != '{')
5927                     PL_lex_state = LEX_INTERPEND;
5928             }
5929         }
5930         if (PL_expect & XFAKEBRACK) {
5931             PL_expect &= XENUMMASK;
5932             PL_bufptr = s;
5933             return yylex();             /* ignore fake brackets */
5934         }
5935         start_force(PL_curforce);
5936         if (PL_madskills) {
5937             curmad('X', newSVpvn(s-1,1));
5938             CURMAD('_', PL_thiswhite);
5939         }
5940         force_next(formbrack ? '.' : '}');
5941         if (formbrack) LEAVE;
5942 #ifdef PERL_MAD
5943         if (!PL_thistoken)
5944             PL_thistoken = newSVpvs("");
5945 #endif
5946         if (formbrack == 2) { /* means . where arguments were expected */
5947             start_force(PL_curforce);
5948             force_next(';');
5949             TOKEN(FORMRBRACK);
5950         }
5951         TOKEN(';');
5952     case '&':
5953         s++;
5954         if (*s++ == '&') {
5955             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5956                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5957                 s -= 2;
5958                 TOKEN(0);
5959             }
5960             AOPERATOR(ANDAND);
5961         }
5962         s--;
5963         if (PL_expect == XOPERATOR) {
5964             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5965                 && isIDFIRST_lazy_if(s,UTF))
5966             {
5967                 CopLINE_dec(PL_curcop);
5968                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5969                 CopLINE_inc(PL_curcop);
5970             }
5971             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5972                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5973                 s--;
5974                 TOKEN(0);
5975             }
5976             BAop(OP_BIT_AND);
5977         }
5978
5979         PL_tokenbuf[0] = '&';
5980         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
5981                        sizeof PL_tokenbuf - 1, TRUE);
5982         if (PL_tokenbuf[1]) {
5983             PL_expect = XOPERATOR;
5984             force_ident_maybe_lex('&');
5985         }
5986         else
5987             PREREF('&');
5988         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5989         TERM('&');
5990
5991     case '|':
5992         s++;
5993         if (*s++ == '|') {
5994             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5995                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5996                 s -= 2;
5997                 TOKEN(0);
5998             }
5999             AOPERATOR(OROR);
6000         }
6001         s--;
6002         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6003                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6004             s--;
6005             TOKEN(0);
6006         }
6007         BOop(OP_BIT_OR);
6008     case '=':
6009         s++;
6010         {
6011             const char tmp = *s++;
6012             if (tmp == '=') {
6013                 if (!PL_lex_allbrackets &&
6014                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6015                     s -= 2;
6016                     TOKEN(0);
6017                 }
6018                 Eop(OP_EQ);
6019             }
6020             if (tmp == '>') {
6021                 if (!PL_lex_allbrackets &&
6022                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6023                     s -= 2;
6024                     TOKEN(0);
6025                 }
6026                 OPERATOR(',');
6027             }
6028             if (tmp == '~')
6029                 PMop(OP_MATCH);
6030             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6031                 && strchr("+-*/%.^&|<",tmp))
6032                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6033                             "Reversed %c= operator",(int)tmp);
6034             s--;
6035             if (PL_expect == XSTATE && isALPHA(tmp) &&
6036                 (s == PL_linestart+1 || s[-2] == '\n') )
6037                 {
6038                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6039                         || PL_lex_state != LEX_NORMAL) {
6040                         d = PL_bufend;
6041                         while (s < d) {
6042                             if (*s++ == '\n') {
6043                                 incline(s);
6044                                 if (strnEQ(s,"=cut",4)) {
6045                                     s = strchr(s,'\n');
6046                                     if (s)
6047                                         s++;
6048                                     else
6049                                         s = d;
6050                                     incline(s);
6051                                     goto retry;
6052                                 }
6053                             }
6054                         }
6055                         goto retry;
6056                     }
6057 #ifdef PERL_MAD
6058                     if (PL_madskills) {
6059                         if (!PL_thiswhite)
6060                             PL_thiswhite = newSVpvs("");
6061                         sv_catpvn(PL_thiswhite, PL_linestart,
6062                                   PL_bufend - PL_linestart);
6063                     }
6064 #endif
6065                     s = PL_bufend;
6066                     PL_parser->in_pod = 1;
6067                     goto retry;
6068                 }
6069         }
6070         if (PL_expect == XBLOCK) {
6071             const char *t = s;
6072 #ifdef PERL_STRICT_CR
6073             while (SPACE_OR_TAB(*t))
6074 #else
6075             while (SPACE_OR_TAB(*t) || *t == '\r')
6076 #endif
6077                 t++;
6078             if (*t == '\n' || *t == '#') {
6079                 formbrack = 1;
6080                 ENTER;
6081                 SAVEI8(PL_parser->form_lex_state);
6082                 SAVEI32(PL_lex_formbrack);
6083                 PL_parser->form_lex_state = PL_lex_state;
6084                 PL_lex_formbrack = PL_lex_brackets + 1;
6085                 goto leftbracket;
6086             }
6087         }
6088         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6089             s--;
6090             TOKEN(0);
6091         }
6092         pl_yylval.ival = 0;
6093         OPERATOR(ASSIGNOP);
6094     case '!':
6095         s++;
6096         {
6097             const char tmp = *s++;
6098             if (tmp == '=') {
6099                 /* was this !=~ where !~ was meant?
6100                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6101
6102                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6103                     const char *t = s+1;
6104
6105                     while (t < PL_bufend && isSPACE(*t))
6106                         ++t;
6107
6108                     if (*t == '/' || *t == '?' ||
6109                         ((*t == 'm' || *t == 's' || *t == 'y')
6110                          && !isALNUM(t[1])) ||
6111                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6112                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6113                                     "!=~ should be !~");
6114                 }
6115                 if (!PL_lex_allbrackets &&
6116                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6117                     s -= 2;
6118                     TOKEN(0);
6119                 }
6120                 Eop(OP_NE);
6121             }
6122             if (tmp == '~')
6123                 PMop(OP_NOT);
6124         }
6125         s--;
6126         OPERATOR('!');
6127     case '<':
6128         if (PL_expect != XOPERATOR) {
6129             if (s[1] != '<' && !strchr(s,'>'))
6130                 check_uni();
6131             if (s[1] == '<')
6132                 s = scan_heredoc(s);
6133             else
6134                 s = scan_inputsymbol(s);
6135             PL_expect = XOPERATOR;
6136             TOKEN(sublex_start());
6137         }
6138         s++;
6139         {
6140             char tmp = *s++;
6141             if (tmp == '<') {
6142                 if (*s == '=' && !PL_lex_allbrackets &&
6143                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6144                     s -= 2;
6145                     TOKEN(0);
6146                 }
6147                 SHop(OP_LEFT_SHIFT);
6148             }
6149             if (tmp == '=') {
6150                 tmp = *s++;
6151                 if (tmp == '>') {
6152                     if (!PL_lex_allbrackets &&
6153                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6154                         s -= 3;
6155                         TOKEN(0);
6156                     }
6157                     Eop(OP_NCMP);
6158                 }
6159                 s--;
6160                 if (!PL_lex_allbrackets &&
6161                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6162                     s -= 2;
6163                     TOKEN(0);
6164                 }
6165                 Rop(OP_LE);
6166             }
6167         }
6168         s--;
6169         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6170             s--;
6171             TOKEN(0);
6172         }
6173         Rop(OP_LT);
6174     case '>':
6175         s++;
6176         {
6177             const char tmp = *s++;
6178             if (tmp == '>') {
6179                 if (*s == '=' && !PL_lex_allbrackets &&
6180                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6181                     s -= 2;
6182                     TOKEN(0);
6183                 }
6184                 SHop(OP_RIGHT_SHIFT);
6185             }
6186             else if (tmp == '=') {
6187                 if (!PL_lex_allbrackets &&
6188                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6189                     s -= 2;
6190                     TOKEN(0);
6191                 }
6192                 Rop(OP_GE);
6193             }
6194         }
6195         s--;
6196         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6197             s--;
6198             TOKEN(0);
6199         }
6200         Rop(OP_GT);
6201
6202     case '$':
6203         CLINE;
6204
6205         if (PL_expect == XOPERATOR) {
6206             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6207                 return deprecate_commaless_var_list();
6208             }
6209         }
6210
6211         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6212             PL_tokenbuf[0] = '@';
6213             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6214                            sizeof PL_tokenbuf - 1, FALSE);
6215             if (PL_expect == XOPERATOR)
6216                 no_op("Array length", s);
6217             if (!PL_tokenbuf[1])
6218                 PREREF(DOLSHARP);
6219             PL_expect = XOPERATOR;
6220             force_ident_maybe_lex('#');
6221             TOKEN(DOLSHARP);
6222         }
6223
6224         PL_tokenbuf[0] = '$';
6225         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6226                        sizeof PL_tokenbuf - 1, FALSE);
6227         if (PL_expect == XOPERATOR)
6228             no_op("Scalar", s);
6229         if (!PL_tokenbuf[1]) {
6230             if (s == PL_bufend)
6231                 yyerror("Final $ should be \\$ or $name");
6232             PREREF('$');
6233         }
6234
6235         d = s;
6236         {
6237             const char tmp = *s;
6238             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6239                 s = SKIPSPACE1(s);
6240
6241             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6242                 && intuit_more(s)) {
6243                 if (*s == '[') {
6244                     PL_tokenbuf[0] = '@';
6245                     if (ckWARN(WARN_SYNTAX)) {
6246                         char *t = s+1;
6247
6248                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6249                             t++;
6250                         if (*t++ == ',') {
6251                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6252                             while (t < PL_bufend && *t != ']')
6253                                 t++;
6254                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6255                                         "Multidimensional syntax %.*s not supported",
6256                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6257                         }
6258                     }
6259                 }
6260                 else if (*s == '{') {
6261                     char *t;
6262                     PL_tokenbuf[0] = '%';
6263                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6264                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6265                         {
6266                             char tmpbuf[sizeof PL_tokenbuf];
6267                             do {
6268                                 t++;
6269                             } while (isSPACE(*t));
6270                             if (isIDFIRST_lazy_if(t,UTF)) {
6271                                 STRLEN len;
6272                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6273                                               &len);
6274                                 while (isSPACE(*t))
6275                                     t++;
6276                                 if (*t == ';'
6277                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6278                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6279                                                 "You need to quote \"%"SVf"\"",
6280                                                   SVfARG(newSVpvn_flags(tmpbuf, len,
6281                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6282                             }
6283                         }
6284                 }
6285             }
6286
6287             PL_expect = XOPERATOR;
6288             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6289                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6290                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6291                     PL_expect = XOPERATOR;
6292                 else if (strchr("$@\"'`q", *s))
6293                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6294                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6295                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6296                 else if (isIDFIRST_lazy_if(s,UTF)) {
6297                     char tmpbuf[sizeof PL_tokenbuf];
6298                     int t2;
6299                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6300                     if ((t2 = keyword(tmpbuf, len, 0))) {
6301                         /* binary operators exclude handle interpretations */
6302                         switch (t2) {
6303                         case -KEY_x:
6304                         case -KEY_eq:
6305                         case -KEY_ne:
6306                         case -KEY_gt:
6307                         case -KEY_lt:
6308                         case -KEY_ge:
6309                         case -KEY_le:
6310                         case -KEY_cmp:
6311                             break;
6312                         default:
6313                             PL_expect = XTERM;  /* e.g. print $fh length() */
6314                             break;
6315                         }
6316                     }
6317                     else {
6318                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6319                     }
6320                 }
6321                 else if (isDIGIT(*s))
6322                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6323                 else if (*s == '.' && isDIGIT(s[1]))
6324                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6325                 else if ((*s == '?' || *s == '-' || *s == '+')
6326                          && !isSPACE(s[1]) && s[1] != '=')
6327                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6328                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6329                          && s[1] != '/')
6330                     PL_expect = XTERM;          /* e.g. print $fh /.../
6331                                                    XXX except DORDOR operator
6332                                                 */
6333                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6334                          && s[2] != '=')
6335                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6336             }
6337         }
6338         force_ident_maybe_lex('$');
6339         TOKEN('$');
6340
6341     case '@':
6342         if (PL_expect == XOPERATOR)
6343             no_op("Array", s);
6344         PL_tokenbuf[0] = '@';
6345         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6346         if (!PL_tokenbuf[1]) {
6347             PREREF('@');
6348         }
6349         if (PL_lex_state == LEX_NORMAL)
6350             s = SKIPSPACE1(s);
6351         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6352             if (*s == '{')
6353                 PL_tokenbuf[0] = '%';
6354
6355             /* Warn about @ where they meant $. */
6356             if (*s == '[' || *s == '{') {
6357                 if (ckWARN(WARN_SYNTAX)) {
6358                     const char *t = s + 1;
6359                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6360                         t += UTF ? UTF8SKIP(t) : 1;
6361                     if (*t == '}' || *t == ']') {
6362                         t++;
6363                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6364        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6365                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6366                             "Scalar value %"SVf" better written as $%"SVf,
6367                             SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6368                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6369                             SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6370                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6371                     }
6372                 }
6373             }
6374         }
6375         PL_expect = XOPERATOR;
6376         force_ident_maybe_lex('@');
6377         TERM('@');
6378
6379      case '/':                  /* may be division, defined-or, or pattern */
6380         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6381             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6382                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6383                 TOKEN(0);
6384             s += 2;
6385             AOPERATOR(DORDOR);
6386         }
6387      case '?':                  /* may either be conditional or pattern */
6388         if (PL_expect == XOPERATOR) {
6389              char tmp = *s++;
6390              if(tmp == '?') {
6391                 if (!PL_lex_allbrackets &&
6392                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6393                     s--;
6394                     TOKEN(0);
6395                 }
6396                 PL_lex_allbrackets++;
6397                 OPERATOR('?');
6398              }
6399              else {
6400                  tmp = *s++;
6401                  if(tmp == '/') {
6402                      /* A // operator. */
6403                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6404                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6405                                             LEX_FAKEEOF_LOGIC)) {
6406                         s -= 2;
6407                         TOKEN(0);
6408                     }
6409                     AOPERATOR(DORDOR);
6410                  }
6411                  else {
6412                      s--;
6413                      if (*s == '=' && !PL_lex_allbrackets &&
6414                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6415                          s--;
6416                          TOKEN(0);
6417                      }
6418                      Mop(OP_DIVIDE);
6419                  }
6420              }
6421          }
6422          else {
6423              /* Disable warning on "study /blah/" */
6424              if (PL_oldoldbufptr == PL_last_uni
6425               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6426                   || memNE(PL_last_uni, "study", 5)
6427                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6428               ))
6429                  check_uni();
6430              if (*s == '?')
6431                  deprecate("?PATTERN? without explicit operator");
6432              s = scan_pat(s,OP_MATCH);
6433              TERM(sublex_start());
6434          }
6435
6436     case '.':
6437         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6438 #ifdef PERL_STRICT_CR
6439             && s[1] == '\n'
6440 #else
6441             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6442 #endif
6443             && (s == PL_linestart || s[-1] == '\n') )
6444         {
6445             PL_expect = XSTATE;
6446             formbrack = 2; /* dot seen where arguments expected */
6447             goto rightbracket;
6448         }
6449         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6450             s += 3;
6451             OPERATOR(YADAYADA);
6452         }
6453         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6454             char tmp = *s++;
6455             if (*s == tmp) {
6456                 if (!PL_lex_allbrackets &&
6457                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6458                     s--;
6459                     TOKEN(0);
6460                 }
6461                 s++;
6462                 if (*s == tmp) {
6463                     s++;
6464                     pl_yylval.ival = OPf_SPECIAL;
6465                 }
6466                 else
6467                     pl_yylval.ival = 0;
6468                 OPERATOR(DOTDOT);
6469             }
6470             if (*s == '=' && !PL_lex_allbrackets &&
6471                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6472                 s--;
6473                 TOKEN(0);
6474             }
6475             Aop(OP_CONCAT);
6476         }
6477         /* FALL THROUGH */
6478     case '0': case '1': case '2': case '3': case '4':
6479     case '5': case '6': case '7': case '8': case '9':
6480         s = scan_num(s, &pl_yylval);
6481         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6482         if (PL_expect == XOPERATOR)
6483             no_op("Number",s);
6484         TERM(THING);
6485
6486     case '\'':
6487         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6488         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6489         if (PL_expect == XOPERATOR) {
6490             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6491                 return deprecate_commaless_var_list();
6492             }
6493             else
6494                 no_op("String",s);
6495         }
6496         if (!s)
6497             missingterm(NULL);
6498         pl_yylval.ival = OP_CONST;
6499         TERM(sublex_start());
6500
6501     case '"':
6502         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6503         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6504         if (PL_expect == XOPERATOR) {
6505             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6506                 return deprecate_commaless_var_list();
6507             }
6508             else
6509                 no_op("String",s);
6510         }
6511         if (!s)
6512             missingterm(NULL);
6513         pl_yylval.ival = OP_CONST;
6514         /* FIXME. I think that this can be const if char *d is replaced by
6515            more localised variables.  */
6516         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6517             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6518                 pl_yylval.ival = OP_STRINGIFY;
6519                 break;
6520             }
6521         }
6522         TERM(sublex_start());
6523
6524     case '`':
6525         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6526         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6527         if (PL_expect == XOPERATOR)
6528             no_op("Backticks",s);
6529         if (!s)
6530             missingterm(NULL);
6531         readpipe_override();
6532         TERM(sublex_start());
6533
6534     case '\\':
6535         s++;
6536         if (PL_lex_inwhat && isDIGIT(*s))
6537             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6538                            *s, *s);
6539         if (PL_expect == XOPERATOR)
6540             no_op("Backslash",s);
6541         OPERATOR(REFGEN);
6542
6543     case 'v':
6544         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6545             char *start = s + 2;
6546             while (isDIGIT(*start) || *start == '_')
6547                 start++;
6548             if (*start == '.' && isDIGIT(start[1])) {
6549                 s = scan_num(s, &pl_yylval);
6550                 TERM(THING);
6551             }
6552             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6553             else if (!isALPHA(*start) && (PL_expect == XTERM
6554                         || PL_expect == XREF || PL_expect == XSTATE
6555                         || PL_expect == XTERMORDORDOR)) {
6556                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6557                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6558                 if (!gv) {
6559                     s = scan_num(s, &pl_yylval);
6560                     TERM(THING);
6561                 }
6562             }
6563         }
6564         goto keylookup;
6565     case 'x':
6566         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6567             s++;
6568             Mop(OP_REPEAT);
6569         }
6570         goto keylookup;
6571
6572     case '_':
6573     case 'a': case 'A':
6574     case 'b': case 'B':
6575     case 'c': case 'C':
6576     case 'd': case 'D':
6577     case 'e': case 'E':
6578     case 'f': case 'F':
6579     case 'g': case 'G':
6580     case 'h': case 'H':
6581     case 'i': case 'I':
6582     case 'j': case 'J':
6583     case 'k': case 'K':
6584     case 'l': case 'L':
6585     case 'm': case 'M':
6586     case 'n': case 'N':
6587     case 'o': case 'O':
6588     case 'p': case 'P':
6589     case 'q': case 'Q':
6590     case 'r': case 'R':
6591     case 's': case 'S':
6592     case 't': case 'T':
6593     case 'u': case 'U':
6594               case 'V':
6595     case 'w': case 'W':
6596               case 'X':
6597     case 'y': case 'Y':
6598     case 'z': case 'Z':
6599
6600       keylookup: {
6601         bool anydelim;
6602         I32 tmp;
6603
6604         orig_keyword = 0;
6605         gv = NULL;
6606         gvp = NULL;
6607
6608         PL_bufptr = s;
6609         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6610
6611         /* Some keywords can be followed by any delimiter, including ':' */
6612         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6613
6614         /* x::* is just a word, unless x is "CORE" */
6615         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6616             goto just_a_word;
6617
6618         d = s;
6619         while (d < PL_bufend && isSPACE(*d))
6620                 d++;    /* no comments skipped here, or s### is misparsed */
6621
6622         /* Is this a word before a => operator? */
6623         if (*d == '=' && d[1] == '>') {
6624             CLINE;
6625             pl_yylval.opval
6626                 = (OP*)newSVOP(OP_CONST, 0,
6627                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6628             pl_yylval.opval->op_private = OPpCONST_BARE;
6629             TERM(WORD);
6630         }
6631
6632         /* Check for plugged-in keyword */
6633         {
6634             OP *o;
6635             int result;
6636             char *saved_bufptr = PL_bufptr;
6637             PL_bufptr = s;
6638             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6639             s = PL_bufptr;
6640             if (result == KEYWORD_PLUGIN_DECLINE) {
6641                 /* not a plugged-in keyword */
6642                 PL_bufptr = saved_bufptr;
6643             } else if (result == KEYWORD_PLUGIN_STMT) {
6644                 pl_yylval.opval = o;
6645                 CLINE;
6646                 PL_expect = XSTATE;
6647                 return REPORT(PLUGSTMT);
6648             } else if (result == KEYWORD_PLUGIN_EXPR) {
6649                 pl_yylval.opval = o;
6650                 CLINE;
6651                 PL_expect = XOPERATOR;
6652                 return REPORT(PLUGEXPR);
6653             } else {
6654                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6655                                         PL_tokenbuf);
6656             }
6657         }
6658
6659         /* Check for built-in keyword */
6660         tmp = keyword(PL_tokenbuf, len, 0);
6661
6662         /* Is this a label? */
6663         if (!anydelim && PL_expect == XSTATE
6664               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6665             s = d + 1;
6666             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6667                                             newSVpvn_flags(PL_tokenbuf,
6668                                                         len, UTF ? SVf_UTF8 : 0));
6669             CLINE;
6670             TOKEN(LABEL);
6671         }
6672
6673         if (tmp < 0) {                  /* second-class keyword? */
6674             GV *ogv = NULL;     /* override (winner) */
6675             GV *hgv = NULL;     /* hidden (loser) */
6676             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6677                 CV *cv;
6678                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6679                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6680                     (cv = GvCVu(gv)))
6681                 {
6682                     if (GvIMPORTED_CV(gv))
6683                         ogv = gv;
6684                     else if (! CvMETHOD(cv))
6685                         hgv = gv;
6686                 }
6687                 if (!ogv &&
6688                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6689                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6690                     (gv = *gvp) && isGV_with_GP(gv) &&
6691                     GvCVu(gv) && GvIMPORTED_CV(gv))
6692                 {
6693                     ogv = gv;
6694                 }
6695             }
6696             if (ogv) {
6697                 orig_keyword = tmp;
6698                 tmp = 0;                /* overridden by import or by GLOBAL */
6699             }
6700             else if (gv && !gvp
6701                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6702                      && GvCVu(gv))
6703             {
6704                 tmp = 0;                /* any sub overrides "weak" keyword */
6705             }
6706             else {                      /* no override */
6707                 tmp = -tmp;
6708                 if (tmp == KEY_dump) {
6709                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6710                                    "dump() better written as CORE::dump()");
6711                 }
6712                 gv = NULL;
6713                 gvp = 0;
6714                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6715                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6716                                    "Ambiguous call resolved as CORE::%s(), "
6717                                    "qualify as such or use &",
6718                                    GvENAME(hgv));
6719             }
6720         }
6721
6722       reserved_word:
6723         switch (tmp) {
6724
6725         default:                        /* not a keyword */
6726             /* Trade off - by using this evil construction we can pull the
6727                variable gv into the block labelled keylookup. If not, then
6728                we have to give it function scope so that the goto from the
6729                earlier ':' case doesn't bypass the initialisation.  */
6730             if (0) {
6731             just_a_word_zero_gv:
6732                 gv = NULL;
6733                 gvp = NULL;
6734                 orig_keyword = 0;
6735             }
6736           just_a_word: {
6737                 SV *sv;
6738                 int pkgname = 0;
6739                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6740                 OP *rv2cv_op;
6741                 CV *cv;
6742 #ifdef PERL_MAD
6743                 SV *nextPL_nextwhite = 0;
6744 #endif
6745
6746
6747                 /* Get the rest if it looks like a package qualifier */
6748
6749                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6750                     STRLEN morelen;
6751                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6752                                   TRUE, &morelen);
6753                     if (!morelen)
6754                         Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6755                                         SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6756                                             (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6757                                 *s == '\'' ? "'" : "::");
6758                     len += morelen;
6759                     pkgname = 1;
6760                 }
6761
6762                 if (PL_expect == XOPERATOR) {
6763                     if (PL_bufptr == PL_linestart) {
6764                         CopLINE_dec(PL_curcop);
6765                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6766                         CopLINE_inc(PL_curcop);
6767                     }
6768                     else
6769                         no_op("Bareword",s);
6770                 }
6771
6772                 /* Look for a subroutine with this name in current package,
6773                    unless name is "Foo::", in which case Foo is a bareword
6774                    (and a package name). */
6775
6776                 if (len > 2 && !PL_madskills &&
6777                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6778                 {
6779                     if (ckWARN(WARN_BAREWORD)
6780                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6781                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6782                             "Bareword \"%"SVf"\" refers to nonexistent package",
6783                              SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6784                                         (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6785                     len -= 2;
6786                     PL_tokenbuf[len] = '\0';
6787                     gv = NULL;
6788                     gvp = 0;
6789                 }
6790                 else {
6791                     if (!gv) {
6792                         /* Mustn't actually add anything to a symbol table.
6793                            But also don't want to "initialise" any placeholder
6794                            constants that might already be there into full
6795                            blown PVGVs with attached PVCV.  */
6796                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6797                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6798                                                SVt_PVCV);
6799                     }
6800                     len = 0;
6801                 }
6802
6803                 /* if we saw a global override before, get the right name */
6804
6805                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6806                     len ? len : strlen(PL_tokenbuf));
6807                 if (gvp) {
6808                     SV * const tmp_sv = sv;
6809                     sv = newSVpvs("CORE::GLOBAL::");
6810                     sv_catsv(sv, tmp_sv);
6811                     SvREFCNT_dec(tmp_sv);
6812                 }
6813
6814 #ifdef PERL_MAD
6815                 if (PL_madskills && !PL_thistoken) {
6816                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6817                     PL_thistoken = newSVpvn(start,s - start);
6818                     PL_realtokenstart = s - SvPVX(PL_linestr);
6819                 }
6820 #endif
6821
6822                 /* Presume this is going to be a bareword of some sort. */
6823                 CLINE;
6824                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6825                 pl_yylval.opval->op_private = OPpCONST_BARE;
6826
6827                 /* And if "Foo::", then that's what it certainly is. */
6828                 if (len)
6829                     goto safe_bareword;
6830
6831                 {
6832                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6833                     const_op->op_private = OPpCONST_BARE;
6834                     rv2cv_op = newCVREF(0, const_op);
6835                 }
6836                 cv = rv2cv_op_cv(rv2cv_op, 0);
6837
6838                 /* See if it's the indirect object for a list operator. */
6839
6840                 if (PL_oldoldbufptr &&
6841                     PL_oldoldbufptr < PL_bufptr &&
6842                     (PL_oldoldbufptr == PL_last_lop
6843                      || PL_oldoldbufptr == PL_last_uni) &&
6844                     /* NO SKIPSPACE BEFORE HERE! */
6845                     (PL_expect == XREF ||
6846                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6847                 {
6848                     bool immediate_paren = *s == '(';
6849
6850                     /* (Now we can afford to cross potential line boundary.) */
6851                     s = SKIPSPACE2(s,nextPL_nextwhite);
6852 #ifdef PERL_MAD
6853                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6854 #endif
6855
6856                     /* Two barewords in a row may indicate method call. */
6857
6858                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6859                         (tmp = intuit_method(s, gv, cv))) {
6860                         op_free(rv2cv_op);
6861                         if (tmp == METHOD && !PL_lex_allbrackets &&
6862                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6863                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6864                         return REPORT(tmp);
6865                     }
6866
6867                     /* If not a declared subroutine, it's an indirect object. */
6868                     /* (But it's an indir obj regardless for sort.) */
6869                     /* Also, if "_" follows a filetest operator, it's a bareword */
6870
6871                     if (
6872                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6873                          (!cv &&
6874                         (PL_last_lop_op != OP_MAPSTART &&
6875                          PL_last_lop_op != OP_GREPSTART))))
6876                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6877                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6878                        )
6879                     {
6880                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6881                         goto bareword;
6882                     }
6883                 }
6884
6885                 PL_expect = XOPERATOR;
6886 #ifdef PERL_MAD
6887                 if (isSPACE(*s))
6888                     s = SKIPSPACE2(s,nextPL_nextwhite);
6889                 PL_nextwhite = nextPL_nextwhite;
6890 #else
6891                 s = skipspace(s);
6892 #endif
6893
6894                 /* Is this a word before a => operator? */
6895                 if (*s == '=' && s[1] == '>' && !pkgname) {
6896                     op_free(rv2cv_op);
6897                     CLINE;
6898                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6899                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6900                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6901                     TERM(WORD);
6902                 }
6903
6904                 /* If followed by a paren, it's certainly a subroutine. */
6905                 if (*s == '(') {
6906                     CLINE;
6907                     if (cv) {
6908                         d = s + 1;
6909                         while (SPACE_OR_TAB(*d))
6910                             d++;
6911                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6912                             s = d + 1;
6913                             goto its_constant;
6914                         }
6915                     }
6916 #ifdef PERL_MAD
6917                     if (PL_madskills) {
6918                         PL_nextwhite = PL_thiswhite;
6919                         PL_thiswhite = 0;
6920                     }
6921                     start_force(PL_curforce);
6922 #endif
6923                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6924                     PL_expect = XOPERATOR;
6925 #ifdef PERL_MAD
6926                     if (PL_madskills) {
6927                         PL_nextwhite = nextPL_nextwhite;
6928                         curmad('X', PL_thistoken);
6929                         PL_thistoken = newSVpvs("");
6930                     }
6931 #endif
6932                     op_free(rv2cv_op);
6933                     force_next(WORD);
6934                     pl_yylval.ival = 0;
6935                     TOKEN('&');
6936                 }
6937
6938                 /* If followed by var or block, call it a method (unless sub) */
6939
6940                 if ((*s == '$' || *s == '{') && !cv) {
6941                     op_free(rv2cv_op);
6942                     PL_last_lop = PL_oldbufptr;
6943                     PL_last_lop_op = OP_METHOD;
6944                     if (!PL_lex_allbrackets &&
6945                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6946                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6947                     PREBLOCK(METHOD);
6948                 }
6949
6950                 /* If followed by a bareword, see if it looks like indir obj. */
6951
6952                 if (!orig_keyword
6953                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6954                         && (tmp = intuit_method(s, gv, cv))) {
6955                     op_free(rv2cv_op);
6956                     if (tmp == METHOD && !PL_lex_allbrackets &&
6957                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6958                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6959                     return REPORT(tmp);
6960                 }
6961
6962                 /* Not a method, so call it a subroutine (if defined) */
6963
6964                 if (cv) {
6965                     if (lastchar == '-') {
6966                         const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
6967                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6968                                 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
6969                                 SVfARG(tmpsv), SVfARG(tmpsv));
6970                     }
6971                     /* Check for a constant sub */
6972                     if ((sv = cv_const_sv(cv))) {
6973                   its_constant:
6974                         op_free(rv2cv_op);
6975                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6976                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6977                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
6978                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6979                         TOKEN(WORD);
6980                     }
6981
6982                     op_free(pl_yylval.opval);
6983                     pl_yylval.opval = rv2cv_op;
6984                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6985                     PL_last_lop = PL_oldbufptr;
6986                     PL_last_lop_op = OP_ENTERSUB;
6987                     /* Is there a prototype? */
6988                     if (
6989 #ifdef PERL_MAD
6990                         cv &&
6991 #endif
6992                         SvPOK(cv))
6993                     {
6994                         STRLEN protolen = CvPROTOLEN(cv);
6995                         const char *proto = CvPROTO(cv);
6996                         bool optional;
6997                         if (!protolen)
6998                             TERM(FUNC0SUB);
6999                         if ((optional = *proto == ';'))
7000                           do
7001                             proto++;
7002                           while (*proto == ';');
7003                         if (
7004                             (
7005                                 (
7006                                     *proto == '$' || *proto == '_'
7007                                  || *proto == '*' || *proto == '+'
7008                                 )
7009                              && proto[1] == '\0'
7010                             )
7011                          || (
7012                              *proto == '\\' && proto[1] && proto[2] == '\0'
7013                             )
7014                         )
7015                             UNIPROTO(UNIOPSUB,optional);
7016                         if (*proto == '\\' && proto[1] == '[') {
7017                             const char *p = proto + 2;
7018                             while(*p && *p != ']')
7019                                 ++p;
7020                             if(*p == ']' && !p[1])
7021                                 UNIPROTO(UNIOPSUB,optional);
7022                         }
7023                         if (*proto == '&' && *s == '{') {
7024                             if (PL_curstash)
7025                                 sv_setpvs(PL_subname, "__ANON__");
7026                             else
7027                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7028                             if (!PL_lex_allbrackets &&
7029                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7030                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7031                             PREBLOCK(LSTOPSUB);
7032                         }
7033                     }
7034 #ifdef PERL_MAD
7035                     {
7036                         if (PL_madskills) {
7037                             PL_nextwhite = PL_thiswhite;
7038                             PL_thiswhite = 0;
7039                         }
7040                         start_force(PL_curforce);
7041                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7042                         PL_expect = XTERM;
7043                         if (PL_madskills) {
7044                             PL_nextwhite = nextPL_nextwhite;
7045                             curmad('X', PL_thistoken);
7046                             PL_thistoken = newSVpvs("");
7047                         }
7048                         force_next(WORD);
7049                         if (!PL_lex_allbrackets &&
7050                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7051                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7052                         TOKEN(NOAMP);
7053                     }
7054                 }
7055
7056                 /* Guess harder when madskills require "best effort". */
7057                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7058                     int probable_sub = 0;
7059                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7060                         probable_sub = 1;
7061                     else if (isALPHA(*s)) {
7062                         char tmpbuf[1024];
7063                         STRLEN tmplen;
7064                         d = s;
7065                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7066                         if (!keyword(tmpbuf, tmplen, 0))
7067                             probable_sub = 1;
7068                         else {
7069                             while (d < PL_bufend && isSPACE(*d))
7070                                 d++;
7071                             if (*d == '=' && d[1] == '>')
7072                                 probable_sub = 1;
7073                         }
7074                     }
7075                     if (probable_sub) {
7076                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7077                                         SVt_PVCV);
7078                         op_free(pl_yylval.opval);
7079                         pl_yylval.opval = rv2cv_op;
7080                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7081                         PL_last_lop = PL_oldbufptr;
7082                         PL_last_lop_op = OP_ENTERSUB;
7083                         PL_nextwhite = PL_thiswhite;
7084                         PL_thiswhite = 0;
7085                         start_force(PL_curforce);
7086                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7087                         PL_expect = XTERM;
7088                         PL_nextwhite = nextPL_nextwhite;
7089                         curmad('X', PL_thistoken);
7090                         PL_thistoken = newSVpvs("");
7091                         force_next(WORD);
7092                         if (!PL_lex_allbrackets &&
7093                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7094                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7095                         TOKEN(NOAMP);
7096                     }
7097 #else
7098                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7099                     PL_expect = XTERM;
7100                     force_next(WORD);
7101                     if (!PL_lex_allbrackets &&
7102                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7103                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7104                     TOKEN(NOAMP);
7105 #endif
7106                 }
7107
7108                 /* Call it a bare word */
7109
7110                 if (PL_hints & HINT_STRICT_SUBS)
7111                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7112                 else {
7113                 bareword:
7114                     /* after "print" and similar functions (corresponding to
7115                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7116                      * a filehandle should be subject to "strict subs".
7117                      * Likewise for the optional indirect-object argument to system
7118                      * or exec, which can't be a bareword */
7119                     if ((PL_last_lop_op == OP_PRINT
7120                             || PL_last_lop_op == OP_PRTF
7121                             || PL_last_lop_op == OP_SAY
7122                             || PL_last_lop_op == OP_SYSTEM
7123                             || PL_last_lop_op == OP_EXEC)
7124                             && (PL_hints & HINT_STRICT_SUBS))
7125                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7126                     if (lastchar != '-') {
7127                         if (ckWARN(WARN_RESERVED)) {
7128                             d = PL_tokenbuf;
7129                             while (isLOWER(*d))
7130                                 d++;
7131                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7132                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7133                                        PL_tokenbuf);
7134                         }
7135                     }
7136                 }
7137                 op_free(rv2cv_op);
7138
7139             safe_bareword:
7140                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7141                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7142                                      "Operator or semicolon missing before %c%"SVf,
7143                                      lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7144                                                     strlen(PL_tokenbuf),
7145                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7146                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7147                                      "Ambiguous use of %c resolved as operator %c",
7148                                      lastchar, lastchar);
7149                 }
7150                 TOKEN(WORD);
7151             }
7152
7153         case KEY___FILE__:
7154             FUN0OP(
7155                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7156             );
7157
7158         case KEY___LINE__:
7159             FUN0OP(
7160                 (OP*)newSVOP(OP_CONST, 0,
7161                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7162             );
7163
7164         case KEY___PACKAGE__:
7165             FUN0OP(
7166                 (OP*)newSVOP(OP_CONST, 0,
7167                                         (PL_curstash
7168                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7169                                          : &PL_sv_undef))
7170             );
7171
7172         case KEY___DATA__:
7173         case KEY___END__: {
7174             GV *gv;
7175             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7176                 const char *pname = "main";
7177                 STRLEN plen = 4;
7178                 U32 putf8 = 0;
7179                 if (PL_tokenbuf[2] == 'D')
7180                 {
7181                     HV * const stash =
7182                         PL_curstash ? PL_curstash : PL_defstash;
7183                     pname = HvNAME_get(stash);
7184                     plen  = HvNAMELEN (stash);
7185                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7186                 }
7187                 gv = gv_fetchpvn_flags(
7188                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7189                         plen+6, GV_ADD|putf8, SVt_PVIO
7190                 );
7191                 GvMULTI_on(gv);
7192                 if (!GvIO(gv))
7193                     GvIOp(gv) = newIO();
7194                 IoIFP(GvIOp(gv)) = PL_rsfp;
7195 #if defined(HAS_FCNTL) && defined(F_SETFD)
7196                 {
7197                     const int fd = PerlIO_fileno(PL_rsfp);
7198                     fcntl(fd,F_SETFD,fd >= 3);
7199                 }
7200 #endif
7201                 /* Mark this internal pseudo-handle as clean */
7202                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7203                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7204                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7205                 else
7206                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7207 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7208                 /* if the script was opened in binmode, we need to revert
7209                  * it to text mode for compatibility; but only iff it has CRs
7210                  * XXX this is a questionable hack at best. */
7211                 if (PL_bufend-PL_bufptr > 2
7212                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7213                 {
7214                     Off_t loc = 0;
7215                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7216                         loc = PerlIO_tell(PL_rsfp);
7217                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7218                     }
7219 #ifdef NETWARE
7220                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7221 #else
7222                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7223 #endif  /* NETWARE */
7224                         if (loc > 0)
7225                             PerlIO_seek(PL_rsfp, loc, 0);
7226                     }
7227                 }
7228 #endif
7229 #ifdef PERLIO_LAYERS
7230                 if (!IN_BYTES) {
7231                     if (UTF)
7232                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7233                     else if (PL_encoding) {
7234                         SV *name;
7235                         dSP;
7236                         ENTER;
7237                         SAVETMPS;
7238                         PUSHMARK(sp);
7239                         EXTEND(SP, 1);
7240                         XPUSHs(PL_encoding);
7241                         PUTBACK;
7242                         call_method("name", G_SCALAR);
7243                         SPAGAIN;
7244                         name = POPs;
7245                         PUTBACK;
7246                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7247                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7248                                                       SVfARG(name)));
7249                         FREETMPS;
7250                         LEAVE;
7251                     }
7252                 }
7253 #endif
7254 #ifdef PERL_MAD
7255                 if (PL_madskills) {
7256                     if (PL_realtokenstart >= 0) {
7257                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7258                         if (!PL_endwhite)
7259                             PL_endwhite = newSVpvs("");
7260                         sv_catsv(PL_endwhite, PL_thiswhite);
7261                         PL_thiswhite = 0;
7262                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7263                         PL_realtokenstart = -1;
7264                     }
7265                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7266                            != NULL) ;
7267                 }
7268 #endif
7269                 PL_rsfp = NULL;
7270             }
7271             goto fake_eof;
7272         }
7273
7274         case KEY___SUB__:
7275             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7276
7277         case KEY_AUTOLOAD:
7278         case KEY_DESTROY:
7279         case KEY_BEGIN:
7280         case KEY_UNITCHECK:
7281         case KEY_CHECK:
7282         case KEY_INIT:
7283         case KEY_END:
7284             if (PL_expect == XSTATE) {
7285                 s = PL_bufptr;
7286                 goto really_sub;
7287             }
7288             goto just_a_word;
7289
7290         case KEY_CORE:
7291             if (*s == ':' && s[1] == ':') {
7292                 STRLEN olen = len;
7293                 d = s;
7294                 s += 2;
7295                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7296                 if ((*s == ':' && s[1] == ':')
7297                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7298                 {
7299                     s = d;
7300                     len = olen;
7301                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7302                     goto just_a_word;
7303                 }
7304                 if (!tmp)
7305                     Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7306                                     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7307                                                 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7308                 if (tmp < 0)
7309                     tmp = -tmp;
7310                 else if (tmp == KEY_require || tmp == KEY_do
7311                       || tmp == KEY_glob)
7312                     /* that's a way to remember we saw "CORE::" */
7313                     orig_keyword = tmp;
7314                 goto reserved_word;
7315             }
7316             goto just_a_word;
7317
7318         case KEY_abs:
7319             UNI(OP_ABS);
7320
7321         case KEY_alarm:
7322             UNI(OP_ALARM);
7323
7324         case KEY_accept:
7325             LOP(OP_ACCEPT,XTERM);
7326
7327         case KEY_and:
7328             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7329                 return REPORT(0);
7330             OPERATOR(ANDOP);
7331
7332         case KEY_atan2:
7333             LOP(OP_ATAN2,XTERM);
7334
7335         case KEY_bind:
7336             LOP(OP_BIND,XTERM);
7337
7338         case KEY_binmode:
7339             LOP(OP_BINMODE,XTERM);
7340
7341         case KEY_bless:
7342             LOP(OP_BLESS,XTERM);
7343
7344         case KEY_break:
7345             FUN0(OP_BREAK);
7346
7347         case KEY_chop:
7348             UNI(OP_CHOP);
7349
7350         case KEY_continue:
7351                     /* We have to disambiguate the two senses of
7352                       "continue". If the next token is a '{' then
7353                       treat it as the start of a continue block;
7354                       otherwise treat it as a control operator.
7355                      */
7356                     s = skipspace(s);
7357                     if (*s == '{')
7358             PREBLOCK(CONTINUE);
7359                     else
7360                         FUN0(OP_CONTINUE);
7361
7362         case KEY_chdir:
7363             /* may use HOME */
7364             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7365             UNI(OP_CHDIR);
7366
7367         case KEY_close:
7368             UNI(OP_CLOSE);
7369
7370         case KEY_closedir:
7371             UNI(OP_CLOSEDIR);
7372
7373         case KEY_cmp:
7374             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7375                 return REPORT(0);
7376             Eop(OP_SCMP);
7377
7378         case KEY_caller:
7379             UNI(OP_CALLER);
7380
7381         case KEY_crypt:
7382 #ifdef FCRYPT
7383             if (!PL_cryptseen) {
7384                 PL_cryptseen = TRUE;
7385                 init_des();
7386             }
7387 #endif
7388             LOP(OP_CRYPT,XTERM);
7389
7390         case KEY_chmod:
7391             LOP(OP_CHMOD,XTERM);
7392
7393         case KEY_chown:
7394             LOP(OP_CHOWN,XTERM);
7395
7396         case KEY_connect:
7397             LOP(OP_CONNECT,XTERM);
7398
7399         case KEY_chr:
7400             UNI(OP_CHR);
7401
7402         case KEY_cos:
7403             UNI(OP_COS);
7404
7405         case KEY_chroot:
7406             UNI(OP_CHROOT);
7407
7408         case KEY_default:
7409             PREBLOCK(DEFAULT);
7410
7411         case KEY_do:
7412             s = SKIPSPACE1(s);
7413             if (*s == '{')
7414                 PRETERMBLOCK(DO);
7415             if (*s != '\'') {
7416                 *PL_tokenbuf = '&';
7417                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7418                               1, &len);
7419                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7420                     d = SKIPSPACE1(d);
7421                     if (*d == '(') {
7422                         force_ident_maybe_lex('&');
7423                         s = d;
7424                     }
7425                 }
7426             }
7427             if (orig_keyword == KEY_do) {
7428                 orig_keyword = 0;
7429                 pl_yylval.ival = 1;
7430             }
7431             else
7432                 pl_yylval.ival = 0;
7433             OPERATOR(DO);
7434
7435         case KEY_die:
7436             PL_hints |= HINT_BLOCK_SCOPE;
7437             LOP(OP_DIE,XTERM);
7438
7439         case KEY_defined:
7440             UNI(OP_DEFINED);
7441
7442         case KEY_delete:
7443             UNI(OP_DELETE);
7444
7445         case KEY_dbmopen:
7446             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7447                               STR_WITH_LEN("NDBM_File::"),
7448                               STR_WITH_LEN("DB_File::"),
7449                               STR_WITH_LEN("GDBM_File::"),
7450                               STR_WITH_LEN("SDBM_File::"),
7451                               STR_WITH_LEN("ODBM_File::"),
7452                               NULL);
7453             LOP(OP_DBMOPEN,XTERM);
7454
7455         case KEY_dbmclose:
7456             UNI(OP_DBMCLOSE);
7457
7458         case KEY_dump:
7459             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7460             LOOPX(OP_DUMP);
7461
7462         case KEY_else:
7463             PREBLOCK(ELSE);
7464
7465         case KEY_elsif:
7466             pl_yylval.ival = CopLINE(PL_curcop);
7467             OPERATOR(ELSIF);
7468
7469         case KEY_eq:
7470             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7471                 return REPORT(0);
7472             Eop(OP_SEQ);
7473
7474         case KEY_exists:
7475             UNI(OP_EXISTS);
7476
7477         case KEY_exit:
7478             if (PL_madskills)
7479                 UNI(OP_INT);
7480             UNI(OP_EXIT);
7481
7482         case KEY_eval:
7483             s = SKIPSPACE1(s);
7484             if (*s == '{') { /* block eval */
7485                 PL_expect = XTERMBLOCK;
7486                 UNIBRACK(OP_ENTERTRY);
7487             }
7488             else { /* string eval */
7489                 PL_expect = XTERM;
7490                 UNIBRACK(OP_ENTEREVAL);
7491             }
7492
7493         case KEY_evalbytes:
7494             PL_expect = XTERM;
7495             UNIBRACK(-OP_ENTEREVAL);
7496
7497         case KEY_eof:
7498             UNI(OP_EOF);
7499
7500         case KEY_exp:
7501             UNI(OP_EXP);
7502
7503         case KEY_each:
7504             UNI(OP_EACH);
7505
7506         case KEY_exec:
7507             LOP(OP_EXEC,XREF);
7508
7509         case KEY_endhostent:
7510             FUN0(OP_EHOSTENT);
7511
7512         case KEY_endnetent:
7513             FUN0(OP_ENETENT);
7514
7515         case KEY_endservent:
7516             FUN0(OP_ESERVENT);
7517
7518         case KEY_endprotoent:
7519             FUN0(OP_EPROTOENT);
7520
7521         case KEY_endpwent:
7522             FUN0(OP_EPWENT);
7523
7524         case KEY_endgrent:
7525             FUN0(OP_EGRENT);
7526
7527         case KEY_for:
7528         case KEY_foreach:
7529             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7530                 return REPORT(0);
7531             pl_yylval.ival = CopLINE(PL_curcop);
7532             s = SKIPSPACE1(s);
7533             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7534                 char *p = s;
7535 #ifdef PERL_MAD
7536                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7537 #endif
7538
7539                 if ((PL_bufend - p) >= 3 &&
7540                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7541                     p += 2;
7542                 else if ((PL_bufend - p) >= 4 &&
7543                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7544                     p += 3;
7545                 p = PEEKSPACE(p);
7546                 if (isIDFIRST_lazy_if(p,UTF)) {
7547                     p = scan_ident(p, PL_bufend,
7548                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7549                     p = PEEKSPACE(p);
7550                 }
7551                 if (*p != '$')
7552                     Perl_croak(aTHX_ "Missing $ on loop variable");
7553 #ifdef PERL_MAD
7554                 s = SvPVX(PL_linestr) + soff;
7555 #endif
7556             }
7557             OPERATOR(FOR);
7558
7559         case KEY_formline:
7560             LOP(OP_FORMLINE,XTERM);
7561
7562         case KEY_fork:
7563             FUN0(OP_FORK);
7564
7565         case KEY_fc:
7566             UNI(OP_FC);
7567
7568         case KEY_fcntl:
7569             LOP(OP_FCNTL,XTERM);
7570
7571         case KEY_fileno:
7572             UNI(OP_FILENO);
7573
7574         case KEY_flock:
7575             LOP(OP_FLOCK,XTERM);
7576
7577         case KEY_gt:
7578             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7579                 return REPORT(0);
7580             Rop(OP_SGT);
7581
7582         case KEY_ge:
7583             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7584                 return REPORT(0);
7585             Rop(OP_SGE);
7586
7587         case KEY_grep:
7588             LOP(OP_GREPSTART, XREF);
7589
7590         case KEY_goto:
7591             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7592             LOOPX(OP_GOTO);
7593
7594         case KEY_gmtime:
7595             UNI(OP_GMTIME);
7596
7597         case KEY_getc:
7598             UNIDOR(OP_GETC);
7599
7600         case KEY_getppid:
7601             FUN0(OP_GETPPID);
7602
7603         case KEY_getpgrp:
7604             UNI(OP_GETPGRP);
7605
7606         case KEY_getpriority:
7607             LOP(OP_GETPRIORITY,XTERM);
7608
7609         case KEY_getprotobyname:
7610             UNI(OP_GPBYNAME);
7611
7612         case KEY_getprotobynumber:
7613             LOP(OP_GPBYNUMBER,XTERM);
7614
7615         case KEY_getprotoent:
7616             FUN0(OP_GPROTOENT);
7617
7618         case KEY_getpwent:
7619             FUN0(OP_GPWENT);
7620
7621         case KEY_getpwnam:
7622             UNI(OP_GPWNAM);
7623
7624         case KEY_getpwuid:
7625             UNI(OP_GPWUID);
7626
7627         case KEY_getpeername:
7628             UNI(OP_GETPEERNAME);
7629
7630         case KEY_gethostbyname:
7631             UNI(OP_GHBYNAME);
7632
7633         case KEY_gethostbyaddr:
7634             LOP(OP_GHBYADDR,XTERM);
7635
7636         case KEY_gethostent:
7637             FUN0(OP_GHOSTENT);
7638
7639         case KEY_getnetbyname:
7640             UNI(OP_GNBYNAME);
7641
7642         case KEY_getnetbyaddr:
7643             LOP(OP_GNBYADDR,XTERM);
7644
7645         case KEY_getnetent:
7646             FUN0(OP_GNETENT);
7647
7648         case KEY_getservbyname:
7649             LOP(OP_GSBYNAME,XTERM);
7650
7651         case KEY_getservbyport:
7652             LOP(OP_GSBYPORT,XTERM);
7653
7654         case KEY_getservent:
7655             FUN0(OP_GSERVENT);
7656
7657         case KEY_getsockname:
7658             UNI(OP_GETSOCKNAME);
7659
7660         case KEY_getsockopt:
7661             LOP(OP_GSOCKOPT,XTERM);
7662
7663         case KEY_getgrent:
7664             FUN0(OP_GGRENT);
7665
7666         case KEY_getgrnam:
7667             UNI(OP_GGRNAM);
7668
7669         case KEY_getgrgid:
7670             UNI(OP_GGRGID);
7671
7672         case KEY_getlogin:
7673             FUN0(OP_GETLOGIN);
7674
7675         case KEY_given:
7676             pl_yylval.ival = CopLINE(PL_curcop);
7677             OPERATOR(GIVEN);
7678
7679         case KEY_glob:
7680             LOP(
7681              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7682              XTERM
7683             );
7684
7685         case KEY_hex:
7686             UNI(OP_HEX);
7687
7688         case KEY_if:
7689             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7690                 return REPORT(0);
7691             pl_yylval.ival = CopLINE(PL_curcop);
7692             OPERATOR(IF);
7693
7694         case KEY_index:
7695             LOP(OP_INDEX,XTERM);
7696
7697         case KEY_int:
7698             UNI(OP_INT);
7699
7700         case KEY_ioctl:
7701             LOP(OP_IOCTL,XTERM);
7702
7703         case KEY_join:
7704             LOP(OP_JOIN,XTERM);
7705
7706         case KEY_keys:
7707             UNI(OP_KEYS);
7708
7709         case KEY_kill:
7710             LOP(OP_KILL,XTERM);
7711
7712         case KEY_last:
7713             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7714             LOOPX(OP_LAST);
7715
7716         case KEY_lc:
7717             UNI(OP_LC);
7718
7719         case KEY_lcfirst:
7720             UNI(OP_LCFIRST);
7721
7722         case KEY_local:
7723             pl_yylval.ival = 0;
7724             OPERATOR(LOCAL);
7725
7726         case KEY_length:
7727             UNI(OP_LENGTH);
7728
7729         case KEY_lt:
7730             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7731                 return REPORT(0);
7732             Rop(OP_SLT);
7733
7734         case KEY_le:
7735             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7736                 return REPORT(0);
7737             Rop(OP_SLE);
7738
7739         case KEY_localtime:
7740             UNI(OP_LOCALTIME);
7741
7742         case KEY_log:
7743             UNI(OP_LOG);
7744
7745         case KEY_link:
7746             LOP(OP_LINK,XTERM);
7747
7748         case KEY_listen:
7749             LOP(OP_LISTEN,XTERM);
7750
7751         case KEY_lock:
7752             UNI(OP_LOCK);
7753
7754         case KEY_lstat:
7755             UNI(OP_LSTAT);
7756
7757         case KEY_m:
7758             s = scan_pat(s,OP_MATCH);
7759             TERM(sublex_start());
7760
7761         case KEY_map:
7762             LOP(OP_MAPSTART, XREF);
7763
7764         case KEY_mkdir:
7765             LOP(OP_MKDIR,XTERM);
7766
7767         case KEY_msgctl:
7768             LOP(OP_MSGCTL,XTERM);
7769
7770         case KEY_msgget:
7771             LOP(OP_MSGGET,XTERM);
7772
7773         case KEY_msgrcv:
7774             LOP(OP_MSGRCV,XTERM);
7775
7776         case KEY_msgsnd:
7777             LOP(OP_MSGSND,XTERM);
7778
7779         case KEY_our:
7780         case KEY_my:
7781         case KEY_state:
7782             PL_in_my = (U16)tmp;
7783             s = SKIPSPACE1(s);
7784             if (isIDFIRST_lazy_if(s,UTF)) {
7785 #ifdef PERL_MAD
7786                 char* start = s;
7787 #endif
7788                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7789                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7790                     goto really_sub;
7791                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7792                 if (!PL_in_my_stash) {
7793                     char tmpbuf[1024];
7794                     PL_bufptr = s;
7795                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7796                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7797                 }
7798 #ifdef PERL_MAD
7799                 if (PL_madskills) {     /* just add type to declarator token */
7800                     sv_catsv(PL_thistoken, PL_nextwhite);
7801                     PL_nextwhite = 0;
7802                     sv_catpvn(PL_thistoken, start, s - start);
7803                 }
7804 #endif
7805             }
7806             pl_yylval.ival = 1;
7807             OPERATOR(MY);
7808
7809         case KEY_next:
7810             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7811             LOOPX(OP_NEXT);
7812
7813         case KEY_ne:
7814             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7815                 return REPORT(0);
7816             Eop(OP_SNE);
7817
7818         case KEY_no:
7819             s = tokenize_use(0, s);
7820             TERM(USE);
7821
7822         case KEY_not:
7823             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7824                 FUN1(OP_NOT);
7825             else {
7826                 if (!PL_lex_allbrackets &&
7827                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7828                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7829                 OPERATOR(NOTOP);
7830             }
7831
7832         case KEY_open:
7833             s = SKIPSPACE1(s);
7834             if (isIDFIRST_lazy_if(s,UTF)) {
7835                 const char *t;
7836                 for (d = s; isALNUM_lazy_if(d,UTF);) {
7837                     d += UTF ? UTF8SKIP(d) : 1;
7838                     if (UTF) {
7839                         while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7840                             d += UTF ? UTF8SKIP(d) : 1;
7841                         }
7842                     }
7843                 }
7844                 for (t=d; isSPACE(*t);)
7845                     t++;
7846                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7847                     /* [perl #16184] */
7848                     && !(t[0] == '=' && t[1] == '>')
7849                     && !(t[0] == ':' && t[1] == ':')
7850                     && !keyword(s, d-s, 0)
7851                 ) {
7852                     SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7853                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7854                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7855                            "Precedence problem: open %"SVf" should be open(%"SVf")",
7856                             SVfARG(tmpsv), SVfARG(tmpsv));
7857                 }
7858             }
7859             LOP(OP_OPEN,XTERM);
7860
7861         case KEY_or:
7862             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7863                 return REPORT(0);
7864             pl_yylval.ival = OP_OR;
7865             OPERATOR(OROP);
7866
7867         case KEY_ord:
7868             UNI(OP_ORD);
7869
7870         case KEY_oct:
7871             UNI(OP_OCT);
7872
7873         case KEY_opendir:
7874             LOP(OP_OPEN_DIR,XTERM);
7875
7876         case KEY_print:
7877             checkcomma(s,PL_tokenbuf,"filehandle");
7878             LOP(OP_PRINT,XREF);
7879
7880         case KEY_printf:
7881             checkcomma(s,PL_tokenbuf,"filehandle");
7882             LOP(OP_PRTF,XREF);
7883
7884         case KEY_prototype:
7885             UNI(OP_PROTOTYPE);
7886
7887         case KEY_push:
7888             LOP(OP_PUSH,XTERM);
7889
7890         case KEY_pop:
7891             UNIDOR(OP_POP);
7892
7893         case KEY_pos:
7894             UNIDOR(OP_POS);
7895
7896         case KEY_pack:
7897             LOP(OP_PACK,XTERM);
7898
7899         case KEY_package:
7900             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7901             s = SKIPSPACE1(s);
7902             s = force_strict_version(s);
7903             PL_lex_expect = XBLOCK;
7904             OPERATOR(PACKAGE);
7905
7906         case KEY_pipe:
7907             LOP(OP_PIPE_OP,XTERM);
7908
7909         case KEY_q:
7910             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7911             if (!s)
7912                 missingterm(NULL);
7913             pl_yylval.ival = OP_CONST;
7914             TERM(sublex_start());
7915
7916         case KEY_quotemeta:
7917             UNI(OP_QUOTEMETA);
7918
7919         case KEY_qw: {
7920             OP *words = NULL;
7921             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7922             if (!s)
7923                 missingterm(NULL);
7924             PL_expect = XOPERATOR;
7925             if (SvCUR(PL_lex_stuff)) {
7926                 int warned_comma = !ckWARN(WARN_QW);
7927                 int warned_comment = warned_comma;
7928                 d = SvPV_force(PL_lex_stuff, len);
7929                 while (len) {
7930                     for (; isSPACE(*d) && len; --len, ++d)
7931                         /**/;
7932                     if (len) {
7933                         SV *sv;
7934                         const char *b = d;
7935                         if (!warned_comma || !warned_comment) {
7936                             for (; !isSPACE(*d) && len; --len, ++d) {
7937                                 if (!warned_comma && *d == ',') {
7938                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7939                                         "Possible attempt to separate words with commas");
7940                                     ++warned_comma;
7941                                 }
7942                                 else if (!warned_comment && *d == '#') {
7943                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7944                                         "Possible attempt to put comments in qw() list");
7945                                     ++warned_comment;
7946                                 }
7947                             }
7948                         }
7949                         else {
7950                             for (; !isSPACE(*d) && len; --len, ++d)
7951                                 /**/;
7952                         }
7953                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7954                         words = op_append_elem(OP_LIST, words,
7955                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7956                     }
7957                 }
7958             }
7959             if (!words)
7960                 words = newNULLLIST();
7961             if (PL_lex_stuff) {
7962                 SvREFCNT_dec(PL_lex_stuff);
7963                 PL_lex_stuff = NULL;
7964             }
7965             PL_expect = XOPERATOR;
7966             pl_yylval.opval = sawparens(words);
7967             TOKEN(QWLIST);
7968         }
7969
7970         case KEY_qq:
7971             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7972             if (!s)
7973                 missingterm(NULL);
7974             pl_yylval.ival = OP_STRINGIFY;
7975             if (SvIVX(PL_lex_stuff) == '\'')
7976                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7977             TERM(sublex_start());
7978
7979         case KEY_qr:
7980             s = scan_pat(s,OP_QR);
7981             TERM(sublex_start());
7982
7983         case KEY_qx:
7984             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7985             if (!s)
7986                 missingterm(NULL);
7987             readpipe_override();
7988             TERM(sublex_start());
7989
7990         case KEY_return:
7991             OLDLOP(OP_RETURN);
7992
7993         case KEY_require:
7994             s = SKIPSPACE1(s);
7995             if (isDIGIT(*s)) {
7996                 s = force_version(s, FALSE);
7997             }
7998             else if (*s != 'v' || !isDIGIT(s[1])
7999                     || (s = force_version(s, TRUE), *s == 'v'))
8000             {
8001                 *PL_tokenbuf = '\0';
8002                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8003                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8004                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8005                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8006                 else if (*s == '<')
8007                     yyerror("<> should be quotes");
8008             }
8009             if (orig_keyword == KEY_require) {
8010                 orig_keyword = 0;
8011                 pl_yylval.ival = 1;
8012             }
8013             else
8014                 pl_yylval.ival = 0;
8015             PL_expect = XTERM;
8016             PL_bufptr = s;
8017             PL_last_uni = PL_oldbufptr;
8018             PL_last_lop_op = OP_REQUIRE;
8019             s = skipspace(s);
8020             return REPORT( (int)REQUIRE );
8021
8022         case KEY_reset:
8023             UNI(OP_RESET);
8024
8025         case KEY_redo:
8026             s = force_word(s,WORD,TRUE,FALSE,FALSE);
8027             LOOPX(OP_REDO);
8028
8029         case KEY_rename:
8030             LOP(OP_RENAME,XTERM);
8031
8032         case KEY_rand:
8033             UNI(OP_RAND);
8034
8035         case KEY_rmdir:
8036             UNI(OP_RMDIR);
8037
8038         case KEY_rindex:
8039             LOP(OP_RINDEX,XTERM);
8040
8041         case KEY_read:
8042             LOP(OP_READ,XTERM);
8043
8044         case KEY_readdir:
8045             UNI(OP_READDIR);
8046
8047         case KEY_readline:
8048             UNIDOR(OP_READLINE);
8049
8050         case KEY_readpipe:
8051             UNIDOR(OP_BACKTICK);
8052
8053         case KEY_rewinddir:
8054             UNI(OP_REWINDDIR);
8055
8056         case KEY_recv:
8057             LOP(OP_RECV,XTERM);
8058
8059         case KEY_reverse:
8060             LOP(OP_REVERSE,XTERM);
8061
8062         case KEY_readlink:
8063             UNIDOR(OP_READLINK);
8064
8065         case KEY_ref:
8066             UNI(OP_REF);
8067
8068         case KEY_s:
8069             s = scan_subst(s);
8070             if (pl_yylval.opval)
8071                 TERM(sublex_start());
8072             else
8073                 TOKEN(1);       /* force error */
8074
8075         case KEY_say:
8076             checkcomma(s,PL_tokenbuf,"filehandle");
8077             LOP(OP_SAY,XREF);
8078
8079         case KEY_chomp:
8080             UNI(OP_CHOMP);
8081
8082         case KEY_scalar:
8083             UNI(OP_SCALAR);
8084
8085         case KEY_select:
8086             LOP(OP_SELECT,XTERM);
8087
8088         case KEY_seek:
8089             LOP(OP_SEEK,XTERM);
8090
8091         case KEY_semctl:
8092             LOP(OP_SEMCTL,XTERM);
8093
8094         case KEY_semget:
8095             LOP(OP_SEMGET,XTERM);
8096
8097         case KEY_semop:
8098             LOP(OP_SEMOP,XTERM);
8099
8100         case KEY_send:
8101             LOP(OP_SEND,XTERM);
8102
8103         case KEY_setpgrp:
8104             LOP(OP_SETPGRP,XTERM);
8105
8106         case KEY_setpriority:
8107             LOP(OP_SETPRIORITY,XTERM);
8108
8109         case KEY_sethostent:
8110             UNI(OP_SHOSTENT);
8111
8112         case KEY_setnetent:
8113             UNI(OP_SNETENT);
8114
8115         case KEY_setservent:
8116             UNI(OP_SSERVENT);
8117
8118         case KEY_setprotoent:
8119             UNI(OP_SPROTOENT);
8120
8121         case KEY_setpwent:
8122             FUN0(OP_SPWENT);
8123
8124         case KEY_setgrent:
8125             FUN0(OP_SGRENT);
8126
8127         case KEY_seekdir:
8128             LOP(OP_SEEKDIR,XTERM);
8129
8130         case KEY_setsockopt:
8131             LOP(OP_SSOCKOPT,XTERM);
8132
8133         case KEY_shift:
8134             UNIDOR(OP_SHIFT);
8135
8136         case KEY_shmctl:
8137             LOP(OP_SHMCTL,XTERM);
8138
8139         case KEY_shmget:
8140             LOP(OP_SHMGET,XTERM);
8141
8142         case KEY_shmread:
8143             LOP(OP_SHMREAD,XTERM);
8144
8145         case KEY_shmwrite:
8146             LOP(OP_SHMWRITE,XTERM);
8147
8148         case KEY_shutdown:
8149             LOP(OP_SHUTDOWN,XTERM);
8150
8151         case KEY_sin:
8152             UNI(OP_SIN);
8153
8154         case KEY_sleep:
8155             UNI(OP_SLEEP);
8156
8157         case KEY_socket:
8158             LOP(OP_SOCKET,XTERM);
8159
8160         case KEY_socketpair:
8161             LOP(OP_SOCKPAIR,XTERM);
8162
8163         case KEY_sort:
8164             checkcomma(s,PL_tokenbuf,"subroutine name");
8165             s = SKIPSPACE1(s);
8166             PL_expect = XTERM;
8167             s = force_word(s,WORD,TRUE,TRUE,FALSE);
8168             LOP(OP_SORT,XREF);
8169
8170         case KEY_split:
8171             LOP(OP_SPLIT,XTERM);
8172
8173         case KEY_sprintf:
8174             LOP(OP_SPRINTF,XTERM);
8175
8176         case KEY_splice:
8177             LOP(OP_SPLICE,XTERM);
8178
8179         case KEY_sqrt:
8180             UNI(OP_SQRT);
8181
8182         case KEY_srand:
8183             UNI(OP_SRAND);
8184
8185         case KEY_stat:
8186             UNI(OP_STAT);
8187
8188         case KEY_study:
8189             UNI(OP_STUDY);
8190
8191         case KEY_substr:
8192             LOP(OP_SUBSTR,XTERM);
8193
8194         case KEY_format:
8195         case KEY_sub:
8196           really_sub:
8197             {
8198                 char * const tmpbuf = PL_tokenbuf + 1;
8199                 SSize_t tboffset = 0;
8200                 expectation attrful;
8201                 bool have_name, have_proto;
8202                 const int key = tmp;
8203
8204 #ifdef PERL_MAD
8205                 SV *tmpwhite = 0;
8206
8207                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8208                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8209                 PL_thistoken = 0;
8210
8211                 d = s;
8212                 s = SKIPSPACE2(s,tmpwhite);
8213 #else
8214                 d = s;
8215                 s = skipspace(s);
8216 #endif
8217
8218                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8219                     (*s == ':' && s[1] == ':'))
8220                 {
8221 #ifdef PERL_MAD
8222                     SV *nametoke = NULL;
8223 #endif
8224
8225                     PL_expect = XBLOCK;
8226                     attrful = XATTRBLOCK;
8227                     /* remember buffer pos'n for later force_word */
8228                     tboffset = s - PL_oldbufptr;
8229                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8230                                   &len);
8231 #ifdef PERL_MAD
8232                     if (PL_madskills)
8233                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8234 #endif
8235                     if (memchr(tmpbuf, ':', len) || key == KEY_our)
8236                         sv_setpvn(PL_subname, tmpbuf, len);
8237                     else {
8238                         sv_setsv(PL_subname,PL_curstname);
8239                         sv_catpvs(PL_subname,"::");
8240                         sv_catpvn(PL_subname,tmpbuf,len);
8241                     }
8242                     if (SvUTF8(PL_linestr))
8243                         SvUTF8_on(PL_subname);
8244                     have_name = TRUE;
8245
8246                     *PL_tokenbuf = '&';
8247
8248 #ifdef PERL_MAD
8249                     start_force(0);
8250                     CURMAD('X', nametoke);
8251                     CURMAD('_', tmpwhite);
8252                     force_ident_maybe_lex('&');
8253
8254                     s = SKIPSPACE2(d,tmpwhite);
8255 #else
8256                     s = skipspace(d);
8257 #endif
8258                 }
8259                 else {
8260                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8261                     {
8262                         *d = '\0';
8263                         /* diag_listed_as: Missing name in "%s sub" */
8264                         Perl_croak(aTHX_
8265                                   "Missing name in \"%s\"", PL_bufptr);
8266                     }
8267                     PL_expect = XTERMBLOCK;
8268                     attrful = XATTRTERM;
8269                     sv_setpvs(PL_subname,"?");
8270                     have_name = FALSE;
8271                 }
8272
8273                 if (key == KEY_format) {
8274 #ifdef PERL_MAD
8275                     PL_thistoken = subtoken;
8276                     s = d;
8277 #else
8278                     if (have_name)
8279                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8280                                           FALSE, TRUE, TRUE);
8281 #endif
8282                     PREBLOCK(FORMAT);
8283                 }
8284
8285                 /* Look for a prototype */
8286                 if (*s == '(') {
8287                     char *p;
8288                     bool bad_proto = FALSE;
8289                     bool in_brackets = FALSE;
8290                     char greedy_proto = ' ';
8291                     bool proto_after_greedy_proto = FALSE;
8292                     bool must_be_last = FALSE;
8293                     bool underscore = FALSE;
8294                     bool seen_underscore = FALSE;
8295                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8296                     STRLEN tmplen;
8297
8298                     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8299                     if (!s)
8300                         Perl_croak(aTHX_ "Prototype not terminated");
8301                     /* strip spaces and check for bad characters */
8302                     d = SvPV(PL_lex_stuff, tmplen);
8303                     tmp = 0;
8304                     for (p = d; tmplen; tmplen--, ++p) {
8305                         if (!isSPACE(*p)) {
8306                             d[tmp++] = *p;
8307
8308                             if (warnillegalproto) {
8309                                 if (must_be_last)
8310                                     proto_after_greedy_proto = TRUE;
8311                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8312                                     bad_proto = TRUE;
8313                                 }
8314                                 else {
8315                                     if ( underscore ) {
8316                                         if ( !strchr(";@%", *p) )
8317                                             bad_proto = TRUE;
8318                                         underscore = FALSE;
8319                                     }
8320                                     if ( *p == '[' ) {
8321                                         in_brackets = TRUE;
8322                                     }
8323                                     else if ( *p == ']' ) {
8324                                         in_brackets = FALSE;
8325                                     }
8326                                     else if ( (*p == '@' || *p == '%') &&
8327                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8328                                          !in_brackets ) {
8329                                         must_be_last = TRUE;
8330                                         greedy_proto = *p;
8331                                     }
8332                                     else if ( *p == '_' ) {
8333                                         underscore = seen_underscore = TRUE;
8334                                     }
8335                                 }
8336                             }
8337                         }
8338                     }
8339                     d[tmp] = '\0';
8340                     if (proto_after_greedy_proto)
8341                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8342                                     "Prototype after '%c' for %"SVf" : %s",
8343                                     greedy_proto, SVfARG(PL_subname), d);
8344                     if (bad_proto) {
8345                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8346                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8347                                     "Illegal character %sin prototype for %"SVf" : %s",
8348                                     seen_underscore ? "after '_' " : "",
8349                                     SVfARG(PL_subname),
8350                                     SvUTF8(PL_lex_stuff)
8351                                         ? sv_uni_display(dsv,
8352                                             newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8353                                             tmp,
8354                                             UNI_DISPLAY_ISPRINT)
8355                                         : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8356                                             PERL_PV_ESCAPE_NONASCII));
8357                     }
8358                     SvCUR_set(PL_lex_stuff, tmp);
8359                     have_proto = TRUE;
8360
8361 #ifdef PERL_MAD
8362                     start_force(0);
8363                     CURMAD('q', PL_thisopen);
8364                     CURMAD('_', tmpwhite);
8365                     CURMAD('=', PL_thisstuff);
8366                     CURMAD('Q', PL_thisclose);
8367                     NEXTVAL_NEXTTOKE.opval =
8368                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8369                     PL_lex_stuff = NULL;
8370                     force_next(THING);
8371
8372                     s = SKIPSPACE2(s,tmpwhite);
8373 #else
8374                     s = skipspace(s);
8375 #endif
8376                 }
8377                 else
8378                     have_proto = FALSE;
8379
8380                 if (*s == ':' && s[1] != ':')
8381                     PL_expect = attrful;
8382                 else if (*s != '{' && key == KEY_sub) {
8383                     if (!have_name)
8384                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8385                     else if (*s != ';' && *s != '}')
8386                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8387                 }
8388
8389 #ifdef PERL_MAD
8390                 start_force(0);
8391                 if (tmpwhite) {
8392                     if (PL_madskills)
8393                         curmad('^', newSVpvs(""));
8394                     CURMAD('_', tmpwhite);
8395                 }
8396                 force_next(0);
8397
8398                 PL_thistoken = subtoken;
8399 #else
8400                 if (have_proto) {
8401                     NEXTVAL_NEXTTOKE.opval =
8402                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8403                     PL_lex_stuff = NULL;
8404                     force_next(THING);
8405                 }
8406 #endif
8407                 if (!have_name) {
8408                     if (PL_curstash)
8409                         sv_setpvs(PL_subname, "__ANON__");
8410                     else
8411                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8412                     TOKEN(ANONSUB);
8413                 }
8414 #ifndef PERL_MAD
8415                 force_ident_maybe_lex('&');
8416 #endif
8417                 if (key == KEY_my)
8418                     TOKEN(MYSUB);
8419                 TOKEN(SUB);
8420             }
8421
8422         case KEY_system:
8423             LOP(OP_SYSTEM,XREF);
8424
8425         case KEY_symlink:
8426             LOP(OP_SYMLINK,XTERM);
8427
8428         case KEY_syscall:
8429             LOP(OP_SYSCALL,XTERM);
8430
8431         case KEY_sysopen:
8432             LOP(OP_SYSOPEN,XTERM);
8433
8434         case KEY_sysseek:
8435             LOP(OP_SYSSEEK,XTERM);
8436
8437         case KEY_sysread:
8438             LOP(OP_SYSREAD,XTERM);
8439
8440         case KEY_syswrite:
8441             LOP(OP_SYSWRITE,XTERM);
8442
8443         case KEY_tr:
8444         case KEY_y:
8445             s = scan_trans(s);
8446             TERM(sublex_start());
8447
8448         case KEY_tell:
8449             UNI(OP_TELL);
8450
8451         case KEY_telldir:
8452             UNI(OP_TELLDIR);
8453
8454         case KEY_tie:
8455             LOP(OP_TIE,XTERM);
8456
8457         case KEY_tied:
8458             UNI(OP_TIED);
8459
8460         case KEY_time:
8461             FUN0(OP_TIME);
8462
8463         case KEY_times:
8464             FUN0(OP_TMS);
8465
8466         case KEY_truncate:
8467             LOP(OP_TRUNCATE,XTERM);
8468
8469         case KEY_uc:
8470             UNI(OP_UC);
8471
8472         case KEY_ucfirst:
8473             UNI(OP_UCFIRST);
8474
8475         case KEY_untie:
8476             UNI(OP_UNTIE);
8477
8478         case KEY_until:
8479             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8480                 return REPORT(0);
8481             pl_yylval.ival = CopLINE(PL_curcop);
8482             OPERATOR(UNTIL);
8483
8484         case KEY_unless:
8485             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8486                 return REPORT(0);
8487             pl_yylval.ival = CopLINE(PL_curcop);
8488             OPERATOR(UNLESS);
8489
8490         case KEY_unlink:
8491             LOP(OP_UNLINK,XTERM);
8492
8493         case KEY_undef:
8494             UNIDOR(OP_UNDEF);
8495
8496         case KEY_unpack:
8497             LOP(OP_UNPACK,XTERM);
8498
8499         case KEY_utime:
8500             LOP(OP_UTIME,XTERM);
8501
8502         case KEY_umask:
8503             UNIDOR(OP_UMASK);
8504
8505         case KEY_unshift:
8506             LOP(OP_UNSHIFT,XTERM);
8507
8508         case KEY_use:
8509             s = tokenize_use(1, s);
8510             OPERATOR(USE);
8511
8512         case KEY_values:
8513             UNI(OP_VALUES);
8514
8515         case KEY_vec:
8516             LOP(OP_VEC,XTERM);
8517
8518         case KEY_when:
8519             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8520                 return REPORT(0);
8521             pl_yylval.ival = CopLINE(PL_curcop);
8522             OPERATOR(WHEN);
8523
8524         case KEY_while:
8525             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8526                 return REPORT(0);
8527             pl_yylval.ival = CopLINE(PL_curcop);
8528             OPERATOR(WHILE);
8529
8530         case KEY_warn:
8531             PL_hints |= HINT_BLOCK_SCOPE;
8532             LOP(OP_WARN,XTERM);
8533
8534         case KEY_wait:
8535             FUN0(OP_WAIT);
8536
8537         case KEY_waitpid:
8538             LOP(OP_WAITPID,XTERM);
8539
8540         case KEY_wantarray:
8541             FUN0(OP_WANTARRAY);
8542
8543         case KEY_write:
8544 #ifdef EBCDIC
8545         {
8546             char ctl_l[2];
8547             ctl_l[0] = toCTRL('L');
8548             ctl_l[1] = '\0';
8549             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8550         }
8551 #else
8552             /* Make sure $^L is defined */
8553             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8554 #endif
8555             UNI(OP_ENTERWRITE);
8556
8557         case KEY_x:
8558             if (PL_expect == XOPERATOR) {
8559                 if (*s == '=' && !PL_lex_allbrackets &&
8560                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8561                     return REPORT(0);
8562                 Mop(OP_REPEAT);
8563             }
8564             check_uni();
8565             goto just_a_word;
8566
8567         case KEY_xor:
8568             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8569                 return REPORT(0);
8570             pl_yylval.ival = OP_XOR;
8571             OPERATOR(OROP);
8572         }
8573     }}
8574 }
8575 #ifdef __SC__
8576 #pragma segment Main
8577 #endif
8578
8579 static void
8580 S_force_ident_maybe_lex(pTHX_ char pit)
8581 {
8582     dVAR;
8583     OP *o;
8584     int force_type;
8585     PADOFFSET tmp = 0;
8586     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8587     /* All routes through this function want to know if there is a colon.  */
8588     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8589
8590     start_force(PL_curforce);
8591
8592     /* if we're in a my(), we can't allow dynamics here.
8593        $foo'bar has already been turned into $foo::bar, so
8594        just check for colons.
8595
8596        if it's a legal name, the OP is a PADANY.
8597     */
8598     if (PL_in_my) {
8599         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8600             if (has_colon)
8601                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8602                                   "variable %s in \"our\"",
8603                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8604             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8605         }
8606         else {
8607             if (has_colon)
8608                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8609                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8610                             UTF ? SVf_UTF8 : 0);
8611
8612             o = newOP(OP_PADANY, 0);
8613             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8614                                                         UTF ? SVf_UTF8 : 0);
8615             force_type = PRIVATEREF;
8616             goto doforce;
8617         }
8618     }
8619
8620     /*
8621        build the ops for accesses to a my() variable.
8622     */
8623
8624     if (!has_colon) {
8625         if (!PL_in_my)
8626             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8627                                     UTF ? SVf_UTF8 : 0);
8628         if (tmp != NOT_IN_PAD) {
8629             /* might be an "our" variable" */
8630             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8631                 /* build ops for a bareword */
8632                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8633                 HEK * const stashname = HvNAME_HEK(stash);
8634                 SV *  const sym = newSVhek(stashname);
8635                 sv_catpvs(sym, "::");
8636                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8637                 o = (OP*)newSVOP(OP_CONST, 0, sym);
8638                 o->op_private = OPpCONST_ENTERED;
8639                 if (pit != '&')
8640                   gv_fetchsv(sym,
8641                     (PL_in_eval
8642                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8643                         : GV_ADDMULTI
8644                     ),
8645                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8646                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8647                      : SVt_PVHV));
8648                 force_type = WORD;
8649                 goto doforce;
8650             }
8651
8652             o = newOP(OP_PADANY, 0);
8653             o->op_targ = tmp;
8654             force_type = PRIVATEREF;
8655             goto doforce;
8656         }
8657     }
8658
8659     /*
8660        Whine if they've said @foo in a doublequoted string,
8661        and @foo isn't a variable we can find in the symbol
8662        table.
8663     */
8664     if (ckWARN(WARN_AMBIGUOUS) &&
8665         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8666         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8667                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8668         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8669                 /* DO NOT warn for @- and @+ */
8670                 && !( PL_tokenbuf[2] == '\0' &&
8671                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8672            )
8673         {
8674             /* Downgraded from fatal to warning 20000522 mjd */
8675             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8676                         "Possible unintended interpolation of %"SVf" in string",
8677                         SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8678                                         SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8679         }
8680     }
8681
8682     /* build ops for a bareword */
8683     o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8684                                                       tokenbuf_len - 1,
8685                                                       UTF ? SVf_UTF8 : 0 ));
8686     o->op_private = OPpCONST_ENTERED;
8687     if (pit != '&')
8688         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8689                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8690                      | ( UTF ? SVf_UTF8 : 0 ),
8691                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8692                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8693                       : SVt_PVHV));
8694     force_type = WORD;
8695
8696    doforce:
8697     NEXTVAL_NEXTTOKE.opval = o;
8698     force_next(force_type);
8699 }
8700
8701 STATIC void
8702 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8703 {
8704     dVAR;
8705
8706     PERL_ARGS_ASSERT_CHECKCOMMA;
8707
8708     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8709         if (ckWARN(WARN_SYNTAX)) {
8710             int level = 1;
8711             const char *w;
8712             for (w = s+2; *w && level; w++) {
8713                 if (*w == '(')
8714                     ++level;
8715                 else if (*w == ')')
8716                     --level;
8717             }
8718             while (isSPACE(*w))
8719                 ++w;
8720             /* the list of chars below is for end of statements or
8721              * block / parens, boolean operators (&&, ||, //) and branch
8722              * constructs (or, and, if, until, unless, while, err, for).
8723              * Not a very solid hack... */
8724             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8725                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8726                             "%s (...) interpreted as function",name);
8727         }
8728     }
8729     while (s < PL_bufend && isSPACE(*s))
8730         s++;
8731     if (*s == '(')
8732         s++;
8733     while (s < PL_bufend && isSPACE(*s))
8734         s++;
8735     if (isIDFIRST_lazy_if(s,UTF)) {
8736         const char * const w = s;
8737         s += UTF ? UTF8SKIP(s) : 1;
8738         while (isALNUM_lazy_if(s,UTF))
8739             s += UTF ? UTF8SKIP(s) : 1;
8740         while (s < PL_bufend && isSPACE(*s))
8741             s++;
8742         if (*s == ',') {
8743             GV* gv;
8744             if (keyword(w, s - w, 0))
8745                 return;
8746
8747             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8748             if (gv && GvCVu(gv))
8749                 return;
8750             Perl_croak(aTHX_ "No comma allowed after %s", what);
8751         }
8752     }
8753 }
8754
8755 /* Either returns sv, or mortalizes sv and returns a new SV*.
8756    Best used as sv=new_constant(..., sv, ...).
8757    If s, pv are NULL, calls subroutine with one argument,
8758    and type is used with error messages only. */
8759
8760 STATIC SV *
8761 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8762                SV *sv, SV *pv, const char *type, STRLEN typelen)
8763 {
8764     dVAR; dSP;
8765     HV * table = GvHV(PL_hintgv);                /* ^H */
8766     SV *res;
8767     SV **cvp;
8768     SV *cv, *typesv;
8769     const char *why1 = "", *why2 = "", *why3 = "";
8770
8771     PERL_ARGS_ASSERT_NEW_CONSTANT;
8772
8773     /* charnames doesn't work well if there have been errors found */
8774     if (PL_error_count > 0 && strEQ(key,"charnames"))
8775         return &PL_sv_undef;
8776
8777     if (!table
8778         || ! (PL_hints & HINT_LOCALIZE_HH)
8779         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8780         || ! SvOK(*cvp))
8781     {
8782         SV *msg;
8783
8784         /* Here haven't found what we're looking for.  If it is charnames,
8785          * perhaps it needs to be loaded.  Try doing that before giving up */
8786         if (strEQ(key,"charnames")) {
8787             Perl_load_module(aTHX_
8788                             0,
8789                             newSVpvs("_charnames"),
8790                              /* version parameter; no need to specify it, as if
8791                               * we get too early a version, will fail anyway,
8792                               * not being able to find '_charnames' */
8793                             NULL,
8794                             newSVpvs(":full"),
8795                             newSVpvs(":short"),
8796                             NULL);
8797             SPAGAIN;
8798             table = GvHV(PL_hintgv);
8799             if (table
8800                 && (PL_hints & HINT_LOCALIZE_HH)
8801                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8802                 && SvOK(*cvp))
8803             {
8804                 goto now_ok;
8805             }
8806         }
8807         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8808             msg = Perl_newSVpvf(aTHX_
8809                             "Constant(%s) unknown", (type ? type: "undef"));
8810         }
8811         else {
8812         why1 = "$^H{";
8813         why2 = key;
8814         why3 = "} is not defined";
8815     report:
8816         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8817                             (type ? type: "undef"), why1, why2, why3);
8818         }
8819         yyerror(SvPVX_const(msg));
8820         SvREFCNT_dec(msg);
8821         return sv;
8822     }
8823 now_ok:
8824     sv_2mortal(sv);                     /* Parent created it permanently */
8825     cv = *cvp;
8826     if (!pv && s)
8827         pv = newSVpvn_flags(s, len, SVs_TEMP);
8828     if (type && pv)
8829         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8830     else
8831         typesv = &PL_sv_undef;
8832
8833     PUSHSTACKi(PERLSI_OVERLOAD);
8834     ENTER ;
8835     SAVETMPS;
8836
8837     PUSHMARK(SP) ;
8838     EXTEND(sp, 3);
8839     if (pv)
8840         PUSHs(pv);
8841     PUSHs(sv);
8842     if (pv)
8843         PUSHs(typesv);
8844     PUTBACK;
8845     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8846
8847     SPAGAIN ;
8848
8849     /* Check the eval first */
8850     if (!PL_in_eval && SvTRUE(ERRSV)) {
8851         sv_catpvs(ERRSV, "Propagated");
8852         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8853         (void)POPs;
8854         res = SvREFCNT_inc_simple(sv);
8855     }
8856     else {
8857         res = POPs;
8858         SvREFCNT_inc_simple_void(res);
8859     }
8860
8861     PUTBACK ;
8862     FREETMPS ;
8863     LEAVE ;
8864     POPSTACK;
8865
8866     if (!SvOK(res)) {
8867         why1 = "Call to &{$^H{";
8868         why2 = key;
8869         why3 = "}} did not return a defined value";
8870         sv = res;
8871         goto report;
8872     }
8873
8874     return res;
8875 }
8876
8877 /* Returns a NUL terminated string, with the length of the string written to
8878    *slp
8879    */
8880 STATIC char *
8881 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8882 {
8883     dVAR;
8884     char *d = dest;
8885     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8886
8887     PERL_ARGS_ASSERT_SCAN_WORD;
8888
8889     for (;;) {
8890         if (d >= e)
8891             Perl_croak(aTHX_ ident_too_long);
8892         if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))   /* UTF handled below */
8893             *d++ = *s++;
8894         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8895             *d++ = ':';
8896             *d++ = ':';
8897             s++;
8898         }
8899         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8900             *d++ = *s++;
8901             *d++ = *s++;
8902         }
8903         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8904             char *t = s + UTF8SKIP(s);
8905             size_t len;
8906             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8907                 t += UTF8SKIP(t);
8908             len = t - s;
8909             if (d + len > e)
8910                 Perl_croak(aTHX_ ident_too_long);
8911             Copy(s, d, len, char);
8912             d += len;
8913             s = t;
8914         }
8915         else {
8916             *d = '\0';
8917             *slp = d - dest;
8918             return s;
8919         }
8920     }
8921 }
8922
8923 STATIC char *
8924 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8925 {
8926     dVAR;
8927     char *bracket = NULL;
8928     char funny = *s++;
8929     char *d = dest;
8930     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8931
8932     PERL_ARGS_ASSERT_SCAN_IDENT;
8933
8934     if (isSPACE(*s))
8935         s = PEEKSPACE(s);
8936     if (isDIGIT(*s)) {
8937         while (isDIGIT(*s)) {
8938             if (d >= e)
8939                 Perl_croak(aTHX_ ident_too_long);
8940             *d++ = *s++;
8941         }
8942     }
8943     else {
8944         for (;;) {
8945             if (d >= e)
8946                 Perl_croak(aTHX_ ident_too_long);
8947             if (isALNUM(*s))    /* UTF handled below */
8948                 *d++ = *s++;
8949             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8950                 *d++ = ':';
8951                 *d++ = ':';
8952                 s++;
8953             }
8954             else if (*s == ':' && s[1] == ':') {
8955                 *d++ = *s++;
8956                 *d++ = *s++;
8957             }
8958             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8959                 char *t = s + UTF8SKIP(s);
8960                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8961                     t += UTF8SKIP(t);
8962                 if (d + (t - s) > e)
8963                     Perl_croak(aTHX_ ident_too_long);
8964                 Copy(s, d, t - s, char);
8965                 d += t - s;
8966                 s = t;
8967             }
8968             else
8969                 break;
8970         }
8971     }
8972     *d = '\0';
8973     d = dest;
8974     if (*d) {
8975         if (PL_lex_state != LEX_NORMAL)
8976             PL_lex_state = LEX_INTERPENDMAYBE;
8977         return s;
8978     }
8979     if (*s == '$' && s[1] &&
8980         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8981     {
8982         return s;
8983     }
8984     if (*s == '{') {
8985         bracket = s;
8986         s++;
8987     }
8988     if (s < send) {
8989         if (UTF) {
8990             const STRLEN skip = UTF8SKIP(s);
8991             STRLEN i;
8992             d[skip] = '\0';
8993             for ( i = 0; i < skip; i++ )
8994                 d[i] = *s++;
8995         }
8996         else {
8997             *d = *s++;
8998             d[1] = '\0';
8999         }
9000     }
9001     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9002         *d = toCTRL(*s);
9003         s++;
9004     }
9005     else if (ck_uni && !bracket)
9006         check_uni();
9007     if (bracket) {
9008         if (isSPACE(s[-1])) {
9009             while (s < send) {
9010                 const char ch = *s++;
9011                 if (!SPACE_OR_TAB(ch)) {
9012                     *d = ch;
9013                     break;
9014                 }
9015             }
9016         }
9017         if (isIDFIRST_lazy_if(d,UTF)) {
9018             d += UTF8SKIP(d);
9019             if (UTF) {
9020                 char *end = s;
9021                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9022                     end += UTF8SKIP(end);
9023                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9024                         end += UTF8SKIP(end);
9025                 }
9026                 Copy(s, d, end - s, char);
9027                 d += end - s;
9028                 s = end;
9029             }
9030             else {
9031                 while ((isALNUM(*s) || *s == ':') && d < e)
9032                     *d++ = *s++;
9033                 if (d >= e)
9034                     Perl_croak(aTHX_ ident_too_long);
9035             }
9036             *d = '\0';
9037             while (s < send && SPACE_OR_TAB(*s))
9038                 s++;
9039             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9040                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9041                     const char * const brack =
9042                         (const char *)
9043                         ((*s == '[') ? "[...]" : "{...}");
9044    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9045                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9046                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9047                         funny, dest, brack, funny, dest, brack);
9048                 }
9049                 bracket++;
9050                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9051                 PL_lex_allbrackets++;
9052                 return s;
9053             }
9054         }
9055         /* Handle extended ${^Foo} variables
9056          * 1999-02-27 mjd-perl-patch@plover.com */
9057         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9058                  && isALNUM(*s))
9059         {
9060             d++;
9061             while (isALNUM(*s) && d < e) {
9062                 *d++ = *s++;
9063             }
9064             if (d >= e)
9065                 Perl_croak(aTHX_ ident_too_long);
9066             *d = '\0';
9067         }
9068         if (*s == '}') {
9069             s++;
9070             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9071                 PL_lex_state = LEX_INTERPEND;
9072                 PL_expect = XREF;
9073             }
9074             if (PL_lex_state == LEX_NORMAL) {
9075                 if (ckWARN(WARN_AMBIGUOUS) &&
9076                     (keyword(dest, d - dest, 0)
9077                      || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9078                 {
9079                     SV *tmp = newSVpvn_flags( dest, d - dest,
9080                                             SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9081                     if (funny == '#')
9082                         funny = '@';
9083                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9084                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9085                         funny, tmp, funny, tmp);
9086                 }
9087             }
9088         }
9089         else {
9090             s = bracket;                /* let the parser handle it */
9091             *dest = '\0';
9092         }
9093     }
9094     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9095         PL_lex_state = LEX_INTERPEND;
9096     return s;
9097 }
9098
9099 static bool
9100 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9101
9102     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9103      * the parse starting at 's', based on the subset that are valid in this
9104      * context input to this routine in 'valid_flags'. Advances s.  Returns
9105      * TRUE if the input should be treated as a valid flag, so the next char
9106      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9107      * first call on the current regex.  This routine will set it to any
9108      * charset modifier found.  The caller shouldn't change it.  This way,
9109      * another charset modifier encountered in the parse can be detected as an
9110      * error, as we have decided to allow only one */
9111
9112     const char c = **s;
9113     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9114
9115     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9116         if (isALNUM_lazy_if(*s, UTF)) {
9117             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9118                        UTF ? SVf_UTF8 : 0);
9119             (*s) += charlen;
9120             /* Pretend that it worked, so will continue processing before
9121              * dieing */
9122             return TRUE;
9123         }
9124         return FALSE;
9125     }
9126
9127     switch (c) {
9128
9129         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9130         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9131         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9132         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9133         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9134         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9135         case LOCALE_PAT_MOD:
9136             if (*charset) {
9137                 goto multiple_charsets;
9138             }
9139             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9140             *charset = c;
9141             break;
9142         case UNICODE_PAT_MOD:
9143             if (*charset) {
9144                 goto multiple_charsets;
9145             }
9146             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9147             *charset = c;
9148             break;
9149         case ASCII_RESTRICT_PAT_MOD:
9150             if (! *charset) {
9151                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9152             }
9153             else {
9154
9155                 /* Error if previous modifier wasn't an 'a', but if it was, see
9156                  * if, and accept, a second occurrence (only) */
9157                 if (*charset != 'a'
9158                     || get_regex_charset(*pmfl)
9159                         != REGEX_ASCII_RESTRICTED_CHARSET)
9160                 {
9161                         goto multiple_charsets;
9162                 }
9163                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9164             }
9165             *charset = c;
9166             break;
9167         case DEPENDS_PAT_MOD:
9168             if (*charset) {
9169                 goto multiple_charsets;
9170             }
9171             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9172             *charset = c;
9173             break;
9174     }
9175
9176     (*s)++;
9177     return TRUE;
9178
9179     multiple_charsets:
9180         if (*charset != c) {
9181             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9182         }
9183         else if (c == 'a') {
9184             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9185         }
9186         else {
9187             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9188         }
9189
9190         /* Pretend that it worked, so will continue processing before dieing */
9191         (*s)++;
9192         return TRUE;
9193 }
9194
9195 STATIC char *
9196 S_scan_pat(pTHX_ char *start, I32 type)
9197 {
9198     dVAR;
9199     PMOP *pm;
9200     char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9201     const char * const valid_flags =
9202         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9203     char charset = '\0';    /* character set modifier */
9204 #ifdef PERL_MAD
9205     char *modstart;
9206 #endif
9207
9208     PERL_ARGS_ASSERT_SCAN_PAT;
9209
9210     /* this was only needed for the initial scan_str; set it to false
9211      * so that any (?{}) code blocks etc are parsed normally */
9212     PL_reg_state.re_reparsing = FALSE;
9213     if (!s) {
9214         const char * const delimiter = skipspace(start);
9215         Perl_croak(aTHX_
9216                    (const char *)
9217                    (*delimiter == '?'
9218                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9219                     : "Search pattern not terminated" ));
9220     }
9221
9222     pm = (PMOP*)newPMOP(type, 0);
9223     if (PL_multi_open == '?') {
9224         /* This is the only point in the code that sets PMf_ONCE:  */
9225         pm->op_pmflags |= PMf_ONCE;
9226
9227         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9228            allows us to restrict the list needed by reset to just the ??
9229            matches.  */
9230         assert(type != OP_TRANS);
9231         if (PL_curstash) {
9232             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9233             U32 elements;
9234             if (!mg) {
9235                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9236                                  0);
9237             }
9238             elements = mg->mg_len / sizeof(PMOP**);
9239             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9240             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9241             mg->mg_len = elements * sizeof(PMOP**);
9242             PmopSTASH_set(pm,PL_curstash);
9243         }
9244     }
9245 #ifdef PERL_MAD
9246     modstart = s;
9247 #endif
9248
9249     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9250      * anon CV. False positives like qr/[(?{]/ are harmless */
9251
9252     if (type == OP_QR) {
9253         STRLEN len;
9254         char *e, *p = SvPV(PL_lex_stuff, len);
9255         e = p + len;
9256         for (; p < e; p++) {
9257             if (p[0] == '(' && p[1] == '?'
9258                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9259             {
9260                 pm->op_pmflags |= PMf_HAS_CV;
9261                 break;
9262             }
9263         }
9264         pm->op_pmflags |= PMf_IS_QR;
9265     }
9266
9267     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9268 #ifdef PERL_MAD
9269     if (PL_madskills && modstart != s) {
9270         SV* tmptoken = newSVpvn(modstart, s - modstart);
9271         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9272     }
9273 #endif
9274     /* issue a warning if /c is specified,but /g is not */
9275     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9276     {
9277         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9278                        "Use of /c modifier is meaningless without /g" );
9279     }
9280
9281     PL_lex_op = (OP*)pm;
9282     pl_yylval.ival = OP_MATCH;
9283     return s;
9284 }
9285
9286 STATIC char *
9287 S_scan_subst(pTHX_ char *start)
9288 {
9289     dVAR;
9290     char *s;
9291     PMOP *pm;
9292     I32 first_start;
9293     I32 es = 0;
9294     char charset = '\0';    /* character set modifier */
9295 #ifdef PERL_MAD
9296     char *modstart;
9297 #endif
9298
9299     PERL_ARGS_ASSERT_SCAN_SUBST;
9300
9301     pl_yylval.ival = OP_NULL;
9302
9303     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9304
9305     if (!s)
9306         Perl_croak(aTHX_ "Substitution pattern not terminated");
9307
9308     if (s[-1] == PL_multi_open)
9309         s--;
9310 #ifdef PERL_MAD
9311     if (PL_madskills) {
9312         CURMAD('q', PL_thisopen);
9313         CURMAD('_', PL_thiswhite);
9314         CURMAD('E', PL_thisstuff);
9315         CURMAD('Q', PL_thisclose);
9316         PL_realtokenstart = s - SvPVX(PL_linestr);
9317     }
9318 #endif
9319
9320     first_start = PL_multi_start;
9321     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9322     if (!s) {
9323         if (PL_lex_stuff) {
9324             SvREFCNT_dec(PL_lex_stuff);
9325             PL_lex_stuff = NULL;
9326         }
9327         Perl_croak(aTHX_ "Substitution replacement not terminated");
9328     }
9329     PL_multi_start = first_start;       /* so whole substitution is taken together */
9330
9331     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9332
9333 #ifdef PERL_MAD
9334     if (PL_madskills) {
9335         CURMAD('z', PL_thisopen);
9336         CURMAD('R', PL_thisstuff);
9337         CURMAD('Z', PL_thisclose);
9338     }
9339     modstart = s;
9340 #endif
9341
9342     while (*s) {
9343         if (*s == EXEC_PAT_MOD) {
9344             s++;
9345             es++;
9346         }
9347         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9348         {
9349             break;
9350         }
9351     }
9352
9353 #ifdef PERL_MAD
9354     if (PL_madskills) {
9355         if (modstart != s)
9356             curmad('m', newSVpvn(modstart, s - modstart));
9357         append_madprops(PL_thismad, (OP*)pm, 0);
9358         PL_thismad = 0;
9359     }
9360 #endif
9361     if ((pm->op_pmflags & PMf_CONTINUE)) {
9362         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9363     }
9364
9365     if (es) {
9366         SV * const repl = newSVpvs("");
9367
9368         PL_multi_end = 0;
9369         pm->op_pmflags |= PMf_EVAL;
9370         while (es-- > 0) {
9371             if (es)
9372                 sv_catpvs(repl, "eval ");
9373             else
9374                 sv_catpvs(repl, "do ");
9375         }
9376         sv_catpvs(repl, "{");
9377         sv_catsv(repl, PL_sublex_info.repl);
9378         sv_catpvs(repl, "}");
9379         SvEVALED_on(repl);
9380         SvREFCNT_dec(PL_sublex_info.repl);
9381         PL_sublex_info.repl = repl;
9382     }
9383
9384     PL_lex_op = (OP*)pm;
9385     pl_yylval.ival = OP_SUBST;
9386     return s;
9387 }
9388
9389 STATIC char *
9390 S_scan_trans(pTHX_ char *start)
9391 {
9392     dVAR;
9393     char* s;
9394     OP *o;
9395     U8 squash;
9396     U8 del;
9397     U8 complement;
9398     bool nondestruct = 0;
9399 #ifdef PERL_MAD
9400     char *modstart;
9401 #endif
9402
9403     PERL_ARGS_ASSERT_SCAN_TRANS;
9404
9405     pl_yylval.ival = OP_NULL;
9406
9407     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9408     if (!s)
9409         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9410
9411     if (s[-1] == PL_multi_open)
9412         s--;
9413 #ifdef PERL_MAD
9414     if (PL_madskills) {
9415         CURMAD('q', PL_thisopen);
9416         CURMAD('_', PL_thiswhite);
9417         CURMAD('E', PL_thisstuff);
9418         CURMAD('Q', PL_thisclose);
9419         PL_realtokenstart = s - SvPVX(PL_linestr);
9420     }
9421 #endif
9422
9423     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9424     if (!s) {
9425         if (PL_lex_stuff) {
9426             SvREFCNT_dec(PL_lex_stuff);
9427             PL_lex_stuff = NULL;
9428         }
9429         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9430     }
9431     if (PL_madskills) {
9432         CURMAD('z', PL_thisopen);
9433         CURMAD('R', PL_thisstuff);
9434         CURMAD('Z', PL_thisclose);
9435     }
9436
9437     complement = del = squash = 0;
9438 #ifdef PERL_MAD
9439     modstart = s;
9440 #endif
9441     while (1) {
9442         switch (*s) {
9443         case 'c':
9444             complement = OPpTRANS_COMPLEMENT;
9445             break;
9446         case 'd':
9447             del = OPpTRANS_DELETE;
9448             break;
9449         case 's':
9450             squash = OPpTRANS_SQUASH;
9451             break;
9452         case 'r':
9453             nondestruct = 1;
9454             break;
9455         default:
9456             goto no_more;
9457         }
9458         s++;
9459     }
9460   no_more:
9461
9462     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9463     o->op_private &= ~OPpTRANS_ALL;
9464     o->op_private |= del|squash|complement|
9465       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9466       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9467
9468     PL_lex_op = o;
9469     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9470
9471 #ifdef PERL_MAD
9472     if (PL_madskills) {
9473         if (modstart != s)
9474             curmad('m', newSVpvn(modstart, s - modstart));
9475         append_madprops(PL_thismad, o, 0);
9476         PL_thismad = 0;
9477     }
9478 #endif
9479
9480     return s;
9481 }
9482
9483 /* scan_heredoc
9484    Takes a pointer to the first < in <<FOO.
9485    Returns a pointer to the byte following <<FOO.
9486
9487    This function scans a heredoc, which involves different methods
9488    depending on whether we are in a string eval, quoted construct, etc.
9489    This is because PL_linestr could containing a single line of input, or
9490    a whole string being evalled, or the contents of the current quote-
9491    like operator.
9492
9493    The two basic methods are:
9494     - Steal lines from the input stream
9495     - Scan the heredoc in PL_linestr and remove it therefrom
9496
9497    In a file scope or filtered eval, the first method is used; in a
9498    string eval, the second.
9499
9500    In a quote-like operator, we have to choose between the two,
9501    depending on where we can find a newline.  We peek into outer lex-
9502    ing scopes until we find one with a newline in it.  If we reach the
9503    outermost lexing scope and it is a file, we use the stream method.
9504    Otherwise it is treated as an eval.
9505 */
9506
9507 STATIC char *
9508 S_scan_heredoc(pTHX_ register char *s)
9509 {
9510     dVAR;
9511     I32 op_type = OP_SCALAR;
9512     I32 len;
9513     SV *tmpstr;
9514     char term;
9515     char *d;
9516     char *e;
9517     char *peek;
9518     const bool infile = PL_rsfp || PL_parser->filtered;
9519     LEXSHARED *shared = PL_parser->lex_shared;
9520 #ifdef PERL_MAD
9521     I32 stuffstart = s - SvPVX(PL_linestr);
9522     char *tstart;
9523
9524     PL_realtokenstart = -1;
9525 #endif
9526
9527     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9528
9529     s += 2;
9530     d = PL_tokenbuf + 1;
9531     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9532     *PL_tokenbuf = '\n';
9533     peek = s;
9534     while (SPACE_OR_TAB(*peek))
9535         peek++;
9536     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9537         s = peek;
9538         term = *s++;
9539         s = delimcpy(d, e, s, PL_bufend, term, &len);
9540         if (s == PL_bufend)
9541             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9542         d += len;
9543         s++;
9544     }
9545     else {
9546         if (*s == '\\')
9547             /* <<\FOO is equivalent to <<'FOO' */
9548             s++, term = '\'';
9549         else
9550             term = '"';
9551         if (!isALNUM_lazy_if(s,UTF))
9552             deprecate("bare << to mean <<\"\"");
9553         for (; isALNUM_lazy_if(s,UTF); s++) {
9554             if (d < e)
9555                 *d++ = *s;
9556         }
9557     }
9558     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9559         Perl_croak(aTHX_ "Delimiter for here document is too long");
9560     *d++ = '\n';
9561     *d = '\0';
9562     len = d - PL_tokenbuf;
9563
9564 #ifdef PERL_MAD
9565     if (PL_madskills) {
9566         tstart = PL_tokenbuf + 1;
9567         PL_thisclose = newSVpvn(tstart, len - 1);
9568         tstart = SvPVX(PL_linestr) + stuffstart;
9569         PL_thisopen = newSVpvn(tstart, s - tstart);
9570         stuffstart = s - SvPVX(PL_linestr);
9571     }
9572 #endif
9573 #ifndef PERL_STRICT_CR
9574     d = strchr(s, '\r');
9575     if (d) {
9576         char * const olds = s;
9577         s = d;
9578         while (s < PL_bufend) {
9579             if (*s == '\r') {
9580                 *d++ = '\n';
9581                 if (*++s == '\n')
9582                     s++;
9583             }
9584             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9585                 *d++ = *s++;
9586                 s++;
9587             }
9588             else
9589                 *d++ = *s++;
9590         }
9591         *d = '\0';
9592         PL_bufend = d;
9593         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9594         s = olds;
9595     }
9596 #endif
9597 #ifdef PERL_MAD
9598     if (PL_madskills) {
9599         tstart = SvPVX(PL_linestr) + stuffstart;
9600         if (PL_thisstuff)
9601             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9602         else
9603             PL_thisstuff = newSVpvn(tstart, s - tstart);
9604     }
9605
9606     stuffstart = s - SvPVX(PL_linestr);
9607 #endif
9608
9609     tmpstr = newSV_type(SVt_PVIV);
9610     SvGROW(tmpstr, 80);
9611     if (term == '\'') {
9612         op_type = OP_CONST;
9613         SvIV_set(tmpstr, -1);
9614     }
9615     else if (term == '`') {
9616         op_type = OP_BACKTICK;
9617         SvIV_set(tmpstr, '\\');
9618     }
9619
9620     PL_multi_start = CopLINE(PL_curcop) + 1;
9621     PL_multi_open = PL_multi_close = '<';
9622     /* inside a string eval or quote-like operator */
9623     if (!infile || PL_lex_inwhat) {
9624         SV *linestr;
9625         char *bufend;
9626         char * const olds = s;
9627         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9628         /* These two fields are not set until an inner lexing scope is
9629            entered.  But we need them set here. */
9630         shared->ls_bufptr  = s;
9631         shared->ls_linestr = PL_linestr;
9632         if (PL_lex_inwhat)
9633           /* Look for a newline.  If the current buffer does not have one,
9634              peek into the line buffer of the parent lexing scope, going
9635              up as many levels as necessary to find one with a newline
9636              after bufptr.
9637            */
9638           while (!(s = (char *)memchr(
9639                     (void *)shared->ls_bufptr, '\n',
9640                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9641                 ))) {
9642             shared = shared->ls_prev;
9643             /* shared is only null if we have gone beyond the outermost
9644                lexing scope.  In a file, we will have broken out of the
9645                loop in the previous iteration.  In an eval, the string buf-
9646                fer ends with "\n;", so the while condition below will have
9647                evaluated to false.  So shared can never be null. */
9648             assert(shared);
9649             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9650                most lexing scope.  In a file, shared->ls_linestr at that
9651                level is just one line, so there is no body to steal. */
9652             if (infile && !shared->ls_prev) {
9653                 s = olds;
9654                 goto streaming;
9655             }
9656           }
9657         else {  /* eval */
9658             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9659             assert(s);
9660         }
9661         linestr = shared->ls_linestr;
9662         bufend = SvEND(linestr);
9663         d = s;
9664         while (s < bufend &&
9665           (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9666             if (*s++ == '\n')
9667                 ++shared->herelines;
9668         }
9669         if (s >= bufend) {
9670             goto interminable;
9671         }
9672         sv_setpvn(tmpstr,d+1,s-d);
9673 #ifdef PERL_MAD
9674         if (PL_madskills) {
9675             if (PL_thisstuff)
9676                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9677             else
9678                 PL_thisstuff = newSVpvn(d + 1, s - d);
9679             stuffstart = s - SvPVX(PL_linestr);
9680         }
9681 #endif
9682         s += len - 1;
9683         /* the preceding stmt passes a newline */
9684         shared->herelines++;
9685
9686         /* s now points to the newline after the heredoc terminator.
9687            d points to the newline before the body of the heredoc.
9688          */
9689
9690         /* We are going to modify linestr in place here, so set
9691            aside copies of the string if necessary for re-evals or
9692            (caller $n)[6]. */
9693         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9694            check shared->re_eval_str. */
9695         if (shared->re_eval_start || shared->re_eval_str) {
9696             /* Set aside the rest of the regexp */
9697             if (!shared->re_eval_str)
9698                 shared->re_eval_str =
9699                        newSVpvn(shared->re_eval_start,
9700                                 bufend - shared->re_eval_start);
9701             shared->re_eval_start -= s-d;
9702         }
9703         if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9704          && cx->blk_eval.cur_text == linestr) {
9705             cx->blk_eval.cur_text = newSVsv(linestr);
9706             SvSCREAM_on(cx->blk_eval.cur_text);
9707         }
9708         /* Copy everything from s onwards back to d. */
9709         Move(s,d,bufend-s + 1,char);
9710         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9711         /* Setting PL_bufend only applies when we have not dug deeper
9712            into other scopes, because sublex_done sets PL_bufend to
9713            SvEND(PL_linestr). */
9714         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9715         s = olds;
9716     }
9717     else
9718     {
9719       SV *linestr_save;
9720      streaming:
9721       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9722       term = PL_tokenbuf[1];
9723       len--;
9724       linestr_save = PL_linestr; /* must restore this afterwards */
9725       d = s;                     /* and this */
9726       PL_linestr = newSVpvs("");
9727       PL_bufend = SvPVX(PL_linestr);
9728       while (1) {
9729 #ifdef PERL_MAD
9730         if (PL_madskills) {
9731             tstart = SvPVX(PL_linestr) + stuffstart;
9732             if (PL_thisstuff)
9733                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9734             else
9735                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9736         }
9737 #endif
9738         PL_bufptr = PL_bufend;
9739         CopLINE_set(PL_curcop,
9740                     PL_multi_start + shared->herelines);
9741         if (!lex_next_chunk(LEX_NO_TERM)
9742          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9743             SvREFCNT_dec(linestr_save);
9744             goto interminable;
9745         }
9746         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9747         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9748             lex_grow_linestr(SvCUR(PL_linestr) + 2);
9749             sv_catpvs(PL_linestr, "\n\0");
9750         }
9751         s = PL_bufptr;
9752 #ifdef PERL_MAD
9753         stuffstart = s - SvPVX(PL_linestr);
9754 #endif
9755         shared->herelines++;
9756         PL_last_lop = PL_last_uni = NULL;
9757 #ifndef PERL_STRICT_CR
9758         if (PL_bufend - PL_linestart >= 2) {
9759             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9760                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9761             {
9762                 PL_bufend[-2] = '\n';
9763                 PL_bufend--;
9764                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9765             }
9766             else if (PL_bufend[-1] == '\r')
9767                 PL_bufend[-1] = '\n';
9768         }
9769         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9770             PL_bufend[-1] = '\n';
9771 #endif
9772         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9773             SvREFCNT_dec(PL_linestr);
9774             PL_linestr = linestr_save;
9775             PL_linestart = SvPVX(linestr_save);
9776             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9777             s = d;
9778             break;
9779         }
9780         else {
9781             sv_catsv(tmpstr,PL_linestr);
9782         }
9783       }
9784     }
9785     PL_multi_end = CopLINE(PL_curcop);
9786     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9787         SvPV_shrink_to_cur(tmpstr);
9788     }
9789     if (!IN_BYTES) {
9790         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9791             SvUTF8_on(tmpstr);
9792         else if (PL_encoding)
9793             sv_recode_to_utf8(tmpstr, PL_encoding);
9794     }
9795     PL_lex_stuff = tmpstr;
9796     pl_yylval.ival = op_type;
9797     return s;
9798
9799   interminable:
9800     SvREFCNT_dec(tmpstr);
9801     CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9802     missingterm(PL_tokenbuf + 1);
9803 }
9804
9805 /* scan_inputsymbol
9806    takes: current position in input buffer
9807    returns: new position in input buffer
9808    side-effects: pl_yylval and lex_op are set.
9809
9810    This code handles:
9811
9812    <>           read from ARGV
9813    <FH>         read from filehandle
9814    <pkg::FH>    read from package qualified filehandle
9815    <pkg'FH>     read from package qualified filehandle
9816    <$fh>        read from filehandle in $fh
9817    <*.h>        filename glob
9818
9819 */
9820
9821 STATIC char *
9822 S_scan_inputsymbol(pTHX_ char *start)
9823 {
9824     dVAR;
9825     char *s = start;            /* current position in buffer */
9826     char *end;
9827     I32 len;
9828     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9829     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9830
9831     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9832
9833     end = strchr(s, '\n');
9834     if (!end)
9835         end = PL_bufend;
9836     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9837
9838     /* die if we didn't have space for the contents of the <>,
9839        or if it didn't end, or if we see a newline
9840     */
9841
9842     if (len >= (I32)sizeof PL_tokenbuf)
9843         Perl_croak(aTHX_ "Excessively long <> operator");
9844     if (s >= end)
9845         Perl_croak(aTHX_ "Unterminated <> operator");
9846
9847     s++;
9848
9849     /* check for <$fh>
9850        Remember, only scalar variables are interpreted as filehandles by
9851        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9852        treated as a glob() call.
9853        This code makes use of the fact that except for the $ at the front,
9854        a scalar variable and a filehandle look the same.
9855     */
9856     if (*d == '$' && d[1]) d++;
9857
9858     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9859     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9860         d += UTF ? UTF8SKIP(d) : 1;
9861
9862     /* If we've tried to read what we allow filehandles to look like, and
9863        there's still text left, then it must be a glob() and not a getline.
9864        Use scan_str to pull out the stuff between the <> and treat it
9865        as nothing more than a string.
9866     */
9867
9868     if (d - PL_tokenbuf != len) {
9869         pl_yylval.ival = OP_GLOB;
9870         s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9871         if (!s)
9872            Perl_croak(aTHX_ "Glob not terminated");
9873         return s;
9874     }
9875     else {
9876         bool readline_overriden = FALSE;
9877         GV *gv_readline;
9878         GV **gvp;
9879         /* we're in a filehandle read situation */
9880         d = PL_tokenbuf;
9881
9882         /* turn <> into <ARGV> */
9883         if (!len)
9884             Copy("ARGV",d,5,char);
9885
9886         /* Check whether readline() is overriden */
9887         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9888         if ((gv_readline
9889                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9890                 ||
9891                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9892                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9893                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9894             readline_overriden = TRUE;
9895
9896         /* if <$fh>, create the ops to turn the variable into a
9897            filehandle
9898         */
9899         if (*d == '$') {
9900             /* try to find it in the pad for this block, otherwise find
9901                add symbol table ops
9902             */
9903             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9904             if (tmp != NOT_IN_PAD) {
9905                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9906                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9907                     HEK * const stashname = HvNAME_HEK(stash);
9908                     SV * const sym = sv_2mortal(newSVhek(stashname));
9909                     sv_catpvs(sym, "::");
9910                     sv_catpv(sym, d+1);
9911                     d = SvPVX(sym);
9912                     goto intro_sym;
9913                 }
9914                 else {
9915                     OP * const o = newOP(OP_PADSV, 0);
9916                     o->op_targ = tmp;
9917                     PL_lex_op = readline_overriden
9918                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9919                                 op_append_elem(OP_LIST, o,
9920                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9921                         : (OP*)newUNOP(OP_READLINE, 0, o);
9922                 }
9923             }
9924             else {
9925                 GV *gv;
9926                 ++d;
9927 intro_sym:
9928                 gv = gv_fetchpv(d,
9929                                 (PL_in_eval
9930                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9931                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9932                                 SVt_PV);
9933                 PL_lex_op = readline_overriden
9934                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9935                             op_append_elem(OP_LIST,
9936                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9937                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9938                     : (OP*)newUNOP(OP_READLINE, 0,
9939                             newUNOP(OP_RV2SV, 0,
9940                                 newGVOP(OP_GV, 0, gv)));
9941             }
9942             if (!readline_overriden)
9943                 PL_lex_op->op_flags |= OPf_SPECIAL;
9944             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9945             pl_yylval.ival = OP_NULL;
9946         }
9947
9948         /* If it's none of the above, it must be a literal filehandle
9949            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9950         else {
9951             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9952             PL_lex_op = readline_overriden
9953                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9954                         op_append_elem(OP_LIST,
9955                             newGVOP(OP_GV, 0, gv),
9956                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9957                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9958             pl_yylval.ival = OP_NULL;
9959         }
9960     }
9961
9962     return s;
9963 }
9964
9965
9966 /* scan_str
9967    takes: start position in buffer
9968           keep_quoted preserve \ on the embedded delimiter(s)
9969           keep_delims preserve the delimiters around the string
9970           re_reparse  compiling a run-time /(?{})/:
9971                         collapse // to /,  and skip encoding src
9972    returns: position to continue reading from buffer
9973    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9974         updates the read buffer.
9975
9976    This subroutine pulls a string out of the input.  It is called for:
9977         q               single quotes           q(literal text)
9978         '               single quotes           'literal text'
9979         qq              double quotes           qq(interpolate $here please)
9980         "               double quotes           "interpolate $here please"
9981         qx              backticks               qx(/bin/ls -l)
9982         `               backticks               `/bin/ls -l`
9983         qw              quote words             @EXPORT_OK = qw( func() $spam )
9984         m//             regexp match            m/this/
9985         s///            regexp substitute       s/this/that/
9986         tr///           string transliterate    tr/this/that/
9987         y///            string transliterate    y/this/that/
9988         ($*@)           sub prototypes          sub foo ($)
9989         (stuff)         sub attr parameters     sub foo : attr(stuff)
9990         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9991
9992    In most of these cases (all but <>, patterns and transliterate)
9993    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9994    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9995    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9996    calls scan_str().
9997
9998    It skips whitespace before the string starts, and treats the first
9999    character as the delimiter.  If the delimiter is one of ([{< then
10000    the corresponding "close" character )]}> is used as the closing
10001    delimiter.  It allows quoting of delimiters, and if the string has
10002    balanced delimiters ([{<>}]) it allows nesting.
10003
10004    On success, the SV with the resulting string is put into lex_stuff or,
10005    if that is already non-NULL, into lex_repl. The second case occurs only
10006    when parsing the RHS of the special constructs s/// and tr/// (y///).
10007    For convenience, the terminating delimiter character is stuffed into
10008    SvIVX of the SV.
10009 */
10010
10011 STATIC char *
10012 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10013 {
10014     dVAR;
10015     SV *sv;                             /* scalar value: string */
10016     const char *tmps;                   /* temp string, used for delimiter matching */
10017     char *s = start;            /* current position in the buffer */
10018     char term;                  /* terminating character */
10019     char *to;                   /* current position in the sv's data */
10020     I32 brackets = 1;                   /* bracket nesting level */
10021     bool has_utf8 = FALSE;              /* is there any utf8 content? */
10022     I32 termcode;                       /* terminating char. code */
10023     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
10024     STRLEN termlen;                     /* length of terminating string */
10025     int last_off = 0;                   /* last position for nesting bracket */
10026 #ifdef PERL_MAD
10027     int stuffstart;
10028     char *tstart;
10029 #endif
10030
10031     PERL_ARGS_ASSERT_SCAN_STR;
10032
10033     /* skip space before the delimiter */
10034     if (isSPACE(*s)) {
10035         s = PEEKSPACE(s);
10036     }
10037
10038 #ifdef PERL_MAD
10039     if (PL_realtokenstart >= 0) {
10040         stuffstart = PL_realtokenstart;
10041         PL_realtokenstart = -1;
10042     }
10043     else
10044         stuffstart = start - SvPVX(PL_linestr);
10045 #endif
10046     /* mark where we are, in case we need to report errors */
10047     CLINE;
10048
10049     /* after skipping whitespace, the next character is the terminator */
10050     term = *s;
10051     if (!UTF) {
10052         termcode = termstr[0] = term;
10053         termlen = 1;
10054     }
10055     else {
10056         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10057         Copy(s, termstr, termlen, U8);
10058         if (!UTF8_IS_INVARIANT(term))
10059             has_utf8 = TRUE;
10060     }
10061
10062     /* mark where we are */
10063     PL_multi_start = CopLINE(PL_curcop);
10064     PL_multi_open = term;
10065
10066     /* find corresponding closing delimiter */
10067     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10068         termcode = termstr[0] = term = tmps[5];
10069
10070     PL_multi_close = term;
10071
10072     /* create a new SV to hold the contents.  79 is the SV's initial length.
10073        What a random number. */
10074     sv = newSV_type(SVt_PVIV);
10075     SvGROW(sv, 80);
10076     SvIV_set(sv, termcode);
10077     (void)SvPOK_only(sv);               /* validate pointer */
10078
10079     /* move past delimiter and try to read a complete string */
10080     if (keep_delims)
10081         sv_catpvn(sv, s, termlen);
10082     s += termlen;
10083 #ifdef PERL_MAD
10084     tstart = SvPVX(PL_linestr) + stuffstart;
10085     if (!PL_thisopen && !keep_delims) {
10086         PL_thisopen = newSVpvn(tstart, s - tstart);
10087         stuffstart = s - SvPVX(PL_linestr);
10088     }
10089 #endif
10090     for (;;) {
10091         if (PL_encoding && !UTF && !re_reparse) {
10092             bool cont = TRUE;
10093
10094             while (cont) {
10095                 int offset = s - SvPVX_const(PL_linestr);
10096                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10097                                            &offset, (char*)termstr, termlen);
10098                 const char * const ns = SvPVX_const(PL_linestr) + offset;
10099                 char * const svlast = SvEND(sv) - 1;
10100
10101                 for (; s < ns; s++) {
10102                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10103                         COPLINE_INC_WITH_HERELINES;
10104                 }
10105                 if (!found)
10106                     goto read_more_line;
10107                 else {
10108                     /* handle quoted delimiters */
10109                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10110                         const char *t;
10111                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10112                             t--;
10113                         if ((svlast-1 - t) % 2) {
10114                             if (!keep_quoted) {
10115                                 *(svlast-1) = term;
10116                                 *svlast = '\0';
10117                                 SvCUR_set(sv, SvCUR(sv) - 1);
10118                             }
10119                             continue;
10120                         }
10121                     }
10122                     if (PL_multi_open == PL_multi_close) {
10123                         cont = FALSE;
10124                     }
10125                     else {
10126                         const char *t;
10127                         char *w;
10128                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10129                             /* At here, all closes are "was quoted" one,
10130                                so we don't check PL_multi_close. */
10131                             if (*t == '\\') {
10132                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10133                                     t++;
10134                                 else
10135                                     *w++ = *t++;
10136                             }
10137                             else if (*t == PL_multi_open)
10138                                 brackets++;
10139
10140                             *w = *t;
10141                         }
10142                         if (w < t) {
10143                             *w++ = term;
10144                             *w = '\0';
10145                             SvCUR_set(sv, w - SvPVX_const(sv));
10146                         }
10147                         last_off = w - SvPVX(sv);
10148                         if (--brackets <= 0)
10149                             cont = FALSE;
10150                     }
10151                 }
10152             }
10153             if (!keep_delims) {
10154                 SvCUR_set(sv, SvCUR(sv) - 1);
10155                 *SvEND(sv) = '\0';
10156             }
10157             break;
10158         }
10159
10160         /* extend sv if need be */
10161         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10162         /* set 'to' to the next character in the sv's string */
10163         to = SvPVX(sv)+SvCUR(sv);
10164
10165         /* if open delimiter is the close delimiter read unbridle */
10166         if (PL_multi_open == PL_multi_close) {
10167             for (; s < PL_bufend; s++,to++) {
10168                 /* embedded newlines increment the current line number */
10169                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10170                     COPLINE_INC_WITH_HERELINES;
10171                 /* handle quoted delimiters */
10172                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10173                     if (!keep_quoted
10174                         && (s[1] == term
10175                             || (re_reparse && s[1] == '\\'))
10176                     )
10177                         s++;
10178                     /* any other quotes are simply copied straight through */
10179                     else
10180                         *to++ = *s++;
10181                 }
10182                 /* terminate when run out of buffer (the for() condition), or
10183                    have found the terminator */
10184                 else if (*s == term) {
10185                     if (termlen == 1)
10186                         break;
10187                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10188                         break;
10189                 }
10190                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10191                     has_utf8 = TRUE;
10192                 *to = *s;
10193             }
10194         }
10195
10196         /* if the terminator isn't the same as the start character (e.g.,
10197            matched brackets), we have to allow more in the quoting, and
10198            be prepared for nested brackets.
10199         */
10200         else {
10201             /* read until we run out of string, or we find the terminator */
10202             for (; s < PL_bufend; s++,to++) {
10203                 /* embedded newlines increment the line count */
10204                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10205                     COPLINE_INC_WITH_HERELINES;
10206                 /* backslashes can escape the open or closing characters */
10207                 if (*s == '\\' && s+1 < PL_bufend) {
10208                     if (!keep_quoted &&
10209                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10210                         s++;
10211                     else
10212                         *to++ = *s++;
10213                 }
10214                 /* allow nested opens and closes */
10215                 else if (*s == PL_multi_close && --brackets <= 0)
10216                     break;
10217                 else if (*s == PL_multi_open)
10218                     brackets++;
10219                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10220                     has_utf8 = TRUE;
10221                 *to = *s;
10222             }
10223         }
10224         /* terminate the copied string and update the sv's end-of-string */
10225         *to = '\0';
10226         SvCUR_set(sv, to - SvPVX_const(sv));
10227
10228         /*
10229          * this next chunk reads more into the buffer if we're not done yet
10230          */
10231
10232         if (s < PL_bufend)
10233             break;              /* handle case where we are done yet :-) */
10234
10235 #ifndef PERL_STRICT_CR
10236         if (to - SvPVX_const(sv) >= 2) {
10237             if ((to[-2] == '\r' && to[-1] == '\n') ||
10238                 (to[-2] == '\n' && to[-1] == '\r'))
10239             {
10240                 to[-2] = '\n';
10241                 to--;
10242                 SvCUR_set(sv, to - SvPVX_const(sv));
10243             }
10244             else if (to[-1] == '\r')
10245                 to[-1] = '\n';
10246         }
10247         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10248             to[-1] = '\n';
10249 #endif
10250
10251      read_more_line:
10252         /* if we're out of file, or a read fails, bail and reset the current
10253            line marker so we can report where the unterminated string began
10254         */
10255 #ifdef PERL_MAD
10256         if (PL_madskills) {
10257             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10258             if (PL_thisstuff)
10259                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10260             else
10261                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10262         }
10263 #endif
10264         COPLINE_INC_WITH_HERELINES;
10265         PL_bufptr = PL_bufend;
10266         if (!lex_next_chunk(0)) {
10267             sv_free(sv);
10268             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10269             return NULL;
10270         }
10271         s = PL_bufptr;
10272 #ifdef PERL_MAD
10273         stuffstart = 0;
10274 #endif
10275     }
10276
10277     /* at this point, we have successfully read the delimited string */
10278
10279     if (!PL_encoding || UTF || re_reparse) {
10280 #ifdef PERL_MAD
10281         if (PL_madskills) {
10282             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10283             const int len = s - tstart;
10284             if (PL_thisstuff)
10285                 sv_catpvn(PL_thisstuff, tstart, len);
10286             else
10287                 PL_thisstuff = newSVpvn(tstart, len);
10288             if (!PL_thisclose && !keep_delims)
10289                 PL_thisclose = newSVpvn(s,termlen);
10290         }
10291 #endif
10292
10293         if (keep_delims)
10294             sv_catpvn(sv, s, termlen);
10295         s += termlen;
10296     }
10297 #ifdef PERL_MAD
10298     else {
10299         if (PL_madskills) {
10300             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10301             const int len = s - tstart - termlen;
10302             if (PL_thisstuff)
10303                 sv_catpvn(PL_thisstuff, tstart, len);
10304             else
10305                 PL_thisstuff = newSVpvn(tstart, len);
10306             if (!PL_thisclose && !keep_delims)
10307                 PL_thisclose = newSVpvn(s - termlen,termlen);
10308         }
10309     }
10310 #endif
10311     if (has_utf8 || (PL_encoding && !re_reparse))
10312         SvUTF8_on(sv);
10313
10314     PL_multi_end = CopLINE(PL_curcop);
10315
10316     /* if we allocated too much space, give some back */
10317     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10318         SvLEN_set(sv, SvCUR(sv) + 1);
10319         SvPV_renew(sv, SvLEN(sv));
10320     }
10321
10322     /* decide whether this is the first or second quoted string we've read
10323        for this op
10324     */
10325
10326     if (PL_lex_stuff)
10327         PL_sublex_info.repl = sv;
10328     else
10329         PL_lex_stuff = sv;
10330     return s;
10331 }
10332
10333 /*
10334   scan_num
10335   takes: pointer to position in buffer
10336   returns: pointer to new position in buffer
10337   side-effects: builds ops for the constant in pl_yylval.op
10338
10339   Read a number in any of the formats that Perl accepts:
10340
10341   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10342   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10343   0b[01](_?[01])*
10344   0[0-7](_?[0-7])*
10345   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10346
10347   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10348   thing it reads.
10349
10350   If it reads a number without a decimal point or an exponent, it will
10351   try converting the number to an integer and see if it can do so
10352   without loss of precision.
10353 */
10354
10355 char *
10356 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10357 {
10358     dVAR;
10359     const char *s = start;      /* current position in buffer */
10360     char *d;                    /* destination in temp buffer */
10361     char *e;                    /* end of temp buffer */
10362     NV nv;                              /* number read, as a double */
10363     SV *sv = NULL;                      /* place to put the converted number */
10364     bool floatit;                       /* boolean: int or float? */
10365     const char *lastub = NULL;          /* position of last underbar */
10366     static char const number_too_long[] = "Number too long";
10367
10368     PERL_ARGS_ASSERT_SCAN_NUM;
10369
10370     /* We use the first character to decide what type of number this is */
10371
10372     switch (*s) {
10373     default:
10374         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10375
10376     /* if it starts with a 0, it could be an octal number, a decimal in
10377        0.13 disguise, or a hexadecimal number, or a binary number. */
10378     case '0':
10379         {
10380           /* variables:
10381              u          holds the "number so far"
10382              shift      the power of 2 of the base
10383                         (hex == 4, octal == 3, binary == 1)
10384              overflowed was the number more than we can hold?
10385
10386              Shift is used when we add a digit.  It also serves as an "are
10387              we in octal/hex/binary?" indicator to disallow hex characters
10388              when in octal mode.
10389            */
10390             NV n = 0.0;
10391             UV u = 0;
10392             I32 shift;
10393             bool overflowed = FALSE;
10394             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10395             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10396             static const char* const bases[5] =
10397               { "", "binary", "", "octal", "hexadecimal" };
10398             static const char* const Bases[5] =
10399               { "", "Binary", "", "Octal", "Hexadecimal" };
10400             static const char* const maxima[5] =
10401               { "",
10402                 "0b11111111111111111111111111111111",
10403                 "",
10404                 "037777777777",
10405                 "0xffffffff" };
10406             const char *base, *Base, *max;
10407
10408             /* check for hex */
10409             if (s[1] == 'x' || s[1] == 'X') {
10410                 shift = 4;
10411                 s += 2;
10412                 just_zero = FALSE;
10413             } else if (s[1] == 'b' || s[1] == 'B') {
10414                 shift = 1;
10415                 s += 2;
10416                 just_zero = FALSE;
10417             }
10418             /* check for a decimal in disguise */
10419             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10420                 goto decimal;
10421             /* so it must be octal */
10422             else {
10423                 shift = 3;
10424                 s++;
10425             }
10426
10427             if (*s == '_') {
10428                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10429                                "Misplaced _ in number");
10430                lastub = s++;
10431             }
10432
10433             base = bases[shift];
10434             Base = Bases[shift];
10435             max  = maxima[shift];
10436
10437             /* read the rest of the number */
10438             for (;;) {
10439                 /* x is used in the overflow test,
10440                    b is the digit we're adding on. */
10441                 UV x, b;
10442
10443                 switch (*s) {
10444
10445                 /* if we don't mention it, we're done */
10446                 default:
10447                     goto out;
10448
10449                 /* _ are ignored -- but warned about if consecutive */
10450                 case '_':
10451                     if (lastub && s == lastub + 1)
10452                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10453                                        "Misplaced _ in number");
10454                     lastub = s++;
10455                     break;
10456
10457                 /* 8 and 9 are not octal */
10458                 case '8': case '9':
10459                     if (shift == 3)
10460                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10461                     /* FALL THROUGH */
10462
10463                 /* octal digits */
10464                 case '2': case '3': case '4':
10465                 case '5': case '6': case '7':
10466                     if (shift == 1)
10467                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10468                     /* FALL THROUGH */
10469
10470                 case '0': case '1':
10471                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10472                     goto digit;
10473
10474                 /* hex digits */
10475                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10476                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10477                     /* make sure they said 0x */
10478                     if (shift != 4)
10479                         goto out;
10480                     b = (*s++ & 7) + 9;
10481
10482                     /* Prepare to put the digit we have onto the end
10483                        of the number so far.  We check for overflows.
10484                     */
10485
10486                   digit:
10487                     just_zero = FALSE;
10488                     if (!overflowed) {
10489                         x = u << shift; /* make room for the digit */
10490
10491                         if ((x >> shift) != u
10492                             && !(PL_hints & HINT_NEW_BINARY)) {
10493                             overflowed = TRUE;
10494                             n = (NV) u;
10495                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10496                                              "Integer overflow in %s number",
10497                                              base);
10498                         } else
10499                             u = x | b;          /* add the digit to the end */
10500                     }
10501                     if (overflowed) {
10502                         n *= nvshift[shift];
10503                         /* If an NV has not enough bits in its
10504                          * mantissa to represent an UV this summing of
10505                          * small low-order numbers is a waste of time
10506                          * (because the NV cannot preserve the
10507                          * low-order bits anyway): we could just
10508                          * remember when did we overflow and in the
10509                          * end just multiply n by the right
10510                          * amount. */
10511                         n += (NV) b;
10512                     }
10513                     break;
10514                 }
10515             }
10516
10517           /* if we get here, we had success: make a scalar value from
10518              the number.
10519           */
10520           out:
10521
10522             /* final misplaced underbar check */
10523             if (s[-1] == '_') {
10524                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10525             }
10526
10527             if (overflowed) {
10528                 if (n > 4294967295.0)
10529                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10530                                    "%s number > %s non-portable",
10531                                    Base, max);
10532                 sv = newSVnv(n);
10533             }
10534             else {
10535 #if UVSIZE > 4
10536                 if (u > 0xffffffff)
10537                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10538                                    "%s number > %s non-portable",
10539                                    Base, max);
10540 #endif
10541                 sv = newSVuv(u);
10542             }
10543             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10544                 sv = new_constant(start, s - start, "integer",
10545                                   sv, NULL, NULL, 0);
10546             else if (PL_hints & HINT_NEW_BINARY)
10547                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10548         }
10549         break;
10550
10551     /*
10552       handle decimal numbers.
10553       we're also sent here when we read a 0 as the first digit
10554     */
10555     case '1': case '2': case '3': case '4': case '5':
10556     case '6': case '7': case '8': case '9': case '.':
10557       decimal:
10558         d = PL_tokenbuf;
10559         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10560         floatit = FALSE;
10561
10562         /* read next group of digits and _ and copy into d */
10563         while (isDIGIT(*s) || *s == '_') {
10564             /* skip underscores, checking for misplaced ones
10565                if -w is on
10566             */
10567             if (*s == '_') {
10568                 if (lastub && s == lastub + 1)
10569                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10570                                    "Misplaced _ in number");
10571                 lastub = s++;
10572             }
10573             else {
10574                 /* check for end of fixed-length buffer */
10575                 if (d >= e)
10576                     Perl_croak(aTHX_ number_too_long);
10577                 /* if we're ok, copy the character */
10578                 *d++ = *s++;
10579             }
10580         }
10581
10582         /* final misplaced underbar check */
10583         if (lastub && s == lastub + 1) {
10584             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10585         }
10586
10587         /* read a decimal portion if there is one.  avoid
10588            3..5 being interpreted as the number 3. followed
10589            by .5
10590         */
10591         if (*s == '.' && s[1] != '.') {
10592             floatit = TRUE;
10593             *d++ = *s++;
10594
10595             if (*s == '_') {
10596                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10597                                "Misplaced _ in number");
10598                 lastub = s;
10599             }
10600
10601             /* copy, ignoring underbars, until we run out of digits.
10602             */
10603             for (; isDIGIT(*s) || *s == '_'; s++) {
10604                 /* fixed length buffer check */
10605                 if (d >= e)
10606                     Perl_croak(aTHX_ number_too_long);
10607                 if (*s == '_') {
10608                    if (lastub && s == lastub + 1)
10609                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10610                                       "Misplaced _ in number");
10611                    lastub = s;
10612                 }
10613                 else
10614                     *d++ = *s;
10615             }
10616             /* fractional part ending in underbar? */
10617             if (s[-1] == '_') {
10618                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10619                                "Misplaced _ in number");
10620             }
10621             if (*s == '.' && isDIGIT(s[1])) {
10622                 /* oops, it's really a v-string, but without the "v" */
10623                 s = start;
10624                 goto vstring;
10625             }
10626         }
10627
10628         /* read exponent part, if present */
10629         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10630             floatit = TRUE;
10631             s++;
10632
10633             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10634             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10635
10636             /* stray preinitial _ */
10637             if (*s == '_') {
10638                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10639                                "Misplaced _ in number");
10640                 lastub = s++;
10641             }
10642
10643             /* allow positive or negative exponent */
10644             if (*s == '+' || *s == '-')
10645                 *d++ = *s++;
10646
10647             /* stray initial _ */
10648             if (*s == '_') {
10649                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10650                                "Misplaced _ in number");
10651                 lastub = s++;
10652             }
10653
10654             /* read digits of exponent */
10655             while (isDIGIT(*s) || *s == '_') {
10656                 if (isDIGIT(*s)) {
10657                     if (d >= e)
10658                         Perl_croak(aTHX_ number_too_long);
10659                     *d++ = *s++;
10660                 }
10661                 else {
10662                    if (((lastub && s == lastub + 1) ||
10663                         (!isDIGIT(s[1]) && s[1] != '_')))
10664                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10665                                       "Misplaced _ in number");
10666                    lastub = s++;
10667                 }
10668             }
10669         }
10670
10671
10672         /*
10673            We try to do an integer conversion first if no characters
10674            indicating "float" have been found.
10675          */
10676
10677         if (!floatit) {
10678             UV uv;
10679             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10680
10681             if (flags == IS_NUMBER_IN_UV) {
10682               if (uv <= IV_MAX)
10683                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10684               else
10685                 sv = newSVuv(uv);
10686             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10687               if (uv <= (UV) IV_MIN)
10688                 sv = newSViv(-(IV)uv);
10689               else
10690                 floatit = TRUE;
10691             } else
10692               floatit = TRUE;
10693         }
10694         if (floatit) {
10695             /* terminate the string */
10696             *d = '\0';
10697             nv = Atof(PL_tokenbuf);
10698             sv = newSVnv(nv);
10699         }
10700
10701         if ( floatit
10702              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10703             const char *const key = floatit ? "float" : "integer";
10704             const STRLEN keylen = floatit ? 5 : 7;
10705             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10706                                 key, keylen, sv, NULL, NULL, 0);
10707         }
10708         break;
10709
10710     /* if it starts with a v, it could be a v-string */
10711     case 'v':
10712 vstring:
10713                 sv = newSV(5); /* preallocate storage space */
10714                 s = scan_vstring(s, PL_bufend, sv);
10715         break;
10716     }
10717
10718     /* make the op for the constant and return */
10719
10720     if (sv)
10721         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10722     else
10723         lvalp->opval = NULL;
10724
10725     return (char *)s;
10726 }
10727
10728 STATIC char *
10729 S_scan_formline(pTHX_ register char *s)
10730 {
10731     dVAR;
10732     char *eol;
10733     char *t;
10734     SV * const stuff = newSVpvs("");
10735     bool needargs = FALSE;
10736     bool eofmt = FALSE;
10737 #ifdef PERL_MAD
10738     char *tokenstart = s;
10739     SV* savewhite = NULL;
10740
10741     if (PL_madskills) {
10742         savewhite = PL_thiswhite;
10743         PL_thiswhite = 0;
10744     }
10745 #endif
10746
10747     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10748
10749     while (!needargs) {
10750         if (*s == '.') {
10751             t = s+1;
10752 #ifdef PERL_STRICT_CR
10753             while (SPACE_OR_TAB(*t))
10754                 t++;
10755 #else
10756             while (SPACE_OR_TAB(*t) || *t == '\r')
10757                 t++;
10758 #endif
10759             if (*t == '\n' || t == PL_bufend) {
10760                 eofmt = TRUE;
10761                 break;
10762             }
10763         }
10764         eol = (char *) memchr(s,'\n',PL_bufend-s);
10765         if (!eol++)
10766                 eol = PL_bufend;
10767         if (*s != '#') {
10768             for (t = s; t < eol; t++) {
10769                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10770                     needargs = FALSE;
10771                     goto enough;        /* ~~ must be first line in formline */
10772                 }
10773                 if (*t == '@' || *t == '^')
10774                     needargs = TRUE;
10775             }
10776             if (eol > s) {
10777                 sv_catpvn(stuff, s, eol-s);
10778 #ifndef PERL_STRICT_CR
10779                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10780                     char *end = SvPVX(stuff) + SvCUR(stuff);
10781                     end[-2] = '\n';
10782                     end[-1] = '\0';
10783                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10784                 }
10785 #endif
10786             }
10787             else
10788               break;
10789         }
10790         s = (char*)eol;
10791         if ((PL_rsfp || PL_parser->filtered)
10792          && PL_parser->form_lex_state == LEX_NORMAL) {
10793             bool got_some;
10794 #ifdef PERL_MAD
10795             if (PL_madskills) {
10796                 if (PL_thistoken)
10797                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10798                 else
10799                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10800             }
10801 #endif
10802             PL_bufptr = PL_bufend;
10803             COPLINE_INC_WITH_HERELINES;
10804             got_some = lex_next_chunk(0);
10805             CopLINE_dec(PL_curcop);
10806             s = PL_bufptr;
10807 #ifdef PERL_MAD
10808             tokenstart = PL_bufptr;
10809 #endif
10810             if (!got_some)
10811                 break;
10812         }
10813         incline(s);
10814     }
10815   enough:
10816     if (!SvCUR(stuff) || needargs)
10817         PL_lex_state = PL_parser->form_lex_state;
10818     if (SvCUR(stuff)) {
10819         PL_expect = XSTATE;
10820         if (needargs) {
10821             start_force(PL_curforce);
10822             NEXTVAL_NEXTTOKE.ival = 0;
10823             force_next(FORMLBRACK);
10824         }
10825         if (!IN_BYTES) {
10826             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10827                 SvUTF8_on(stuff);
10828             else if (PL_encoding)
10829                 sv_recode_to_utf8(stuff, PL_encoding);
10830         }
10831         start_force(PL_curforce);
10832         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10833         force_next(THING);
10834     }
10835     else {
10836         SvREFCNT_dec(stuff);
10837         if (eofmt)
10838             PL_lex_formbrack = 0;
10839     }
10840 #ifdef PERL_MAD
10841     if (PL_madskills) {
10842         if (PL_thistoken)
10843             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10844         else
10845             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10846         PL_thiswhite = savewhite;
10847     }
10848 #endif
10849     return s;
10850 }
10851
10852 I32
10853 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10854 {
10855     dVAR;
10856     const I32 oldsavestack_ix = PL_savestack_ix;
10857     CV* const outsidecv = PL_compcv;
10858
10859     SAVEI32(PL_subline);
10860     save_item(PL_subname);
10861     SAVESPTR(PL_compcv);
10862
10863     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10864     CvFLAGS(PL_compcv) |= flags;
10865
10866     PL_subline = CopLINE(PL_curcop);
10867     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10868     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10869     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10870     if (outsidecv && CvPADLIST(outsidecv))
10871         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10872
10873     return oldsavestack_ix;
10874 }
10875
10876 #ifdef __SC__
10877 #pragma segment Perl_yylex
10878 #endif
10879 static int
10880 S_yywarn(pTHX_ const char *const s, U32 flags)
10881 {
10882     dVAR;
10883
10884     PERL_ARGS_ASSERT_YYWARN;
10885
10886     PL_in_eval |= EVAL_WARNONLY;
10887     yyerror_pv(s, flags);
10888     PL_in_eval &= ~EVAL_WARNONLY;
10889     return 0;
10890 }
10891
10892 int
10893 Perl_yyerror(pTHX_ const char *const s)
10894 {
10895     PERL_ARGS_ASSERT_YYERROR;
10896     return yyerror_pvn(s, strlen(s), 0);
10897 }
10898
10899 int
10900 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10901 {
10902     PERL_ARGS_ASSERT_YYERROR_PV;
10903     return yyerror_pvn(s, strlen(s), flags);
10904 }
10905
10906 int
10907 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10908 {
10909     dVAR;
10910     const char *context = NULL;
10911     int contlen = -1;
10912     SV *msg;
10913     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10914     int yychar  = PL_parser->yychar;
10915     U32 is_utf8 = flags & SVf_UTF8;
10916
10917     PERL_ARGS_ASSERT_YYERROR_PVN;
10918
10919     if (!yychar || (yychar == ';' && !PL_rsfp))
10920         sv_catpvs(where_sv, "at EOF");
10921     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10922       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10923       PL_oldbufptr != PL_bufptr) {
10924         /*
10925                 Only for NetWare:
10926                 The code below is removed for NetWare because it abends/crashes on NetWare
10927                 when the script has error such as not having the closing quotes like:
10928                     if ($var eq "value)
10929                 Checking of white spaces is anyway done in NetWare code.
10930         */
10931 #ifndef NETWARE
10932         while (isSPACE(*PL_oldoldbufptr))
10933             PL_oldoldbufptr++;
10934 #endif
10935         context = PL_oldoldbufptr;
10936         contlen = PL_bufptr - PL_oldoldbufptr;
10937     }
10938     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10939       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10940         /*
10941                 Only for NetWare:
10942                 The code below is removed for NetWare because it abends/crashes on NetWare
10943                 when the script has error such as not having the closing quotes like:
10944                     if ($var eq "value)
10945                 Checking of white spaces is anyway done in NetWare code.
10946         */
10947 #ifndef NETWARE
10948         while (isSPACE(*PL_oldbufptr))
10949             PL_oldbufptr++;
10950 #endif
10951         context = PL_oldbufptr;
10952         contlen = PL_bufptr - PL_oldbufptr;
10953     }
10954     else if (yychar > 255)
10955         sv_catpvs(where_sv, "next token ???");
10956     else if (yychar == -2) { /* YYEMPTY */
10957         if (PL_lex_state == LEX_NORMAL ||
10958            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10959             sv_catpvs(where_sv, "at end of line");
10960         else if (PL_lex_inpat)
10961             sv_catpvs(where_sv, "within pattern");
10962         else
10963             sv_catpvs(where_sv, "within string");
10964     }
10965     else {
10966         sv_catpvs(where_sv, "next char ");
10967         if (yychar < 32)
10968             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10969         else if (isPRINT_LC(yychar)) {
10970             const char string = yychar;
10971             sv_catpvn(where_sv, &string, 1);
10972         }
10973         else
10974             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10975     }
10976     msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
10977     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10978         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10979     if (context)
10980         Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
10981                             SVfARG(newSVpvn_flags(context, contlen,
10982                                         SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
10983     else
10984         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10985     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10986         Perl_sv_catpvf(aTHX_ msg,
10987         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10988                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10989         PL_multi_end = 0;
10990     }
10991     if (PL_in_eval & EVAL_WARNONLY) {
10992         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10993     }
10994     else
10995         qerror(msg);
10996     if (PL_error_count >= 10) {
10997         if (PL_in_eval && SvCUR(ERRSV))
10998             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10999                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
11000         else
11001             Perl_croak(aTHX_ "%s has too many errors.\n",
11002             OutCopFILE(PL_curcop));
11003     }
11004     PL_in_my = 0;
11005     PL_in_my_stash = NULL;
11006     return 0;
11007 }
11008 #ifdef __SC__
11009 #pragma segment Main
11010 #endif
11011
11012 STATIC char*
11013 S_swallow_bom(pTHX_ U8 *s)
11014 {
11015     dVAR;
11016     const STRLEN slen = SvCUR(PL_linestr);
11017
11018     PERL_ARGS_ASSERT_SWALLOW_BOM;
11019
11020     switch (s[0]) {
11021     case 0xFF:
11022         if (s[1] == 0xFE) {
11023             /* UTF-16 little-endian? (or UTF-32LE?) */
11024             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11025                 /* diag_listed_as: Unsupported script encoding %s */
11026                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11027 #ifndef PERL_NO_UTF16_FILTER
11028             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11029             s += 2;
11030             if (PL_bufend > (char*)s) {
11031                 s = add_utf16_textfilter(s, TRUE);
11032             }
11033 #else
11034             /* diag_listed_as: Unsupported script encoding %s */
11035             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11036 #endif
11037         }
11038         break;
11039     case 0xFE:
11040         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11041 #ifndef PERL_NO_UTF16_FILTER
11042             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11043             s += 2;
11044             if (PL_bufend > (char *)s) {
11045                 s = add_utf16_textfilter(s, FALSE);
11046             }
11047 #else
11048             /* diag_listed_as: Unsupported script encoding %s */
11049             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11050 #endif
11051         }
11052         break;
11053     case 0xEF:
11054         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11055             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11056             s += 3;                      /* UTF-8 */
11057         }
11058         break;
11059     case 0:
11060         if (slen > 3) {
11061              if (s[1] == 0) {
11062                   if (s[2] == 0xFE && s[3] == 0xFF) {
11063                        /* UTF-32 big-endian */
11064                        /* diag_listed_as: Unsupported script encoding %s */
11065                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11066                   }
11067              }
11068              else if (s[2] == 0 && s[3] != 0) {
11069                   /* Leading bytes
11070                    * 00 xx 00 xx
11071                    * are a good indicator of UTF-16BE. */
11072 #ifndef PERL_NO_UTF16_FILTER
11073                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11074                   s = add_utf16_textfilter(s, FALSE);
11075 #else
11076                   /* diag_listed_as: Unsupported script encoding %s */
11077                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11078 #endif
11079              }
11080         }
11081 #ifdef EBCDIC
11082     case 0xDD:
11083         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11084             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11085             s += 4;                      /* UTF-8 */
11086         }
11087         break;
11088 #endif
11089
11090     default:
11091          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11092                   /* Leading bytes
11093                    * xx 00 xx 00
11094                    * are a good indicator of UTF-16LE. */
11095 #ifndef PERL_NO_UTF16_FILTER
11096               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11097               s = add_utf16_textfilter(s, TRUE);
11098 #else
11099               /* diag_listed_as: Unsupported script encoding %s */
11100               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11101 #endif
11102          }
11103     }
11104     return (char*)s;
11105 }
11106
11107
11108 #ifndef PERL_NO_UTF16_FILTER
11109 static I32
11110 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11111 {
11112     dVAR;
11113     SV *const filter = FILTER_DATA(idx);
11114     /* We re-use this each time round, throwing the contents away before we
11115        return.  */
11116     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11117     SV *const utf8_buffer = filter;
11118     IV status = IoPAGE(filter);
11119     const bool reverse = cBOOL(IoLINES(filter));
11120     I32 retval;
11121
11122     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11123
11124     /* As we're automatically added, at the lowest level, and hence only called
11125        from this file, we can be sure that we're not called in block mode. Hence
11126        don't bother writing code to deal with block mode.  */
11127     if (maxlen) {
11128         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11129     }
11130     if (status < 0) {
11131         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11132     }
11133     DEBUG_P(PerlIO_printf(Perl_debug_log,
11134                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11135                           FPTR2DPTR(void *, S_utf16_textfilter),
11136                           reverse ? 'l' : 'b', idx, maxlen, status,
11137                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11138
11139     while (1) {
11140         STRLEN chars;
11141         STRLEN have;
11142         I32 newlen;
11143         U8 *end;
11144         /* First, look in our buffer of existing UTF-8 data:  */
11145         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11146
11147         if (nl) {
11148             ++nl;
11149         } else if (status == 0) {
11150             /* EOF */
11151             IoPAGE(filter) = 0;
11152             nl = SvEND(utf8_buffer);
11153         }
11154         if (nl) {
11155             STRLEN got = nl - SvPVX(utf8_buffer);
11156             /* Did we have anything to append?  */
11157             retval = got != 0;
11158             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11159             /* Everything else in this code works just fine if SVp_POK isn't
11160                set.  This, however, needs it, and we need it to work, else
11161                we loop infinitely because the buffer is never consumed.  */
11162             sv_chop(utf8_buffer, nl);
11163             break;
11164         }
11165
11166         /* OK, not a complete line there, so need to read some more UTF-16.
11167            Read an extra octect if the buffer currently has an odd number. */
11168         while (1) {
11169             if (status <= 0)
11170                 break;
11171             if (SvCUR(utf16_buffer) >= 2) {
11172                 /* Location of the high octet of the last complete code point.
11173                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11174                    *coupled* with all the benefits of partial reads and
11175                    endianness.  */
11176                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11177                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11178
11179                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11180                     break;
11181                 }
11182
11183                 /* We have the first half of a surrogate. Read more.  */
11184                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11185             }
11186
11187             status = FILTER_READ(idx + 1, utf16_buffer,
11188                                  160 + (SvCUR(utf16_buffer) & 1));
11189             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11190             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11191             if (status < 0) {
11192                 /* Error */
11193                 IoPAGE(filter) = status;
11194                 return status;
11195             }
11196         }
11197
11198         chars = SvCUR(utf16_buffer) >> 1;
11199         have = SvCUR(utf8_buffer);
11200         SvGROW(utf8_buffer, have + chars * 3 + 1);
11201
11202         if (reverse) {
11203             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11204                                          (U8*)SvPVX_const(utf8_buffer) + have,
11205                                          chars * 2, &newlen);
11206         } else {
11207             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11208                                 (U8*)SvPVX_const(utf8_buffer) + have,
11209                                 chars * 2, &newlen);
11210         }
11211         SvCUR_set(utf8_buffer, have + newlen);
11212         *end = '\0';
11213
11214         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11215            it's private to us, and utf16_to_utf8{,reversed} take a
11216            (pointer,length) pair, rather than a NUL-terminated string.  */
11217         if(SvCUR(utf16_buffer) & 1) {
11218             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11219             SvCUR_set(utf16_buffer, 1);
11220         } else {
11221             SvCUR_set(utf16_buffer, 0);
11222         }
11223     }
11224     DEBUG_P(PerlIO_printf(Perl_debug_log,
11225                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11226                           status,
11227                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11228     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11229     return retval;
11230 }
11231
11232 static U8 *
11233 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11234 {
11235     SV *filter = filter_add(S_utf16_textfilter, NULL);
11236
11237     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11238
11239     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11240     sv_setpvs(filter, "");
11241     IoLINES(filter) = reversed;
11242     IoPAGE(filter) = 1; /* Not EOF */
11243
11244     /* Sadly, we have to return a valid pointer, come what may, so we have to
11245        ignore any error return from this.  */
11246     SvCUR_set(PL_linestr, 0);
11247     if (FILTER_READ(0, PL_linestr, 0)) {
11248         SvUTF8_on(PL_linestr);
11249     } else {
11250         SvUTF8_on(PL_linestr);
11251     }
11252     PL_bufend = SvEND(PL_linestr);
11253     return (U8*)SvPVX(PL_linestr);
11254 }
11255 #endif
11256
11257 /*
11258 Returns a pointer to the next character after the parsed
11259 vstring, as well as updating the passed in sv.
11260
11261 Function must be called like
11262
11263         sv = newSV(5);
11264         s = scan_vstring(s,e,sv);
11265
11266 where s and e are the start and end of the string.
11267 The sv should already be large enough to store the vstring
11268 passed in, for performance reasons.
11269
11270 */
11271
11272 char *
11273 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11274 {
11275     dVAR;
11276     const char *pos = s;
11277     const char *start = s;
11278
11279     PERL_ARGS_ASSERT_SCAN_VSTRING;
11280
11281     if (*pos == 'v') pos++;  /* get past 'v' */
11282     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11283         pos++;
11284     if ( *pos != '.') {
11285         /* this may not be a v-string if followed by => */
11286         const char *next = pos;
11287         while (next < e && isSPACE(*next))
11288             ++next;
11289         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11290             /* return string not v-string */
11291             sv_setpvn(sv,(char *)s,pos-s);
11292             return (char *)pos;
11293         }
11294     }
11295
11296     if (!isALPHA(*pos)) {
11297         U8 tmpbuf[UTF8_MAXBYTES+1];
11298
11299         if (*s == 'v')
11300             s++;  /* get past 'v' */
11301
11302         sv_setpvs(sv, "");
11303
11304         for (;;) {
11305             /* this is atoi() that tolerates underscores */
11306             U8 *tmpend;
11307             UV rev = 0;
11308             const char *end = pos;
11309             UV mult = 1;
11310             while (--end >= s) {
11311                 if (*end != '_') {
11312                     const UV orev = rev;
11313                     rev += (*end - '0') * mult;
11314                     mult *= 10;
11315                     if (orev > rev)
11316                         /* diag_listed_as: Integer overflow in %s number */
11317                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11318                                          "Integer overflow in decimal number");
11319                 }
11320             }
11321 #ifdef EBCDIC
11322             if (rev > 0x7FFFFFFF)
11323                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11324 #endif
11325             /* Append native character for the rev point */
11326             tmpend = uvchr_to_utf8(tmpbuf, rev);
11327             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11328             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11329                  SvUTF8_on(sv);
11330             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11331                  s = ++pos;
11332             else {
11333                  s = pos;
11334                  break;
11335             }
11336             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11337                  pos++;
11338         }
11339         SvPOK_on(sv);
11340         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11341         SvRMAGICAL_on(sv);
11342     }
11343     return (char *)s;
11344 }
11345
11346 int
11347 Perl_keyword_plugin_standard(pTHX_
11348         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11349 {
11350     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11351     PERL_UNUSED_CONTEXT;
11352     PERL_UNUSED_ARG(keyword_ptr);
11353     PERL_UNUSED_ARG(keyword_len);
11354     PERL_UNUSED_ARG(op_ptr);
11355     return KEYWORD_PLUGIN_DECLINE;
11356 }
11357
11358 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11359 static void
11360 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11361 {
11362     SAVEI32(PL_lex_brackets);
11363     if (PL_lex_brackets > 100)
11364         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11365     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11366     SAVEI32(PL_lex_allbrackets);
11367     PL_lex_allbrackets = 0;
11368     SAVEI8(PL_lex_fakeeof);
11369     PL_lex_fakeeof = (U8)fakeeof;
11370     if(yyparse(gramtype) && !PL_parser->error_count)
11371         qerror(Perl_mess(aTHX_ "Parse error"));
11372 }
11373
11374 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11375 static OP *
11376 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11377 {
11378     OP *o;
11379     ENTER;
11380     SAVEVPTR(PL_eval_root);
11381     PL_eval_root = NULL;
11382     parse_recdescent(gramtype, fakeeof);
11383     o = PL_eval_root;
11384     LEAVE;
11385     return o;
11386 }
11387
11388 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11389 static OP *
11390 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11391 {
11392     OP *exprop;
11393     if (flags & ~PARSE_OPTIONAL)
11394         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11395     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11396     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11397         if (!PL_parser->error_count)
11398             qerror(Perl_mess(aTHX_ "Parse error"));
11399         exprop = newOP(OP_NULL, 0);
11400     }
11401     return exprop;
11402 }
11403
11404 /*
11405 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11406
11407 Parse a Perl arithmetic expression.  This may contain operators of precedence
11408 down to the bit shift operators.  The expression must be followed (and thus
11409 terminated) either by a comparison or lower-precedence operator or by
11410 something that would normally terminate an expression such as semicolon.
11411 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11412 otherwise it is mandatory.  It is up to the caller to ensure that the
11413 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11414 the source of the code to be parsed and the lexical context for the
11415 expression.
11416
11417 The op tree representing the expression is returned.  If an optional
11418 expression is absent, a null pointer is returned, otherwise the pointer
11419 will be non-null.
11420
11421 If an error occurs in parsing or compilation, in most cases a valid op
11422 tree is returned anyway.  The error is reflected in the parser state,
11423 normally resulting in a single exception at the top level of parsing
11424 which covers all the compilation errors that occurred.  Some compilation
11425 errors, however, will throw an exception immediately.
11426
11427 =cut
11428 */
11429
11430 OP *
11431 Perl_parse_arithexpr(pTHX_ U32 flags)
11432 {
11433     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11434 }
11435
11436 /*
11437 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11438
11439 Parse a Perl term expression.  This may contain operators of precedence
11440 down to the assignment operators.  The expression must be followed (and thus
11441 terminated) either by a comma or lower-precedence operator or by
11442 something that would normally terminate an expression such as semicolon.
11443 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11444 otherwise it is mandatory.  It is up to the caller to ensure that the
11445 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11446 the source of the code to be parsed and the lexical context for the
11447 expression.
11448
11449 The op tree representing the expression is returned.  If an optional
11450 expression is absent, a null pointer is returned, otherwise the pointer
11451 will be non-null.
11452
11453 If an error occurs in parsing or compilation, in most cases a valid op
11454 tree is returned anyway.  The error is reflected in the parser state,
11455 normally resulting in a single exception at the top level of parsing
11456 which covers all the compilation errors that occurred.  Some compilation
11457 errors, however, will throw an exception immediately.
11458
11459 =cut
11460 */
11461
11462 OP *
11463 Perl_parse_termexpr(pTHX_ U32 flags)
11464 {
11465     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11466 }
11467
11468 /*
11469 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11470
11471 Parse a Perl list expression.  This may contain operators of precedence
11472 down to the comma operator.  The expression must be followed (and thus
11473 terminated) either by a low-precedence logic operator such as C<or> or by
11474 something that would normally terminate an expression such as semicolon.
11475 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11476 otherwise it is mandatory.  It is up to the caller to ensure that the
11477 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11478 the source of the code to be parsed and the lexical context for the
11479 expression.
11480
11481 The op tree representing the expression is returned.  If an optional
11482 expression is absent, a null pointer is returned, otherwise the pointer
11483 will be non-null.
11484
11485 If an error occurs in parsing or compilation, in most cases a valid op
11486 tree is returned anyway.  The error is reflected in the parser state,
11487 normally resulting in a single exception at the top level of parsing
11488 which covers all the compilation errors that occurred.  Some compilation
11489 errors, however, will throw an exception immediately.
11490
11491 =cut
11492 */
11493
11494 OP *
11495 Perl_parse_listexpr(pTHX_ U32 flags)
11496 {
11497     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11498 }
11499
11500 /*
11501 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11502
11503 Parse a single complete Perl expression.  This allows the full
11504 expression grammar, including the lowest-precedence operators such
11505 as C<or>.  The expression must be followed (and thus terminated) by a
11506 token that an expression would normally be terminated by: end-of-file,
11507 closing bracketing punctuation, semicolon, or one of the keywords that
11508 signals a postfix expression-statement modifier.  If I<flags> includes
11509 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11510 mandatory.  It is up to the caller to ensure that the dynamic parser
11511 state (L</PL_parser> et al) is correctly set to reflect the source of
11512 the code to be parsed and the lexical context for the expression.
11513
11514 The op tree representing the expression is returned.  If an optional
11515 expression is absent, a null pointer is returned, otherwise the pointer
11516 will be non-null.
11517
11518 If an error occurs in parsing or compilation, in most cases a valid op
11519 tree is returned anyway.  The error is reflected in the parser state,
11520 normally resulting in a single exception at the top level of parsing
11521 which covers all the compilation errors that occurred.  Some compilation
11522 errors, however, will throw an exception immediately.
11523
11524 =cut
11525 */
11526
11527 OP *
11528 Perl_parse_fullexpr(pTHX_ U32 flags)
11529 {
11530     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11531 }
11532
11533 /*
11534 =for apidoc Amx|OP *|parse_block|U32 flags
11535
11536 Parse a single complete Perl code block.  This consists of an opening
11537 brace, a sequence of statements, and a closing brace.  The block
11538 constitutes a lexical scope, so C<my> variables and various compile-time
11539 effects can be contained within it.  It is up to the caller to ensure
11540 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11541 reflect the source of the code to be parsed and the lexical context for
11542 the statement.
11543
11544 The op tree representing the code block is returned.  This is always a
11545 real op, never a null pointer.  It will normally be a C<lineseq> list,
11546 including C<nextstate> or equivalent ops.  No ops to construct any kind
11547 of runtime scope are included by virtue of it being a block.
11548
11549 If an error occurs in parsing or compilation, in most cases a valid op
11550 tree (most likely null) is returned anyway.  The error is reflected in
11551 the parser state, normally resulting in a single exception at the top
11552 level of parsing which covers all the compilation errors that occurred.
11553 Some compilation errors, however, will throw an exception immediately.
11554
11555 The I<flags> parameter is reserved for future use, and must always
11556 be zero.
11557
11558 =cut
11559 */
11560
11561 OP *
11562 Perl_parse_block(pTHX_ U32 flags)
11563 {
11564     if (flags)
11565         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11566     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11567 }
11568
11569 /*
11570 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11571
11572 Parse a single unadorned Perl statement.  This may be a normal imperative
11573 statement or a declaration that has compile-time effect.  It does not
11574 include any label or other affixture.  It is up to the caller to ensure
11575 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11576 reflect the source of the code to be parsed and the lexical context for
11577 the statement.
11578
11579 The op tree representing the statement is returned.  This may be a
11580 null pointer if the statement is null, for example if it was actually
11581 a subroutine definition (which has compile-time side effects).  If not
11582 null, it will be ops directly implementing the statement, suitable to
11583 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11584 equivalent op (except for those embedded in a scope contained entirely
11585 within the statement).
11586
11587 If an error occurs in parsing or compilation, in most cases a valid op
11588 tree (most likely null) is returned anyway.  The error is reflected in
11589 the parser state, normally resulting in a single exception at the top
11590 level of parsing which covers all the compilation errors that occurred.
11591 Some compilation errors, however, will throw an exception immediately.
11592
11593 The I<flags> parameter is reserved for future use, and must always
11594 be zero.
11595
11596 =cut
11597 */
11598
11599 OP *
11600 Perl_parse_barestmt(pTHX_ U32 flags)
11601 {
11602     if (flags)
11603         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11604     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11605 }
11606
11607 /*
11608 =for apidoc Amx|SV *|parse_label|U32 flags
11609
11610 Parse a single label, possibly optional, of the type that may prefix a
11611 Perl statement.  It is up to the caller to ensure that the dynamic parser
11612 state (L</PL_parser> et al) is correctly set to reflect the source of
11613 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11614 label is optional, otherwise it is mandatory.
11615
11616 The name of the label is returned in the form of a fresh scalar.  If an
11617 optional label is absent, a null pointer is returned.
11618
11619 If an error occurs in parsing, which can only occur if the label is
11620 mandatory, a valid label is returned anyway.  The error is reflected in
11621 the parser state, normally resulting in a single exception at the top
11622 level of parsing which covers all the compilation errors that occurred.
11623
11624 =cut
11625 */
11626
11627 SV *
11628 Perl_parse_label(pTHX_ U32 flags)
11629 {
11630     if (flags & ~PARSE_OPTIONAL)
11631         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11632     if (PL_lex_state == LEX_KNOWNEXT) {
11633         PL_parser->yychar = yylex();
11634         if (PL_parser->yychar == LABEL) {
11635             SV *lsv;
11636             PL_parser->yychar = YYEMPTY;
11637             lsv = newSV_type(SVt_PV);
11638             sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11639             return lsv;
11640         } else {
11641             yyunlex();
11642             goto no_label;
11643         }
11644     } else {
11645         char *s, *t;
11646         STRLEN wlen, bufptr_pos;
11647         lex_read_space(0);
11648         t = s = PL_bufptr;
11649         if (!isIDFIRST_lazy_if(s, UTF))
11650             goto no_label;
11651         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11652         if (word_takes_any_delimeter(s, wlen))
11653             goto no_label;
11654         bufptr_pos = s - SvPVX(PL_linestr);
11655         PL_bufptr = t;
11656         lex_read_space(LEX_KEEP_PREVIOUS);
11657         t = PL_bufptr;
11658         s = SvPVX(PL_linestr) + bufptr_pos;
11659         if (t[0] == ':' && t[1] != ':') {
11660             PL_oldoldbufptr = PL_oldbufptr;
11661             PL_oldbufptr = s;
11662             PL_bufptr = t+1;
11663             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11664         } else {
11665             PL_bufptr = s;
11666             no_label:
11667             if (flags & PARSE_OPTIONAL) {
11668                 return NULL;
11669             } else {
11670                 qerror(Perl_mess(aTHX_ "Parse error"));
11671                 return newSVpvs("x");
11672             }
11673         }
11674     }
11675 }
11676
11677 /*
11678 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11679
11680 Parse a single complete Perl statement.  This may be a normal imperative
11681 statement or a declaration that has compile-time effect, and may include
11682 optional labels.  It is up to the caller to ensure that the dynamic
11683 parser state (L</PL_parser> et al) is correctly set to reflect the source
11684 of the code to be parsed and the lexical context for the statement.
11685
11686 The op tree representing the statement is returned.  This may be a
11687 null pointer if the statement is null, for example if it was actually
11688 a subroutine definition (which has compile-time side effects).  If not
11689 null, it will be the result of a L</newSTATEOP> call, normally including
11690 a C<nextstate> or equivalent op.
11691
11692 If an error occurs in parsing or compilation, in most cases a valid op
11693 tree (most likely null) is returned anyway.  The error is reflected in
11694 the parser state, normally resulting in a single exception at the top
11695 level of parsing which covers all the compilation errors that occurred.
11696 Some compilation errors, however, will throw an exception immediately.
11697
11698 The I<flags> parameter is reserved for future use, and must always
11699 be zero.
11700
11701 =cut
11702 */
11703
11704 OP *
11705 Perl_parse_fullstmt(pTHX_ U32 flags)
11706 {
11707     if (flags)
11708         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11709     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11710 }
11711
11712 /*
11713 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11714
11715 Parse a sequence of zero or more Perl statements.  These may be normal
11716 imperative statements, including optional labels, or declarations
11717 that have compile-time effect, or any mixture thereof.  The statement
11718 sequence ends when a closing brace or end-of-file is encountered in a
11719 place where a new statement could have validly started.  It is up to
11720 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11721 is correctly set to reflect the source of the code to be parsed and the
11722 lexical context for the statements.
11723
11724 The op tree representing the statement sequence is returned.  This may
11725 be a null pointer if the statements were all null, for example if there
11726 were no statements or if there were only subroutine definitions (which
11727 have compile-time side effects).  If not null, it will be a C<lineseq>
11728 list, normally including C<nextstate> or equivalent ops.
11729
11730 If an error occurs in parsing or compilation, in most cases a valid op
11731 tree is returned anyway.  The error is reflected in the parser state,
11732 normally resulting in a single exception at the top level of parsing
11733 which covers all the compilation errors that occurred.  Some compilation
11734 errors, however, will throw an exception immediately.
11735
11736 The I<flags> parameter is reserved for future use, and must always
11737 be zero.
11738
11739 =cut
11740 */
11741
11742 OP *
11743 Perl_parse_stmtseq(pTHX_ U32 flags)
11744 {
11745     OP *stmtseqop;
11746     I32 c;
11747     if (flags)
11748         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11749     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11750     c = lex_peek_unichar(0);
11751     if (c != -1 && c != /*{*/'}')
11752         qerror(Perl_mess(aTHX_ "Parse error"));
11753     return stmtseqop;
11754 }
11755
11756 /*
11757  * Local variables:
11758  * c-indentation-style: bsd
11759  * c-basic-offset: 4
11760  * indent-tabs-mode: nil
11761  * End:
11762  *
11763  * ex: set ts=8 sts=4 sw=4 et:
11764  */