toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN1         : not used, except for not, which isn't a UNIOP
 228  * BOop         : bitwise or or xor
 229  * BAop         : bitwise and
 230  * SHop         : shift operator
 231  * PWop         : power operator
 232  * PMop         : pattern-matching operator
 233  * Aop          : addition-level operator
 234  * Mop          : multiplication-level operator
 235  * Eop          : equality-testing operator
 236  * Rop          : relational operator <= != gt
 237  *
 238  * Also see LOP and lop() below.
 239  */
 240
 241 #ifdef DEBUGGING /* Serve -DT. */
 242 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 243 #else
 244 #   define REPORT(retval) (retval)
 245 #endif
 246
 247 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 248 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 249 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 250 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 253 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 254 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 255 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 256 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 257 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 258 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 259 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 260 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 261 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 262 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 263 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 264 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 265 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 266 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 267
 268 /* This bit of chicanery makes a unary function followed by
 269  * a parenthesis into a function with one argument, highest precedence.
 270  * The UNIDOR macro is for unary functions that can be followed by the //
 271  * operator (such as C<shift // 0>).
 272  */
 273 #define UNI2(f,x) { \
 274         pl_yylval.ival = f; \
 275         PL_expect = x; \
 276         PL_bufptr = s; \
 277         PL_last_uni = PL_oldbufptr; \
 278         PL_last_lop_op = f; \
 279         if (*s == '(') \
 280             return REPORT( (int)FUNC1 ); \
 281         s = PEEKSPACE(s); \
 282         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 283         }
 284 #define UNI(f)    UNI2(f,XTERM)
 285 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 286
 287 #define UNIBRACK(f) { \
 288         pl_yylval.ival = f; \
 289         PL_bufptr = s; \
 290         PL_last_uni = PL_oldbufptr; \
 291         if (*s == '(') \
 292             return REPORT( (int)FUNC1 ); \
 293         s = PEEKSPACE(s); \
 294         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 295         }
 296
 297 /* grandfather return to old style */
 298 #define OLDLOP(f) \
 299         do { \
 300             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 301                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 302             pl_yylval.ival = (f); \
 303             PL_expect = XTERM; \
 304             PL_bufptr = s; \
 305             return (int)LSTOP; \
 306         } while(0)
 307
 308 #ifdef DEBUGGING
 309
 310 /* how to interpret the pl_yylval associated with the token */
 311 enum token_type {
 312     TOKENTYPE_NONE,
 313     TOKENTYPE_IVAL,
 314     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 315     TOKENTYPE_PVAL,
 316     TOKENTYPE_OPVAL,
 317     TOKENTYPE_GVVAL
 318 };
 319
 320 static struct debug_tokens {
 321     const int token;
 322     enum token_type type;
 323     const char *name;
 324 } const debug_tokens[] =
 325 {
 326     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 327     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 328     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 329     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 330     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 331     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 332     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 333     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 334     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 335     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 336     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 337     { DO,               TOKENTYPE_NONE,         "DO" },
 338     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 339     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 340     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 341     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 342     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 343     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 344     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 345     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 346     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 347     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 348     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 349     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 350     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 351     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 352     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 353     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 354     { IF,               TOKENTYPE_IVAL,         "IF" },
 355     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 356     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 357     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 358     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 359     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 360     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 361     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 362     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 363     { MY,               TOKENTYPE_IVAL,         "MY" },
 364     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 365     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 366     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 367     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 368     { OROR,             TOKENTYPE_NONE,         "OROR" },
 369     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 370     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 371     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 372     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 373     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 374     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 375     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 376     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 377     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 378     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 379     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 380     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 381     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 382     { SUB,              TOKENTYPE_NONE,         "SUB" },
 383     { THING,            TOKENTYPE_OPVAL,        "THING" },
 384     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 385     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 386     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 387     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 388     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 389     { USE,              TOKENTYPE_IVAL,         "USE" },
 390     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 391     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 392     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 393     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 394     { 0,                TOKENTYPE_NONE,         NULL }
 395 };
 396
 397 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 398
 399 STATIC int
 400 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 401 {
 402     dVAR;
 403
 404     PERL_ARGS_ASSERT_TOKEREPORT;
 405
 406     if (DEBUG_T_TEST) {
 407         const char *name = NULL;
 408         enum token_type type = TOKENTYPE_NONE;
 409         const struct debug_tokens *p;
 410         SV* const report = newSVpvs("<== ");
 411
 412         for (p = debug_tokens; p->token; p++) {
 413             if (p->token == (int)rv) {
 414                 name = p->name;
 415                 type = p->type;
 416                 break;
 417             }
 418         }
 419         if (name)
 420             Perl_sv_catpv(aTHX_ report, name);
 421         else if ((char)rv > ' ' && (char)rv < '~')
 422             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 423         else if (!rv)
 424             sv_catpvs(report, "EOF");
 425         else
 426             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 427         switch (type) {
 428         case TOKENTYPE_NONE:
 429         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 430             break;
 431         case TOKENTYPE_IVAL:
 432             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 433             break;
 434         case TOKENTYPE_OPNUM:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 436                                     PL_op_name[lvalp->ival]);
 437             break;
 438         case TOKENTYPE_PVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 440             break;
 441         case TOKENTYPE_OPVAL:
 442             if (lvalp->opval) {
 443                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 444                                     PL_op_name[lvalp->opval->op_type]);
 445                 if (lvalp->opval->op_type == OP_CONST) {
 446                     Perl_sv_catpvf(aTHX_ report, " %s",
 447                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 448                 }
 449
 450             }
 451             else
 452                 sv_catpvs(report, "(opval=null)");
 453             break;
 454         }
 455         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 456     };
 457     return (int)rv;
 458 }
 459
 460
 461 /* print the buffer with suitable escapes */
 462
 463 STATIC void
 464 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 465 {
 466     SV* const tmp = newSVpvs("");
 467
 468     PERL_ARGS_ASSERT_PRINTBUF;
 469
 470     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 471     SvREFCNT_dec(tmp);
 472 }
 473
 474 #endif
 475
 476 static int
 477 S_deprecate_commaless_var_list(pTHX) {
 478     PL_expect = XTERM;
 479     deprecate("comma-less variable list");
 480     return REPORT(','); /* grandfather non-comma-format format */
 481 }
 482
 483 /*
 484  * S_ao
 485  *
 486  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 487  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 488  */
 489
 490 STATIC int
 491 S_ao(pTHX_ int toketype)
 492 {
 493     dVAR;
 494     if (*PL_bufptr == '=') {
 495         PL_bufptr++;
 496         if (toketype == ANDAND)
 497             pl_yylval.ival = OP_ANDASSIGN;
 498         else if (toketype == OROR)
 499             pl_yylval.ival = OP_ORASSIGN;
 500         else if (toketype == DORDOR)
 501             pl_yylval.ival = OP_DORASSIGN;
 502         toketype = ASSIGNOP;
 503     }
 504     return toketype;
 505 }
 506
 507 /*
 508  * S_no_op
 509  * When Perl expects an operator and finds something else, no_op
 510  * prints the warning.  It always prints "<something> found where
 511  * operator expected.  It prints "Missing semicolon on previous line?"
 512  * if the surprise occurs at the start of the line.  "do you need to
 513  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 514  * where the compiler doesn't know if foo is a method call or a function.
 515  * It prints "Missing operator before end of line" if there's nothing
 516  * after the missing operator, or "... before <...>" if there is something
 517  * after the missing operator.
 518  */
 519
 520 STATIC void
 521 S_no_op(pTHX_ const char *const what, char *s)
 522 {
 523     dVAR;
 524     char * const oldbp = PL_bufptr;
 525     const bool is_first = (PL_oldbufptr == PL_linestart);
 526
 527     PERL_ARGS_ASSERT_NO_OP;
 528
 529     if (!s)
 530         s = oldbp;
 531     else
 532         PL_bufptr = s;
 533     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 534     if (ckWARN_d(WARN_SYNTAX)) {
 535         if (is_first)
 536             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 537                     "\t(Missing semicolon on previous line?)\n");
 538         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 539             const char *t;
 540             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 541                 NOOP;
 542             if (t < PL_bufptr && isSPACE(*t))
 543                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                         "\t(Do you need to predeclare %.*s?)\n",
 545                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 546         }
 547         else {
 548             assert(s >= oldbp);
 549             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 550                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 551         }
 552     }
 553     PL_bufptr = oldbp;
 554 }
 555
 556 /*
 557  * S_missingterm
 558  * Complain about missing quote/regexp/heredoc terminator.
 559  * If it's called with NULL then it cauterizes the line buffer.
 560  * If we're in a delimited string and the delimiter is a control
 561  * character, it's reformatted into a two-char sequence like ^C.
 562  * This is fatal.
 563  */
 564
 565 STATIC void
 566 S_missingterm(pTHX_ char *s)
 567 {
 568     dVAR;
 569     char tmpbuf[3];
 570     char q;
 571     if (s) {
 572         char * const nl = strrchr(s,'\n');
 573         if (nl)
 574             *nl = '\0';
 575     }
 576     else if (isCNTRL(PL_multi_close)) {
 577         *tmpbuf = '^';
 578         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 579         tmpbuf[2] = '\0';
 580         s = tmpbuf;
 581     }
 582     else {
 583         *tmpbuf = (char)PL_multi_close;
 584         tmpbuf[1] = '\0';
 585         s = tmpbuf;
 586     }
 587     q = strchr(s,'"') ? '\'' : '"';
 588     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 589 }
 590
 591 /*
 592  * Check whether the named feature is enabled.
 593  */
 594 bool
 595 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 596 {
 597     dVAR;
 598     HV * const hinthv = GvHV(PL_hintgv);
 599     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 600
 601     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 602
 603     if (namelen > MAX_FEATURE_LEN)
 604         return FALSE;
 605     memcpy(&he_name[8], name, namelen);
 606
 607     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 608 }
 609
 610 /*
 611  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 612  * utf16-to-utf8-reversed.
 613  */
 614
 615 #ifdef PERL_CR_FILTER
 616 static void
 617 strip_return(SV *sv)
 618 {
 619     register const char *s = SvPVX_const(sv);
 620     register const char * const e = s + SvCUR(sv);
 621
 622     PERL_ARGS_ASSERT_STRIP_RETURN;
 623
 624     /* outer loop optimized to do nothing if there are no CR-LFs */
 625     while (s < e) {
 626         if (*s++ == '\r' && *s == '\n') {
 627             /* hit a CR-LF, need to copy the rest */
 628             register char *d = s - 1;
 629             *d++ = *s++;
 630             while (s < e) {
 631                 if (*s == '\r' && s[1] == '\n')
 632                     s++;
 633                 *d++ = *s++;
 634             }
 635             SvCUR(sv) -= s - d;
 636             return;
 637         }
 638     }
 639 }
 640
 641 STATIC I32
 642 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 643 {
 644     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 645     if (count > 0 && !maxlen)
 646         strip_return(sv);
 647     return count;
 648 }
 649 #endif
 650
 651 /*
 652 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 653
 654 Creates and initialises a new lexer/parser state object, supplying
 655 a context in which to lex and parse from a new source of Perl code.
 656 A pointer to the new state object is placed in L</PL_parser>.  An entry
 657 is made on the save stack so that upon unwinding the new state object
 658 will be destroyed and the former value of L</PL_parser> will be restored.
 659 Nothing else need be done to clean up the parsing context.
 660
 661 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 662 non-null, provides a string (in SV form) containing code to be parsed.
 663 A copy of the string is made, so subsequent modification of I<line>
 664 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 665 from which code will be read to be parsed.  If both are non-null, the
 666 code in I<line> comes first and must consist of complete lines of input,
 667 and I<rsfp> supplies the remainder of the source.
 668
 669 The I<flags> parameter is reserved for future use, and must always
 670 be zero.
 671
 672 =cut
 673 */
 674
 675 void
 676 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 677 {
 678     dVAR;
 679     const char *s = NULL;
 680     STRLEN len;
 681     yy_parser *parser, *oparser;
 682     if (flags)
 683         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 684
 685     /* create and initialise a parser */
 686
 687     Newxz(parser, 1, yy_parser);
 688     parser->old_parser = oparser = PL_parser;
 689     PL_parser = parser;
 690
 691     parser->stack = NULL;
 692     parser->ps = NULL;
 693     parser->stack_size = 0;
 694
 695     /* on scope exit, free this parser and restore any outer one */
 696     SAVEPARSER(parser);
 697     parser->saved_curcop = PL_curcop;
 698
 699     /* initialise lexer state */
 700
 701 #ifdef PERL_MAD
 702     parser->curforce = -1;
 703 #else
 704     parser->nexttoke = 0;
 705 #endif
 706     parser->error_count = oparser ? oparser->error_count : 0;
 707     parser->copline = NOLINE;
 708     parser->lex_state = LEX_NORMAL;
 709     parser->expect = XSTATE;
 710     parser->rsfp = rsfp;
 711     parser->rsfp_filters = newAV();
 712
 713     Newx(parser->lex_brackstack, 120, char);
 714     Newx(parser->lex_casestack, 12, char);
 715     *parser->lex_casestack = '\0';
 716
 717     if (line) {
 718         s = SvPV_const(line, len);
 719     } else {
 720         len = 0;
 721     }
 722
 723     if (!len) {
 724         parser->linestr = newSVpvs("\n;");
 725     } else {
 726         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 727         if (s[len-1] != ';')
 728             sv_catpvs(parser->linestr, "\n;");
 729     }
 730     parser->oldoldbufptr =
 731         parser->oldbufptr =
 732         parser->bufptr =
 733         parser->linestart = SvPVX(parser->linestr);
 734     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 735     parser->last_lop = parser->last_uni = NULL;
 736
 737     parser->in_pod = 0;
 738 }
 739
 740
 741 /* delete a parser object */
 742
 743 void
 744 Perl_parser_free(pTHX_  const yy_parser *parser)
 745 {
 746     PERL_ARGS_ASSERT_PARSER_FREE;
 747
 748     PL_curcop = parser->saved_curcop;
 749     SvREFCNT_dec(parser->linestr);
 750
 751     if (parser->rsfp == PerlIO_stdin())
 752         PerlIO_clearerr(parser->rsfp);
 753     else if (parser->rsfp && (!parser->old_parser ||
 754                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 755         PerlIO_close(parser->rsfp);
 756     SvREFCNT_dec(parser->rsfp_filters);
 757
 758     Safefree(parser->lex_brackstack);
 759     Safefree(parser->lex_casestack);
 760     PL_parser = parser->old_parser;
 761     Safefree(parser);
 762 }
 763
 764
 765 /*
 766 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 767
 768 Buffer scalar containing the chunk currently under consideration of the
 769 text currently being lexed.  This is always a plain string scalar (for
 770 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 771 normal scalar means; instead refer to the buffer directly by the pointer
 772 variables described below.
 773
 774 The lexer maintains various C<char*> pointers to things in the
 775 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 776 reallocated, all of these pointers must be updated.  Don't attempt to
 777 do this manually, but rather use L</lex_grow_linestr> if you need to
 778 reallocate the buffer.
 779
 780 The content of the text chunk in the buffer is commonly exactly one
 781 complete line of input, up to and including a newline terminator,
 782 but there are situations where it is otherwise.  The octets of the
 783 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 784 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 785 flag on this scalar, which may disagree with it.
 786
 787 For direct examination of the buffer, the variable
 788 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 789 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 790 of these pointers is usually preferable to examination of the scalar
 791 through normal scalar means.
 792
 793 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 794
 795 Direct pointer to the end of the chunk of text currently being lexed, the
 796 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 797 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 798 always located at the end of the buffer, and does not count as part of
 799 the buffer's contents.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 802
 803 Points to the current position of lexing inside the lexer buffer.
 804 Characters around this point may be freely examined, within
 805 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 806 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 807 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 808
 809 Lexing code (whether in the Perl core or not) moves this pointer past
 810 the characters that it consumes.  It is also expected to perform some
 811 bookkeeping whenever a newline character is consumed.  This movement
 812 can be more conveniently performed by the function L</lex_read_to>,
 813 which handles newlines appropriately.
 814
 815 Interpretation of the buffer's octets can be abstracted out by
 816 using the slightly higher-level functions L</lex_peek_unichar> and
 817 L</lex_read_unichar>.
 818
 819 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 820
 821 Points to the start of the current line inside the lexer buffer.
 822 This is useful for indicating at which column an error occurred, and
 823 not much else.  This must be updated by any lexing code that consumes
 824 a newline; the function L</lex_read_to> handles this detail.
 825
 826 =cut
 827 */
 828
 829 /*
 830 =for apidoc Amx|bool|lex_bufutf8
 831
 832 Indicates whether the octets in the lexer buffer
 833 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 834 of Unicode characters.  If not, they should be interpreted as Latin-1
 835 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 836
 837 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 838 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 839 encoding.
 840
 841 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 842 is significant, but not the whole story regarding the input character
 843 encoding.  Normally, when a file is being read, the scalar contains octets
 844 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 845 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 846 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 847 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 848 is in effect.  This logic may change in the future; use this function
 849 instead of implementing the logic yourself.
 850
 851 =cut
 852 */
 853
 854 bool
 855 Perl_lex_bufutf8(pTHX)
 856 {
 857     return UTF;
 858 }
 859
 860 /*
 861 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 862
 863 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 864 at least I<len> octets (including terminating NUL).  Returns a
 865 pointer to the reallocated buffer.  This is necessary before making
 866 any direct modification of the buffer that would increase its length.
 867 L</lex_stuff_pvn> provides a more convenient way to insert text into
 868 the buffer.
 869
 870 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 871 this function updates all of the lexer's variables that point directly
 872 into the buffer.
 873
 874 =cut
 875 */
 876
 877 char *
 878 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 879 {
 880     SV *linestr;
 881     char *buf;
 882     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 883     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 884     linestr = PL_parser->linestr;
 885     buf = SvPVX(linestr);
 886     if (len <= SvLEN(linestr))
 887         return buf;
 888     bufend_pos = PL_parser->bufend - buf;
 889     bufptr_pos = PL_parser->bufptr - buf;
 890     oldbufptr_pos = PL_parser->oldbufptr - buf;
 891     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 892     linestart_pos = PL_parser->linestart - buf;
 893     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 894     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 895     buf = sv_grow(linestr, len);
 896     PL_parser->bufend = buf + bufend_pos;
 897     PL_parser->bufptr = buf + bufptr_pos;
 898     PL_parser->oldbufptr = buf + oldbufptr_pos;
 899     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 900     PL_parser->linestart = buf + linestart_pos;
 901     if (PL_parser->last_uni)
 902         PL_parser->last_uni = buf + last_uni_pos;
 903     if (PL_parser->last_lop)
 904         PL_parser->last_lop = buf + last_lop_pos;
 905     return buf;
 906 }
 907
 908 /*
 909 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 910
 911 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 912 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 913 reallocating the buffer if necessary.  This means that lexing code that
 914 runs later will see the characters as if they had appeared in the input.
 915 It is not recommended to do this as part of normal parsing, and most
 916 uses of this facility run the risk of the inserted characters being
 917 interpreted in an unintended manner.
 918
 919 The string to be inserted is represented by I<len> octets starting
 920 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 921 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 922 The characters are recoded for the lexer buffer, according to how the
 923 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 924 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 925 function is more convenient.
 926
 927 =cut
 928 */
 929
 930 void
 931 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 932 {
 933     dVAR;
 934     char *bufptr;
 935     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 936     if (flags & ~(LEX_STUFF_UTF8))
 937         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 938     if (UTF) {
 939         if (flags & LEX_STUFF_UTF8) {
 940             goto plain_copy;
 941         } else {
 942             STRLEN highhalf = 0;
 943             const char *p, *e = pv+len;
 944             for (p = pv; p != e; p++)
 945                 highhalf += !!(((U8)*p) & 0x80);
 946             if (!highhalf)
 947                 goto plain_copy;
 948             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 949             bufptr = PL_parser->bufptr;
 950             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 951             SvCUR_set(PL_parser->linestr,
 952                 SvCUR(PL_parser->linestr) + len+highhalf);
 953             PL_parser->bufend += len+highhalf;
 954             for (p = pv; p != e; p++) {
 955                 U8 c = (U8)*p;
 956                 if (c & 0x80) {
 957                     *bufptr++ = (char)(0xc0 | (c >> 6));
 958                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 959                 } else {
 960                     *bufptr++ = (char)c;
 961                 }
 962             }
 963         }
 964     } else {
 965         if (flags & LEX_STUFF_UTF8) {
 966             STRLEN highhalf = 0;
 967             const char *p, *e = pv+len;
 968             for (p = pv; p != e; p++) {
 969                 U8 c = (U8)*p;
 970                 if (c >= 0xc4) {
 971                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 972                                 "non-Latin-1 character into Latin-1 input");
 973                 } else if (c >= 0xc2 && p+1 != e &&
 974                             (((U8)p[1]) & 0xc0) == 0x80) {
 975                     p++;
 976                     highhalf++;
 977                 } else if (c >= 0x80) {
 978                     /* malformed UTF-8 */
 979                     ENTER;
 980                     SAVESPTR(PL_warnhook);
 981                     PL_warnhook = PERL_WARNHOOK_FATAL;
 982                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 983                     LEAVE;
 984                 }
 985             }
 986             if (!highhalf)
 987                 goto plain_copy;
 988             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 989             bufptr = PL_parser->bufptr;
 990             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 991             SvCUR_set(PL_parser->linestr,
 992                 SvCUR(PL_parser->linestr) + len-highhalf);
 993             PL_parser->bufend += len-highhalf;
 994             for (p = pv; p != e; p++) {
 995                 U8 c = (U8)*p;
 996                 if (c & 0x80) {
 997                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
 998                     p++;
 999                 } else {
1000                     *bufptr++ = (char)c;
1001                 }
1002             }
1003         } else {
1004             plain_copy:
1005             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1006             bufptr = PL_parser->bufptr;
1007             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1008             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1009             PL_parser->bufend += len;
1010             Copy(pv, bufptr, len, char);
1011         }
1012     }
1013 }
1014
1015 /*
1016 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1017
1018 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1019 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1020 reallocating the buffer if necessary.  This means that lexing code that
1021 runs later will see the characters as if they had appeared in the input.
1022 It is not recommended to do this as part of normal parsing, and most
1023 uses of this facility run the risk of the inserted characters being
1024 interpreted in an unintended manner.
1025
1026 The string to be inserted is represented by octets starting at I<pv>
1027 and continuing to the first nul.  These octets are interpreted as either
1028 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1029 in I<flags>.  The characters are recoded for the lexer buffer, according
1030 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1031 If it is not convenient to nul-terminate a string to be inserted, the
1032 L</lex_stuff_pvn> function is more appropriate.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1039 {
1040     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1041     lex_stuff_pvn(pv, strlen(pv), flags);
1042 }
1043
1044 /*
1045 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1046
1047 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1048 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1049 reallocating the buffer if necessary.  This means that lexing code that
1050 runs later will see the characters as if they had appeared in the input.
1051 It is not recommended to do this as part of normal parsing, and most
1052 uses of this facility run the risk of the inserted characters being
1053 interpreted in an unintended manner.
1054
1055 The string to be inserted is the string value of I<sv>.  The characters
1056 are recoded for the lexer buffer, according to how the buffer is currently
1057 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1058 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1059 need to construct a scalar.
1060
1061 =cut
1062 */
1063
1064 void
1065 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1066 {
1067     char *pv;
1068     STRLEN len;
1069     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1070     if (flags)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1072     pv = SvPV(sv, len);
1073     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1074 }
1075
1076 /*
1077 =for apidoc Amx|void|lex_unstuff|char *ptr
1078
1079 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1080 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1081 This hides the discarded text from any lexing code that runs later,
1082 as if the text had never appeared.
1083
1084 This is not the normal way to consume lexed text.  For that, use
1085 L</lex_read_to>.
1086
1087 =cut
1088 */
1089
1090 void
1091 Perl_lex_unstuff(pTHX_ char *ptr)
1092 {
1093     char *buf, *bufend;
1094     STRLEN unstuff_len;
1095     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1096     buf = PL_parser->bufptr;
1097     if (ptr < buf)
1098         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1099     if (ptr == buf)
1100         return;
1101     bufend = PL_parser->bufend;
1102     if (ptr > bufend)
1103         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1104     unstuff_len = ptr - buf;
1105     Move(ptr, buf, bufend+1-ptr, char);
1106     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1107     PL_parser->bufend = bufend - unstuff_len;
1108 }
1109
1110 /*
1111 =for apidoc Amx|void|lex_read_to|char *ptr
1112
1113 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1114 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1115 performing the correct bookkeeping whenever a newline character is passed.
1116 This is the normal way to consume lexed text.
1117
1118 Interpretation of the buffer's octets can be abstracted out by
1119 using the slightly higher-level functions L</lex_peek_unichar> and
1120 L</lex_read_unichar>.
1121
1122 =cut
1123 */
1124
1125 void
1126 Perl_lex_read_to(pTHX_ char *ptr)
1127 {
1128     char *s;
1129     PERL_ARGS_ASSERT_LEX_READ_TO;
1130     s = PL_parser->bufptr;
1131     if (ptr < s || ptr > PL_parser->bufend)
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1133     for (; s != ptr; s++)
1134         if (*s == '\n') {
1135             CopLINE_inc(PL_curcop);
1136             PL_parser->linestart = s+1;
1137         }
1138     PL_parser->bufptr = ptr;
1139 }
1140
1141 /*
1142 =for apidoc Amx|void|lex_discard_to|char *ptr
1143
1144 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1145 up to I<ptr>.  The remaining content of the buffer will be moved, and
1146 all pointers into the buffer updated appropriately.  I<ptr> must not
1147 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1148 it is not permitted to discard text that has yet to be lexed.
1149
1150 Normally it is not necessarily to do this directly, because it suffices to
1151 use the implicit discarding behaviour of L</lex_next_chunk> and things
1152 based on it.  However, if a token stretches across multiple lines,
1153 and the lexing code has kept multiple lines of text in the buffer for
1154 that purpose, then after completion of the token it would be wise to
1155 explicitly discard the now-unneeded earlier lines, to avoid future
1156 multi-line tokens growing the buffer without bound.
1157
1158 =cut
1159 */
1160
1161 void
1162 Perl_lex_discard_to(pTHX_ char *ptr)
1163 {
1164     char *buf;
1165     STRLEN discard_len;
1166     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1167     buf = SvPVX(PL_parser->linestr);
1168     if (ptr < buf)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170     if (ptr == buf)
1171         return;
1172     if (ptr > PL_parser->bufptr)
1173         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174     discard_len = ptr - buf;
1175     if (PL_parser->oldbufptr < ptr)
1176         PL_parser->oldbufptr = ptr;
1177     if (PL_parser->oldoldbufptr < ptr)
1178         PL_parser->oldoldbufptr = ptr;
1179     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1180         PL_parser->last_uni = NULL;
1181     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1182         PL_parser->last_lop = NULL;
1183     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1184     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1185     PL_parser->bufend -= discard_len;
1186     PL_parser->bufptr -= discard_len;
1187     PL_parser->oldbufptr -= discard_len;
1188     PL_parser->oldoldbufptr -= discard_len;
1189     if (PL_parser->last_uni)
1190         PL_parser->last_uni -= discard_len;
1191     if (PL_parser->last_lop)
1192         PL_parser->last_lop -= discard_len;
1193 }
1194
1195 /*
1196 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1197
1198 Reads in the next chunk of text to be lexed, appending it to
1199 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1200 looked to the end of the current chunk and wants to know more.  It is
1201 usual, but not necessary, for lexing to have consumed the entirety of
1202 the current chunk at this time.
1203
1204 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1205 chunk (i.e., the current chunk has been entirely consumed), normally the
1206 current chunk will be discarded at the same time that the new chunk is
1207 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1208 will not be discarded.  If the current chunk has not been entirely
1209 consumed, then it will not be discarded regardless of the flag.
1210
1211 Returns true if some new text was added to the buffer, or false if the
1212 buffer has reached the end of the input text.
1213
1214 =cut
1215 */
1216
1217 #define LEX_FAKE_EOF 0x80000000
1218
1219 bool
1220 Perl_lex_next_chunk(pTHX_ U32 flags)
1221 {
1222     SV *linestr;
1223     char *buf;
1224     STRLEN old_bufend_pos, new_bufend_pos;
1225     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1226     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1227     bool got_some_for_debugger = 0;
1228     bool got_some;
1229     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1230         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1231     linestr = PL_parser->linestr;
1232     buf = SvPVX(linestr);
1233     if (!(flags & LEX_KEEP_PREVIOUS) &&
1234             PL_parser->bufptr == PL_parser->bufend) {
1235         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1236         linestart_pos = 0;
1237         if (PL_parser->last_uni != PL_parser->bufend)
1238             PL_parser->last_uni = NULL;
1239         if (PL_parser->last_lop != PL_parser->bufend)
1240             PL_parser->last_lop = NULL;
1241         last_uni_pos = last_lop_pos = 0;
1242         *buf = 0;
1243         SvCUR(linestr) = 0;
1244     } else {
1245         old_bufend_pos = PL_parser->bufend - buf;
1246         bufptr_pos = PL_parser->bufptr - buf;
1247         oldbufptr_pos = PL_parser->oldbufptr - buf;
1248         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1249         linestart_pos = PL_parser->linestart - buf;
1250         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1251         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1252     }
1253     if (flags & LEX_FAKE_EOF) {
1254         goto eof;
1255     } else if (!PL_parser->rsfp) {
1256         got_some = 0;
1257     } else if (filter_gets(linestr, old_bufend_pos)) {
1258         got_some = 1;
1259         got_some_for_debugger = 1;
1260     } else {
1261         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1262             sv_setpvs(linestr, "");
1263         eof:
1264         /* End of real input.  Close filehandle (unless it was STDIN),
1265          * then add implicit termination.
1266          */
1267         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1268             PerlIO_clearerr(PL_parser->rsfp);
1269         else if (PL_parser->rsfp)
1270             (void)PerlIO_close(PL_parser->rsfp);
1271         PL_parser->rsfp = NULL;
1272         PL_parser->in_pod = 0;
1273 #ifdef PERL_MAD
1274         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1275             PL_faketokens = 1;
1276 #endif
1277         if (!PL_in_eval && PL_minus_p) {
1278             sv_catpvs(linestr,
1279                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1280             PL_minus_n = PL_minus_p = 0;
1281         } else if (!PL_in_eval && PL_minus_n) {
1282             sv_catpvs(linestr, /*{*/";}");
1283             PL_minus_n = 0;
1284         } else
1285             sv_catpvs(linestr, ";");
1286         got_some = 1;
1287     }
1288     buf = SvPVX(linestr);
1289     new_bufend_pos = SvCUR(linestr);
1290     PL_parser->bufend = buf + new_bufend_pos;
1291     PL_parser->bufptr = buf + bufptr_pos;
1292     PL_parser->oldbufptr = buf + oldbufptr_pos;
1293     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1294     PL_parser->linestart = buf + linestart_pos;
1295     if (PL_parser->last_uni)
1296         PL_parser->last_uni = buf + last_uni_pos;
1297     if (PL_parser->last_lop)
1298         PL_parser->last_lop = buf + last_lop_pos;
1299     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1300             PL_curstash != PL_debstash) {
1301         /* debugger active and we're not compiling the debugger code,
1302          * so store the line into the debugger's array of lines
1303          */
1304         update_debugger_info(NULL, buf+old_bufend_pos,
1305             new_bufend_pos-old_bufend_pos);
1306     }
1307     return got_some;
1308 }
1309
1310 /*
1311 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1312
1313 Looks ahead one (Unicode) character in the text currently being lexed.
1314 Returns the codepoint (unsigned integer value) of the next character,
1315 or -1 if lexing has reached the end of the input text.  To consume the
1316 peeked character, use L</lex_read_unichar>.
1317
1318 If the next character is in (or extends into) the next chunk of input
1319 text, the next chunk will be read in.  Normally the current chunk will be
1320 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1321 then the current chunk will not be discarded.
1322
1323 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1324 is encountered, an exception is generated.
1325
1326 =cut
1327 */
1328
1329 I32
1330 Perl_lex_peek_unichar(pTHX_ U32 flags)
1331 {
1332     dVAR;
1333     char *s, *bufend;
1334     if (flags & ~(LEX_KEEP_PREVIOUS))
1335         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1336     s = PL_parser->bufptr;
1337     bufend = PL_parser->bufend;
1338     if (UTF) {
1339         U8 head;
1340         I32 unichar;
1341         STRLEN len, retlen;
1342         if (s == bufend) {
1343             if (!lex_next_chunk(flags))
1344                 return -1;
1345             s = PL_parser->bufptr;
1346             bufend = PL_parser->bufend;
1347         }
1348         head = (U8)*s;
1349         if (!(head & 0x80))
1350             return head;
1351         if (head & 0x40) {
1352             len = PL_utf8skip[head];
1353             while ((STRLEN)(bufend-s) < len) {
1354                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1355                     break;
1356                 s = PL_parser->bufptr;
1357                 bufend = PL_parser->bufend;
1358             }
1359         }
1360         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1361         if (retlen == (STRLEN)-1) {
1362             /* malformed UTF-8 */
1363             ENTER;
1364             SAVESPTR(PL_warnhook);
1365             PL_warnhook = PERL_WARNHOOK_FATAL;
1366             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1367             LEAVE;
1368         }
1369         return unichar;
1370     } else {
1371         if (s == bufend) {
1372             if (!lex_next_chunk(flags))
1373                 return -1;
1374             s = PL_parser->bufptr;
1375         }
1376         return (U8)*s;
1377     }
1378 }
1379
1380 /*
1381 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1382
1383 Reads the next (Unicode) character in the text currently being lexed.
1384 Returns the codepoint (unsigned integer value) of the character read,
1385 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1386 if lexing has reached the end of the input text.  To non-destructively
1387 examine the next character, use L</lex_peek_unichar> instead.
1388
1389 If the next character is in (or extends into) the next chunk of input
1390 text, the next chunk will be read in.  Normally the current chunk will be
1391 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1392 then the current chunk will not be discarded.
1393
1394 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1395 is encountered, an exception is generated.
1396
1397 =cut
1398 */
1399
1400 I32
1401 Perl_lex_read_unichar(pTHX_ U32 flags)
1402 {
1403     I32 c;
1404     if (flags & ~(LEX_KEEP_PREVIOUS))
1405         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1406     c = lex_peek_unichar(flags);
1407     if (c != -1) {
1408         if (c == '\n')
1409             CopLINE_inc(PL_curcop);
1410         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1411     }
1412     return c;
1413 }
1414
1415 /*
1416 =for apidoc Amx|void|lex_read_space|U32 flags
1417
1418 Reads optional spaces, in Perl style, in the text currently being
1419 lexed.  The spaces may include ordinary whitespace characters and
1420 Perl-style comments.  C<#line> directives are processed if encountered.
1421 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1422 at a non-space character (or the end of the input text).
1423
1424 If spaces extend into the next chunk of input text, the next chunk will
1425 be read in.  Normally the current chunk will be discarded at the same
1426 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1427 chunk will not be discarded.
1428
1429 =cut
1430 */
1431
1432 #define LEX_NO_NEXT_CHUNK 0x80000000
1433
1434 void
1435 Perl_lex_read_space(pTHX_ U32 flags)
1436 {
1437     char *s, *bufend;
1438     bool need_incline = 0;
1439     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1440         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1441 #ifdef PERL_MAD
1442     if (PL_skipwhite) {
1443         sv_free(PL_skipwhite);
1444         PL_skipwhite = NULL;
1445     }
1446     if (PL_madskills)
1447         PL_skipwhite = newSVpvs("");
1448 #endif /* PERL_MAD */
1449     s = PL_parser->bufptr;
1450     bufend = PL_parser->bufend;
1451     while (1) {
1452         char c = *s;
1453         if (c == '#') {
1454             do {
1455                 c = *++s;
1456             } while (!(c == '\n' || (c == 0 && s == bufend)));
1457         } else if (c == '\n') {
1458             s++;
1459             PL_parser->linestart = s;
1460             if (s == bufend)
1461                 need_incline = 1;
1462             else
1463                 incline(s);
1464         } else if (isSPACE(c)) {
1465             s++;
1466         } else if (c == 0 && s == bufend) {
1467             bool got_more;
1468 #ifdef PERL_MAD
1469             if (PL_madskills)
1470                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1471 #endif /* PERL_MAD */
1472             if (flags & LEX_NO_NEXT_CHUNK)
1473                 break;
1474             PL_parser->bufptr = s;
1475             CopLINE_inc(PL_curcop);
1476             got_more = lex_next_chunk(flags);
1477             CopLINE_dec(PL_curcop);
1478             s = PL_parser->bufptr;
1479             bufend = PL_parser->bufend;
1480             if (!got_more)
1481                 break;
1482             if (need_incline && PL_parser->rsfp) {
1483                 incline(s);
1484                 need_incline = 0;
1485             }
1486         } else {
1487             break;
1488         }
1489     }
1490 #ifdef PERL_MAD
1491     if (PL_madskills)
1492         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1493 #endif /* PERL_MAD */
1494     PL_parser->bufptr = s;
1495 }
1496
1497 /*
1498  * S_incline
1499  * This subroutine has nothing to do with tilting, whether at windmills
1500  * or pinball tables.  Its name is short for "increment line".  It
1501  * increments the current line number in CopLINE(PL_curcop) and checks
1502  * to see whether the line starts with a comment of the form
1503  *    # line 500 "foo.pm"
1504  * If so, it sets the current line number and file to the values in the comment.
1505  */
1506
1507 STATIC void
1508 S_incline(pTHX_ const char *s)
1509 {
1510     dVAR;
1511     const char *t;
1512     const char *n;
1513     const char *e;
1514     line_t line_num;
1515
1516     PERL_ARGS_ASSERT_INCLINE;
1517
1518     CopLINE_inc(PL_curcop);
1519     if (*s++ != '#')
1520         return;
1521     while (SPACE_OR_TAB(*s))
1522         s++;
1523     if (strnEQ(s, "line", 4))
1524         s += 4;
1525     else
1526         return;
1527     if (SPACE_OR_TAB(*s))
1528         s++;
1529     else
1530         return;
1531     while (SPACE_OR_TAB(*s))
1532         s++;
1533     if (!isDIGIT(*s))
1534         return;
1535
1536     n = s;
1537     while (isDIGIT(*s))
1538         s++;
1539     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1540         return;
1541     while (SPACE_OR_TAB(*s))
1542         s++;
1543     if (*s == '"' && (t = strchr(s+1, '"'))) {
1544         s++;
1545         e = t + 1;
1546     }
1547     else {
1548         t = s;
1549         while (!isSPACE(*t))
1550             t++;
1551         e = t;
1552     }
1553     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1554         e++;
1555     if (*e != '\n' && *e != '\0')
1556         return;         /* false alarm */
1557
1558     line_num = atoi(n)-1;
1559
1560     if (t - s > 0) {
1561         const STRLEN len = t - s;
1562         SV *const temp_sv = CopFILESV(PL_curcop);
1563         const char *cf;
1564         STRLEN tmplen;
1565
1566         if (temp_sv) {
1567             cf = SvPVX(temp_sv);
1568             tmplen = SvCUR(temp_sv);
1569         } else {
1570             cf = NULL;
1571             tmplen = 0;
1572         }
1573
1574         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1575             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576              * to *{"::_<newfilename"} */
1577             /* However, the long form of evals is only turned on by the
1578                debugger - usually they're "(eval %lu)" */
1579             char smallbuf[128];
1580             char *tmpbuf;
1581             GV **gvp;
1582             STRLEN tmplen2 = len;
1583             if (tmplen + 2 <= sizeof smallbuf)
1584                 tmpbuf = smallbuf;
1585             else
1586                 Newx(tmpbuf, tmplen + 2, char);
1587             tmpbuf[0] = '_';
1588             tmpbuf[1] = '<';
1589             memcpy(tmpbuf + 2, cf, tmplen);
1590             tmplen += 2;
1591             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1592             if (gvp) {
1593                 char *tmpbuf2;
1594                 GV *gv2;
1595
1596                 if (tmplen2 + 2 <= sizeof smallbuf)
1597                     tmpbuf2 = smallbuf;
1598                 else
1599                     Newx(tmpbuf2, tmplen2 + 2, char);
1600
1601                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602                     /* Either they malloc'd it, or we malloc'd it,
1603                        so no prefix is present in ours.  */
1604                     tmpbuf2[0] = '_';
1605                     tmpbuf2[1] = '<';
1606                 }
1607
1608                 memcpy(tmpbuf2 + 2, s, tmplen2);
1609                 tmplen2 += 2;
1610
1611                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1612                 if (!isGV(gv2)) {
1613                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1614                     /* adjust ${"::_<newfilename"} to store the new file name */
1615                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1616                     /* The line number may differ. If that is the case,
1617                        alias the saved lines that are in the array.
1618                        Otherwise alias the whole array. */
1619                     if (CopLINE(PL_curcop) == line_num) {
1620                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1621                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1622                     }
1623                     else if (GvAV(*gvp)) {
1624                         AV * const av = GvAV(*gvp);
1625                         const I32 start = CopLINE(PL_curcop)+1;
1626                         I32 items = AvFILLp(av) - start;
1627                         if (items > 0) {
1628                             AV * const av2 = GvAVn(gv2);
1629                             SV **svp = AvARRAY(av) + start;
1630                             I32 l = (I32)line_num+1;
1631                             while (items--)
1632                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1633                         }
1634                     }
1635                 }
1636
1637                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1638             }
1639             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1640         }
1641         CopFILE_free(PL_curcop);
1642         CopFILE_setn(PL_curcop, s, len);
1643     }
1644     CopLINE_set(PL_curcop, line_num);
1645 }
1646
1647 #ifdef PERL_MAD
1648 /* skip space before PL_thistoken */
1649
1650 STATIC char *
1651 S_skipspace0(pTHX_ register char *s)
1652 {
1653     PERL_ARGS_ASSERT_SKIPSPACE0;
1654
1655     s = skipspace(s);
1656     if (!PL_madskills)
1657         return s;
1658     if (PL_skipwhite) {
1659         if (!PL_thiswhite)
1660             PL_thiswhite = newSVpvs("");
1661         sv_catsv(PL_thiswhite, PL_skipwhite);
1662         sv_free(PL_skipwhite);
1663         PL_skipwhite = 0;
1664     }
1665     PL_realtokenstart = s - SvPVX(PL_linestr);
1666     return s;
1667 }
1668
1669 /* skip space after PL_thistoken */
1670
1671 STATIC char *
1672 S_skipspace1(pTHX_ register char *s)
1673 {
1674     const char *start = s;
1675     I32 startoff = start - SvPVX(PL_linestr);
1676
1677     PERL_ARGS_ASSERT_SKIPSPACE1;
1678
1679     s = skipspace(s);
1680     if (!PL_madskills)
1681         return s;
1682     start = SvPVX(PL_linestr) + startoff;
1683     if (!PL_thistoken && PL_realtokenstart >= 0) {
1684         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1685         PL_thistoken = newSVpvn(tstart, start - tstart);
1686     }
1687     PL_realtokenstart = -1;
1688     if (PL_skipwhite) {
1689         if (!PL_nextwhite)
1690             PL_nextwhite = newSVpvs("");
1691         sv_catsv(PL_nextwhite, PL_skipwhite);
1692         sv_free(PL_skipwhite);
1693         PL_skipwhite = 0;
1694     }
1695     return s;
1696 }
1697
1698 STATIC char *
1699 S_skipspace2(pTHX_ register char *s, SV **svp)
1700 {
1701     char *start;
1702     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1703     const I32 startoff = s - SvPVX(PL_linestr);
1704
1705     PERL_ARGS_ASSERT_SKIPSPACE2;
1706
1707     s = skipspace(s);
1708     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1709     if (!PL_madskills || !svp)
1710         return s;
1711     start = SvPVX(PL_linestr) + startoff;
1712     if (!PL_thistoken && PL_realtokenstart >= 0) {
1713         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1714         PL_thistoken = newSVpvn(tstart, start - tstart);
1715         PL_realtokenstart = -1;
1716     }
1717     if (PL_skipwhite) {
1718         if (!*svp)
1719             *svp = newSVpvs("");
1720         sv_setsv(*svp, PL_skipwhite);
1721         sv_free(PL_skipwhite);
1722         PL_skipwhite = 0;
1723     }
1724
1725     return s;
1726 }
1727 #endif
1728
1729 STATIC void
1730 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1731 {
1732     AV *av = CopFILEAVx(PL_curcop);
1733     if (av) {
1734         SV * const sv = newSV_type(SVt_PVMG);
1735         if (orig_sv)
1736             sv_setsv(sv, orig_sv);
1737         else
1738             sv_setpvn(sv, buf, len);
1739         (void)SvIOK_on(sv);
1740         SvIV_set(sv, 0);
1741         av_store(av, (I32)CopLINE(PL_curcop), sv);
1742     }
1743 }
1744
1745 /*
1746  * S_skipspace
1747  * Called to gobble the appropriate amount and type of whitespace.
1748  * Skips comments as well.
1749  */
1750
1751 STATIC char *
1752 S_skipspace(pTHX_ register char *s)
1753 {
1754 #ifdef PERL_MAD
1755     char *start = s;
1756 #endif /* PERL_MAD */
1757     PERL_ARGS_ASSERT_SKIPSPACE;
1758 #ifdef PERL_MAD
1759     if (PL_skipwhite) {
1760         sv_free(PL_skipwhite);
1761         PL_skipwhite = NULL;
1762     }
1763 #endif /* PERL_MAD */
1764     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1765         while (s < PL_bufend && SPACE_OR_TAB(*s))
1766             s++;
1767     } else {
1768         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1769         PL_bufptr = s;
1770         lex_read_space(LEX_KEEP_PREVIOUS |
1771                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1772                     LEX_NO_NEXT_CHUNK : 0));
1773         s = PL_bufptr;
1774         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1775         if (PL_linestart > PL_bufptr)
1776             PL_bufptr = PL_linestart;
1777         return s;
1778     }
1779 #ifdef PERL_MAD
1780     if (PL_madskills)
1781         PL_skipwhite = newSVpvn(start, s-start);
1782 #endif /* PERL_MAD */
1783     return s;
1784 }
1785
1786 /*
1787  * S_check_uni
1788  * Check the unary operators to ensure there's no ambiguity in how they're
1789  * used.  An ambiguous piece of code would be:
1790  *     rand + 5
1791  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1792  * the +5 is its argument.
1793  */
1794
1795 STATIC void
1796 S_check_uni(pTHX)
1797 {
1798     dVAR;
1799     const char *s;
1800     const char *t;
1801
1802     if (PL_oldoldbufptr != PL_last_uni)
1803         return;
1804     while (isSPACE(*PL_last_uni))
1805         PL_last_uni++;
1806     s = PL_last_uni;
1807     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1808         s++;
1809     if ((t = strchr(s, '(')) && t < PL_bufptr)
1810         return;
1811
1812     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1813                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1814                      (int)(s - PL_last_uni), PL_last_uni);
1815 }
1816
1817 /*
1818  * LOP : macro to build a list operator.  Its behaviour has been replaced
1819  * with a subroutine, S_lop() for which LOP is just another name.
1820  */
1821
1822 #define LOP(f,x) return lop(f,x,s)
1823
1824 /*
1825  * S_lop
1826  * Build a list operator (or something that might be one).  The rules:
1827  *  - if we have a next token, then it's a list operator [why?]
1828  *  - if the next thing is an opening paren, then it's a function
1829  *  - else it's a list operator
1830  */
1831
1832 STATIC I32
1833 S_lop(pTHX_ I32 f, int x, char *s)
1834 {
1835     dVAR;
1836
1837     PERL_ARGS_ASSERT_LOP;
1838
1839     pl_yylval.ival = f;
1840     CLINE;
1841     PL_expect = x;
1842     PL_bufptr = s;
1843     PL_last_lop = PL_oldbufptr;
1844     PL_last_lop_op = (OPCODE)f;
1845 #ifdef PERL_MAD
1846     if (PL_lasttoke)
1847         goto lstop;
1848 #else
1849     if (PL_nexttoke)
1850         goto lstop;
1851 #endif
1852     if (*s == '(')
1853         return REPORT(FUNC);
1854     s = PEEKSPACE(s);
1855     if (*s == '(')
1856         return REPORT(FUNC);
1857     else {
1858         lstop:
1859         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1860             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1861         return REPORT(LSTOP);
1862     }
1863 }
1864
1865 #ifdef PERL_MAD
1866  /*
1867  * S_start_force
1868  * Sets up for an eventual force_next().  start_force(0) basically does
1869  * an unshift, while start_force(-1) does a push.  yylex removes items
1870  * on the "pop" end.
1871  */
1872
1873 STATIC void
1874 S_start_force(pTHX_ int where)
1875 {
1876     int i;
1877
1878     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1879         where = PL_lasttoke;
1880     assert(PL_curforce < 0 || PL_curforce == where);
1881     if (PL_curforce != where) {
1882         for (i = PL_lasttoke; i > where; --i) {
1883             PL_nexttoke[i] = PL_nexttoke[i-1];
1884         }
1885         PL_lasttoke++;
1886     }
1887     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1888         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1889     PL_curforce = where;
1890     if (PL_nextwhite) {
1891         if (PL_madskills)
1892             curmad('^', newSVpvs(""));
1893         CURMAD('_', PL_nextwhite);
1894     }
1895 }
1896
1897 STATIC void
1898 S_curmad(pTHX_ char slot, SV *sv)
1899 {
1900     MADPROP **where;
1901
1902     if (!sv)
1903         return;
1904     if (PL_curforce < 0)
1905         where = &PL_thismad;
1906     else
1907         where = &PL_nexttoke[PL_curforce].next_mad;
1908
1909     if (PL_faketokens)
1910         sv_setpvs(sv, "");
1911     else {
1912         if (!IN_BYTES) {
1913             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1914                 SvUTF8_on(sv);
1915             else if (PL_encoding) {
1916                 sv_recode_to_utf8(sv, PL_encoding);
1917             }
1918         }
1919     }
1920
1921     /* keep a slot open for the head of the list? */
1922     if (slot != '_' && *where && (*where)->mad_key == '^') {
1923         (*where)->mad_key = slot;
1924         sv_free(MUTABLE_SV(((*where)->mad_val)));
1925         (*where)->mad_val = (void*)sv;
1926     }
1927     else
1928         addmad(newMADsv(slot, sv), where, 0);
1929 }
1930 #else
1931 #  define start_force(where)    NOOP
1932 #  define curmad(slot, sv)      NOOP
1933 #endif
1934
1935 /*
1936  * S_force_next
1937  * When the lexer realizes it knows the next token (for instance,
1938  * it is reordering tokens for the parser) then it can call S_force_next
1939  * to know what token to return the next time the lexer is called.  Caller
1940  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1941  * and possibly PL_expect to ensure the lexer handles the token correctly.
1942  */
1943
1944 STATIC void
1945 S_force_next(pTHX_ I32 type)
1946 {
1947     dVAR;
1948 #ifdef DEBUGGING
1949     if (DEBUG_T_TEST) {
1950         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1951         tokereport(type, &NEXTVAL_NEXTTOKE);
1952     }
1953 #endif
1954 #ifdef PERL_MAD
1955     if (PL_curforce < 0)
1956         start_force(PL_lasttoke);
1957     PL_nexttoke[PL_curforce].next_type = type;
1958     if (PL_lex_state != LEX_KNOWNEXT)
1959         PL_lex_defer = PL_lex_state;
1960     PL_lex_state = LEX_KNOWNEXT;
1961     PL_lex_expect = PL_expect;
1962     PL_curforce = -1;
1963 #else
1964     PL_nexttype[PL_nexttoke] = type;
1965     PL_nexttoke++;
1966     if (PL_lex_state != LEX_KNOWNEXT) {
1967         PL_lex_defer = PL_lex_state;
1968         PL_lex_expect = PL_expect;
1969         PL_lex_state = LEX_KNOWNEXT;
1970     }
1971 #endif
1972 }
1973
1974 void
1975 Perl_yyunlex(pTHX)
1976 {
1977     int yyc = PL_parser->yychar;
1978     if (yyc != YYEMPTY) {
1979         if (yyc) {
1980             start_force(-1);
1981             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1982             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1983                 PL_lex_allbrackets--;
1984                 PL_lex_brackets--;
1985                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1986             } else if (yyc == '('/*)*/) {
1987                 PL_lex_allbrackets--;
1988                 yyc |= (2<<24);
1989             }
1990             force_next(yyc);
1991         }
1992         PL_parser->yychar = YYEMPTY;
1993     }
1994 }
1995
1996 STATIC SV *
1997 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1998 {
1999     dVAR;
2000     SV * const sv = newSVpvn_utf8(start, len,
2001                                   !IN_BYTES
2002                                   && UTF
2003                                   && !is_ascii_string((const U8*)start, len)
2004                                   && is_utf8_string((const U8*)start, len));
2005     return sv;
2006 }
2007
2008 /*
2009  * S_force_word
2010  * When the lexer knows the next thing is a word (for instance, it has
2011  * just seen -> and it knows that the next char is a word char, then
2012  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2013  * lookahead.
2014  *
2015  * Arguments:
2016  *   char *start : buffer position (must be within PL_linestr)
2017  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2018  *   int check_keyword : if true, Perl checks to make sure the word isn't
2019  *       a keyword (do this if the word is a label, e.g. goto FOO)
2020  *   int allow_pack : if true, : characters will also be allowed (require,
2021  *       use, etc. do this)
2022  *   int allow_initial_tick : used by the "sub" lexer only.
2023  */
2024
2025 STATIC char *
2026 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2027 {
2028     dVAR;
2029     register char *s;
2030     STRLEN len;
2031
2032     PERL_ARGS_ASSERT_FORCE_WORD;
2033
2034     start = SKIPSPACE1(start);
2035     s = start;
2036     if (isIDFIRST_lazy_if(s,UTF) ||
2037         (allow_pack && *s == ':') ||
2038         (allow_initial_tick && *s == '\'') )
2039     {
2040         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2041         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2042             return start;
2043         start_force(PL_curforce);
2044         if (PL_madskills)
2045             curmad('X', newSVpvn(start,s-start));
2046         if (token == METHOD) {
2047             s = SKIPSPACE1(s);
2048             if (*s == '(')
2049                 PL_expect = XTERM;
2050             else {
2051                 PL_expect = XOPERATOR;
2052             }
2053         }
2054         if (PL_madskills)
2055             curmad('g', newSVpvs( "forced" ));
2056         NEXTVAL_NEXTTOKE.opval
2057             = (OP*)newSVOP(OP_CONST,0,
2058                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2059         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2060         force_next(token);
2061     }
2062     return s;
2063 }
2064
2065 /*
2066  * S_force_ident
2067  * Called when the lexer wants $foo *foo &foo etc, but the program
2068  * text only contains the "foo" portion.  The first argument is a pointer
2069  * to the "foo", and the second argument is the type symbol to prefix.
2070  * Forces the next token to be a "WORD".
2071  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2072  */
2073
2074 STATIC void
2075 S_force_ident(pTHX_ register const char *s, int kind)
2076 {
2077     dVAR;
2078
2079     PERL_ARGS_ASSERT_FORCE_IDENT;
2080
2081     if (*s) {
2082         const STRLEN len = strlen(s);
2083         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2084         start_force(PL_curforce);
2085         NEXTVAL_NEXTTOKE.opval = o;
2086         force_next(WORD);
2087         if (kind) {
2088             o->op_private = OPpCONST_ENTERED;
2089             /* XXX see note in pp_entereval() for why we forgo typo
2090                warnings if the symbol must be introduced in an eval.
2091                GSAR 96-10-12 */
2092             gv_fetchpvn_flags(s, len,
2093                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2094                               : GV_ADD,
2095                               kind == '$' ? SVt_PV :
2096                               kind == '@' ? SVt_PVAV :
2097                               kind == '%' ? SVt_PVHV :
2098                               SVt_PVGV
2099                               );
2100         }
2101     }
2102 }
2103
2104 NV
2105 Perl_str_to_version(pTHX_ SV *sv)
2106 {
2107     NV retval = 0.0;
2108     NV nshift = 1.0;
2109     STRLEN len;
2110     const char *start = SvPV_const(sv,len);
2111     const char * const end = start + len;
2112     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2113
2114     PERL_ARGS_ASSERT_STR_TO_VERSION;
2115
2116     while (start < end) {
2117         STRLEN skip;
2118         UV n;
2119         if (utf)
2120             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2121         else {
2122             n = *(U8*)start;
2123             skip = 1;
2124         }
2125         retval += ((NV)n)/nshift;
2126         start += skip;
2127         nshift *= 1000;
2128     }
2129     return retval;
2130 }
2131
2132 /*
2133  * S_force_version
2134  * Forces the next token to be a version number.
2135  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2136  * and if "guessing" is TRUE, then no new token is created (and the caller
2137  * must use an alternative parsing method).
2138  */
2139
2140 STATIC char *
2141 S_force_version(pTHX_ char *s, int guessing)
2142 {
2143     dVAR;
2144     OP *version = NULL;
2145     char *d;
2146 #ifdef PERL_MAD
2147     I32 startoff = s - SvPVX(PL_linestr);
2148 #endif
2149
2150     PERL_ARGS_ASSERT_FORCE_VERSION;
2151
2152     s = SKIPSPACE1(s);
2153
2154     d = s;
2155     if (*d == 'v')
2156         d++;
2157     if (isDIGIT(*d)) {
2158         while (isDIGIT(*d) || *d == '_' || *d == '.')
2159             d++;
2160 #ifdef PERL_MAD
2161         if (PL_madskills) {
2162             start_force(PL_curforce);
2163             curmad('X', newSVpvn(s,d-s));
2164         }
2165 #endif
2166         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2167             SV *ver;
2168 #ifdef USE_LOCALE_NUMERIC
2169             char *loc = setlocale(LC_NUMERIC, "C");
2170 #endif
2171             s = scan_num(s, &pl_yylval);
2172 #ifdef USE_LOCALE_NUMERIC
2173             setlocale(LC_NUMERIC, loc);
2174 #endif
2175             version = pl_yylval.opval;
2176             ver = cSVOPx(version)->op_sv;
2177             if (SvPOK(ver) && !SvNIOK(ver)) {
2178                 SvUPGRADE(ver, SVt_PVNV);
2179                 SvNV_set(ver, str_to_version(ver));
2180                 SvNOK_on(ver);          /* hint that it is a version */
2181             }
2182         }
2183         else if (guessing) {
2184 #ifdef PERL_MAD
2185             if (PL_madskills) {
2186                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2187                 PL_nextwhite = 0;
2188                 s = SvPVX(PL_linestr) + startoff;
2189             }
2190 #endif
2191             return s;
2192         }
2193     }
2194
2195 #ifdef PERL_MAD
2196     if (PL_madskills && !version) {
2197         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2198         PL_nextwhite = 0;
2199         s = SvPVX(PL_linestr) + startoff;
2200     }
2201 #endif
2202     /* NOTE: The parser sees the package name and the VERSION swapped */
2203     start_force(PL_curforce);
2204     NEXTVAL_NEXTTOKE.opval = version;
2205     force_next(WORD);
2206
2207     return s;
2208 }
2209
2210 /*
2211  * S_force_strict_version
2212  * Forces the next token to be a version number using strict syntax rules.
2213  */
2214
2215 STATIC char *
2216 S_force_strict_version(pTHX_ char *s)
2217 {
2218     dVAR;
2219     OP *version = NULL;
2220 #ifdef PERL_MAD
2221     I32 startoff = s - SvPVX(PL_linestr);
2222 #endif
2223     const char *errstr = NULL;
2224
2225     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2226
2227     while (isSPACE(*s)) /* leading whitespace */
2228         s++;
2229
2230     if (is_STRICT_VERSION(s,&errstr)) {
2231         SV *ver = newSV(0);
2232         s = (char *)scan_version(s, ver, 0);
2233         version = newSVOP(OP_CONST, 0, ver);
2234     }
2235     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2236             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2237     {
2238         PL_bufptr = s;
2239         if (errstr)
2240             yyerror(errstr); /* version required */
2241         return s;
2242     }
2243
2244 #ifdef PERL_MAD
2245     if (PL_madskills && !version) {
2246         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2247         PL_nextwhite = 0;
2248         s = SvPVX(PL_linestr) + startoff;
2249     }
2250 #endif
2251     /* NOTE: The parser sees the package name and the VERSION swapped */
2252     start_force(PL_curforce);
2253     NEXTVAL_NEXTTOKE.opval = version;
2254     force_next(WORD);
2255
2256     return s;
2257 }
2258
2259 /*
2260  * S_tokeq
2261  * Tokenize a quoted string passed in as an SV.  It finds the next
2262  * chunk, up to end of string or a backslash.  It may make a new
2263  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2264  * turns \\ into \.
2265  */
2266
2267 STATIC SV *
2268 S_tokeq(pTHX_ SV *sv)
2269 {
2270     dVAR;
2271     register char *s;
2272     register char *send;
2273     register char *d;
2274     STRLEN len = 0;
2275     SV *pv = sv;
2276
2277     PERL_ARGS_ASSERT_TOKEQ;
2278
2279     if (!SvLEN(sv))
2280         goto finish;
2281
2282     s = SvPV_force(sv, len);
2283     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2284         goto finish;
2285     send = s + len;
2286     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2287     while (s < send && !(*s == '\\' && s[1] == '\\'))
2288         s++;
2289     if (s == send)
2290         goto finish;
2291     d = s;
2292     if ( PL_hints & HINT_NEW_STRING ) {
2293         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2294     }
2295     while (s < send) {
2296         if (*s == '\\') {
2297             if (s + 1 < send && (s[1] == '\\'))
2298                 s++;            /* all that, just for this */
2299         }
2300         *d++ = *s++;
2301     }
2302     *d = '\0';
2303     SvCUR_set(sv, d - SvPVX_const(sv));
2304   finish:
2305     if ( PL_hints & HINT_NEW_STRING )
2306        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2307     return sv;
2308 }
2309
2310 /*
2311  * Now come three functions related to double-quote context,
2312  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2313  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2314  * interact with PL_lex_state, and create fake ( ... ) argument lists
2315  * to handle functions and concatenation.
2316  * They assume that whoever calls them will be setting up a fake
2317  * join call, because each subthing puts a ',' after it.  This lets
2318  *   "lower \luPpEr"
2319  * become
2320  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2321  *
2322  * (I'm not sure whether the spurious commas at the end of lcfirst's
2323  * arguments and join's arguments are created or not).
2324  */
2325
2326 /*
2327  * S_sublex_start
2328  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2329  *
2330  * Pattern matching will set PL_lex_op to the pattern-matching op to
2331  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2332  *
2333  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2334  *
2335  * Everything else becomes a FUNC.
2336  *
2337  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2338  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2339  * call to S_sublex_push().
2340  */
2341
2342 STATIC I32
2343 S_sublex_start(pTHX)
2344 {
2345     dVAR;
2346     register const I32 op_type = pl_yylval.ival;
2347
2348     if (op_type == OP_NULL) {
2349         pl_yylval.opval = PL_lex_op;
2350         PL_lex_op = NULL;
2351         return THING;
2352     }
2353     if (op_type == OP_CONST || op_type == OP_READLINE) {
2354         SV *sv = tokeq(PL_lex_stuff);
2355
2356         if (SvTYPE(sv) == SVt_PVIV) {
2357             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2358             STRLEN len;
2359             const char * const p = SvPV_const(sv, len);
2360             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2361             SvREFCNT_dec(sv);
2362             sv = nsv;
2363         }
2364         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2365         PL_lex_stuff = NULL;
2366         /* Allow <FH> // "foo" */
2367         if (op_type == OP_READLINE)
2368             PL_expect = XTERMORDORDOR;
2369         return THING;
2370     }
2371     else if (op_type == OP_BACKTICK && PL_lex_op) {
2372         /* readpipe() vas overriden */
2373         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2374         pl_yylval.opval = PL_lex_op;
2375         PL_lex_op = NULL;
2376         PL_lex_stuff = NULL;
2377         return THING;
2378     }
2379
2380     PL_sublex_info.super_state = PL_lex_state;
2381     PL_sublex_info.sub_inwhat = (U16)op_type;
2382     PL_sublex_info.sub_op = PL_lex_op;
2383     PL_lex_state = LEX_INTERPPUSH;
2384
2385     PL_expect = XTERM;
2386     if (PL_lex_op) {
2387         pl_yylval.opval = PL_lex_op;
2388         PL_lex_op = NULL;
2389         return PMFUNC;
2390     }
2391     else
2392         return FUNC;
2393 }
2394
2395 /*
2396  * S_sublex_push
2397  * Create a new scope to save the lexing state.  The scope will be
2398  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2399  * to the uc, lc, etc. found before.
2400  * Sets PL_lex_state to LEX_INTERPCONCAT.
2401  */
2402
2403 STATIC I32
2404 S_sublex_push(pTHX)
2405 {
2406     dVAR;
2407     ENTER;
2408
2409     PL_lex_state = PL_sublex_info.super_state;
2410     SAVEBOOL(PL_lex_dojoin);
2411     SAVEI32(PL_lex_brackets);
2412     SAVEI32(PL_lex_allbrackets);
2413     SAVEI8(PL_lex_fakeeof);
2414     SAVEI32(PL_lex_casemods);
2415     SAVEI32(PL_lex_starts);
2416     SAVEI8(PL_lex_state);
2417     SAVEVPTR(PL_lex_inpat);
2418     SAVEI16(PL_lex_inwhat);
2419     SAVECOPLINE(PL_curcop);
2420     SAVEPPTR(PL_bufptr);
2421     SAVEPPTR(PL_bufend);
2422     SAVEPPTR(PL_oldbufptr);
2423     SAVEPPTR(PL_oldoldbufptr);
2424     SAVEPPTR(PL_last_lop);
2425     SAVEPPTR(PL_last_uni);
2426     SAVEPPTR(PL_linestart);
2427     SAVESPTR(PL_linestr);
2428     SAVEGENERICPV(PL_lex_brackstack);
2429     SAVEGENERICPV(PL_lex_casestack);
2430
2431     PL_linestr = PL_lex_stuff;
2432     PL_lex_stuff = NULL;
2433
2434     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2435         = SvPVX(PL_linestr);
2436     PL_bufend += SvCUR(PL_linestr);
2437     PL_last_lop = PL_last_uni = NULL;
2438     SAVEFREESV(PL_linestr);
2439
2440     PL_lex_dojoin = FALSE;
2441     PL_lex_brackets = 0;
2442     PL_lex_allbrackets = 0;
2443     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2444     Newx(PL_lex_brackstack, 120, char);
2445     Newx(PL_lex_casestack, 12, char);
2446     PL_lex_casemods = 0;
2447     *PL_lex_casestack = '\0';
2448     PL_lex_starts = 0;
2449     PL_lex_state = LEX_INTERPCONCAT;
2450     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2451
2452     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2453     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2454     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2455         PL_lex_inpat = PL_sublex_info.sub_op;
2456     else
2457         PL_lex_inpat = NULL;
2458
2459     return '(';
2460 }
2461
2462 /*
2463  * S_sublex_done
2464  * Restores lexer state after a S_sublex_push.
2465  */
2466
2467 STATIC I32
2468 S_sublex_done(pTHX)
2469 {
2470     dVAR;
2471     if (!PL_lex_starts++) {
2472         SV * const sv = newSVpvs("");
2473         if (SvUTF8(PL_linestr))
2474             SvUTF8_on(sv);
2475         PL_expect = XOPERATOR;
2476         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2477         return THING;
2478     }
2479
2480     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2481         PL_lex_state = LEX_INTERPCASEMOD;
2482         return yylex();
2483     }
2484
2485     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2486     assert(PL_lex_inwhat != OP_TRANSR);
2487     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2488         PL_linestr = PL_lex_repl;
2489         PL_lex_inpat = 0;
2490         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2491         PL_bufend += SvCUR(PL_linestr);
2492         PL_last_lop = PL_last_uni = NULL;
2493         SAVEFREESV(PL_linestr);
2494         PL_lex_dojoin = FALSE;
2495         PL_lex_brackets = 0;
2496         PL_lex_allbrackets = 0;
2497         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2498         PL_lex_casemods = 0;
2499         *PL_lex_casestack = '\0';
2500         PL_lex_starts = 0;
2501         if (SvEVALED(PL_lex_repl)) {
2502             PL_lex_state = LEX_INTERPNORMAL;
2503             PL_lex_starts++;
2504             /*  we don't clear PL_lex_repl here, so that we can check later
2505                 whether this is an evalled subst; that means we rely on the
2506                 logic to ensure sublex_done() is called again only via the
2507                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2508         }
2509         else {
2510             PL_lex_state = LEX_INTERPCONCAT;
2511             PL_lex_repl = NULL;
2512         }
2513         return ',';
2514     }
2515     else {
2516 #ifdef PERL_MAD
2517         if (PL_madskills) {
2518             if (PL_thiswhite) {
2519                 if (!PL_endwhite)
2520                     PL_endwhite = newSVpvs("");
2521                 sv_catsv(PL_endwhite, PL_thiswhite);
2522                 PL_thiswhite = 0;
2523             }
2524             if (PL_thistoken)
2525                 sv_setpvs(PL_thistoken,"");
2526             else
2527                 PL_realtokenstart = -1;
2528         }
2529 #endif
2530         LEAVE;
2531         PL_bufend = SvPVX(PL_linestr);
2532         PL_bufend += SvCUR(PL_linestr);
2533         PL_expect = XOPERATOR;
2534         PL_sublex_info.sub_inwhat = 0;
2535         return ')';
2536     }
2537 }
2538
2539 /*
2540   scan_const
2541
2542   Extracts a pattern, double-quoted string, or transliteration.  This
2543   is terrifying code.
2544
2545   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2546   processing a pattern (PL_lex_inpat is true), a transliteration
2547   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2548
2549   Returns a pointer to the character scanned up to. If this is
2550   advanced from the start pointer supplied (i.e. if anything was
2551   successfully parsed), will leave an OP for the substring scanned
2552   in pl_yylval. Caller must intuit reason for not parsing further
2553   by looking at the next characters herself.
2554
2555   In patterns:
2556     backslashes:
2557       constants: \N{NAME} only
2558       case and quoting: \U \Q \E
2559     stops on @ and $, but not for $ as tail anchor
2560
2561   In transliterations:
2562     characters are VERY literal, except for - not at the start or end
2563     of the string, which indicates a range. If the range is in bytes,
2564     scan_const expands the range to the full set of intermediate
2565     characters. If the range is in utf8, the hyphen is replaced with
2566     a certain range mark which will be handled by pmtrans() in op.c.
2567
2568   In double-quoted strings:
2569     backslashes:
2570       double-quoted style: \r and \n
2571       constants: \x31, etc.
2572       deprecated backrefs: \1 (in substitution replacements)
2573       case and quoting: \U \Q \E
2574     stops on @ and $
2575
2576   scan_const does *not* construct ops to handle interpolated strings.
2577   It stops processing as soon as it finds an embedded $ or @ variable
2578   and leaves it to the caller to work out what's going on.
2579
2580   embedded arrays (whether in pattern or not) could be:
2581       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2582
2583   $ in double-quoted strings must be the symbol of an embedded scalar.
2584
2585   $ in pattern could be $foo or could be tail anchor.  Assumption:
2586   it's a tail anchor if $ is the last thing in the string, or if it's
2587   followed by one of "()| \r\n\t"
2588
2589   \1 (backreferences) are turned into $1
2590
2591   The structure of the code is
2592       while (there's a character to process) {
2593           handle transliteration ranges
2594           skip regexp comments /(?#comment)/ and codes /(?{code})/
2595           skip #-initiated comments in //x patterns
2596           check for embedded arrays
2597           check for embedded scalars
2598           if (backslash) {
2599               deprecate \1 in substitution replacements
2600               handle string-changing backslashes \l \U \Q \E, etc.
2601               switch (what was escaped) {
2602                   handle \- in a transliteration (becomes a literal -)
2603                   if a pattern and not \N{, go treat as regular character
2604                   handle \132 (octal characters)
2605                   handle \x15 and \x{1234} (hex characters)
2606                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2607                   handle \cV (control characters)
2608                   handle printf-style backslashes (\f, \r, \n, etc)
2609               } (end switch)
2610               continue
2611           } (end if backslash)
2612           handle regular character
2613     } (end while character to read)
2614
2615 */
2616
2617 STATIC char *
2618 S_scan_const(pTHX_ char *start)
2619 {
2620     dVAR;
2621     register char *send = PL_bufend;            /* end of the constant */
2622     SV *sv = newSV(send - start);               /* sv for the constant.  See
2623                                                    note below on sizing. */
2624     register char *s = start;                   /* start of the constant */
2625     register char *d = SvPVX(sv);               /* destination for copies */
2626     bool dorange = FALSE;                       /* are we in a translit range? */
2627     bool didrange = FALSE;                      /* did we just finish a range? */
2628     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2629     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2630                                                    to be UTF8?  But, this can
2631                                                    show as true when the source
2632                                                    isn't utf8, as for example
2633                                                    when it is entirely composed
2634                                                    of hex constants */
2635
2636     /* Note on sizing:  The scanned constant is placed into sv, which is
2637      * initialized by newSV() assuming one byte of output for every byte of
2638      * input.  This routine expects newSV() to allocate an extra byte for a
2639      * trailing NUL, which this routine will append if it gets to the end of
2640      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2641      * CAPITAL LETTER A}), or more output than input if the constant ends up
2642      * recoded to utf8, but each time a construct is found that might increase
2643      * the needed size, SvGROW() is called.  Its size parameter each time is
2644      * based on the best guess estimate at the time, namely the length used so
2645      * far, plus the length the current construct will occupy, plus room for
2646      * the trailing NUL, plus one byte for every input byte still unscanned */
2647
2648     UV uv;
2649 #ifdef EBCDIC
2650     UV literal_endpoint = 0;
2651     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2652 #endif
2653
2654     PERL_ARGS_ASSERT_SCAN_CONST;
2655
2656     assert(PL_lex_inwhat != OP_TRANSR);
2657     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2658         /* If we are doing a trans and we know we want UTF8 set expectation */
2659         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2660         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2661     }
2662
2663
2664     while (s < send || dorange) {
2665
2666         /* get transliterations out of the way (they're most literal) */
2667         if (PL_lex_inwhat == OP_TRANS) {
2668             /* expand a range A-Z to the full set of characters.  AIE! */
2669             if (dorange) {
2670                 I32 i;                          /* current expanded character */
2671                 I32 min;                        /* first character in range */
2672                 I32 max;                        /* last character in range */
2673
2674 #ifdef EBCDIC
2675                 UV uvmax = 0;
2676 #endif
2677
2678                 if (has_utf8
2679 #ifdef EBCDIC
2680                     && !native_range
2681 #endif
2682                     ) {
2683                     char * const c = (char*)utf8_hop((U8*)d, -1);
2684                     char *e = d++;
2685                     while (e-- > c)
2686                         *(e + 1) = *e;
2687                     *c = (char)UTF_TO_NATIVE(0xff);
2688                     /* mark the range as done, and continue */
2689                     dorange = FALSE;
2690                     didrange = TRUE;
2691                     continue;
2692                 }
2693
2694                 i = d - SvPVX_const(sv);                /* remember current offset */
2695 #ifdef EBCDIC
2696                 SvGROW(sv,
2697                        SvLEN(sv) + (has_utf8 ?
2698                                     (512 - UTF_CONTINUATION_MARK +
2699                                      UNISKIP(0x100))
2700                                     : 256));
2701                 /* How many two-byte within 0..255: 128 in UTF-8,
2702                  * 96 in UTF-8-mod. */
2703 #else
2704                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2705 #endif
2706                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2707 #ifdef EBCDIC
2708                 if (has_utf8) {
2709                     int j;
2710                     for (j = 0; j <= 1; j++) {
2711                         char * const c = (char*)utf8_hop((U8*)d, -1);
2712                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2713                         if (j)
2714                             min = (U8)uv;
2715                         else if (uv < 256)
2716                             max = (U8)uv;
2717                         else {
2718                             max = (U8)0xff; /* only to \xff */
2719                             uvmax = uv; /* \x{100} to uvmax */
2720                         }
2721                         d = c; /* eat endpoint chars */
2722                      }
2723                 }
2724                else {
2725 #endif
2726                    d -= 2;              /* eat the first char and the - */
2727                    min = (U8)*d;        /* first char in range */
2728                    max = (U8)d[1];      /* last char in range  */
2729 #ifdef EBCDIC
2730                }
2731 #endif
2732
2733                 if (min > max) {
2734                     Perl_croak(aTHX_
2735                                "Invalid range \"%c-%c\" in transliteration operator",
2736                                (char)min, (char)max);
2737                 }
2738
2739 #ifdef EBCDIC
2740                 if (literal_endpoint == 2 &&
2741                     ((isLOWER(min) && isLOWER(max)) ||
2742                      (isUPPER(min) && isUPPER(max)))) {
2743                     if (isLOWER(min)) {
2744                         for (i = min; i <= max; i++)
2745                             if (isLOWER(i))
2746                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2747                     } else {
2748                         for (i = min; i <= max; i++)
2749                             if (isUPPER(i))
2750                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2751                     }
2752                 }
2753                 else
2754 #endif
2755                     for (i = min; i <= max; i++)
2756 #ifdef EBCDIC
2757                         if (has_utf8) {
2758                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2759                             if (UNI_IS_INVARIANT(ch))
2760                                 *d++ = (U8)i;
2761                             else {
2762                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2763                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2764                             }
2765                         }
2766                         else
2767 #endif
2768                             *d++ = (char)i;
2769
2770 #ifdef EBCDIC
2771                 if (uvmax) {
2772                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2773                     if (uvmax > 0x101)
2774                         *d++ = (char)UTF_TO_NATIVE(0xff);
2775                     if (uvmax > 0x100)
2776                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2777                 }
2778 #endif
2779
2780                 /* mark the range as done, and continue */
2781                 dorange = FALSE;
2782                 didrange = TRUE;
2783 #ifdef EBCDIC
2784                 literal_endpoint = 0;
2785 #endif
2786                 continue;
2787             }
2788
2789             /* range begins (ignore - as first or last char) */
2790             else if (*s == '-' && s+1 < send  && s != start) {
2791                 if (didrange) {
2792                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2793                 }
2794                 if (has_utf8
2795 #ifdef EBCDIC
2796                     && !native_range
2797 #endif
2798                     ) {
2799                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2800                     s++;
2801                     continue;
2802                 }
2803                 dorange = TRUE;
2804                 s++;
2805             }
2806             else {
2807                 didrange = FALSE;
2808 #ifdef EBCDIC
2809                 literal_endpoint = 0;
2810                 native_range = TRUE;
2811 #endif
2812             }
2813         }
2814
2815         /* if we get here, we're not doing a transliteration */
2816
2817         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2818            except for the last char, which will be done separately. */
2819         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2820             if (s[2] == '#') {
2821                 while (s+1 < send && *s != ')')
2822                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2823             }
2824             else if (s[2] == '{' /* This should match regcomp.c */
2825                     || (s[2] == '?' && s[3] == '{'))
2826             {
2827                 I32 count = 1;
2828                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2829                 char c;
2830
2831                 while (count && (c = *regparse)) {
2832                     if (c == '\\' && regparse[1])
2833                         regparse++;
2834                     else if (c == '{')
2835                         count++;
2836                     else if (c == '}')
2837                         count--;
2838                     regparse++;
2839                 }
2840                 if (*regparse != ')')
2841                     regparse--;         /* Leave one char for continuation. */
2842                 while (s < regparse)
2843                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2844             }
2845         }
2846
2847         /* likewise skip #-initiated comments in //x patterns */
2848         else if (*s == '#' && PL_lex_inpat &&
2849           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2850             while (s+1 < send && *s != '\n')
2851                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2852         }
2853
2854         /* check for embedded arrays
2855            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2856            */
2857         else if (*s == '@' && s[1]) {
2858             if (isALNUM_lazy_if(s+1,UTF))
2859                 break;
2860             if (strchr(":'{$", s[1]))
2861                 break;
2862             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2863                 break; /* in regexp, neither @+ nor @- are interpolated */
2864         }
2865
2866         /* check for embedded scalars.  only stop if we're sure it's a
2867            variable.
2868         */
2869         else if (*s == '$') {
2870             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2871                 break;
2872             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2873                 if (s[1] == '\\') {
2874                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2875                                    "Possible unintended interpolation of $\\ in regex");
2876                 }
2877                 break;          /* in regexp, $ might be tail anchor */
2878             }
2879         }
2880
2881         /* End of else if chain - OP_TRANS rejoin rest */
2882
2883         /* backslashes */
2884         if (*s == '\\' && s+1 < send) {
2885             char* e;    /* Can be used for ending '}', etc. */
2886
2887             s++;
2888
2889             /* warn on \1 - \9 in substitution replacements, but note that \11
2890              * is an octal; and \19 is \1 followed by '9' */
2891             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2892                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2893             {
2894                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2895                 *--s = '$';
2896                 break;
2897             }
2898
2899             /* string-change backslash escapes */
2900             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2901                 --s;
2902                 break;
2903             }
2904             /* In a pattern, process \N, but skip any other backslash escapes.
2905              * This is because we don't want to translate an escape sequence
2906              * into a meta symbol and have the regex compiler use the meta
2907              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2908              * in spite of this, we do have to process \N here while the proper
2909              * charnames handler is in scope.  See bugs #56444 and #62056.
2910              * There is a complication because \N in a pattern may also stand
2911              * for 'match a non-nl', and not mean a charname, in which case its
2912              * processing should be deferred to the regex compiler.  To be a
2913              * charname it must be followed immediately by a '{', and not look
2914              * like \N followed by a curly quantifier, i.e., not something like
2915              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2916              * quantifier */
2917             else if (PL_lex_inpat
2918                     && (*s != 'N'
2919                         || s[1] != '{'
2920                         || regcurly(s + 1)))
2921             {
2922                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2923                 goto default_action;
2924             }
2925
2926             switch (*s) {
2927
2928             /* quoted - in transliterations */
2929             case '-':
2930                 if (PL_lex_inwhat == OP_TRANS) {
2931                     *d++ = *s++;
2932                     continue;
2933                 }
2934                 /* FALL THROUGH */
2935             default:
2936                 {
2937                     if ((isALPHA(*s) || isDIGIT(*s)))
2938                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2939                                        "Unrecognized escape \\%c passed through",
2940                                        *s);
2941                     /* default action is to copy the quoted character */
2942                     goto default_action;
2943                 }
2944
2945             /* eg. \132 indicates the octal constant 0132 */
2946             case '0': case '1': case '2': case '3':
2947             case '4': case '5': case '6': case '7':
2948                 {
2949                     I32 flags = 0;
2950                     STRLEN len = 3;
2951                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2952                     s += len;
2953                 }
2954                 goto NUM_ESCAPE_INSERT;
2955
2956             /* eg. \o{24} indicates the octal constant \024 */
2957             case 'o':
2958                 {
2959                     STRLEN len;
2960                     const char* error;
2961
2962                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2963                     s += len;
2964                     if (! valid) {
2965                         yyerror(error);
2966                         continue;
2967                     }
2968                     goto NUM_ESCAPE_INSERT;
2969                 }
2970
2971             /* eg. \x24 indicates the hex constant 0x24 */
2972             case 'x':
2973                 ++s;
2974                 if (*s == '{') {
2975                     char* const e = strchr(s, '}');
2976                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2977                       PERL_SCAN_DISALLOW_PREFIX;
2978                     STRLEN len;
2979
2980                     ++s;
2981                     if (!e) {
2982                         yyerror("Missing right brace on \\x{}");
2983                         continue;
2984                     }
2985                     len = e - s;
2986                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2987                     s = e + 1;
2988                 }
2989                 else {
2990                     {
2991                         STRLEN len = 2;
2992                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2993                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2994                         s += len;
2995                     }
2996                 }
2997
2998               NUM_ESCAPE_INSERT:
2999                 /* Insert oct or hex escaped character.  There will always be
3000                  * enough room in sv since such escapes will be longer than any
3001                  * UTF-8 sequence they can end up as, except if they force us
3002                  * to recode the rest of the string into utf8 */
3003
3004                 /* Here uv is the ordinal of the next character being added in
3005                  * unicode (converted from native). */
3006                 if (!UNI_IS_INVARIANT(uv)) {
3007                     if (!has_utf8 && uv > 255) {
3008                         /* Might need to recode whatever we have accumulated so
3009                          * far if it contains any chars variant in utf8 or
3010                          * utf-ebcdic. */
3011
3012                         SvCUR_set(sv, d - SvPVX_const(sv));
3013                         SvPOK_on(sv);
3014                         *d = '\0';
3015                         /* See Note on sizing above.  */
3016                         sv_utf8_upgrade_flags_grow(sv,
3017                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3018                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3019                         d = SvPVX(sv) + SvCUR(sv);
3020                         has_utf8 = TRUE;
3021                     }
3022
3023                     if (has_utf8) {
3024                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3025                         if (PL_lex_inwhat == OP_TRANS &&
3026                             PL_sublex_info.sub_op) {
3027                             PL_sublex_info.sub_op->op_private |=
3028                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3029                                              : OPpTRANS_TO_UTF);
3030                         }
3031 #ifdef EBCDIC
3032                         if (uv > 255 && !dorange)
3033                             native_range = FALSE;
3034 #endif
3035                     }
3036                     else {
3037                         *d++ = (char)uv;
3038                     }
3039                 }
3040                 else {
3041                     *d++ = (char) uv;
3042                 }
3043                 continue;
3044
3045             case 'N':
3046                 /* In a non-pattern \N must be a named character, like \N{LATIN
3047                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3048                  * mean to match a non-newline.  For non-patterns, named
3049                  * characters are converted to their string equivalents. In
3050                  * patterns, named characters are not converted to their
3051                  * ultimate forms for the same reasons that other escapes
3052                  * aren't.  Instead, they are converted to the \N{U+...} form
3053                  * to get the value from the charnames that is in effect right
3054                  * now, while preserving the fact that it was a named character
3055                  * so that the regex compiler knows this */
3056
3057                 /* This section of code doesn't generally use the
3058                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3059                  * a close examination of this macro and determined it is a
3060                  * no-op except on utfebcdic variant characters.  Every
3061                  * character generated by this that would normally need to be
3062                  * enclosed by this macro is invariant, so the macro is not
3063                  * needed, and would complicate use of copy().  XXX There are
3064                  * other parts of this file where the macro is used
3065                  * inconsistently, but are saved by it being a no-op */
3066
3067                 /* The structure of this section of code (besides checking for
3068                  * errors and upgrading to utf8) is:
3069                  *  Further disambiguate between the two meanings of \N, and if
3070                  *      not a charname, go process it elsewhere
3071                  *  If of form \N{U+...}, pass it through if a pattern;
3072                  *      otherwise convert to utf8
3073                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3074                  *  pattern; otherwise convert to utf8 */
3075
3076                 /* Here, s points to the 'N'; the test below is guaranteed to
3077                  * succeed if we are being called on a pattern as we already
3078                  * know from a test above that the next character is a '{'.
3079                  * On a non-pattern \N must mean 'named sequence, which
3080                  * requires braces */
3081                 s++;
3082                 if (*s != '{') {
3083                     yyerror("Missing braces on \\N{}");
3084                     continue;
3085                 }
3086                 s++;
3087
3088                 /* If there is no matching '}', it is an error. */
3089                 if (! (e = strchr(s, '}'))) {
3090                     if (! PL_lex_inpat) {
3091                         yyerror("Missing right brace on \\N{}");
3092                     } else {
3093                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3094                     }
3095                     continue;
3096                 }
3097
3098                 /* Here it looks like a named character */
3099
3100                 if (PL_lex_inpat) {
3101
3102                     /* XXX This block is temporary code.  \N{} implies that the
3103                      * pattern is to have Unicode semantics, and therefore
3104                      * currently has to be encoded in utf8.  By putting it in
3105                      * utf8 now, we save a whole pass in the regular expression
3106                      * compiler.  Once that code is changed so Unicode
3107                      * semantics doesn't necessarily have to be in utf8, this
3108                      * block should be removed.  However, the code that parses
3109                      * the output of this would have to be changed to not
3110                      * necessarily expect utf8 */
3111                     if (!has_utf8) {
3112                         SvCUR_set(sv, d - SvPVX_const(sv));
3113                         SvPOK_on(sv);
3114                         *d = '\0';
3115                         /* See Note on sizing above.  */
3116                         sv_utf8_upgrade_flags_grow(sv,
3117                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3118                                         /* 5 = '\N{' + cur char + NUL */
3119                                         (STRLEN)(send - s) + 5);
3120                         d = SvPVX(sv) + SvCUR(sv);
3121                         has_utf8 = TRUE;
3122                     }
3123                 }
3124
3125                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3126                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3127                                 | PERL_SCAN_DISALLOW_PREFIX;
3128                     STRLEN len;
3129
3130                     /* For \N{U+...}, the '...' is a unicode value even on
3131                      * EBCDIC machines */
3132                     s += 2;         /* Skip to next char after the 'U+' */
3133                     len = e - s;
3134                     uv = grok_hex(s, &len, &flags, NULL);
3135                     if (len == 0 || len != (STRLEN)(e - s)) {
3136                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3137                         s = e + 1;
3138                         continue;
3139                     }
3140
3141                     if (PL_lex_inpat) {
3142
3143                         /* On non-EBCDIC platforms, pass through to the regex
3144                          * compiler unchanged.  The reason we evaluated the
3145                          * number above is to make sure there wasn't a syntax
3146                          * error.  But on EBCDIC we convert to native so
3147                          * downstream code can continue to assume it's native
3148                          */
3149                         s -= 5;     /* Include the '\N{U+' */
3150 #ifdef EBCDIC
3151                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3152                                                                and the \0 */
3153                                     "\\N{U+%X}",
3154                                     (unsigned int) UNI_TO_NATIVE(uv));
3155 #else
3156                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3157                         d += e - s + 1;
3158 #endif
3159                     }
3160                     else {  /* Not a pattern: convert the hex to string */
3161
3162                          /* If destination is not in utf8, unconditionally
3163                           * recode it to be so.  This is because \N{} implies
3164                           * Unicode semantics, and scalars have to be in utf8
3165                           * to guarantee those semantics */
3166                         if (! has_utf8) {
3167                             SvCUR_set(sv, d - SvPVX_const(sv));
3168                             SvPOK_on(sv);
3169                             *d = '\0';
3170                             /* See Note on sizing above.  */
3171                             sv_utf8_upgrade_flags_grow(
3172                                         sv,
3173                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3174                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3175                             d = SvPVX(sv) + SvCUR(sv);
3176                             has_utf8 = TRUE;
3177                         }
3178
3179                         /* Add the string to the output */
3180                         if (UNI_IS_INVARIANT(uv)) {
3181                             *d++ = (char) uv;
3182                         }
3183                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3184                     }
3185                 }
3186                 else { /* Here is \N{NAME} but not \N{U+...}. */
3187
3188                     SV *res;            /* result from charnames */
3189                     const char *str;    /* the string in 'res' */
3190                     STRLEN len;         /* its length */
3191
3192                     /* Get the value for NAME */
3193                     res = newSVpvn(s, e - s);
3194                     res = new_constant( NULL, 0, "charnames",
3195                                         /* includes all of: \N{...} */
3196                                         res, NULL, s - 3, e - s + 4 );
3197
3198                     /* Most likely res will be in utf8 already since the
3199                      * standard charnames uses pack U, but a custom translator
3200                      * can leave it otherwise, so make sure.  XXX This can be
3201                      * revisited to not have charnames use utf8 for characters
3202                      * that don't need it when regexes don't have to be in utf8
3203                      * for Unicode semantics.  If doing so, remember EBCDIC */
3204                     sv_utf8_upgrade(res);
3205                     str = SvPV_const(res, len);
3206
3207                     /* Don't accept malformed input */
3208                     if (! is_utf8_string((U8 *) str, len)) {
3209                         yyerror("Malformed UTF-8 returned by \\N");
3210                     }
3211                     else if (PL_lex_inpat) {
3212
3213                         if (! len) { /* The name resolved to an empty string */
3214                             Copy("\\N{}", d, 4, char);
3215                             d += 4;
3216                         }
3217                         else {
3218                             /* In order to not lose information for the regex
3219                             * compiler, pass the result in the specially made
3220                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3221                             * the code points in hex of each character
3222                             * returned by charnames */
3223
3224                             const char *str_end = str + len;
3225                             STRLEN char_length;     /* cur char's byte length */
3226                             STRLEN output_length;   /* and the number of bytes
3227                                                        after this is translated
3228                                                        into hex digits */
3229                             const STRLEN off = d - SvPVX_const(sv);
3230
3231                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3232                              * max('U+', '.'); and 1 for NUL */
3233                             char hex_string[2 * UTF8_MAXBYTES + 5];
3234
3235                             /* Get the first character of the result. */
3236                             U32 uv = utf8n_to_uvuni((U8 *) str,
3237                                                     len,
3238                                                     &char_length,
3239                                                     UTF8_ALLOW_ANYUV);
3240
3241                             /* The call to is_utf8_string() above hopefully
3242                              * guarantees that there won't be an error.  But
3243                              * it's easy here to make sure.  The function just
3244                              * above warns and returns 0 if invalid utf8, but
3245                              * it can also return 0 if the input is validly a
3246                              * NUL. Disambiguate */
3247                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3248                                 uv = UNICODE_REPLACEMENT;
3249                             }
3250
3251                             /* Convert first code point to hex, including the
3252                              * boiler plate before it.  For all these, we
3253                              * convert to native format so that downstream code
3254                              * can continue to assume the input is native */
3255                             output_length =
3256                                 my_snprintf(hex_string, sizeof(hex_string),
3257                                             "\\N{U+%X",
3258                                             (unsigned int) UNI_TO_NATIVE(uv));
3259
3260                             /* Make sure there is enough space to hold it */
3261                             d = off + SvGROW(sv, off
3262                                                  + output_length
3263                                                  + (STRLEN)(send - e)
3264                                                  + 2);  /* '}' + NUL */
3265                             /* And output it */
3266                             Copy(hex_string, d, output_length, char);
3267                             d += output_length;
3268
3269                             /* For each subsequent character, append dot and
3270                              * its ordinal in hex */
3271                             while ((str += char_length) < str_end) {
3272                                 const STRLEN off = d - SvPVX_const(sv);
3273                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3274                                                         str_end - str,
3275                                                         &char_length,
3276                                                         UTF8_ALLOW_ANYUV);
3277                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3278                                     uv = UNICODE_REPLACEMENT;
3279                                 }
3280
3281                                 output_length =
3282                                     my_snprintf(hex_string, sizeof(hex_string),
3283                                             ".%X",
3284                                             (unsigned int) UNI_TO_NATIVE(uv));
3285
3286                                 d = off + SvGROW(sv, off
3287                                                      + output_length
3288                                                      + (STRLEN)(send - e)
3289                                                      + 2);      /* '}' +  NUL */
3290                                 Copy(hex_string, d, output_length, char);
3291                                 d += output_length;
3292                             }
3293
3294                             *d++ = '}'; /* Done.  Add the trailing brace */
3295                         }
3296                     }
3297                     else { /* Here, not in a pattern.  Convert the name to a
3298                             * string. */
3299
3300                          /* If destination is not in utf8, unconditionally
3301                           * recode it to be so.  This is because \N{} implies
3302                           * Unicode semantics, and scalars have to be in utf8
3303                           * to guarantee those semantics */
3304                         if (! has_utf8) {
3305                             SvCUR_set(sv, d - SvPVX_const(sv));
3306                             SvPOK_on(sv);
3307                             *d = '\0';
3308                             /* See Note on sizing above.  */
3309                             sv_utf8_upgrade_flags_grow(sv,
3310                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3311                                                 len + (STRLEN)(send - s) + 1);
3312                             d = SvPVX(sv) + SvCUR(sv);
3313                             has_utf8 = TRUE;
3314                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3315
3316                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3317                              * set correctly here). */
3318                             const STRLEN off = d - SvPVX_const(sv);
3319                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3320                         }
3321                         Copy(str, d, len, char);
3322                         d += len;
3323                     }
3324                     SvREFCNT_dec(res);
3325
3326                     /* Deprecate non-approved name syntax */
3327                     if (ckWARN_d(WARN_DEPRECATED)) {
3328                         bool problematic = FALSE;
3329                         char* i = s;
3330
3331                         /* For non-ut8 input, look to see that the first
3332                          * character is an alpha, then loop through the rest
3333                          * checking that each is a continuation */
3334                         if (! this_utf8) {
3335                             if (! isALPHAU(*i)) problematic = TRUE;
3336                             else for (i = s + 1; i < e; i++) {
3337                                 if (isCHARNAME_CONT(*i)) continue;
3338                                 problematic = TRUE;
3339                                 break;
3340                             }
3341                         }
3342                         else {
3343                             /* Similarly for utf8.  For invariants can check
3344                              * directly.  We accept anything above the latin1
3345                              * range because it is immaterial to Perl if it is
3346                              * correct or not, and is expensive to check.  But
3347                              * it is fairly easy in the latin1 range to convert
3348                              * the variants into a single character and check
3349                              * those */
3350                             if (UTF8_IS_INVARIANT(*i)) {
3351                                 if (! isALPHAU(*i)) problematic = TRUE;
3352                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3353                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3354                                                                             *(i+1)))))
3355                                 {
3356                                     problematic = TRUE;
3357                                 }
3358                             }
3359                             if (! problematic) for (i = s + UTF8SKIP(s);
3360                                                     i < e;
3361                                                     i+= UTF8SKIP(i))
3362                             {
3363                                 if (UTF8_IS_INVARIANT(*i)) {
3364                                     if (isCHARNAME_CONT(*i)) continue;
3365                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3366                                     continue;
3367                                 } else if (isCHARNAME_CONT(
3368                                             UNI_TO_NATIVE(
3369                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3370                                 {
3371                                     continue;
3372                                 }
3373                                 problematic = TRUE;
3374                                 break;
3375                             }
3376                         }
3377                         if (problematic) {
3378                             /* The e-i passed to the final %.*s makes sure that
3379                              * should the trailing NUL be missing that this
3380                              * print won't run off the end of the string */
3381                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3382                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3383                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3384                         }
3385                     }
3386                 } /* End \N{NAME} */
3387 #ifdef EBCDIC
3388                 if (!dorange)
3389                     native_range = FALSE; /* \N{} is defined to be Unicode */
3390 #endif
3391                 s = e + 1;  /* Point to just after the '}' */
3392                 continue;
3393
3394             /* \c is a control character */
3395             case 'c':
3396                 s++;
3397                 if (s < send) {
3398                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3399                 }
3400                 else {
3401                     yyerror("Missing control char name in \\c");
3402                 }
3403                 continue;
3404
3405             /* printf-style backslashes, formfeeds, newlines, etc */
3406             case 'b':
3407                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3408                 break;
3409             case 'n':
3410                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3411                 break;
3412             case 'r':
3413                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3414                 break;
3415             case 'f':
3416                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3417                 break;
3418             case 't':
3419                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3420                 break;
3421             case 'e':
3422                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3423                 break;
3424             case 'a':
3425                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3426                 break;
3427             } /* end switch */
3428
3429             s++;
3430             continue;
3431         } /* end if (backslash) */
3432 #ifdef EBCDIC
3433         else
3434             literal_endpoint++;
3435 #endif
3436
3437     default_action:
3438         /* If we started with encoded form, or already know we want it,
3439            then encode the next character */
3440         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3441             STRLEN len  = 1;
3442
3443
3444             /* One might think that it is wasted effort in the case of the
3445              * source being utf8 (this_utf8 == TRUE) to take the next character
3446              * in the source, convert it to an unsigned value, and then convert
3447              * it back again.  But the source has not been validated here.  The
3448              * routine that does the conversion checks for errors like
3449              * malformed utf8 */
3450
3451             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3452             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3453             if (!has_utf8) {
3454                 SvCUR_set(sv, d - SvPVX_const(sv));
3455                 SvPOK_on(sv);
3456                 *d = '\0';
3457                 /* See Note on sizing above.  */
3458                 sv_utf8_upgrade_flags_grow(sv,
3459                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3460                                         need + (STRLEN)(send - s) + 1);
3461                 d = SvPVX(sv) + SvCUR(sv);
3462                 has_utf8 = TRUE;
3463             } else if (need > len) {
3464                 /* encoded value larger than old, may need extra space (NOTE:
3465                  * SvCUR() is not set correctly here).   See Note on sizing
3466                  * above.  */
3467                 const STRLEN off = d - SvPVX_const(sv);
3468                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3469             }
3470             s += len;
3471
3472             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3473 #ifdef EBCDIC
3474             if (uv > 255 && !dorange)
3475                 native_range = FALSE;
3476 #endif
3477         }
3478         else {
3479             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3480         }
3481     } /* while loop to process each character */
3482
3483     /* terminate the string and set up the sv */
3484     *d = '\0';
3485     SvCUR_set(sv, d - SvPVX_const(sv));
3486     if (SvCUR(sv) >= SvLEN(sv))
3487         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3488
3489     SvPOK_on(sv);
3490     if (PL_encoding && !has_utf8) {
3491         sv_recode_to_utf8(sv, PL_encoding);
3492         if (SvUTF8(sv))
3493             has_utf8 = TRUE;
3494     }
3495     if (has_utf8) {
3496         SvUTF8_on(sv);
3497         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3498             PL_sublex_info.sub_op->op_private |=
3499                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3500         }
3501     }
3502
3503     /* shrink the sv if we allocated more than we used */
3504     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3505         SvPV_shrink_to_cur(sv);
3506     }
3507
3508     /* return the substring (via pl_yylval) only if we parsed anything */
3509     if (s > PL_bufptr) {
3510         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3511             const char *const key = PL_lex_inpat ? "qr" : "q";
3512             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3513             const char *type;
3514             STRLEN typelen;
3515
3516             if (PL_lex_inwhat == OP_TRANS) {
3517                 type = "tr";
3518                 typelen = 2;
3519             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3520                 type = "s";
3521                 typelen = 1;
3522             } else  {
3523                 type = "qq";
3524                 typelen = 2;
3525             }
3526
3527             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3528                                 type, typelen);
3529         }
3530         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3531     } else
3532         SvREFCNT_dec(sv);
3533     return s;
3534 }
3535
3536 /* S_intuit_more
3537  * Returns TRUE if there's more to the expression (e.g., a subscript),
3538  * FALSE otherwise.
3539  *
3540  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3541  *
3542  * ->[ and ->{ return TRUE
3543  * { and [ outside a pattern are always subscripts, so return TRUE
3544  * if we're outside a pattern and it's not { or [, then return FALSE
3545  * if we're in a pattern and the first char is a {
3546  *   {4,5} (any digits around the comma) returns FALSE
3547  * if we're in a pattern and the first char is a [
3548  *   [] returns FALSE
3549  *   [SOMETHING] has a funky algorithm to decide whether it's a
3550  *      character class or not.  It has to deal with things like
3551  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3552  * anything else returns TRUE
3553  */
3554
3555 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3556
3557 STATIC int
3558 S_intuit_more(pTHX_ register char *s)
3559 {
3560     dVAR;
3561
3562     PERL_ARGS_ASSERT_INTUIT_MORE;
3563
3564     if (PL_lex_brackets)
3565         return TRUE;
3566     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3567         return TRUE;
3568     if (*s != '{' && *s != '[')
3569         return FALSE;
3570     if (!PL_lex_inpat)
3571         return TRUE;
3572
3573     /* In a pattern, so maybe we have {n,m}. */
3574     if (*s == '{') {
3575         if (regcurly(s)) {
3576             return FALSE;
3577         }
3578         return TRUE;
3579     }
3580
3581     /* On the other hand, maybe we have a character class */
3582
3583     s++;
3584     if (*s == ']' || *s == '^')
3585         return FALSE;
3586     else {
3587         /* this is terrifying, and it works */
3588         int weight = 2;         /* let's weigh the evidence */
3589         char seen[256];
3590         unsigned char un_char = 255, last_un_char;
3591         const char * const send = strchr(s,']');
3592         char tmpbuf[sizeof PL_tokenbuf * 4];
3593
3594         if (!send)              /* has to be an expression */
3595             return TRUE;
3596
3597         Zero(seen,256,char);
3598         if (*s == '$')
3599             weight -= 3;
3600         else if (isDIGIT(*s)) {
3601             if (s[1] != ']') {
3602                 if (isDIGIT(s[1]) && s[2] == ']')
3603                     weight -= 10;
3604             }
3605             else
3606                 weight -= 100;
3607         }
3608         for (; s < send; s++) {
3609             last_un_char = un_char;
3610             un_char = (unsigned char)*s;
3611             switch (*s) {
3612             case '@':
3613             case '&':
3614             case '$':
3615                 weight -= seen[un_char] * 10;
3616                 if (isALNUM_lazy_if(s+1,UTF)) {
3617                     int len;
3618                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3619                     len = (int)strlen(tmpbuf);
3620                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3621                         weight -= 100;
3622                     else
3623                         weight -= 10;
3624                 }
3625                 else if (*s == '$' && s[1] &&
3626                   strchr("[#!%*<>()-=",s[1])) {
3627                     if (/*{*/ strchr("])} =",s[2]))
3628                         weight -= 10;
3629                     else
3630                         weight -= 1;
3631                 }
3632                 break;
3633             case '\\':
3634                 un_char = 254;
3635                 if (s[1]) {
3636                     if (strchr("wds]",s[1]))
3637                         weight += 100;
3638                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3639                         weight += 1;
3640                     else if (strchr("rnftbxcav",s[1]))
3641                         weight += 40;
3642                     else if (isDIGIT(s[1])) {
3643                         weight += 40;
3644                         while (s[1] && isDIGIT(s[1]))
3645                             s++;
3646                     }
3647                 }
3648                 else
3649                     weight += 100;
3650                 break;
3651             case '-':
3652                 if (s[1] == '\\')
3653                     weight += 50;
3654                 if (strchr("aA01! ",last_un_char))
3655                     weight += 30;
3656                 if (strchr("zZ79~",s[1]))
3657                     weight += 30;
3658                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3659                     weight -= 5;        /* cope with negative subscript */
3660                 break;
3661             default:
3662                 if (!isALNUM(last_un_char)
3663                     && !(last_un_char == '$' || last_un_char == '@'
3664                          || last_un_char == '&')
3665                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3666                     char *d = tmpbuf;
3667                     while (isALPHA(*s))
3668                         *d++ = *s++;
3669                     *d = '\0';
3670                     if (keyword(tmpbuf, d - tmpbuf, 0))
3671                         weight -= 150;
3672                 }
3673                 if (un_char == last_un_char + 1)
3674                     weight += 5;
3675                 weight -= seen[un_char];
3676                 break;
3677             }
3678             seen[un_char]++;
3679         }
3680         if (weight >= 0)        /* probably a character class */
3681             return FALSE;
3682     }
3683
3684     return TRUE;
3685 }
3686
3687 /*
3688  * S_intuit_method
3689  *
3690  * Does all the checking to disambiguate
3691  *   foo bar
3692  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3693  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3694  *
3695  * First argument is the stuff after the first token, e.g. "bar".
3696  *
3697  * Not a method if bar is a filehandle.
3698  * Not a method if foo is a subroutine prototyped to take a filehandle.
3699  * Not a method if it's really "Foo $bar"
3700  * Method if it's "foo $bar"
3701  * Not a method if it's really "print foo $bar"
3702  * Method if it's really "foo package::" (interpreted as package->foo)
3703  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3704  * Not a method if bar is a filehandle or package, but is quoted with
3705  *   =>
3706  */
3707
3708 STATIC int
3709 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3710 {
3711     dVAR;
3712     char *s = start + (*start == '$');
3713     char tmpbuf[sizeof PL_tokenbuf];
3714     STRLEN len;
3715     GV* indirgv;
3716 #ifdef PERL_MAD
3717     int soff;
3718 #endif
3719
3720     PERL_ARGS_ASSERT_INTUIT_METHOD;
3721
3722     if (gv) {
3723         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3724             return 0;
3725         if (cv) {
3726             if (SvPOK(cv)) {
3727                 const char *proto = SvPVX_const(cv);
3728                 if (proto) {
3729                     if (*proto == ';')
3730                         proto++;
3731                     if (*proto == '*')
3732                         return 0;
3733                 }
3734             }
3735         } else
3736             gv = NULL;
3737     }
3738     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3739     /* start is the beginning of the possible filehandle/object,
3740      * and s is the end of it
3741      * tmpbuf is a copy of it
3742      */
3743
3744     if (*start == '$') {
3745         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3746                 isUPPER(*PL_tokenbuf))
3747             return 0;
3748 #ifdef PERL_MAD
3749         len = start - SvPVX(PL_linestr);
3750 #endif
3751         s = PEEKSPACE(s);
3752 #ifdef PERL_MAD
3753         start = SvPVX(PL_linestr) + len;
3754 #endif
3755         PL_bufptr = start;
3756         PL_expect = XREF;
3757         return *s == '(' ? FUNCMETH : METHOD;
3758     }
3759     if (!keyword(tmpbuf, len, 0)) {
3760         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3761             len -= 2;
3762             tmpbuf[len] = '\0';
3763 #ifdef PERL_MAD
3764             soff = s - SvPVX(PL_linestr);
3765 #endif
3766             goto bare_package;
3767         }
3768         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3769         if (indirgv && GvCVu(indirgv))
3770             return 0;
3771         /* filehandle or package name makes it a method */
3772         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3773 #ifdef PERL_MAD
3774             soff = s - SvPVX(PL_linestr);
3775 #endif
3776             s = PEEKSPACE(s);
3777             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3778                 return 0;       /* no assumptions -- "=>" quotes bareword */
3779       bare_package:
3780             start_force(PL_curforce);
3781             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3782                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3783             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3784             if (PL_madskills)
3785                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3786             PL_expect = XTERM;
3787             force_next(WORD);
3788             PL_bufptr = s;
3789 #ifdef PERL_MAD
3790             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3791 #endif
3792             return *s == '(' ? FUNCMETH : METHOD;
3793         }
3794     }
3795     return 0;
3796 }
3797
3798 /* Encoded script support. filter_add() effectively inserts a
3799  * 'pre-processing' function into the current source input stream.
3800  * Note that the filter function only applies to the current source file
3801  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3802  *
3803  * The datasv parameter (which may be NULL) can be used to pass
3804  * private data to this instance of the filter. The filter function
3805  * can recover the SV using the FILTER_DATA macro and use it to
3806  * store private buffers and state information.
3807  *
3808  * The supplied datasv parameter is upgraded to a PVIO type
3809  * and the IoDIRP/IoANY field is used to store the function pointer,
3810  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3811  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3812  * private use must be set using malloc'd pointers.
3813  */
3814
3815 SV *
3816 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3817 {
3818     dVAR;
3819     if (!funcp)
3820         return NULL;
3821
3822     if (!PL_parser)
3823         return NULL;
3824
3825     if (!PL_rsfp_filters)
3826         PL_rsfp_filters = newAV();
3827     if (!datasv)
3828         datasv = newSV(0);
3829     SvUPGRADE(datasv, SVt_PVIO);
3830     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3831     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3832     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3833                           FPTR2DPTR(void *, IoANY(datasv)),
3834                           SvPV_nolen(datasv)));
3835     av_unshift(PL_rsfp_filters, 1);
3836     av_store(PL_rsfp_filters, 0, datasv) ;
3837     return(datasv);
3838 }
3839
3840
3841 /* Delete most recently added instance of this filter function. */
3842 void
3843 Perl_filter_del(pTHX_ filter_t funcp)
3844 {
3845     dVAR;
3846     SV *datasv;
3847
3848     PERL_ARGS_ASSERT_FILTER_DEL;
3849
3850 #ifdef DEBUGGING
3851     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3852                           FPTR2DPTR(void*, funcp)));
3853 #endif
3854     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3855         return;
3856     /* if filter is on top of stack (usual case) just pop it off */
3857     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3858     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3859         sv_free(av_pop(PL_rsfp_filters));
3860
3861         return;
3862     }
3863     /* we need to search for the correct entry and clear it     */
3864     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3865 }
3866
3867
3868 /* Invoke the idxth filter function for the current rsfp.        */
3869 /* maxlen 0 = read one text line */
3870 I32
3871 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3872 {
3873     dVAR;
3874     filter_t funcp;
3875     SV *datasv = NULL;
3876     /* This API is bad. It should have been using unsigned int for maxlen.
3877        Not sure if we want to change the API, but if not we should sanity
3878        check the value here.  */
3879     const unsigned int correct_length
3880         = maxlen < 0 ?
3881 #ifdef PERL_MICRO
3882         0x7FFFFFFF
3883 #else
3884         INT_MAX
3885 #endif
3886         : maxlen;
3887
3888     PERL_ARGS_ASSERT_FILTER_READ;
3889
3890     if (!PL_parser || !PL_rsfp_filters)
3891         return -1;
3892     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3893         /* Provide a default input filter to make life easy.    */
3894         /* Note that we append to the line. This is handy.      */
3895         DEBUG_P(PerlIO_printf(Perl_debug_log,
3896                               "filter_read %d: from rsfp\n", idx));
3897         if (correct_length) {
3898             /* Want a block */
3899             int len ;
3900             const int old_len = SvCUR(buf_sv);
3901
3902             /* ensure buf_sv is large enough */
3903             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3904             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3905                                    correct_length)) <= 0) {
3906                 if (PerlIO_error(PL_rsfp))
3907                     return -1;          /* error */
3908                 else
3909                     return 0 ;          /* end of file */
3910             }
3911             SvCUR_set(buf_sv, old_len + len) ;
3912             SvPVX(buf_sv)[old_len + len] = '\0';
3913         } else {
3914             /* Want a line */
3915             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3916                 if (PerlIO_error(PL_rsfp))
3917                     return -1;          /* error */
3918                 else
3919                     return 0 ;          /* end of file */
3920             }
3921         }
3922         return SvCUR(buf_sv);
3923     }
3924     /* Skip this filter slot if filter has been deleted */
3925     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3926         DEBUG_P(PerlIO_printf(Perl_debug_log,
3927                               "filter_read %d: skipped (filter deleted)\n",
3928                               idx));
3929         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3930     }
3931     /* Get function pointer hidden within datasv        */
3932     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3933     DEBUG_P(PerlIO_printf(Perl_debug_log,
3934                           "filter_read %d: via function %p (%s)\n",
3935                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3936     /* Call function. The function is expected to       */
3937     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3938     /* Return: <0:error, =0:eof, >0:not eof             */
3939     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3940 }
3941
3942 STATIC char *
3943 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3944 {
3945     dVAR;
3946
3947     PERL_ARGS_ASSERT_FILTER_GETS;
3948
3949 #ifdef PERL_CR_FILTER
3950     if (!PL_rsfp_filters) {
3951         filter_add(S_cr_textfilter,NULL);
3952     }
3953 #endif
3954     if (PL_rsfp_filters) {
3955         if (!append)
3956             SvCUR_set(sv, 0);   /* start with empty line        */
3957         if (FILTER_READ(0, sv, 0) > 0)
3958             return ( SvPVX(sv) ) ;
3959         else
3960             return NULL ;
3961     }
3962     else
3963         return (sv_gets(sv, PL_rsfp, append));
3964 }
3965
3966 STATIC HV *
3967 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3968 {
3969     dVAR;
3970     GV *gv;
3971
3972     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3973
3974     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3975         return PL_curstash;
3976
3977     if (len > 2 &&
3978         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3979         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3980     {
3981         return GvHV(gv);                        /* Foo:: */
3982     }
3983
3984     /* use constant CLASS => 'MyClass' */
3985     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3986     if (gv && GvCV(gv)) {
3987         SV * const sv = cv_const_sv(GvCV(gv));
3988         if (sv)
3989             pkgname = SvPV_const(sv, len);
3990     }
3991
3992     return gv_stashpvn(pkgname, len, 0);
3993 }
3994
3995 /*
3996  * S_readpipe_override
3997  * Check whether readpipe() is overridden, and generates the appropriate
3998  * optree, provided sublex_start() is called afterwards.
3999  */
4000 STATIC void
4001 S_readpipe_override(pTHX)
4002 {
4003     GV **gvp;
4004     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4005     pl_yylval.ival = OP_BACKTICK;
4006     if ((gv_readpipe
4007                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4008             ||
4009             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4010              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4011              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4012     {
4013         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4014             op_append_elem(OP_LIST,
4015                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4016                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4017     }
4018 }
4019
4020 #ifdef PERL_MAD
4021  /*
4022  * Perl_madlex
4023  * The intent of this yylex wrapper is to minimize the changes to the
4024  * tokener when we aren't interested in collecting madprops.  It remains
4025  * to be seen how successful this strategy will be...
4026  */
4027
4028 int
4029 Perl_madlex(pTHX)
4030 {
4031     int optype;
4032     char *s = PL_bufptr;
4033
4034     /* make sure PL_thiswhite is initialized */
4035     PL_thiswhite = 0;
4036     PL_thismad = 0;
4037
4038     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4039     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4040         return S_pending_ident(aTHX);
4041
4042     /* previous token ate up our whitespace? */
4043     if (!PL_lasttoke && PL_nextwhite) {
4044         PL_thiswhite = PL_nextwhite;
4045         PL_nextwhite = 0;
4046     }
4047
4048     /* isolate the token, and figure out where it is without whitespace */
4049     PL_realtokenstart = -1;
4050     PL_thistoken = 0;
4051     optype = yylex();
4052     s = PL_bufptr;
4053     assert(PL_curforce < 0);
4054
4055     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4056         if (!PL_thistoken) {
4057             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4058                 PL_thistoken = newSVpvs("");
4059             else {
4060                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4061                 PL_thistoken = newSVpvn(tstart, s - tstart);
4062             }
4063         }
4064         if (PL_thismad) /* install head */
4065             CURMAD('X', PL_thistoken);
4066     }
4067
4068     /* last whitespace of a sublex? */
4069     if (optype == ')' && PL_endwhite) {
4070         CURMAD('X', PL_endwhite);
4071     }
4072
4073     if (!PL_thismad) {
4074
4075         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4076         if (!PL_thiswhite && !PL_endwhite && !optype) {
4077             sv_free(PL_thistoken);
4078             PL_thistoken = 0;
4079             return 0;
4080         }
4081
4082         /* put off final whitespace till peg */
4083         if (optype == ';' && !PL_rsfp) {
4084             PL_nextwhite = PL_thiswhite;
4085             PL_thiswhite = 0;
4086         }
4087         else if (PL_thisopen) {
4088             CURMAD('q', PL_thisopen);
4089             if (PL_thistoken)
4090                 sv_free(PL_thistoken);
4091             PL_thistoken = 0;
4092         }
4093         else {
4094             /* Store actual token text as madprop X */
4095             CURMAD('X', PL_thistoken);
4096         }
4097
4098         if (PL_thiswhite) {
4099             /* add preceding whitespace as madprop _ */
4100             CURMAD('_', PL_thiswhite);
4101         }
4102
4103         if (PL_thisstuff) {
4104             /* add quoted material as madprop = */
4105             CURMAD('=', PL_thisstuff);
4106         }
4107
4108         if (PL_thisclose) {
4109             /* add terminating quote as madprop Q */
4110             CURMAD('Q', PL_thisclose);
4111         }
4112     }
4113
4114     /* special processing based on optype */
4115
4116     switch (optype) {
4117
4118     /* opval doesn't need a TOKEN since it can already store mp */
4119     case WORD:
4120     case METHOD:
4121     case FUNCMETH:
4122     case THING:
4123     case PMFUNC:
4124     case PRIVATEREF:
4125     case FUNC0SUB:
4126     case UNIOPSUB:
4127     case LSTOPSUB:
4128         if (pl_yylval.opval)
4129             append_madprops(PL_thismad, pl_yylval.opval, 0);
4130         PL_thismad = 0;
4131         return optype;
4132
4133     /* fake EOF */
4134     case 0:
4135         optype = PEG;
4136         if (PL_endwhite) {
4137             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4138             PL_endwhite = 0;
4139         }
4140         break;
4141
4142     case ']':
4143     case '}':
4144         if (PL_faketokens)
4145             break;
4146         /* remember any fake bracket that lexer is about to discard */
4147         if (PL_lex_brackets == 1 &&
4148             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4149         {
4150             s = PL_bufptr;
4151             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4152                 s++;
4153             if (*s == '}') {
4154                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4155                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4156                 PL_thiswhite = 0;
4157                 PL_bufptr = s - 1;
4158                 break;  /* don't bother looking for trailing comment */
4159             }
4160             else
4161                 s = PL_bufptr;
4162         }
4163         if (optype == ']')
4164             break;
4165         /* FALLTHROUGH */
4166
4167     /* attach a trailing comment to its statement instead of next token */
4168     case ';':
4169         if (PL_faketokens)
4170             break;
4171         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4172             s = PL_bufptr;
4173             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4174                 s++;
4175             if (*s == '\n' || *s == '#') {
4176                 while (s < PL_bufend && *s != '\n')
4177                     s++;
4178                 if (s < PL_bufend)
4179                     s++;
4180                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4181                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4182                 PL_thiswhite = 0;
4183                 PL_bufptr = s;
4184             }
4185         }
4186         break;
4187
4188     /* pval */
4189     case LABEL:
4190         break;
4191
4192     /* ival */
4193     default:
4194         break;
4195
4196     }
4197
4198     /* Create new token struct.  Note: opvals return early above. */
4199     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4200     PL_thismad = 0;
4201     return optype;
4202 }
4203 #endif
4204
4205 STATIC char *
4206 S_tokenize_use(pTHX_ int is_use, char *s) {
4207     dVAR;
4208
4209     PERL_ARGS_ASSERT_TOKENIZE_USE;
4210
4211     if (PL_expect != XSTATE)
4212         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4213                     is_use ? "use" : "no"));
4214     s = SKIPSPACE1(s);
4215     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4216         s = force_version(s, TRUE);
4217         if (*s == ';' || *s == '}'
4218                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4219             start_force(PL_curforce);
4220             NEXTVAL_NEXTTOKE.opval = NULL;
4221             force_next(WORD);
4222         }
4223         else if (*s == 'v') {
4224             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4225             s = force_version(s, FALSE);
4226         }
4227     }
4228     else {
4229         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4230         s = force_version(s, FALSE);
4231     }
4232     pl_yylval.ival = is_use;
4233     return s;
4234 }
4235 #ifdef DEBUGGING
4236     static const char* const exp_name[] =
4237         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4238           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4239         };
4240 #endif
4241
4242 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4243 STATIC bool
4244 S_word_takes_any_delimeter(char *p, STRLEN len)
4245 {
4246     return (len == 1 && strchr("msyq", p[0])) ||
4247            (len == 2 && (
4248             (p[0] == 't' && p[1] == 'r') ||
4249             (p[0] == 'q' && strchr("qwxr", p[1]))));
4250 }
4251
4252 /*
4253   yylex
4254
4255   Works out what to call the token just pulled out of the input
4256   stream.  The yacc parser takes care of taking the ops we return and
4257   stitching them into a tree.
4258
4259   Returns:
4260     PRIVATEREF
4261
4262   Structure:
4263       if read an identifier
4264           if we're in a my declaration
4265               croak if they tried to say my($foo::bar)
4266               build the ops for a my() declaration
4267           if it's an access to a my() variable
4268               are we in a sort block?
4269                   croak if my($a); $a <=> $b
4270               build ops for access to a my() variable
4271           if in a dq string, and they've said @foo and we can't find @foo
4272               croak
4273           build ops for a bareword
4274       if we already built the token before, use it.
4275 */
4276
4277
4278 #ifdef __SC__
4279 #pragma segment Perl_yylex
4280 #endif
4281 int
4282 Perl_yylex(pTHX)
4283 {
4284     dVAR;
4285     register char *s = PL_bufptr;
4286     register char *d;
4287     STRLEN len;
4288     bool bof = FALSE;
4289     U32 fake_eof = 0;
4290
4291     /* orig_keyword, gvp, and gv are initialized here because
4292      * jump to the label just_a_word_zero can bypass their
4293      * initialization later. */
4294     I32 orig_keyword = 0;
4295     GV *gv = NULL;
4296     GV **gvp = NULL;
4297
4298     DEBUG_T( {
4299         SV* tmp = newSVpvs("");
4300         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4301             (IV)CopLINE(PL_curcop),
4302             lex_state_names[PL_lex_state],
4303             exp_name[PL_expect],
4304             pv_display(tmp, s, strlen(s), 0, 60));
4305         SvREFCNT_dec(tmp);
4306     } );
4307     /* check if there's an identifier for us to look at */
4308     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4309         return REPORT(S_pending_ident(aTHX));
4310
4311     /* no identifier pending identification */
4312
4313     switch (PL_lex_state) {
4314 #ifdef COMMENTARY
4315     case LEX_NORMAL:            /* Some compilers will produce faster */
4316     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4317         break;
4318 #endif
4319
4320     /* when we've already built the next token, just pull it out of the queue */
4321     case LEX_KNOWNEXT:
4322 #ifdef PERL_MAD
4323         PL_lasttoke--;
4324         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4325         if (PL_madskills) {
4326             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4327             PL_nexttoke[PL_lasttoke].next_mad = 0;
4328             if (PL_thismad && PL_thismad->mad_key == '_') {
4329                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4330                 PL_thismad->mad_val = 0;
4331                 mad_free(PL_thismad);
4332                 PL_thismad = 0;
4333             }
4334         }
4335         if (!PL_lasttoke) {
4336             PL_lex_state = PL_lex_defer;
4337             PL_expect = PL_lex_expect;
4338             PL_lex_defer = LEX_NORMAL;
4339             if (!PL_nexttoke[PL_lasttoke].next_type)
4340                 return yylex();
4341         }
4342 #else
4343         PL_nexttoke--;
4344         pl_yylval = PL_nextval[PL_nexttoke];
4345         if (!PL_nexttoke) {
4346             PL_lex_state = PL_lex_defer;
4347             PL_expect = PL_lex_expect;
4348             PL_lex_defer = LEX_NORMAL;
4349         }
4350 #endif
4351         {
4352             I32 next_type;
4353 #ifdef PERL_MAD
4354             next_type = PL_nexttoke[PL_lasttoke].next_type;
4355 #else
4356             next_type = PL_nexttype[PL_nexttoke];
4357 #endif
4358             if (next_type & (7<<24)) {
4359                 if (next_type & (1<<24)) {
4360                     if (PL_lex_brackets > 100)
4361                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4362                     PL_lex_brackstack[PL_lex_brackets++] =
4363                         (char) ((next_type >> 16) & 0xff);
4364                 }
4365                 if (next_type & (2<<24))
4366                     PL_lex_allbrackets++;
4367                 if (next_type & (4<<24))
4368                     PL_lex_allbrackets--;
4369                 next_type &= 0xffff;
4370             }
4371 #ifdef PERL_MAD
4372             /* FIXME - can these be merged?  */
4373             return next_type;
4374 #else
4375             return REPORT(next_type);
4376 #endif
4377         }
4378
4379     /* interpolated case modifiers like \L \U, including \Q and \E.
4380        when we get here, PL_bufptr is at the \
4381     */
4382     case LEX_INTERPCASEMOD:
4383 #ifdef DEBUGGING
4384         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4385             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4386 #endif
4387         /* handle \E or end of string */
4388         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4389             /* if at a \E */
4390             if (PL_lex_casemods) {
4391                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4392                 PL_lex_casestack[PL_lex_casemods] = '\0';
4393
4394                 if (PL_bufptr != PL_bufend
4395                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4396                     PL_bufptr += 2;
4397                     PL_lex_state = LEX_INTERPCONCAT;
4398 #ifdef PERL_MAD
4399                     if (PL_madskills)
4400                         PL_thistoken = newSVpvs("\\E");
4401 #endif
4402                 }
4403                 PL_lex_allbrackets--;
4404                 return REPORT(')');
4405             }
4406 #ifdef PERL_MAD
4407             while (PL_bufptr != PL_bufend &&
4408               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4409                 if (!PL_thiswhite)
4410                     PL_thiswhite = newSVpvs("");
4411                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4412                 PL_bufptr += 2;
4413             }
4414 #else
4415             if (PL_bufptr != PL_bufend)
4416                 PL_bufptr += 2;
4417 #endif
4418             PL_lex_state = LEX_INTERPCONCAT;
4419             return yylex();
4420         }
4421         else {
4422             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4423               "### Saw case modifier\n"); });
4424             s = PL_bufptr + 1;
4425             if (s[1] == '\\' && s[2] == 'E') {
4426 #ifdef PERL_MAD
4427                 if (!PL_thiswhite)
4428                     PL_thiswhite = newSVpvs("");
4429                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4430 #endif
4431                 PL_bufptr = s + 3;
4432                 PL_lex_state = LEX_INTERPCONCAT;
4433                 return yylex();
4434             }
4435             else {
4436                 I32 tmp;
4437                 if (!PL_madskills) /* when just compiling don't need correct */
4438                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4439                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4440                 if ((*s == 'L' || *s == 'U') &&
4441                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4442                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4443                     PL_lex_allbrackets--;
4444                     return REPORT(')');
4445                 }
4446                 if (PL_lex_casemods > 10)
4447                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4448                 PL_lex_casestack[PL_lex_casemods++] = *s;
4449                 PL_lex_casestack[PL_lex_casemods] = '\0';
4450                 PL_lex_state = LEX_INTERPCONCAT;
4451                 start_force(PL_curforce);
4452                 NEXTVAL_NEXTTOKE.ival = 0;
4453                 force_next((2<<24)|'(');
4454                 start_force(PL_curforce);
4455                 if (*s == 'l')
4456                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4457                 else if (*s == 'u')
4458                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4459                 else if (*s == 'L')
4460                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4461                 else if (*s == 'U')
4462                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4463                 else if (*s == 'Q')
4464                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4465                 else
4466                     Perl_croak(aTHX_ "panic: yylex");
4467                 if (PL_madskills) {
4468                     SV* const tmpsv = newSVpvs("\\ ");
4469                     /* replace the space with the character we want to escape
4470                      */
4471                     SvPVX(tmpsv)[1] = *s;
4472                     curmad('_', tmpsv);
4473                 }
4474                 PL_bufptr = s + 1;
4475             }
4476             force_next(FUNC);
4477             if (PL_lex_starts) {
4478                 s = PL_bufptr;
4479                 PL_lex_starts = 0;
4480 #ifdef PERL_MAD
4481                 if (PL_madskills) {
4482                     if (PL_thistoken)
4483                         sv_free(PL_thistoken);
4484                     PL_thistoken = newSVpvs("");
4485                 }
4486 #endif
4487                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4488                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4489                     OPERATOR(',');
4490                 else
4491                     Aop(OP_CONCAT);
4492             }
4493             else
4494                 return yylex();
4495         }
4496
4497     case LEX_INTERPPUSH:
4498         return REPORT(sublex_push());
4499
4500     case LEX_INTERPSTART:
4501         if (PL_bufptr == PL_bufend)
4502             return REPORT(sublex_done());
4503         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4504               "### Interpolated variable\n"); });
4505         PL_expect = XTERM;
4506         PL_lex_dojoin = (*PL_bufptr == '@');
4507         PL_lex_state = LEX_INTERPNORMAL;
4508         if (PL_lex_dojoin) {
4509             start_force(PL_curforce);
4510             NEXTVAL_NEXTTOKE.ival = 0;
4511             force_next(',');
4512             start_force(PL_curforce);
4513             force_ident("\"", '$');
4514             start_force(PL_curforce);
4515             NEXTVAL_NEXTTOKE.ival = 0;
4516             force_next('$');
4517             start_force(PL_curforce);
4518             NEXTVAL_NEXTTOKE.ival = 0;
4519             force_next((2<<24)|'(');
4520             start_force(PL_curforce);
4521             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4522             force_next(FUNC);
4523         }
4524         if (PL_lex_starts++) {
4525             s = PL_bufptr;
4526 #ifdef PERL_MAD
4527             if (PL_madskills) {
4528                 if (PL_thistoken)
4529                     sv_free(PL_thistoken);
4530                 PL_thistoken = newSVpvs("");
4531             }
4532 #endif
4533             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4534             if (!PL_lex_casemods && PL_lex_inpat)
4535                 OPERATOR(',');
4536             else
4537                 Aop(OP_CONCAT);
4538         }
4539         return yylex();
4540
4541     case LEX_INTERPENDMAYBE:
4542         if (intuit_more(PL_bufptr)) {
4543             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4544             break;
4545         }
4546         /* FALL THROUGH */
4547
4548     case LEX_INTERPEND:
4549         if (PL_lex_dojoin) {
4550             PL_lex_dojoin = FALSE;
4551             PL_lex_state = LEX_INTERPCONCAT;
4552 #ifdef PERL_MAD
4553             if (PL_madskills) {
4554                 if (PL_thistoken)
4555                     sv_free(PL_thistoken);
4556                 PL_thistoken = newSVpvs("");
4557             }
4558 #endif
4559             PL_lex_allbrackets--;
4560             return REPORT(')');
4561         }
4562         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4563             && SvEVALED(PL_lex_repl))
4564         {
4565             if (PL_bufptr != PL_bufend)
4566                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4567             PL_lex_repl = NULL;
4568         }
4569         /* FALLTHROUGH */
4570     case LEX_INTERPCONCAT:
4571 #ifdef DEBUGGING
4572         if (PL_lex_brackets)
4573             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4574 #endif
4575         if (PL_bufptr == PL_bufend)
4576             return REPORT(sublex_done());
4577
4578         if (SvIVX(PL_linestr) == '\'') {
4579             SV *sv = newSVsv(PL_linestr);
4580             if (!PL_lex_inpat)
4581                 sv = tokeq(sv);
4582             else if ( PL_hints & HINT_NEW_RE )
4583                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4584             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4585             s = PL_bufend;
4586         }
4587         else {
4588             s = scan_const(PL_bufptr);
4589             if (*s == '\\')
4590                 PL_lex_state = LEX_INTERPCASEMOD;
4591             else
4592                 PL_lex_state = LEX_INTERPSTART;
4593         }
4594
4595         if (s != PL_bufptr) {
4596             start_force(PL_curforce);
4597             if (PL_madskills) {
4598                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4599             }
4600             NEXTVAL_NEXTTOKE = pl_yylval;
4601             PL_expect = XTERM;
4602             force_next(THING);
4603             if (PL_lex_starts++) {
4604 #ifdef PERL_MAD
4605                 if (PL_madskills) {
4606                     if (PL_thistoken)
4607                         sv_free(PL_thistoken);
4608                     PL_thistoken = newSVpvs("");
4609                 }
4610 #endif
4611                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4612                 if (!PL_lex_casemods && PL_lex_inpat)
4613                     OPERATOR(',');
4614                 else
4615                     Aop(OP_CONCAT);
4616             }
4617             else {
4618                 PL_bufptr = s;
4619                 return yylex();
4620             }
4621         }
4622
4623         return yylex();
4624     case LEX_FORMLINE:
4625         PL_lex_state = LEX_NORMAL;
4626         s = scan_formline(PL_bufptr);
4627         if (!PL_lex_formbrack)
4628             goto rightbracket;
4629         OPERATOR(';');
4630     }
4631
4632     s = PL_bufptr;
4633     PL_oldoldbufptr = PL_oldbufptr;
4634     PL_oldbufptr = s;
4635
4636   retry:
4637 #ifdef PERL_MAD
4638     if (PL_thistoken) {
4639         sv_free(PL_thistoken);
4640         PL_thistoken = 0;
4641     }
4642     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4643 #endif
4644     switch (*s) {
4645     default:
4646         if (isIDFIRST_lazy_if(s,UTF))
4647             goto keylookup;
4648         {
4649         unsigned char c = *s;
4650         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4651         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4652             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4653         } else {
4654             d = PL_linestart;
4655         }
4656         *s = '\0';
4657         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4658     }
4659     case 4:
4660     case 26:
4661         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4662     case 0:
4663 #ifdef PERL_MAD
4664         if (PL_madskills)
4665             PL_faketokens = 0;
4666 #endif
4667         if (!PL_rsfp) {
4668             PL_last_uni = 0;
4669             PL_last_lop = 0;
4670             if (PL_lex_brackets &&
4671                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4672                 yyerror((const char *)
4673                         (PL_lex_formbrack
4674                          ? "Format not terminated"
4675                          : "Missing right curly or square bracket"));
4676             }
4677             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4678                         "### Tokener got EOF\n");
4679             } );
4680             TOKEN(0);
4681         }
4682         if (s++ < PL_bufend)
4683             goto retry;                 /* ignore stray nulls */
4684         PL_last_uni = 0;
4685         PL_last_lop = 0;
4686         if (!PL_in_eval && !PL_preambled) {
4687             PL_preambled = TRUE;
4688 #ifdef PERL_MAD
4689             if (PL_madskills)
4690                 PL_faketokens = 1;
4691 #endif
4692             if (PL_perldb) {
4693                 /* Generate a string of Perl code to load the debugger.
4694                  * If PERL5DB is set, it will return the contents of that,
4695                  * otherwise a compile-time require of perl5db.pl.  */
4696
4697                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4698
4699                 if (pdb) {
4700                     sv_setpv(PL_linestr, pdb);
4701                     sv_catpvs(PL_linestr,";");
4702                 } else {
4703                     SETERRNO(0,SS_NORMAL);
4704                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4705                 }
4706             } else
4707                 sv_setpvs(PL_linestr,"");
4708             if (PL_preambleav) {
4709                 SV **svp = AvARRAY(PL_preambleav);
4710                 SV **const end = svp + AvFILLp(PL_preambleav);
4711                 while(svp <= end) {
4712                     sv_catsv(PL_linestr, *svp);
4713                     ++svp;
4714                     sv_catpvs(PL_linestr, ";");
4715                 }
4716                 sv_free(MUTABLE_SV(PL_preambleav));
4717                 PL_preambleav = NULL;
4718             }
4719             if (PL_minus_E)
4720                 sv_catpvs(PL_linestr,
4721                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4722             if (PL_minus_n || PL_minus_p) {
4723                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4724                 if (PL_minus_l)
4725                     sv_catpvs(PL_linestr,"chomp;");
4726                 if (PL_minus_a) {
4727                     if (PL_minus_F) {
4728                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4729                              || *PL_splitstr == '"')
4730                               && strchr(PL_splitstr + 1, *PL_splitstr))
4731                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4732                         else {
4733                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4734                                bytes can be used as quoting characters.  :-) */
4735                             const char *splits = PL_splitstr;
4736                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4737                             do {
4738                                 /* Need to \ \s  */
4739                                 if (*splits == '\\')
4740                                     sv_catpvn(PL_linestr, splits, 1);
4741                                 sv_catpvn(PL_linestr, splits, 1);
4742                             } while (*splits++);
4743                             /* This loop will embed the trailing NUL of
4744                                PL_linestr as the last thing it does before
4745                                terminating.  */
4746                             sv_catpvs(PL_linestr, ");");
4747                         }
4748                     }
4749                     else
4750                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4751                 }
4752             }
4753             sv_catpvs(PL_linestr, "\n");
4754             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4755             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4756             PL_last_lop = PL_last_uni = NULL;
4757             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4758                 update_debugger_info(PL_linestr, NULL, 0);
4759             goto retry;
4760         }
4761         do {
4762             fake_eof = 0;
4763             bof = PL_rsfp ? TRUE : FALSE;
4764             if (0) {
4765               fake_eof:
4766                 fake_eof = LEX_FAKE_EOF;
4767             }
4768             PL_bufptr = PL_bufend;
4769             CopLINE_inc(PL_curcop);
4770             if (!lex_next_chunk(fake_eof)) {
4771                 CopLINE_dec(PL_curcop);
4772                 s = PL_bufptr;
4773                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4774             }
4775             CopLINE_dec(PL_curcop);
4776 #ifdef PERL_MAD
4777             if (!PL_rsfp)
4778                 PL_realtokenstart = -1;
4779 #endif
4780             s = PL_bufptr;
4781             /* If it looks like the start of a BOM or raw UTF-16,
4782              * check if it in fact is. */
4783             if (bof && PL_rsfp &&
4784                      (*s == 0 ||
4785                       *(U8*)s == 0xEF ||
4786                       *(U8*)s >= 0xFE ||
4787                       s[1] == 0)) {
4788                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4789                 if (bof) {
4790                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4791                     s = swallow_bom((U8*)s);
4792                 }
4793             }
4794             if (PL_parser->in_pod) {
4795                 /* Incest with pod. */
4796 #ifdef PERL_MAD
4797                 if (PL_madskills)
4798                     sv_catsv(PL_thiswhite, PL_linestr);
4799 #endif
4800                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4801                     sv_setpvs(PL_linestr, "");
4802                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4803                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4804                     PL_last_lop = PL_last_uni = NULL;
4805                     PL_parser->in_pod = 0;
4806                 }
4807             }
4808             if (PL_rsfp)
4809                 incline(s);
4810         } while (PL_parser->in_pod);
4811         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4812         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4813         PL_last_lop = PL_last_uni = NULL;
4814         if (CopLINE(PL_curcop) == 1) {
4815             while (s < PL_bufend && isSPACE(*s))
4816                 s++;
4817             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4818                 s++;
4819 #ifdef PERL_MAD
4820             if (PL_madskills)
4821                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4822 #endif
4823             d = NULL;
4824             if (!PL_in_eval) {
4825                 if (*s == '#' && *(s+1) == '!')
4826                     d = s + 2;
4827 #ifdef ALTERNATE_SHEBANG
4828                 else {
4829                     static char const as[] = ALTERNATE_SHEBANG;
4830                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4831                         d = s + (sizeof(as) - 1);
4832                 }
4833 #endif /* ALTERNATE_SHEBANG */
4834             }
4835             if (d) {
4836                 char *ipath;
4837                 char *ipathend;
4838
4839                 while (isSPACE(*d))
4840                     d++;
4841                 ipath = d;
4842                 while (*d && !isSPACE(*d))
4843                     d++;
4844                 ipathend = d;
4845
4846 #ifdef ARG_ZERO_IS_SCRIPT
4847                 if (ipathend > ipath) {
4848                     /*
4849                      * HP-UX (at least) sets argv[0] to the script name,
4850                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4851                      * at least, set argv[0] to the basename of the Perl
4852                      * interpreter. So, having found "#!", we'll set it right.
4853                      */
4854                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4855                                                     SVt_PV)); /* $^X */
4856                     assert(SvPOK(x) || SvGMAGICAL(x));
4857                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4858                         sv_setpvn(x, ipath, ipathend - ipath);
4859                         SvSETMAGIC(x);
4860                     }
4861                     else {
4862                         STRLEN blen;
4863                         STRLEN llen;
4864                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4865                         const char * const lstart = SvPV_const(x,llen);
4866                         if (llen < blen) {
4867                             bstart += blen - llen;
4868                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4869                                 sv_setpvn(x, ipath, ipathend - ipath);
4870                                 SvSETMAGIC(x);
4871                             }
4872                         }
4873                     }
4874                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4875                 }
4876 #endif /* ARG_ZERO_IS_SCRIPT */
4877
4878                 /*
4879                  * Look for options.
4880                  */
4881                 d = instr(s,"perl -");
4882                 if (!d) {
4883                     d = instr(s,"perl");
4884 #if defined(DOSISH)
4885                     /* avoid getting into infinite loops when shebang
4886                      * line contains "Perl" rather than "perl" */
4887                     if (!d) {
4888                         for (d = ipathend-4; d >= ipath; --d) {
4889                             if ((*d == 'p' || *d == 'P')
4890                                 && !ibcmp(d, "perl", 4))
4891                             {
4892                                 break;
4893                             }
4894                         }
4895                         if (d < ipath)
4896                             d = NULL;
4897                     }
4898 #endif
4899                 }
4900 #ifdef ALTERNATE_SHEBANG
4901                 /*
4902                  * If the ALTERNATE_SHEBANG on this system starts with a
4903                  * character that can be part of a Perl expression, then if
4904                  * we see it but not "perl", we're probably looking at the
4905                  * start of Perl code, not a request to hand off to some
4906                  * other interpreter.  Similarly, if "perl" is there, but
4907                  * not in the first 'word' of the line, we assume the line
4908                  * contains the start of the Perl program.
4909                  */
4910                 if (d && *s != '#') {
4911                     const char *c = ipath;
4912                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4913                         c++;
4914                     if (c < d)
4915                         d = NULL;       /* "perl" not in first word; ignore */
4916                     else
4917                         *s = '#';       /* Don't try to parse shebang line */
4918                 }
4919 #endif /* ALTERNATE_SHEBANG */
4920                 if (!d &&
4921                     *s == '#' &&
4922                     ipathend > ipath &&
4923                     !PL_minus_c &&
4924                     !instr(s,"indir") &&
4925                     instr(PL_origargv[0],"perl"))
4926                 {
4927                     dVAR;
4928                     char **newargv;
4929
4930                     *ipathend = '\0';
4931                     s = ipathend + 1;
4932                     while (s < PL_bufend && isSPACE(*s))
4933                         s++;
4934                     if (s < PL_bufend) {
4935                         Newx(newargv,PL_origargc+3,char*);
4936                         newargv[1] = s;
4937                         while (s < PL_bufend && !isSPACE(*s))
4938                             s++;
4939                         *s = '\0';
4940                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4941                     }
4942                     else
4943                         newargv = PL_origargv;
4944                     newargv[0] = ipath;
4945                     PERL_FPU_PRE_EXEC
4946                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4947                     PERL_FPU_POST_EXEC
4948                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4949                 }
4950                 if (d) {
4951                     while (*d && !isSPACE(*d))
4952                         d++;
4953                     while (SPACE_OR_TAB(*d))
4954                         d++;
4955
4956                     if (*d++ == '-') {
4957                         const bool switches_done = PL_doswitches;
4958                         const U32 oldpdb = PL_perldb;
4959                         const bool oldn = PL_minus_n;
4960                         const bool oldp = PL_minus_p;
4961                         const char *d1 = d;
4962
4963                         do {
4964                             bool baduni = FALSE;
4965                             if (*d1 == 'C') {
4966                                 const char *d2 = d1 + 1;
4967                                 if (parse_unicode_opts((const char **)&d2)
4968                                     != PL_unicode)
4969                                     baduni = TRUE;
4970                             }
4971                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4972                                 const char * const m = d1;
4973                                 while (*d1 && !isSPACE(*d1))
4974                                     d1++;
4975                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4976                                       (int)(d1 - m), m);
4977                             }
4978                             d1 = moreswitches(d1);
4979                         } while (d1);
4980                         if (PL_doswitches && !switches_done) {
4981                             int argc = PL_origargc;
4982                             char **argv = PL_origargv;
4983                             do {
4984                                 argc--,argv++;
4985                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4986                             init_argv_symbols(argc,argv);
4987                         }
4988                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4989                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4990                               /* if we have already added "LINE: while (<>) {",
4991                                  we must not do it again */
4992                         {
4993                             sv_setpvs(PL_linestr, "");
4994                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4995                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4996                             PL_last_lop = PL_last_uni = NULL;
4997                             PL_preambled = FALSE;
4998                             if (PERLDB_LINE || PERLDB_SAVESRC)
4999                                 (void)gv_fetchfile(PL_origfilename);
5000                             goto retry;
5001                         }
5002                     }
5003                 }
5004             }
5005         }
5006         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5007             PL_bufptr = s;
5008             PL_lex_state = LEX_FORMLINE;
5009             return yylex();
5010         }
5011         goto retry;
5012     case '\r':
5013 #ifdef PERL_STRICT_CR
5014         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5015         Perl_croak(aTHX_
5016       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5017 #endif
5018     case ' ': case '\t': case '\f': case 013:
5019 #ifdef PERL_MAD
5020         PL_realtokenstart = -1;
5021         if (!PL_thiswhite)
5022             PL_thiswhite = newSVpvs("");
5023         sv_catpvn(PL_thiswhite, s, 1);
5024 #endif
5025         s++;
5026         goto retry;
5027     case '#':
5028     case '\n':
5029 #ifdef PERL_MAD
5030         PL_realtokenstart = -1;
5031         if (PL_madskills)
5032             PL_faketokens = 0;
5033 #endif
5034         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5035             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5036                 /* handle eval qq[#line 1 "foo"\n ...] */
5037                 CopLINE_dec(PL_curcop);
5038                 incline(s);
5039             }
5040             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5041                 s = SKIPSPACE0(s);
5042                 if (!PL_in_eval || PL_rsfp)
5043                     incline(s);
5044             }
5045             else {
5046                 d = s;
5047                 while (d < PL_bufend && *d != '\n')
5048                     d++;
5049                 if (d < PL_bufend)
5050                     d++;
5051                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5052                   Perl_croak(aTHX_ "panic: input overflow");
5053 #ifdef PERL_MAD
5054                 if (PL_madskills)
5055                     PL_thiswhite = newSVpvn(s, d - s);
5056 #endif
5057                 s = d;
5058                 incline(s);
5059             }
5060             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5061                 PL_bufptr = s;
5062                 PL_lex_state = LEX_FORMLINE;
5063                 return yylex();
5064             }
5065         }
5066         else {
5067 #ifdef PERL_MAD
5068             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5069                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5070                     PL_faketokens = 0;
5071                     s = SKIPSPACE0(s);
5072                     TOKEN(PEG); /* make sure any #! line is accessible */
5073                 }
5074                 s = SKIPSPACE0(s);
5075             }
5076             else {
5077 /*              if (PL_madskills && PL_lex_formbrack) { */
5078                     d = s;
5079                     while (d < PL_bufend && *d != '\n')
5080                         d++;
5081                     if (d < PL_bufend)
5082                         d++;
5083                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5084                       Perl_croak(aTHX_ "panic: input overflow");
5085                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5086                         if (!PL_thiswhite)
5087                             PL_thiswhite = newSVpvs("");
5088                         if (CopLINE(PL_curcop) == 1) {
5089                             sv_setpvs(PL_thiswhite, "");
5090                             PL_faketokens = 0;
5091                         }
5092                         sv_catpvn(PL_thiswhite, s, d - s);
5093                     }
5094                     s = d;
5095 /*              }
5096                 *s = '\0';
5097                 PL_bufend = s; */
5098             }
5099 #else
5100             *s = '\0';
5101             PL_bufend = s;
5102 #endif
5103         }
5104         goto retry;
5105     case '-':
5106         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5107             I32 ftst = 0;
5108             char tmp;
5109
5110             s++;
5111             PL_bufptr = s;
5112             tmp = *s++;
5113
5114             while (s < PL_bufend && SPACE_OR_TAB(*s))
5115                 s++;
5116
5117             if (strnEQ(s,"=>",2)) {
5118                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5119                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5120                 OPERATOR('-');          /* unary minus */
5121             }
5122             PL_last_uni = PL_oldbufptr;
5123             switch (tmp) {
5124             case 'r': ftst = OP_FTEREAD;        break;
5125             case 'w': ftst = OP_FTEWRITE;       break;
5126             case 'x': ftst = OP_FTEEXEC;        break;
5127             case 'o': ftst = OP_FTEOWNED;       break;
5128             case 'R': ftst = OP_FTRREAD;        break;
5129             case 'W': ftst = OP_FTRWRITE;       break;
5130             case 'X': ftst = OP_FTREXEC;        break;
5131             case 'O': ftst = OP_FTROWNED;       break;
5132             case 'e': ftst = OP_FTIS;           break;
5133             case 'z': ftst = OP_FTZERO;         break;
5134             case 's': ftst = OP_FTSIZE;         break;
5135             case 'f': ftst = OP_FTFILE;         break;
5136             case 'd': ftst = OP_FTDIR;          break;
5137             case 'l': ftst = OP_FTLINK;         break;
5138             case 'p': ftst = OP_FTPIPE;         break;
5139             case 'S': ftst = OP_FTSOCK;         break;
5140             case 'u': ftst = OP_FTSUID;         break;
5141             case 'g': ftst = OP_FTSGID;         break;
5142             case 'k': ftst = OP_FTSVTX;         break;
5143             case 'b': ftst = OP_FTBLK;          break;
5144             case 'c': ftst = OP_FTCHR;          break;
5145             case 't': ftst = OP_FTTTY;          break;
5146             case 'T': ftst = OP_FTTEXT;         break;
5147             case 'B': ftst = OP_FTBINARY;       break;
5148             case 'M': case 'A': case 'C':
5149                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5150                 switch (tmp) {
5151                 case 'M': ftst = OP_FTMTIME;    break;
5152                 case 'A': ftst = OP_FTATIME;    break;
5153                 case 'C': ftst = OP_FTCTIME;    break;
5154                 default:                        break;
5155                 }
5156                 break;
5157             default:
5158                 break;
5159             }
5160             if (ftst) {
5161                 PL_last_lop_op = (OPCODE)ftst;
5162                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5163                         "### Saw file test %c\n", (int)tmp);
5164                 } );
5165                 FTST(ftst);
5166             }
5167             else {
5168                 /* Assume it was a minus followed by a one-letter named
5169                  * subroutine call (or a -bareword), then. */
5170                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5171                         "### '-%c' looked like a file test but was not\n",
5172                         (int) tmp);
5173                 } );
5174                 s = --PL_bufptr;
5175             }
5176         }
5177         {
5178             const char tmp = *s++;
5179             if (*s == tmp) {
5180                 s++;
5181                 if (PL_expect == XOPERATOR)
5182                     TERM(POSTDEC);
5183                 else
5184                     OPERATOR(PREDEC);
5185             }
5186             else if (*s == '>') {
5187                 s++;
5188                 s = SKIPSPACE1(s);
5189                 if (isIDFIRST_lazy_if(s,UTF)) {
5190                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5191                     TOKEN(ARROW);
5192                 }
5193                 else if (*s == '$')
5194                     OPERATOR(ARROW);
5195                 else
5196                     TERM(ARROW);
5197             }
5198             if (PL_expect == XOPERATOR) {
5199                 if (*s == '=' && !PL_lex_allbrackets &&
5200                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5201                     s--;
5202                     TOKEN(0);
5203                 }
5204                 Aop(OP_SUBTRACT);
5205             }
5206             else {
5207                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5208                     check_uni();
5209                 OPERATOR('-');          /* unary minus */
5210             }
5211         }
5212
5213     case '+':
5214         {
5215             const char tmp = *s++;
5216             if (*s == tmp) {
5217                 s++;
5218                 if (PL_expect == XOPERATOR)
5219                     TERM(POSTINC);
5220                 else
5221                     OPERATOR(PREINC);
5222             }
5223             if (PL_expect == XOPERATOR) {
5224                 if (*s == '=' && !PL_lex_allbrackets &&
5225                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5226                     s--;
5227                     TOKEN(0);
5228                 }
5229                 Aop(OP_ADD);
5230             }
5231             else {
5232                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5233                     check_uni();
5234                 OPERATOR('+');
5235             }
5236         }
5237
5238     case '*':
5239         if (PL_expect != XOPERATOR) {
5240             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5241             PL_expect = XOPERATOR;
5242             force_ident(PL_tokenbuf, '*');
5243             if (!*PL_tokenbuf)
5244                 PREREF('*');
5245             TERM('*');
5246         }
5247         s++;
5248         if (*s == '*') {
5249             s++;
5250             if (*s == '=' && !PL_lex_allbrackets &&
5251                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5252                 s -= 2;
5253                 TOKEN(0);
5254             }
5255             PWop(OP_POW);
5256         }
5257         if (*s == '=' && !PL_lex_allbrackets &&
5258                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5259             s--;
5260             TOKEN(0);
5261         }
5262         Mop(OP_MULTIPLY);
5263
5264     case '%':
5265         if (PL_expect == XOPERATOR) {
5266             if (s[1] == '=' && !PL_lex_allbrackets &&
5267                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5268                 TOKEN(0);
5269             ++s;
5270             Mop(OP_MODULO);
5271         }
5272         PL_tokenbuf[0] = '%';
5273         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5274                 sizeof PL_tokenbuf - 1, FALSE);
5275         if (!PL_tokenbuf[1]) {
5276             PREREF('%');
5277         }
5278         PL_pending_ident = '%';
5279         TERM('%');
5280
5281     case '^':
5282         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5283                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5284             TOKEN(0);
5285         s++;
5286         BOop(OP_BIT_XOR);
5287     case '[':
5288         if (PL_lex_brackets > 100)
5289             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5290         PL_lex_brackstack[PL_lex_brackets++] = 0;
5291         PL_lex_allbrackets++;
5292         {
5293             const char tmp = *s++;
5294             OPERATOR(tmp);
5295         }
5296     case '~':
5297         if (s[1] == '~'
5298             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5299         {
5300             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5301                 TOKEN(0);
5302             s += 2;
5303             Eop(OP_SMARTMATCH);
5304         }
5305         s++;
5306         OPERATOR('~');
5307     case ',':
5308         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5309             TOKEN(0);
5310         s++;
5311         OPERATOR(',');
5312     case ':':
5313         if (s[1] == ':') {
5314             len = 0;
5315             goto just_a_word_zero_gv;
5316         }
5317         s++;
5318         switch (PL_expect) {
5319             OP *attrs;
5320 #ifdef PERL_MAD
5321             I32 stuffstart;
5322 #endif
5323         case XOPERATOR:
5324             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5325                 break;
5326             PL_bufptr = s;      /* update in case we back off */
5327             if (*s == '=') {
5328                 Perl_croak(aTHX_
5329                            "Use of := for an empty attribute list is not allowed");
5330             }
5331             goto grabattrs;
5332         case XATTRBLOCK:
5333             PL_expect = XBLOCK;
5334             goto grabattrs;
5335         case XATTRTERM:
5336             PL_expect = XTERMBLOCK;
5337          grabattrs:
5338 #ifdef PERL_MAD
5339             stuffstart = s - SvPVX(PL_linestr) - 1;
5340 #endif
5341             s = PEEKSPACE(s);
5342             attrs = NULL;
5343             while (isIDFIRST_lazy_if(s,UTF)) {
5344                 I32 tmp;
5345                 SV *sv;
5346                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5347                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5348                     if (tmp < 0) tmp = -tmp;
5349                     switch (tmp) {
5350                     case KEY_or:
5351                     case KEY_and:
5352                     case KEY_for:
5353                     case KEY_foreach:
5354                     case KEY_unless:
5355                     case KEY_if:
5356                     case KEY_while:
5357                     case KEY_until:
5358                         goto got_attrs;
5359                     default:
5360                         break;
5361                     }
5362                 }
5363                 sv = newSVpvn(s, len);
5364                 if (*d == '(') {
5365                     d = scan_str(d,TRUE,TRUE);
5366                     if (!d) {
5367                         /* MUST advance bufptr here to avoid bogus
5368                            "at end of line" context messages from yyerror().
5369                          */
5370                         PL_bufptr = s + len;
5371                         yyerror("Unterminated attribute parameter in attribute list");
5372                         if (attrs)
5373                             op_free(attrs);
5374                         sv_free(sv);
5375                         return REPORT(0);       /* EOF indicator */
5376                     }
5377                 }
5378                 if (PL_lex_stuff) {
5379                     sv_catsv(sv, PL_lex_stuff);
5380                     attrs = op_append_elem(OP_LIST, attrs,
5381                                         newSVOP(OP_CONST, 0, sv));
5382                     SvREFCNT_dec(PL_lex_stuff);
5383                     PL_lex_stuff = NULL;
5384                 }
5385                 else {
5386                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5387                         sv_free(sv);
5388                         if (PL_in_my == KEY_our) {
5389                             deprecate(":unique");
5390                         }
5391                         else
5392                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5393                     }
5394
5395                     /* NOTE: any CV attrs applied here need to be part of
5396                        the CVf_BUILTIN_ATTRS define in cv.h! */
5397                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5398                         sv_free(sv);
5399                         CvLVALUE_on(PL_compcv);
5400                     }
5401                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5402                         sv_free(sv);
5403                         deprecate(":locked");
5404                     }
5405                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5406                         sv_free(sv);
5407                         CvMETHOD_on(PL_compcv);
5408                     }
5409                     /* After we've set the flags, it could be argued that
5410                        we don't need to do the attributes.pm-based setting
5411                        process, and shouldn't bother appending recognized
5412                        flags.  To experiment with that, uncomment the
5413                        following "else".  (Note that's already been
5414                        uncommented.  That keeps the above-applied built-in
5415                        attributes from being intercepted (and possibly
5416                        rejected) by a package's attribute routines, but is
5417                        justified by the performance win for the common case
5418                        of applying only built-in attributes.) */
5419                     else
5420                         attrs = op_append_elem(OP_LIST, attrs,
5421                                             newSVOP(OP_CONST, 0,
5422                                                     sv));
5423                 }
5424                 s = PEEKSPACE(d);
5425                 if (*s == ':' && s[1] != ':')
5426                     s = PEEKSPACE(s+1);
5427                 else if (s == d)
5428                     break;      /* require real whitespace or :'s */
5429                 /* XXX losing whitespace on sequential attributes here */
5430             }
5431             {
5432                 const char tmp
5433                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5434                 if (*s != ';' && *s != '}' && *s != tmp
5435                     && (tmp != '=' || *s != ')')) {
5436                     const char q = ((*s == '\'') ? '"' : '\'');
5437                     /* If here for an expression, and parsed no attrs, back
5438                        off. */
5439                     if (tmp == '=' && !attrs) {
5440                         s = PL_bufptr;
5441                         break;
5442                     }
5443                     /* MUST advance bufptr here to avoid bogus "at end of line"
5444                        context messages from yyerror().
5445                     */
5446                     PL_bufptr = s;
5447                     yyerror( (const char *)
5448                              (*s
5449                               ? Perl_form(aTHX_ "Invalid separator character "
5450                                           "%c%c%c in attribute list", q, *s, q)
5451                               : "Unterminated attribute list" ) );
5452                     if (attrs)
5453                         op_free(attrs);
5454                     OPERATOR(':');
5455                 }
5456             }
5457         got_attrs:
5458             if (attrs) {
5459                 start_force(PL_curforce);
5460                 NEXTVAL_NEXTTOKE.opval = attrs;
5461                 CURMAD('_', PL_nextwhite);
5462                 force_next(THING);
5463             }
5464 #ifdef PERL_MAD
5465             if (PL_madskills) {
5466                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5467                                      (s - SvPVX(PL_linestr)) - stuffstart);
5468             }
5469 #endif
5470             TOKEN(COLONATTR);
5471         }
5472         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5473             s--;
5474             TOKEN(0);
5475         }
5476         PL_lex_allbrackets--;
5477         OPERATOR(':');
5478     case '(':
5479         s++;
5480         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5481             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5482         else
5483             PL_expect = XTERM;
5484         s = SKIPSPACE1(s);
5485         PL_lex_allbrackets++;
5486         TOKEN('(');
5487     case ';':
5488         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5489             TOKEN(0);
5490         CLINE;
5491         s++;
5492         OPERATOR(';');
5493     case ')':
5494         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5495             TOKEN(0);
5496         s++;
5497         PL_lex_allbrackets--;
5498         s = SKIPSPACE1(s);
5499         if (*s == '{')
5500             PREBLOCK(')');
5501         TERM(')');
5502     case ']':
5503         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5504             TOKEN(0);
5505         s++;
5506         if (PL_lex_brackets <= 0)
5507             yyerror("Unmatched right square bracket");
5508         else
5509             --PL_lex_brackets;
5510         PL_lex_allbrackets--;
5511         if (PL_lex_state == LEX_INTERPNORMAL) {
5512             if (PL_lex_brackets == 0) {
5513                 if (*s == '-' && s[1] == '>')
5514                     PL_lex_state = LEX_INTERPENDMAYBE;
5515                 else if (*s != '[' && *s != '{')
5516                     PL_lex_state = LEX_INTERPEND;
5517             }
5518         }
5519         TERM(']');
5520     case '{':
5521       leftbracket:
5522         s++;
5523         if (PL_lex_brackets > 100) {
5524             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5525         }
5526         switch (PL_expect) {
5527         case XTERM:
5528             if (PL_lex_formbrack) {
5529                 s--;
5530                 PRETERMBLOCK(DO);
5531             }
5532             if (PL_oldoldbufptr == PL_last_lop)
5533                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5534             else
5535                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5536             PL_lex_allbrackets++;
5537             OPERATOR(HASHBRACK);
5538         case XOPERATOR:
5539             while (s < PL_bufend && SPACE_OR_TAB(*s))
5540                 s++;
5541             d = s;
5542             PL_tokenbuf[0] = '\0';
5543             if (d < PL_bufend && *d == '-') {
5544                 PL_tokenbuf[0] = '-';
5545                 d++;
5546                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5547                     d++;
5548             }
5549             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5550                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5551                               FALSE, &len);
5552                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5553                     d++;
5554                 if (*d == '}') {
5555                     const char minus = (PL_tokenbuf[0] == '-');
5556                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5557                     if (minus)
5558                         force_next('-');
5559                 }
5560             }
5561             /* FALL THROUGH */
5562         case XATTRBLOCK:
5563         case XBLOCK:
5564             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5565             PL_lex_allbrackets++;
5566             PL_expect = XSTATE;
5567             break;
5568         case XATTRTERM:
5569         case XTERMBLOCK:
5570             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5571             PL_lex_allbrackets++;
5572             PL_expect = XSTATE;
5573             break;
5574         default: {
5575                 const char *t;
5576                 if (PL_oldoldbufptr == PL_last_lop)
5577                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5578                 else
5579                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5580                 PL_lex_allbrackets++;
5581                 s = SKIPSPACE1(s);
5582                 if (*s == '}') {
5583                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5584                         PL_expect = XTERM;
5585                         /* This hack is to get the ${} in the message. */
5586                         PL_bufptr = s+1;
5587                         yyerror("syntax error");
5588                         break;
5589                     }
5590                     OPERATOR(HASHBRACK);
5591                 }
5592                 /* This hack serves to disambiguate a pair of curlies
5593                  * as being a block or an anon hash.  Normally, expectation
5594                  * determines that, but in cases where we're not in a
5595                  * position to expect anything in particular (like inside
5596                  * eval"") we have to resolve the ambiguity.  This code
5597                  * covers the case where the first term in the curlies is a
5598                  * quoted string.  Most other cases need to be explicitly
5599                  * disambiguated by prepending a "+" before the opening
5600                  * curly in order to force resolution as an anon hash.
5601                  *
5602                  * XXX should probably propagate the outer expectation
5603                  * into eval"" to rely less on this hack, but that could
5604                  * potentially break current behavior of eval"".
5605                  * GSAR 97-07-21
5606                  */
5607                 t = s;
5608                 if (*s == '\'' || *s == '"' || *s == '`') {
5609                     /* common case: get past first string, handling escapes */
5610                     for (t++; t < PL_bufend && *t != *s;)
5611                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5612                             t++;
5613                     t++;
5614                 }
5615                 else if (*s == 'q') {
5616                     if (++t < PL_bufend
5617                         && (!isALNUM(*t)
5618                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5619                                 && !isALNUM(*t))))
5620                     {
5621                         /* skip q//-like construct */
5622                         const char *tmps;
5623                         char open, close, term;
5624                         I32 brackets = 1;
5625
5626                         while (t < PL_bufend && isSPACE(*t))
5627                             t++;
5628                         /* check for q => */
5629                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5630                             OPERATOR(HASHBRACK);
5631                         }
5632                         term = *t;
5633                         open = term;
5634                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5635                             term = tmps[5];
5636                         close = term;
5637                         if (open == close)
5638                             for (t++; t < PL_bufend; t++) {
5639                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5640                                     t++;
5641                                 else if (*t == open)
5642                                     break;
5643                             }
5644                         else {
5645                             for (t++; t < PL_bufend; t++) {
5646                                 if (*t == '\\' && t+1 < PL_bufend)
5647                                     t++;
5648                                 else if (*t == close && --brackets <= 0)
5649                                     break;
5650                                 else if (*t == open)
5651                                     brackets++;
5652                             }
5653                         }
5654                         t++;
5655                     }
5656                     else
5657                         /* skip plain q word */
5658                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5659                              t += UTF8SKIP(t);
5660                 }
5661                 else if (isALNUM_lazy_if(t,UTF)) {
5662                     t += UTF8SKIP(t);
5663                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5664                          t += UTF8SKIP(t);
5665                 }
5666                 while (t < PL_bufend && isSPACE(*t))
5667                     t++;
5668                 /* if comma follows first term, call it an anon hash */
5669                 /* XXX it could be a comma expression with loop modifiers */
5670                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5671                                    || (*t == '=' && t[1] == '>')))
5672                     OPERATOR(HASHBRACK);
5673                 if (PL_expect == XREF)
5674                     PL_expect = XTERM;
5675                 else {
5676                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5677                     PL_expect = XSTATE;
5678                 }
5679             }
5680             break;
5681         }
5682         pl_yylval.ival = CopLINE(PL_curcop);
5683         if (isSPACE(*s) || *s == '#')
5684             PL_copline = NOLINE;   /* invalidate current command line number */
5685         TOKEN('{');
5686     case '}':
5687         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5688             TOKEN(0);
5689       rightbracket:
5690         s++;
5691         if (PL_lex_brackets <= 0)
5692             yyerror("Unmatched right curly bracket");
5693         else
5694             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5695         PL_lex_allbrackets--;
5696         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5697             PL_lex_formbrack = 0;
5698         if (PL_lex_state == LEX_INTERPNORMAL) {
5699             if (PL_lex_brackets == 0) {
5700                 if (PL_expect & XFAKEBRACK) {
5701                     PL_expect &= XENUMMASK;
5702                     PL_lex_state = LEX_INTERPEND;
5703                     PL_bufptr = s;
5704 #if 0
5705                     if (PL_madskills) {
5706                         if (!PL_thiswhite)
5707                             PL_thiswhite = newSVpvs("");
5708                         sv_catpvs(PL_thiswhite,"}");
5709                     }
5710 #endif
5711                     return yylex();     /* ignore fake brackets */
5712                 }
5713                 if (*s == '-' && s[1] == '>')
5714                     PL_lex_state = LEX_INTERPENDMAYBE;
5715                 else if (*s != '[' && *s != '{')
5716                     PL_lex_state = LEX_INTERPEND;
5717             }
5718         }
5719         if (PL_expect & XFAKEBRACK) {
5720             PL_expect &= XENUMMASK;
5721             PL_bufptr = s;
5722             return yylex();             /* ignore fake brackets */
5723         }
5724         start_force(PL_curforce);
5725         if (PL_madskills) {
5726             curmad('X', newSVpvn(s-1,1));
5727             CURMAD('_', PL_thiswhite);
5728         }
5729         force_next('}');
5730 #ifdef PERL_MAD
5731         if (!PL_thistoken)
5732             PL_thistoken = newSVpvs("");
5733 #endif
5734         TOKEN(';');
5735     case '&':
5736         s++;
5737         if (*s++ == '&') {
5738             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5739                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5740                 s -= 2;
5741                 TOKEN(0);
5742             }
5743             AOPERATOR(ANDAND);
5744         }
5745         s--;
5746         if (PL_expect == XOPERATOR) {
5747             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5748                 && isIDFIRST_lazy_if(s,UTF))
5749             {
5750                 CopLINE_dec(PL_curcop);
5751                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5752                 CopLINE_inc(PL_curcop);
5753             }
5754             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5755                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5756                 s--;
5757                 TOKEN(0);
5758             }
5759             BAop(OP_BIT_AND);
5760         }
5761
5762         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5763         if (*PL_tokenbuf) {
5764             PL_expect = XOPERATOR;
5765             force_ident(PL_tokenbuf, '&');
5766         }
5767         else
5768             PREREF('&');
5769         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5770         TERM('&');
5771
5772     case '|':
5773         s++;
5774         if (*s++ == '|') {
5775             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5776                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5777                 s -= 2;
5778                 TOKEN(0);
5779             }
5780             AOPERATOR(OROR);
5781         }
5782         s--;
5783         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5784                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5785             s--;
5786             TOKEN(0);
5787         }
5788         BOop(OP_BIT_OR);
5789     case '=':
5790         s++;
5791         {
5792             const char tmp = *s++;
5793             if (tmp == '=') {
5794                 if (!PL_lex_allbrackets &&
5795                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5796                     s -= 2;
5797                     TOKEN(0);
5798                 }
5799                 Eop(OP_EQ);
5800             }
5801             if (tmp == '>') {
5802                 if (!PL_lex_allbrackets &&
5803                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5804                     s -= 2;
5805                     TOKEN(0);
5806                 }
5807                 OPERATOR(',');
5808             }
5809             if (tmp == '~')
5810                 PMop(OP_MATCH);
5811             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5812                 && strchr("+-*/%.^&|<",tmp))
5813                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5814                             "Reversed %c= operator",(int)tmp);
5815             s--;
5816             if (PL_expect == XSTATE && isALPHA(tmp) &&
5817                 (s == PL_linestart+1 || s[-2] == '\n') )
5818                 {
5819                     if (PL_in_eval && !PL_rsfp) {
5820                         d = PL_bufend;
5821                         while (s < d) {
5822                             if (*s++ == '\n') {
5823                                 incline(s);
5824                                 if (strnEQ(s,"=cut",4)) {
5825                                     s = strchr(s,'\n');
5826                                     if (s)
5827                                         s++;
5828                                     else
5829                                         s = d;
5830                                     incline(s);
5831                                     goto retry;
5832                                 }
5833                             }
5834                         }
5835                         goto retry;
5836                     }
5837 #ifdef PERL_MAD
5838                     if (PL_madskills) {
5839                         if (!PL_thiswhite)
5840                             PL_thiswhite = newSVpvs("");
5841                         sv_catpvn(PL_thiswhite, PL_linestart,
5842                                   PL_bufend - PL_linestart);
5843                     }
5844 #endif
5845                     s = PL_bufend;
5846                     PL_parser->in_pod = 1;
5847                     goto retry;
5848                 }
5849         }
5850         if (PL_lex_brackets < PL_lex_formbrack) {
5851             const char *t = s;
5852 #ifdef PERL_STRICT_CR
5853             while (SPACE_OR_TAB(*t))
5854 #else
5855             while (SPACE_OR_TAB(*t) || *t == '\r')
5856 #endif
5857                 t++;
5858             if (*t == '\n' || *t == '#') {
5859                 s--;
5860                 PL_expect = XBLOCK;
5861                 goto leftbracket;
5862             }
5863         }
5864         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5865             s--;
5866             TOKEN(0);
5867         }
5868         pl_yylval.ival = 0;
5869         OPERATOR(ASSIGNOP);
5870     case '!':
5871         s++;
5872         {
5873             const char tmp = *s++;
5874             if (tmp == '=') {
5875                 /* was this !=~ where !~ was meant?
5876                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5877
5878                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5879                     const char *t = s+1;
5880
5881                     while (t < PL_bufend && isSPACE(*t))
5882                         ++t;
5883
5884                     if (*t == '/' || *t == '?' ||
5885                         ((*t == 'm' || *t == 's' || *t == 'y')
5886                          && !isALNUM(t[1])) ||
5887                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5888                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5889                                     "!=~ should be !~");
5890                 }
5891                 if (!PL_lex_allbrackets &&
5892                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5893                     s -= 2;
5894                     TOKEN(0);
5895                 }
5896                 Eop(OP_NE);
5897             }
5898             if (tmp == '~')
5899                 PMop(OP_NOT);
5900         }
5901         s--;
5902         OPERATOR('!');
5903     case '<':
5904         if (PL_expect != XOPERATOR) {
5905             if (s[1] != '<' && !strchr(s,'>'))
5906                 check_uni();
5907             if (s[1] == '<')
5908                 s = scan_heredoc(s);
5909             else
5910                 s = scan_inputsymbol(s);
5911             TERM(sublex_start());
5912         }
5913         s++;
5914         {
5915             char tmp = *s++;
5916             if (tmp == '<') {
5917                 if (*s == '=' && !PL_lex_allbrackets &&
5918                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5919                     s -= 2;
5920                     TOKEN(0);
5921                 }
5922                 SHop(OP_LEFT_SHIFT);
5923             }
5924             if (tmp == '=') {
5925                 tmp = *s++;
5926                 if (tmp == '>') {
5927                     if (!PL_lex_allbrackets &&
5928                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5929                         s -= 3;
5930                         TOKEN(0);
5931                     }
5932                     Eop(OP_NCMP);
5933                 }
5934                 s--;
5935                 if (!PL_lex_allbrackets &&
5936                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5937                     s -= 2;
5938                     TOKEN(0);
5939                 }
5940                 Rop(OP_LE);
5941             }
5942         }
5943         s--;
5944         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5945             s--;
5946             TOKEN(0);
5947         }
5948         Rop(OP_LT);
5949     case '>':
5950         s++;
5951         {
5952             const char tmp = *s++;
5953             if (tmp == '>') {
5954                 if (*s == '=' && !PL_lex_allbrackets &&
5955                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5956                     s -= 2;
5957                     TOKEN(0);
5958                 }
5959                 SHop(OP_RIGHT_SHIFT);
5960             }
5961             else if (tmp == '=') {
5962                 if (!PL_lex_allbrackets &&
5963                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5964                     s -= 2;
5965                     TOKEN(0);
5966                 }
5967                 Rop(OP_GE);
5968             }
5969         }
5970         s--;
5971         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5972             s--;
5973             TOKEN(0);
5974         }
5975         Rop(OP_GT);
5976
5977     case '$':
5978         CLINE;
5979
5980         if (PL_expect == XOPERATOR) {
5981             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5982                 return deprecate_commaless_var_list();
5983             }
5984         }
5985
5986         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5987             PL_tokenbuf[0] = '@';
5988             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5989                            sizeof PL_tokenbuf - 1, FALSE);
5990             if (PL_expect == XOPERATOR)
5991                 no_op("Array length", s);
5992             if (!PL_tokenbuf[1])
5993                 PREREF(DOLSHARP);
5994             PL_expect = XOPERATOR;
5995             PL_pending_ident = '#';
5996             TOKEN(DOLSHARP);
5997         }
5998
5999         PL_tokenbuf[0] = '$';
6000         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6001                        sizeof PL_tokenbuf - 1, FALSE);
6002         if (PL_expect == XOPERATOR)
6003             no_op("Scalar", s);
6004         if (!PL_tokenbuf[1]) {
6005             if (s == PL_bufend)
6006                 yyerror("Final $ should be \\$ or $name");
6007             PREREF('$');
6008         }
6009
6010         /* This kludge not intended to be bulletproof. */
6011         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6012             pl_yylval.opval = newSVOP(OP_CONST, 0,
6013                                    newSViv(CopARYBASE_get(&PL_compiling)));
6014             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
6015             TERM(THING);
6016         }
6017
6018         d = s;
6019         {
6020             const char tmp = *s;
6021             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6022                 s = SKIPSPACE1(s);
6023
6024             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6025                 && intuit_more(s)) {
6026                 if (*s == '[') {
6027                     PL_tokenbuf[0] = '@';
6028                     if (ckWARN(WARN_SYNTAX)) {
6029                         char *t = s+1;
6030
6031                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6032                             t++;
6033                         if (*t++ == ',') {
6034                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6035                             while (t < PL_bufend && *t != ']')
6036                                 t++;
6037                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6038                                         "Multidimensional syntax %.*s not supported",
6039                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6040                         }
6041                     }
6042                 }
6043                 else if (*s == '{') {
6044                     char *t;
6045                     PL_tokenbuf[0] = '%';
6046                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6047                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6048                         {
6049                             char tmpbuf[sizeof PL_tokenbuf];
6050                             do {
6051                                 t++;
6052                             } while (isSPACE(*t));
6053                             if (isIDFIRST_lazy_if(t,UTF)) {
6054                                 STRLEN len;
6055                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6056                                               &len);
6057                                 while (isSPACE(*t))
6058                                     t++;
6059                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6060                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6061                                                 "You need to quote \"%s\"",
6062                                                 tmpbuf);
6063                             }
6064                         }
6065                 }
6066             }
6067
6068             PL_expect = XOPERATOR;
6069             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6070                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6071                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6072                     PL_expect = XOPERATOR;
6073                 else if (strchr("$@\"'`q", *s))
6074                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6075                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6076                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6077                 else if (isIDFIRST_lazy_if(s,UTF)) {
6078                     char tmpbuf[sizeof PL_tokenbuf];
6079                     int t2;
6080                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6081                     if ((t2 = keyword(tmpbuf, len, 0))) {
6082                         /* binary operators exclude handle interpretations */
6083                         switch (t2) {
6084                         case -KEY_x:
6085                         case -KEY_eq:
6086                         case -KEY_ne:
6087                         case -KEY_gt:
6088                         case -KEY_lt:
6089                         case -KEY_ge:
6090                         case -KEY_le:
6091                         case -KEY_cmp:
6092                             break;
6093                         default:
6094                             PL_expect = XTERM;  /* e.g. print $fh length() */
6095                             break;
6096                         }
6097                     }
6098                     else {
6099                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6100                     }
6101                 }
6102                 else if (isDIGIT(*s))
6103                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6104                 else if (*s == '.' && isDIGIT(s[1]))
6105                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6106                 else if ((*s == '?' || *s == '-' || *s == '+')
6107                          && !isSPACE(s[1]) && s[1] != '=')
6108                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6109                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6110                          && s[1] != '/')
6111                     PL_expect = XTERM;          /* e.g. print $fh /.../
6112                                                    XXX except DORDOR operator
6113                                                 */
6114                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6115                          && s[2] != '=')
6116                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6117             }
6118         }
6119         PL_pending_ident = '$';
6120         TOKEN('$');
6121
6122     case '@':
6123         if (PL_expect == XOPERATOR)
6124             no_op("Array", s);
6125         PL_tokenbuf[0] = '@';
6126         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6127         if (!PL_tokenbuf[1]) {
6128             PREREF('@');
6129         }
6130         if (PL_lex_state == LEX_NORMAL)
6131             s = SKIPSPACE1(s);
6132         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6133             if (*s == '{')
6134                 PL_tokenbuf[0] = '%';
6135
6136             /* Warn about @ where they meant $. */
6137             if (*s == '[' || *s == '{') {
6138                 if (ckWARN(WARN_SYNTAX)) {
6139                     const char *t = s + 1;
6140                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6141                         t++;
6142                     if (*t == '}' || *t == ']') {
6143                         t++;
6144                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6145                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6146                             "Scalar value %.*s better written as $%.*s",
6147                             (int)(t-PL_bufptr), PL_bufptr,
6148                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6149                     }
6150                 }
6151             }
6152         }
6153         PL_pending_ident = '@';
6154         TERM('@');
6155
6156      case '/':                  /* may be division, defined-or, or pattern */
6157         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6158             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6159                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6160                 TOKEN(0);
6161             s += 2;
6162             AOPERATOR(DORDOR);
6163         }
6164      case '?':                  /* may either be conditional or pattern */
6165         if (PL_expect == XOPERATOR) {
6166              char tmp = *s++;
6167              if(tmp == '?') {
6168                 if (!PL_lex_allbrackets &&
6169                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6170                     s--;
6171                     TOKEN(0);
6172                 }
6173                 PL_lex_allbrackets++;
6174                 OPERATOR('?');
6175              }
6176              else {
6177                  tmp = *s++;
6178                  if(tmp == '/') {
6179                      /* A // operator. */
6180                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6181                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6182                                             LEX_FAKEEOF_LOGIC)) {
6183                         s -= 2;
6184                         TOKEN(0);
6185                     }
6186                     AOPERATOR(DORDOR);
6187                  }
6188                  else {
6189                      s--;
6190                      if (*s == '=' && !PL_lex_allbrackets &&
6191                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6192                          s--;
6193                          TOKEN(0);
6194                      }
6195                      Mop(OP_DIVIDE);
6196                  }
6197              }
6198          }
6199          else {
6200              /* Disable warning on "study /blah/" */
6201              if (PL_oldoldbufptr == PL_last_uni
6202               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6203                   || memNE(PL_last_uni, "study", 5)
6204                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6205               ))
6206                  check_uni();
6207              if (*s == '?')
6208                  deprecate("?PATTERN? without explicit operator");
6209              s = scan_pat(s,OP_MATCH);
6210              TERM(sublex_start());
6211          }
6212
6213     case '.':
6214         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6215 #ifdef PERL_STRICT_CR
6216             && s[1] == '\n'
6217 #else
6218             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6219 #endif
6220             && (s == PL_linestart || s[-1] == '\n') )
6221         {
6222             PL_lex_formbrack = 0;
6223             PL_expect = XSTATE;
6224             goto rightbracket;
6225         }
6226         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6227             s += 3;
6228             OPERATOR(YADAYADA);
6229         }
6230         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6231             char tmp = *s++;
6232             if (*s == tmp) {
6233                 if (!PL_lex_allbrackets &&
6234                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6235                     s--;
6236                     TOKEN(0);
6237                 }
6238                 s++;
6239                 if (*s == tmp) {
6240                     s++;
6241                     pl_yylval.ival = OPf_SPECIAL;
6242                 }
6243                 else
6244                     pl_yylval.ival = 0;
6245                 OPERATOR(DOTDOT);
6246             }
6247             if (*s == '=' && !PL_lex_allbrackets &&
6248                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6249                 s--;
6250                 TOKEN(0);
6251             }
6252             Aop(OP_CONCAT);
6253         }
6254         /* FALL THROUGH */
6255     case '0': case '1': case '2': case '3': case '4':
6256     case '5': case '6': case '7': case '8': case '9':
6257         s = scan_num(s, &pl_yylval);
6258         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6259         if (PL_expect == XOPERATOR)
6260             no_op("Number",s);
6261         TERM(THING);
6262
6263     case '\'':
6264         s = scan_str(s,!!PL_madskills,FALSE);
6265         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6266         if (PL_expect == XOPERATOR) {
6267             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6268                 return deprecate_commaless_var_list();
6269             }
6270             else
6271                 no_op("String",s);
6272         }
6273         if (!s)
6274             missingterm(NULL);
6275         pl_yylval.ival = OP_CONST;
6276         TERM(sublex_start());
6277
6278     case '"':
6279         s = scan_str(s,!!PL_madskills,FALSE);
6280         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6281         if (PL_expect == XOPERATOR) {
6282             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6283                 return deprecate_commaless_var_list();
6284             }
6285             else
6286                 no_op("String",s);
6287         }
6288         if (!s)
6289             missingterm(NULL);
6290         pl_yylval.ival = OP_CONST;
6291         /* FIXME. I think that this can be const if char *d is replaced by
6292            more localised variables.  */
6293         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6294             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6295                 pl_yylval.ival = OP_STRINGIFY;
6296                 break;
6297             }
6298         }
6299         TERM(sublex_start());
6300
6301     case '`':
6302         s = scan_str(s,!!PL_madskills,FALSE);
6303         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6304         if (PL_expect == XOPERATOR)
6305             no_op("Backticks",s);
6306         if (!s)
6307             missingterm(NULL);
6308         readpipe_override();
6309         TERM(sublex_start());
6310
6311     case '\\':
6312         s++;
6313         if (PL_lex_inwhat && isDIGIT(*s))
6314             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6315                            *s, *s);
6316         if (PL_expect == XOPERATOR)
6317             no_op("Backslash",s);
6318         OPERATOR(REFGEN);
6319
6320     case 'v':
6321         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6322             char *start = s + 2;
6323             while (isDIGIT(*start) || *start == '_')
6324                 start++;
6325             if (*start == '.' && isDIGIT(start[1])) {
6326                 s = scan_num(s, &pl_yylval);
6327                 TERM(THING);
6328             }
6329             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6330             else if (!isALPHA(*start) && (PL_expect == XTERM
6331                         || PL_expect == XREF || PL_expect == XSTATE
6332                         || PL_expect == XTERMORDORDOR)) {
6333                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6334                 if (!gv) {
6335                     s = scan_num(s, &pl_yylval);
6336                     TERM(THING);
6337                 }
6338             }
6339         }
6340         goto keylookup;
6341     case 'x':
6342         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6343             s++;
6344             Mop(OP_REPEAT);
6345         }
6346         goto keylookup;
6347
6348     case '_':
6349     case 'a': case 'A':
6350     case 'b': case 'B':
6351     case 'c': case 'C':
6352     case 'd': case 'D':
6353     case 'e': case 'E':
6354     case 'f': case 'F':
6355     case 'g': case 'G':
6356     case 'h': case 'H':
6357     case 'i': case 'I':
6358     case 'j': case 'J':
6359     case 'k': case 'K':
6360     case 'l': case 'L':
6361     case 'm': case 'M':
6362     case 'n': case 'N':
6363     case 'o': case 'O':
6364     case 'p': case 'P':
6365     case 'q': case 'Q':
6366     case 'r': case 'R':
6367     case 's': case 'S':
6368     case 't': case 'T':
6369     case 'u': case 'U':
6370               case 'V':
6371     case 'w': case 'W':
6372               case 'X':
6373     case 'y': case 'Y':
6374     case 'z': case 'Z':
6375
6376       keylookup: {
6377         bool anydelim;
6378         I32 tmp;
6379
6380         orig_keyword = 0;
6381         gv = NULL;
6382         gvp = NULL;
6383
6384         PL_bufptr = s;
6385         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6386
6387         /* Some keywords can be followed by any delimiter, including ':' */
6388         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6389
6390         /* x::* is just a word, unless x is "CORE" */
6391         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6392             goto just_a_word;
6393
6394         d = s;
6395         while (d < PL_bufend && isSPACE(*d))
6396                 d++;    /* no comments skipped here, or s### is misparsed */
6397
6398         /* Is this a word before a => operator? */
6399         if (*d == '=' && d[1] == '>') {
6400             CLINE;
6401             pl_yylval.opval
6402                 = (OP*)newSVOP(OP_CONST, 0,
6403                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6404             pl_yylval.opval->op_private = OPpCONST_BARE;
6405             TERM(WORD);
6406         }
6407
6408         /* Check for plugged-in keyword */
6409         {
6410             OP *o;
6411             int result;
6412             char *saved_bufptr = PL_bufptr;
6413             PL_bufptr = s;
6414             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6415             s = PL_bufptr;
6416             if (result == KEYWORD_PLUGIN_DECLINE) {
6417                 /* not a plugged-in keyword */
6418                 PL_bufptr = saved_bufptr;
6419             } else if (result == KEYWORD_PLUGIN_STMT) {
6420                 pl_yylval.opval = o;
6421                 CLINE;
6422                 PL_expect = XSTATE;
6423                 return REPORT(PLUGSTMT);
6424             } else if (result == KEYWORD_PLUGIN_EXPR) {
6425                 pl_yylval.opval = o;
6426                 CLINE;
6427                 PL_expect = XOPERATOR;
6428                 return REPORT(PLUGEXPR);
6429             } else {
6430                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6431                                         PL_tokenbuf);
6432             }
6433         }
6434
6435         /* Check for built-in keyword */
6436         tmp = keyword(PL_tokenbuf, len, 0);
6437
6438         /* Is this a label? */
6439         if (!anydelim && PL_expect == XSTATE
6440               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6441             s = d + 1;
6442             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6443             CLINE;
6444             TOKEN(LABEL);
6445         }
6446
6447         if (tmp < 0) {                  /* second-class keyword? */
6448             GV *ogv = NULL;     /* override (winner) */
6449             GV *hgv = NULL;     /* hidden (loser) */
6450             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6451                 CV *cv;
6452                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6453                     (cv = GvCVu(gv)))
6454                 {
6455                     if (GvIMPORTED_CV(gv))
6456                         ogv = gv;
6457                     else if (! CvMETHOD(cv))
6458                         hgv = gv;
6459                 }
6460                 if (!ogv &&
6461                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6462                     (gv = *gvp) && isGV_with_GP(gv) &&
6463                     GvCVu(gv) && GvIMPORTED_CV(gv))
6464                 {
6465                     ogv = gv;
6466                 }
6467             }
6468             if (ogv) {
6469                 orig_keyword = tmp;
6470                 tmp = 0;                /* overridden by import or by GLOBAL */
6471             }
6472             else if (gv && !gvp
6473                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6474                      && GvCVu(gv))
6475             {
6476                 tmp = 0;                /* any sub overrides "weak" keyword */
6477             }
6478             else {                      /* no override */
6479                 tmp = -tmp;
6480                 if (tmp == KEY_dump) {
6481                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6482                                    "dump() better written as CORE::dump()");
6483                 }
6484                 gv = NULL;
6485                 gvp = 0;
6486                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6487                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6488                                    "Ambiguous call resolved as CORE::%s(), "
6489                                    "qualify as such or use &",
6490                                    GvENAME(hgv));
6491             }
6492         }
6493
6494       reserved_word:
6495         switch (tmp) {
6496
6497         default:                        /* not a keyword */
6498             /* Trade off - by using this evil construction we can pull the
6499                variable gv into the block labelled keylookup. If not, then
6500                we have to give it function scope so that the goto from the
6501                earlier ':' case doesn't bypass the initialisation.  */
6502             if (0) {
6503             just_a_word_zero_gv:
6504                 gv = NULL;
6505                 gvp = NULL;
6506                 orig_keyword = 0;
6507             }
6508           just_a_word: {
6509                 SV *sv;
6510                 int pkgname = 0;
6511                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6512                 OP *rv2cv_op;
6513                 CV *cv;
6514 #ifdef PERL_MAD
6515                 SV *nextPL_nextwhite = 0;
6516 #endif
6517
6518
6519                 /* Get the rest if it looks like a package qualifier */
6520
6521                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6522                     STRLEN morelen;
6523                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6524                                   TRUE, &morelen);
6525                     if (!morelen)
6526                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6527                                 *s == '\'' ? "'" : "::");
6528                     len += morelen;
6529                     pkgname = 1;
6530                 }
6531
6532                 if (PL_expect == XOPERATOR) {
6533                     if (PL_bufptr == PL_linestart) {
6534                         CopLINE_dec(PL_curcop);
6535                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6536                         CopLINE_inc(PL_curcop);
6537                     }
6538                     else
6539                         no_op("Bareword",s);
6540                 }
6541
6542                 /* Look for a subroutine with this name in current package,
6543                    unless name is "Foo::", in which case Foo is a bareword
6544                    (and a package name). */
6545
6546                 if (len > 2 && !PL_madskills &&
6547                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6548                 {
6549                     if (ckWARN(WARN_BAREWORD)
6550                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6551                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6552                             "Bareword \"%s\" refers to nonexistent package",
6553                              PL_tokenbuf);
6554                     len -= 2;
6555                     PL_tokenbuf[len] = '\0';
6556                     gv = NULL;
6557                     gvp = 0;
6558                 }
6559                 else {
6560                     if (!gv) {
6561                         /* Mustn't actually add anything to a symbol table.
6562                            But also don't want to "initialise" any placeholder
6563                            constants that might already be there into full
6564                            blown PVGVs with attached PVCV.  */
6565                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6566                                                GV_NOADD_NOINIT, SVt_PVCV);
6567                     }
6568                     len = 0;
6569                 }
6570
6571                 /* if we saw a global override before, get the right name */
6572
6573                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6574                     len ? len : strlen(PL_tokenbuf));
6575                 if (gvp) {
6576                     SV * const tmp_sv = sv;
6577                     sv = newSVpvs("CORE::GLOBAL::");
6578                     sv_catsv(sv, tmp_sv);
6579                     SvREFCNT_dec(tmp_sv);
6580                 }
6581
6582 #ifdef PERL_MAD
6583                 if (PL_madskills && !PL_thistoken) {
6584                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6585                     PL_thistoken = newSVpvn(start,s - start);
6586                     PL_realtokenstart = s - SvPVX(PL_linestr);
6587                 }
6588 #endif
6589
6590                 /* Presume this is going to be a bareword of some sort. */
6591                 CLINE;
6592                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6593                 pl_yylval.opval->op_private = OPpCONST_BARE;
6594
6595                 /* And if "Foo::", then that's what it certainly is. */
6596                 if (len)
6597                     goto safe_bareword;
6598
6599                 {
6600                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6601                     const_op->op_private = OPpCONST_BARE;
6602                     rv2cv_op = newCVREF(0, const_op);
6603                 }
6604                 cv = rv2cv_op_cv(rv2cv_op, 0);
6605
6606                 /* See if it's the indirect object for a list operator. */
6607
6608                 if (PL_oldoldbufptr &&
6609                     PL_oldoldbufptr < PL_bufptr &&
6610                     (PL_oldoldbufptr == PL_last_lop
6611                      || PL_oldoldbufptr == PL_last_uni) &&
6612                     /* NO SKIPSPACE BEFORE HERE! */
6613                     (PL_expect == XREF ||
6614                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6615                 {
6616                     bool immediate_paren = *s == '(';
6617
6618                     /* (Now we can afford to cross potential line boundary.) */
6619                     s = SKIPSPACE2(s,nextPL_nextwhite);
6620 #ifdef PERL_MAD
6621                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6622 #endif
6623
6624                     /* Two barewords in a row may indicate method call. */
6625
6626                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6627                         (tmp = intuit_method(s, gv, cv))) {
6628                         op_free(rv2cv_op);
6629                         if (tmp == METHOD && !PL_lex_allbrackets &&
6630                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6631                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6632                         return REPORT(tmp);
6633                     }
6634
6635                     /* If not a declared subroutine, it's an indirect object. */
6636                     /* (But it's an indir obj regardless for sort.) */
6637                     /* Also, if "_" follows a filetest operator, it's a bareword */
6638
6639                     if (
6640                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6641                          (!cv &&
6642                         (PL_last_lop_op != OP_MAPSTART &&
6643                          PL_last_lop_op != OP_GREPSTART))))
6644                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6645                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6646                        )
6647                     {
6648                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6649                         goto bareword;
6650                     }
6651                 }
6652
6653                 PL_expect = XOPERATOR;
6654 #ifdef PERL_MAD
6655                 if (isSPACE(*s))
6656                     s = SKIPSPACE2(s,nextPL_nextwhite);
6657                 PL_nextwhite = nextPL_nextwhite;
6658 #else
6659                 s = skipspace(s);
6660 #endif
6661
6662                 /* Is this a word before a => operator? */
6663                 if (*s == '=' && s[1] == '>' && !pkgname) {
6664                     op_free(rv2cv_op);
6665                     CLINE;
6666                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6667                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6668                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6669                     TERM(WORD);
6670                 }
6671
6672                 /* If followed by a paren, it's certainly a subroutine. */
6673                 if (*s == '(') {
6674                     CLINE;
6675                     if (cv) {
6676                         d = s + 1;
6677                         while (SPACE_OR_TAB(*d))
6678                             d++;
6679                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6680                             s = d + 1;
6681                             goto its_constant;
6682                         }
6683                     }
6684 #ifdef PERL_MAD
6685                     if (PL_madskills) {
6686                         PL_nextwhite = PL_thiswhite;
6687                         PL_thiswhite = 0;
6688                     }
6689                     start_force(PL_curforce);
6690 #endif
6691                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6692                     PL_expect = XOPERATOR;
6693 #ifdef PERL_MAD
6694                     if (PL_madskills) {
6695                         PL_nextwhite = nextPL_nextwhite;
6696                         curmad('X', PL_thistoken);
6697                         PL_thistoken = newSVpvs("");
6698                     }
6699 #endif
6700                     op_free(rv2cv_op);
6701                     force_next(WORD);
6702                     pl_yylval.ival = 0;
6703                     TOKEN('&');
6704                 }
6705
6706                 /* If followed by var or block, call it a method (unless sub) */
6707
6708                 if ((*s == '$' || *s == '{') && !cv) {
6709                     op_free(rv2cv_op);
6710                     PL_last_lop = PL_oldbufptr;
6711                     PL_last_lop_op = OP_METHOD;
6712                     if (!PL_lex_allbrackets &&
6713                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6714                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6715                     PREBLOCK(METHOD);
6716                 }
6717
6718                 /* If followed by a bareword, see if it looks like indir obj. */
6719
6720                 if (!orig_keyword
6721                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6722                         && (tmp = intuit_method(s, gv, cv))) {
6723                     op_free(rv2cv_op);
6724                     if (tmp == METHOD && !PL_lex_allbrackets &&
6725                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6726                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6727                     return REPORT(tmp);
6728                 }
6729
6730                 /* Not a method, so call it a subroutine (if defined) */
6731
6732                 if (cv) {
6733                     if (lastchar == '-')
6734                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6735                                          "Ambiguous use of -%s resolved as -&%s()",
6736                                          PL_tokenbuf, PL_tokenbuf);
6737                     /* Check for a constant sub */
6738                     if ((sv = cv_const_sv(cv))) {
6739                   its_constant:
6740                         op_free(rv2cv_op);
6741                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6742                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6743                         pl_yylval.opval->op_private = 0;
6744                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6745                         TOKEN(WORD);
6746                     }
6747
6748                     op_free(pl_yylval.opval);
6749                     pl_yylval.opval = rv2cv_op;
6750                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6751                     PL_last_lop = PL_oldbufptr;
6752                     PL_last_lop_op = OP_ENTERSUB;
6753                     /* Is there a prototype? */
6754                     if (
6755 #ifdef PERL_MAD
6756                         cv &&
6757 #endif
6758                         SvPOK(cv))
6759                     {
6760                         STRLEN protolen;
6761                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6762                         if (!protolen)
6763                             TERM(FUNC0SUB);
6764                         while (*proto == ';')
6765                             proto++;
6766                         if (
6767                             (
6768                                 (
6769                                     *proto == '$' || *proto == '_'
6770                                  || *proto == '*' || *proto == '+'
6771                                 )
6772                              && proto[1] == '\0'
6773                             )
6774                          || (
6775                              *proto == '\\' && proto[1] && proto[2] == '\0'
6776                             )
6777                         )
6778                             OPERATOR(UNIOPSUB);
6779                         if (*proto == '\\' && proto[1] == '[') {
6780                             const char *p = proto + 2;
6781                             while(*p && *p != ']')
6782                                 ++p;
6783                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6784                         }
6785                         if (*proto == '&' && *s == '{') {
6786                             if (PL_curstash)
6787                                 sv_setpvs(PL_subname, "__ANON__");
6788                             else
6789                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6790                             if (!PL_lex_allbrackets &&
6791                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6792                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6793                             PREBLOCK(LSTOPSUB);
6794                         }
6795                     }
6796 #ifdef PERL_MAD
6797                     {
6798                         if (PL_madskills) {
6799                             PL_nextwhite = PL_thiswhite;
6800                             PL_thiswhite = 0;
6801                         }
6802                         start_force(PL_curforce);
6803                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6804                         PL_expect = XTERM;
6805                         if (PL_madskills) {
6806                             PL_nextwhite = nextPL_nextwhite;
6807                             curmad('X', PL_thistoken);
6808                             PL_thistoken = newSVpvs("");
6809                         }
6810                         force_next(WORD);
6811                         if (!PL_lex_allbrackets &&
6812                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6813                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6814                         TOKEN(NOAMP);
6815                     }
6816                 }
6817
6818                 /* Guess harder when madskills require "best effort". */
6819                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6820                     int probable_sub = 0;
6821                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6822                         probable_sub = 1;
6823                     else if (isALPHA(*s)) {
6824                         char tmpbuf[1024];
6825                         STRLEN tmplen;
6826                         d = s;
6827                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6828                         if (!keyword(tmpbuf, tmplen, 0))
6829                             probable_sub = 1;
6830                         else {
6831                             while (d < PL_bufend && isSPACE(*d))
6832                                 d++;
6833                             if (*d == '=' && d[1] == '>')
6834                                 probable_sub = 1;
6835                         }
6836                     }
6837                     if (probable_sub) {
6838                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6839                         op_free(pl_yylval.opval);
6840                         pl_yylval.opval = rv2cv_op;
6841                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6842                         PL_last_lop = PL_oldbufptr;
6843                         PL_last_lop_op = OP_ENTERSUB;
6844                         PL_nextwhite = PL_thiswhite;
6845                         PL_thiswhite = 0;
6846                         start_force(PL_curforce);
6847                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6848                         PL_expect = XTERM;
6849                         PL_nextwhite = nextPL_nextwhite;
6850                         curmad('X', PL_thistoken);
6851                         PL_thistoken = newSVpvs("");
6852                         force_next(WORD);
6853                         if (!PL_lex_allbrackets &&
6854                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6855                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6856                         TOKEN(NOAMP);
6857                     }
6858 #else
6859                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6860                     PL_expect = XTERM;
6861                     force_next(WORD);
6862                     if (!PL_lex_allbrackets &&
6863                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6864                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6865                     TOKEN(NOAMP);
6866 #endif
6867                 }
6868
6869                 /* Call it a bare word */
6870
6871                 if (PL_hints & HINT_STRICT_SUBS)
6872                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6873                 else {
6874                 bareword:
6875                     /* after "print" and similar functions (corresponding to
6876                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6877                      * a filehandle should be subject to "strict subs".
6878                      * Likewise for the optional indirect-object argument to system
6879                      * or exec, which can't be a bareword */
6880                     if ((PL_last_lop_op == OP_PRINT
6881                             || PL_last_lop_op == OP_PRTF
6882                             || PL_last_lop_op == OP_SAY
6883                             || PL_last_lop_op == OP_SYSTEM
6884                             || PL_last_lop_op == OP_EXEC)
6885                             && (PL_hints & HINT_STRICT_SUBS))
6886                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6887                     if (lastchar != '-') {
6888                         if (ckWARN(WARN_RESERVED)) {
6889                             d = PL_tokenbuf;
6890                             while (isLOWER(*d))
6891                                 d++;
6892                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6893                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6894                                        PL_tokenbuf);
6895                         }
6896                     }
6897                 }
6898                 op_free(rv2cv_op);
6899
6900             safe_bareword:
6901                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6902                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6903                                      "Operator or semicolon missing before %c%s",
6904                                      lastchar, PL_tokenbuf);
6905                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6906                                      "Ambiguous use of %c resolved as operator %c",
6907                                      lastchar, lastchar);
6908                 }
6909                 TOKEN(WORD);
6910             }
6911
6912         case KEY___FILE__:
6913             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6914                                         newSVpv(CopFILE(PL_curcop),0));
6915             TERM(THING);
6916
6917         case KEY___LINE__:
6918             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6919                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6920             TERM(THING);
6921
6922         case KEY___PACKAGE__:
6923             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6924                                         (PL_curstash
6925                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6926                                          : &PL_sv_undef));
6927             TERM(THING);
6928
6929         case KEY___DATA__:
6930         case KEY___END__: {
6931             GV *gv;
6932             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6933                 const char *pname = "main";
6934                 if (PL_tokenbuf[2] == 'D')
6935                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6936                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6937                                 SVt_PVIO);
6938                 GvMULTI_on(gv);
6939                 if (!GvIO(gv))
6940                     GvIOp(gv) = newIO();
6941                 IoIFP(GvIOp(gv)) = PL_rsfp;
6942 #if defined(HAS_FCNTL) && defined(F_SETFD)
6943                 {
6944                     const int fd = PerlIO_fileno(PL_rsfp);
6945                     fcntl(fd,F_SETFD,fd >= 3);
6946                 }
6947 #endif
6948                 /* Mark this internal pseudo-handle as clean */
6949                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6950                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6951                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6952                 else
6953                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6954 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6955                 /* if the script was opened in binmode, we need to revert
6956                  * it to text mode for compatibility; but only iff it has CRs
6957                  * XXX this is a questionable hack at best. */
6958                 if (PL_bufend-PL_bufptr > 2
6959                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6960                 {
6961                     Off_t loc = 0;
6962                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6963                         loc = PerlIO_tell(PL_rsfp);
6964                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6965                     }
6966 #ifdef NETWARE
6967                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6968 #else
6969                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6970 #endif  /* NETWARE */
6971 #ifdef PERLIO_IS_STDIO /* really? */
6972 #  if defined(__BORLANDC__)
6973                         /* XXX see note in do_binmode() */
6974                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6975 #  endif
6976 #endif
6977                         if (loc > 0)
6978                             PerlIO_seek(PL_rsfp, loc, 0);
6979                     }
6980                 }
6981 #endif
6982 #ifdef PERLIO_LAYERS
6983                 if (!IN_BYTES) {
6984                     if (UTF)
6985                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6986                     else if (PL_encoding) {
6987                         SV *name;
6988                         dSP;
6989                         ENTER;
6990                         SAVETMPS;
6991                         PUSHMARK(sp);
6992                         EXTEND(SP, 1);
6993                         XPUSHs(PL_encoding);
6994                         PUTBACK;
6995                         call_method("name", G_SCALAR);
6996                         SPAGAIN;
6997                         name = POPs;
6998                         PUTBACK;
6999                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7000                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7001                                                       SVfARG(name)));
7002                         FREETMPS;
7003                         LEAVE;
7004                     }
7005                 }
7006 #endif
7007 #ifdef PERL_MAD
7008                 if (PL_madskills) {
7009                     if (PL_realtokenstart >= 0) {
7010                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7011                         if (!PL_endwhite)
7012                             PL_endwhite = newSVpvs("");
7013                         sv_catsv(PL_endwhite, PL_thiswhite);
7014                         PL_thiswhite = 0;
7015                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7016                         PL_realtokenstart = -1;
7017                     }
7018                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7019                            != NULL) ;
7020                 }
7021 #endif
7022                 PL_rsfp = NULL;
7023             }
7024             goto fake_eof;
7025         }
7026
7027         case KEY_AUTOLOAD:
7028         case KEY_DESTROY:
7029         case KEY_BEGIN:
7030         case KEY_UNITCHECK:
7031         case KEY_CHECK:
7032         case KEY_INIT:
7033         case KEY_END:
7034             if (PL_expect == XSTATE) {
7035                 s = PL_bufptr;
7036                 goto really_sub;
7037             }
7038             goto just_a_word;
7039
7040         case KEY_CORE:
7041             if (*s == ':' && s[1] == ':') {
7042                 s += 2;
7043                 d = s;
7044                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7045                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
7046                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7047                 if (tmp < 0)
7048                     tmp = -tmp;
7049                 else if (tmp == KEY_require || tmp == KEY_do)
7050                     /* that's a way to remember we saw "CORE::" */
7051                     orig_keyword = tmp;
7052                 goto reserved_word;
7053             }
7054             goto just_a_word;
7055
7056         case KEY_abs:
7057             UNI(OP_ABS);
7058
7059         case KEY_alarm:
7060             UNI(OP_ALARM);
7061
7062         case KEY_accept:
7063             LOP(OP_ACCEPT,XTERM);
7064
7065         case KEY_and:
7066             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7067                 return REPORT(0);
7068             OPERATOR(ANDOP);
7069
7070         case KEY_atan2:
7071             LOP(OP_ATAN2,XTERM);
7072
7073         case KEY_bind:
7074             LOP(OP_BIND,XTERM);
7075
7076         case KEY_binmode:
7077             LOP(OP_BINMODE,XTERM);
7078
7079         case KEY_bless:
7080             LOP(OP_BLESS,XTERM);
7081
7082         case KEY_break:
7083             FUN0(OP_BREAK);
7084
7085         case KEY_chop:
7086             UNI(OP_CHOP);
7087
7088         case KEY_continue:
7089             /* When 'use switch' is in effect, continue has a dual
7090                life as a control operator. */
7091             {
7092                 if (!FEATURE_IS_ENABLED("switch"))
7093                     PREBLOCK(CONTINUE);
7094                 else {
7095                     /* We have to disambiguate the two senses of
7096                       "continue". If the next token is a '{' then
7097                       treat it as the start of a continue block;
7098                       otherwise treat it as a control operator.
7099                      */
7100                     s = skipspace(s);
7101                     if (*s == '{')
7102             PREBLOCK(CONTINUE);
7103                     else
7104                         FUN0(OP_CONTINUE);
7105                 }
7106             }
7107
7108         case KEY_chdir:
7109             /* may use HOME */
7110             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7111             UNI(OP_CHDIR);
7112
7113         case KEY_close:
7114             UNI(OP_CLOSE);
7115
7116         case KEY_closedir:
7117             UNI(OP_CLOSEDIR);
7118
7119         case KEY_cmp:
7120             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7121                 return REPORT(0);
7122             Eop(OP_SCMP);
7123
7124         case KEY_caller:
7125             UNI(OP_CALLER);
7126
7127         case KEY_crypt:
7128 #ifdef FCRYPT
7129             if (!PL_cryptseen) {
7130                 PL_cryptseen = TRUE;
7131                 init_des();
7132             }
7133 #endif
7134             LOP(OP_CRYPT,XTERM);
7135
7136         case KEY_chmod:
7137             LOP(OP_CHMOD,XTERM);
7138
7139         case KEY_chown:
7140             LOP(OP_CHOWN,XTERM);
7141
7142         case KEY_connect:
7143             LOP(OP_CONNECT,XTERM);
7144
7145         case KEY_chr:
7146             UNI(OP_CHR);
7147
7148         case KEY_cos:
7149             UNI(OP_COS);
7150
7151         case KEY_chroot:
7152             UNI(OP_CHROOT);
7153
7154         case KEY_default:
7155             PREBLOCK(DEFAULT);
7156
7157         case KEY_do:
7158             s = SKIPSPACE1(s);
7159             if (*s == '{')
7160                 PRETERMBLOCK(DO);
7161             if (*s != '\'')
7162                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7163             if (orig_keyword == KEY_do) {
7164                 orig_keyword = 0;
7165                 pl_yylval.ival = 1;
7166             }
7167             else
7168                 pl_yylval.ival = 0;
7169             OPERATOR(DO);
7170
7171         case KEY_die:
7172             PL_hints |= HINT_BLOCK_SCOPE;
7173             LOP(OP_DIE,XTERM);
7174
7175         case KEY_defined:
7176             UNI(OP_DEFINED);
7177
7178         case KEY_delete:
7179             UNI(OP_DELETE);
7180
7181         case KEY_dbmopen:
7182             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7183                               STR_WITH_LEN("NDBM_File::"),
7184                               STR_WITH_LEN("DB_File::"),
7185                               STR_WITH_LEN("GDBM_File::"),
7186                               STR_WITH_LEN("SDBM_File::"),
7187                               STR_WITH_LEN("ODBM_File::"),
7188                               NULL);
7189             LOP(OP_DBMOPEN,XTERM);
7190
7191         case KEY_dbmclose:
7192             UNI(OP_DBMCLOSE);
7193
7194         case KEY_dump:
7195             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7196             LOOPX(OP_DUMP);
7197
7198         case KEY_else:
7199             PREBLOCK(ELSE);
7200
7201         case KEY_elsif:
7202             pl_yylval.ival = CopLINE(PL_curcop);
7203             OPERATOR(ELSIF);
7204
7205         case KEY_eq:
7206             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7207                 return REPORT(0);
7208             Eop(OP_SEQ);
7209
7210         case KEY_exists:
7211             UNI(OP_EXISTS);
7212
7213         case KEY_exit:
7214             if (PL_madskills)
7215                 UNI(OP_INT);
7216             UNI(OP_EXIT);
7217
7218         case KEY_eval:
7219             s = SKIPSPACE1(s);
7220             if (*s == '{') { /* block eval */
7221                 PL_expect = XTERMBLOCK;
7222                 UNIBRACK(OP_ENTERTRY);
7223             }
7224             else { /* string eval */
7225                 PL_expect = XTERM;
7226                 UNIBRACK(OP_ENTEREVAL);
7227             }
7228
7229         case KEY_eof:
7230             UNI(OP_EOF);
7231
7232         case KEY_exp:
7233             UNI(OP_EXP);
7234
7235         case KEY_each:
7236             UNI(OP_EACH);
7237
7238         case KEY_exec:
7239             LOP(OP_EXEC,XREF);
7240
7241         case KEY_endhostent:
7242             FUN0(OP_EHOSTENT);
7243
7244         case KEY_endnetent:
7245             FUN0(OP_ENETENT);
7246
7247         case KEY_endservent:
7248             FUN0(OP_ESERVENT);
7249
7250         case KEY_endprotoent:
7251             FUN0(OP_EPROTOENT);
7252
7253         case KEY_endpwent:
7254             FUN0(OP_EPWENT);
7255
7256         case KEY_endgrent:
7257             FUN0(OP_EGRENT);
7258
7259         case KEY_for:
7260         case KEY_foreach:
7261             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7262                 return REPORT(0);
7263             pl_yylval.ival = CopLINE(PL_curcop);
7264             s = SKIPSPACE1(s);
7265             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7266                 char *p = s;
7267 #ifdef PERL_MAD
7268                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7269 #endif
7270
7271                 if ((PL_bufend - p) >= 3 &&
7272                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7273                     p += 2;
7274                 else if ((PL_bufend - p) >= 4 &&
7275                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7276                     p += 3;
7277                 p = PEEKSPACE(p);
7278                 if (isIDFIRST_lazy_if(p,UTF)) {
7279                     p = scan_ident(p, PL_bufend,
7280                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7281                     p = PEEKSPACE(p);
7282                 }
7283                 if (*p != '$')
7284                     Perl_croak(aTHX_ "Missing $ on loop variable");
7285 #ifdef PERL_MAD
7286                 s = SvPVX(PL_linestr) + soff;
7287 #endif
7288             }
7289             OPERATOR(FOR);
7290
7291         case KEY_formline:
7292             LOP(OP_FORMLINE,XTERM);
7293
7294         case KEY_fork:
7295             FUN0(OP_FORK);
7296
7297         case KEY_fcntl:
7298             LOP(OP_FCNTL,XTERM);
7299
7300         case KEY_fileno:
7301             UNI(OP_FILENO);
7302
7303         case KEY_flock:
7304             LOP(OP_FLOCK,XTERM);
7305
7306         case KEY_gt:
7307             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7308                 return REPORT(0);
7309             Rop(OP_SGT);
7310
7311         case KEY_ge:
7312             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7313                 return REPORT(0);
7314             Rop(OP_SGE);
7315
7316         case KEY_grep:
7317             LOP(OP_GREPSTART, XREF);
7318
7319         case KEY_goto:
7320             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7321             LOOPX(OP_GOTO);
7322
7323         case KEY_gmtime:
7324             UNI(OP_GMTIME);
7325
7326         case KEY_getc:
7327             UNIDOR(OP_GETC);
7328
7329         case KEY_getppid:
7330             FUN0(OP_GETPPID);
7331
7332         case KEY_getpgrp:
7333             UNI(OP_GETPGRP);
7334
7335         case KEY_getpriority:
7336             LOP(OP_GETPRIORITY,XTERM);
7337
7338         case KEY_getprotobyname:
7339             UNI(OP_GPBYNAME);
7340
7341         case KEY_getprotobynumber:
7342             LOP(OP_GPBYNUMBER,XTERM);
7343
7344         case KEY_getprotoent:
7345             FUN0(OP_GPROTOENT);
7346
7347         case KEY_getpwent:
7348             FUN0(OP_GPWENT);
7349
7350         case KEY_getpwnam:
7351             UNI(OP_GPWNAM);
7352
7353         case KEY_getpwuid:
7354             UNI(OP_GPWUID);
7355
7356         case KEY_getpeername:
7357             UNI(OP_GETPEERNAME);
7358
7359         case KEY_gethostbyname:
7360             UNI(OP_GHBYNAME);
7361
7362         case KEY_gethostbyaddr:
7363             LOP(OP_GHBYADDR,XTERM);
7364
7365         case KEY_gethostent:
7366             FUN0(OP_GHOSTENT);
7367
7368         case KEY_getnetbyname:
7369             UNI(OP_GNBYNAME);
7370
7371         case KEY_getnetbyaddr:
7372             LOP(OP_GNBYADDR,XTERM);
7373
7374         case KEY_getnetent:
7375             FUN0(OP_GNETENT);
7376
7377         case KEY_getservbyname:
7378             LOP(OP_GSBYNAME,XTERM);
7379
7380         case KEY_getservbyport:
7381             LOP(OP_GSBYPORT,XTERM);
7382
7383         case KEY_getservent:
7384             FUN0(OP_GSERVENT);
7385
7386         case KEY_getsockname:
7387             UNI(OP_GETSOCKNAME);
7388
7389         case KEY_getsockopt:
7390             LOP(OP_GSOCKOPT,XTERM);
7391
7392         case KEY_getgrent:
7393             FUN0(OP_GGRENT);
7394
7395         case KEY_getgrnam:
7396             UNI(OP_GGRNAM);
7397
7398         case KEY_getgrgid:
7399             UNI(OP_GGRGID);
7400
7401         case KEY_getlogin:
7402             FUN0(OP_GETLOGIN);
7403
7404         case KEY_given:
7405             pl_yylval.ival = CopLINE(PL_curcop);
7406             OPERATOR(GIVEN);
7407
7408         case KEY_glob:
7409             LOP(OP_GLOB,XTERM);
7410
7411         case KEY_hex:
7412             UNI(OP_HEX);
7413
7414         case KEY_if:
7415             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7416                 return REPORT(0);
7417             pl_yylval.ival = CopLINE(PL_curcop);
7418             OPERATOR(IF);
7419
7420         case KEY_index:
7421             LOP(OP_INDEX,XTERM);
7422
7423         case KEY_int:
7424             UNI(OP_INT);
7425
7426         case KEY_ioctl:
7427             LOP(OP_IOCTL,XTERM);
7428
7429         case KEY_join:
7430             LOP(OP_JOIN,XTERM);
7431
7432         case KEY_keys:
7433             UNI(OP_KEYS);
7434
7435         case KEY_kill:
7436             LOP(OP_KILL,XTERM);
7437
7438         case KEY_last:
7439             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7440             LOOPX(OP_LAST);
7441
7442         case KEY_lc:
7443             UNI(OP_LC);
7444
7445         case KEY_lcfirst:
7446             UNI(OP_LCFIRST);
7447
7448         case KEY_local:
7449             pl_yylval.ival = 0;
7450             OPERATOR(LOCAL);
7451
7452         case KEY_length:
7453             UNI(OP_LENGTH);
7454
7455         case KEY_lt:
7456             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7457                 return REPORT(0);
7458             Rop(OP_SLT);
7459
7460         case KEY_le:
7461             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7462                 return REPORT(0);
7463             Rop(OP_SLE);
7464
7465         case KEY_localtime:
7466             UNI(OP_LOCALTIME);
7467
7468         case KEY_log:
7469             UNI(OP_LOG);
7470
7471         case KEY_link:
7472             LOP(OP_LINK,XTERM);
7473
7474         case KEY_listen:
7475             LOP(OP_LISTEN,XTERM);
7476
7477         case KEY_lock:
7478             UNI(OP_LOCK);
7479
7480         case KEY_lstat:
7481             UNI(OP_LSTAT);
7482
7483         case KEY_m:
7484             s = scan_pat(s,OP_MATCH);
7485             TERM(sublex_start());
7486
7487         case KEY_map:
7488             LOP(OP_MAPSTART, XREF);
7489
7490         case KEY_mkdir:
7491             LOP(OP_MKDIR,XTERM);
7492
7493         case KEY_msgctl:
7494             LOP(OP_MSGCTL,XTERM);
7495
7496         case KEY_msgget:
7497             LOP(OP_MSGGET,XTERM);
7498
7499         case KEY_msgrcv:
7500             LOP(OP_MSGRCV,XTERM);
7501
7502         case KEY_msgsnd:
7503             LOP(OP_MSGSND,XTERM);
7504
7505         case KEY_our:
7506         case KEY_my:
7507         case KEY_state:
7508             PL_in_my = (U16)tmp;
7509             s = SKIPSPACE1(s);
7510             if (isIDFIRST_lazy_if(s,UTF)) {
7511 #ifdef PERL_MAD
7512                 char* start = s;
7513 #endif
7514                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7515                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7516                     goto really_sub;
7517                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7518                 if (!PL_in_my_stash) {
7519                     char tmpbuf[1024];
7520                     PL_bufptr = s;
7521                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7522                     yyerror(tmpbuf);
7523                 }
7524 #ifdef PERL_MAD
7525                 if (PL_madskills) {     /* just add type to declarator token */
7526                     sv_catsv(PL_thistoken, PL_nextwhite);
7527                     PL_nextwhite = 0;
7528                     sv_catpvn(PL_thistoken, start, s - start);
7529                 }
7530 #endif
7531             }
7532             pl_yylval.ival = 1;
7533             OPERATOR(MY);
7534
7535         case KEY_next:
7536             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7537             LOOPX(OP_NEXT);
7538
7539         case KEY_ne:
7540             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7541                 return REPORT(0);
7542             Eop(OP_SNE);
7543
7544         case KEY_no:
7545             s = tokenize_use(0, s);
7546             OPERATOR(USE);
7547
7548         case KEY_not:
7549             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7550                 FUN1(OP_NOT);
7551             else {
7552                 if (!PL_lex_allbrackets &&
7553                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7554                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7555                 OPERATOR(NOTOP);
7556             }
7557
7558         case KEY_open:
7559             s = SKIPSPACE1(s);
7560             if (isIDFIRST_lazy_if(s,UTF)) {
7561                 const char *t;
7562                 for (d = s; isALNUM_lazy_if(d,UTF);)
7563                     d++;
7564                 for (t=d; isSPACE(*t);)
7565                     t++;
7566                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7567                     /* [perl #16184] */
7568                     && !(t[0] == '=' && t[1] == '>')
7569                 ) {
7570                     int parms_len = (int)(d-s);
7571                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7572                            "Precedence problem: open %.*s should be open(%.*s)",
7573                             parms_len, s, parms_len, s);
7574                 }
7575             }
7576             LOP(OP_OPEN,XTERM);
7577
7578         case KEY_or:
7579             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7580                 return REPORT(0);
7581             pl_yylval.ival = OP_OR;
7582             OPERATOR(OROP);
7583
7584         case KEY_ord:
7585             UNI(OP_ORD);
7586
7587         case KEY_oct:
7588             UNI(OP_OCT);
7589
7590         case KEY_opendir:
7591             LOP(OP_OPEN_DIR,XTERM);
7592
7593         case KEY_print:
7594             checkcomma(s,PL_tokenbuf,"filehandle");
7595             LOP(OP_PRINT,XREF);
7596
7597         case KEY_printf:
7598             checkcomma(s,PL_tokenbuf,"filehandle");
7599             LOP(OP_PRTF,XREF);
7600
7601         case KEY_prototype:
7602             UNI(OP_PROTOTYPE);
7603
7604         case KEY_push:
7605             LOP(OP_PUSH,XTERM);
7606
7607         case KEY_pop:
7608             UNIDOR(OP_POP);
7609
7610         case KEY_pos:
7611             UNIDOR(OP_POS);
7612
7613         case KEY_pack:
7614             LOP(OP_PACK,XTERM);
7615
7616         case KEY_package:
7617             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7618             s = SKIPSPACE1(s);
7619             s = force_strict_version(s);
7620             PL_lex_expect = XBLOCK;
7621             OPERATOR(PACKAGE);
7622
7623         case KEY_pipe:
7624             LOP(OP_PIPE_OP,XTERM);
7625
7626         case KEY_q:
7627             s = scan_str(s,!!PL_madskills,FALSE);
7628             if (!s)
7629                 missingterm(NULL);
7630             pl_yylval.ival = OP_CONST;
7631             TERM(sublex_start());
7632
7633         case KEY_quotemeta:
7634             UNI(OP_QUOTEMETA);
7635
7636         case KEY_qw: {
7637             OP *words = NULL;
7638             s = scan_str(s,!!PL_madskills,FALSE);
7639             if (!s)
7640                 missingterm(NULL);
7641             PL_expect = XOPERATOR;
7642             if (SvCUR(PL_lex_stuff)) {
7643                 int warned = 0;
7644                 d = SvPV_force(PL_lex_stuff, len);
7645                 while (len) {
7646                     for (; isSPACE(*d) && len; --len, ++d)
7647                         /**/;
7648                     if (len) {
7649                         SV *sv;
7650                         const char *b = d;
7651                         if (!warned && ckWARN(WARN_QW)) {
7652                             for (; !isSPACE(*d) && len; --len, ++d) {
7653                                 if (*d == ',') {
7654                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7655                                         "Possible attempt to separate words with commas");
7656                                     ++warned;
7657                                 }
7658                                 else if (*d == '#') {
7659                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7660                                         "Possible attempt to put comments in qw() list");
7661                                     ++warned;
7662                                 }
7663                             }
7664                         }
7665                         else {
7666                             for (; !isSPACE(*d) && len; --len, ++d)
7667                                 /**/;
7668                         }
7669                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7670                         words = op_append_elem(OP_LIST, words,
7671                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7672                     }
7673                 }
7674             }
7675             if (!words)
7676                 words = newNULLLIST();
7677             if (PL_lex_stuff) {
7678                 SvREFCNT_dec(PL_lex_stuff);
7679                 PL_lex_stuff = NULL;
7680             }
7681             PL_expect = XOPERATOR;
7682             pl_yylval.opval = sawparens(words);
7683             TOKEN(QWLIST);
7684         }
7685
7686         case KEY_qq:
7687             s = scan_str(s,!!PL_madskills,FALSE);
7688             if (!s)
7689                 missingterm(NULL);
7690             pl_yylval.ival = OP_STRINGIFY;
7691             if (SvIVX(PL_lex_stuff) == '\'')
7692                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7693             TERM(sublex_start());
7694
7695         case KEY_qr:
7696             s = scan_pat(s,OP_QR);
7697             TERM(sublex_start());
7698
7699         case KEY_qx:
7700             s = scan_str(s,!!PL_madskills,FALSE);
7701             if (!s)
7702                 missingterm(NULL);
7703             readpipe_override();
7704             TERM(sublex_start());
7705
7706         case KEY_return:
7707             OLDLOP(OP_RETURN);
7708
7709         case KEY_require:
7710             s = SKIPSPACE1(s);
7711             if (isDIGIT(*s)) {
7712                 s = force_version(s, FALSE);
7713             }
7714             else if (*s != 'v' || !isDIGIT(s[1])
7715                     || (s = force_version(s, TRUE), *s == 'v'))
7716             {
7717                 *PL_tokenbuf = '\0';
7718                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7719                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7720                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7721                 else if (*s == '<')
7722                     yyerror("<> should be quotes");
7723             }
7724             if (orig_keyword == KEY_require) {
7725                 orig_keyword = 0;
7726                 pl_yylval.ival = 1;
7727             }
7728             else
7729                 pl_yylval.ival = 0;
7730             PL_expect = XTERM;
7731             PL_bufptr = s;
7732             PL_last_uni = PL_oldbufptr;
7733             PL_last_lop_op = OP_REQUIRE;
7734             s = skipspace(s);
7735             return REPORT( (int)REQUIRE );
7736
7737         case KEY_reset:
7738             UNI(OP_RESET);
7739
7740         case KEY_redo:
7741             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7742             LOOPX(OP_REDO);
7743
7744         case KEY_rename:
7745             LOP(OP_RENAME,XTERM);
7746
7747         case KEY_rand:
7748             UNI(OP_RAND);
7749
7750         case KEY_rmdir:
7751             UNI(OP_RMDIR);
7752
7753         case KEY_rindex:
7754             LOP(OP_RINDEX,XTERM);
7755
7756         case KEY_read:
7757             LOP(OP_READ,XTERM);
7758
7759         case KEY_readdir:
7760             UNI(OP_READDIR);
7761
7762         case KEY_readline:
7763             UNIDOR(OP_READLINE);
7764
7765         case KEY_readpipe:
7766             UNIDOR(OP_BACKTICK);
7767
7768         case KEY_rewinddir:
7769             UNI(OP_REWINDDIR);
7770
7771         case KEY_recv:
7772             LOP(OP_RECV,XTERM);
7773
7774         case KEY_reverse:
7775             LOP(OP_REVERSE,XTERM);
7776
7777         case KEY_readlink:
7778             UNIDOR(OP_READLINK);
7779
7780         case KEY_ref:
7781             UNI(OP_REF);
7782
7783         case KEY_s:
7784             s = scan_subst(s);
7785             if (pl_yylval.opval)
7786                 TERM(sublex_start());
7787             else
7788                 TOKEN(1);       /* force error */
7789
7790         case KEY_say:
7791             checkcomma(s,PL_tokenbuf,"filehandle");
7792             LOP(OP_SAY,XREF);
7793
7794         case KEY_chomp:
7795             UNI(OP_CHOMP);
7796
7797         case KEY_scalar:
7798             UNI(OP_SCALAR);
7799
7800         case KEY_select:
7801             LOP(OP_SELECT,XTERM);
7802
7803         case KEY_seek:
7804             LOP(OP_SEEK,XTERM);
7805
7806         case KEY_semctl:
7807             LOP(OP_SEMCTL,XTERM);
7808
7809         case KEY_semget:
7810             LOP(OP_SEMGET,XTERM);
7811
7812         case KEY_semop:
7813             LOP(OP_SEMOP,XTERM);
7814
7815         case KEY_send:
7816             LOP(OP_SEND,XTERM);
7817
7818         case KEY_setpgrp:
7819             LOP(OP_SETPGRP,XTERM);
7820
7821         case KEY_setpriority:
7822             LOP(OP_SETPRIORITY,XTERM);
7823
7824         case KEY_sethostent:
7825             UNI(OP_SHOSTENT);
7826
7827         case KEY_setnetent:
7828             UNI(OP_SNETENT);
7829
7830         case KEY_setservent:
7831             UNI(OP_SSERVENT);
7832
7833         case KEY_setprotoent:
7834             UNI(OP_SPROTOENT);
7835
7836         case KEY_setpwent:
7837             FUN0(OP_SPWENT);
7838
7839         case KEY_setgrent:
7840             FUN0(OP_SGRENT);
7841
7842         case KEY_seekdir:
7843             LOP(OP_SEEKDIR,XTERM);
7844
7845         case KEY_setsockopt:
7846             LOP(OP_SSOCKOPT,XTERM);
7847
7848         case KEY_shift:
7849             UNIDOR(OP_SHIFT);
7850
7851         case KEY_shmctl:
7852             LOP(OP_SHMCTL,XTERM);
7853
7854         case KEY_shmget:
7855             LOP(OP_SHMGET,XTERM);
7856
7857         case KEY_shmread:
7858             LOP(OP_SHMREAD,XTERM);
7859
7860         case KEY_shmwrite:
7861             LOP(OP_SHMWRITE,XTERM);
7862
7863         case KEY_shutdown:
7864             LOP(OP_SHUTDOWN,XTERM);
7865
7866         case KEY_sin:
7867             UNI(OP_SIN);
7868
7869         case KEY_sleep:
7870             UNI(OP_SLEEP);
7871
7872         case KEY_socket:
7873             LOP(OP_SOCKET,XTERM);
7874
7875         case KEY_socketpair:
7876             LOP(OP_SOCKPAIR,XTERM);
7877
7878         case KEY_sort:
7879             checkcomma(s,PL_tokenbuf,"subroutine name");
7880             s = SKIPSPACE1(s);
7881             if (*s == ';' || *s == ')')         /* probably a close */
7882                 Perl_croak(aTHX_ "sort is now a reserved word");
7883             PL_expect = XTERM;
7884             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7885             LOP(OP_SORT,XREF);
7886
7887         case KEY_split:
7888             LOP(OP_SPLIT,XTERM);
7889
7890         case KEY_sprintf:
7891             LOP(OP_SPRINTF,XTERM);
7892
7893         case KEY_splice:
7894             LOP(OP_SPLICE,XTERM);
7895
7896         case KEY_sqrt:
7897             UNI(OP_SQRT);
7898
7899         case KEY_srand:
7900             UNI(OP_SRAND);
7901
7902         case KEY_stat:
7903             UNI(OP_STAT);
7904
7905         case KEY_study:
7906             UNI(OP_STUDY);
7907
7908         case KEY_substr:
7909             LOP(OP_SUBSTR,XTERM);
7910
7911         case KEY_format:
7912         case KEY_sub:
7913           really_sub:
7914             {
7915                 char tmpbuf[sizeof PL_tokenbuf];
7916                 SSize_t tboffset = 0;
7917                 expectation attrful;
7918                 bool have_name, have_proto;
7919                 const int key = tmp;
7920
7921 #ifdef PERL_MAD
7922                 SV *tmpwhite = 0;
7923
7924                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7925                 SV *subtoken = newSVpvn(tstart, s - tstart);
7926                 PL_thistoken = 0;
7927
7928                 d = s;
7929                 s = SKIPSPACE2(s,tmpwhite);
7930 #else
7931                 s = skipspace(s);
7932 #endif
7933
7934                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7935                     (*s == ':' && s[1] == ':'))
7936                 {
7937 #ifdef PERL_MAD
7938                     SV *nametoke = NULL;
7939 #endif
7940
7941                     PL_expect = XBLOCK;
7942                     attrful = XATTRBLOCK;
7943                     /* remember buffer pos'n for later force_word */
7944                     tboffset = s - PL_oldbufptr;
7945                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7946 #ifdef PERL_MAD
7947                     if (PL_madskills)
7948                         nametoke = newSVpvn(s, d - s);
7949 #endif
7950                     if (memchr(tmpbuf, ':', len))
7951                         sv_setpvn(PL_subname, tmpbuf, len);
7952                     else {
7953                         sv_setsv(PL_subname,PL_curstname);
7954                         sv_catpvs(PL_subname,"::");
7955                         sv_catpvn(PL_subname,tmpbuf,len);
7956                     }
7957                     have_name = TRUE;
7958
7959 #ifdef PERL_MAD
7960
7961                     start_force(0);
7962                     CURMAD('X', nametoke);
7963                     CURMAD('_', tmpwhite);
7964                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7965                                       FALSE, TRUE, TRUE);
7966
7967                     s = SKIPSPACE2(d,tmpwhite);
7968 #else
7969                     s = skipspace(d);
7970 #endif
7971                 }
7972                 else {
7973                     if (key == KEY_my)
7974                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7975                     PL_expect = XTERMBLOCK;
7976                     attrful = XATTRTERM;
7977                     sv_setpvs(PL_subname,"?");
7978                     have_name = FALSE;
7979                 }
7980
7981                 if (key == KEY_format) {
7982                     if (*s == '=')
7983                         PL_lex_formbrack = PL_lex_brackets + 1;
7984 #ifdef PERL_MAD
7985                     PL_thistoken = subtoken;
7986                     s = d;
7987 #else
7988                     if (have_name)
7989                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7990                                           FALSE, TRUE, TRUE);
7991 #endif
7992                     OPERATOR(FORMAT);
7993                 }
7994
7995                 /* Look for a prototype */
7996                 if (*s == '(') {
7997                     char *p;
7998                     bool bad_proto = FALSE;
7999                     bool in_brackets = FALSE;
8000                     char greedy_proto = ' ';
8001                     bool proto_after_greedy_proto = FALSE;
8002                     bool must_be_last = FALSE;
8003                     bool underscore = FALSE;
8004                     bool seen_underscore = FALSE;
8005                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8006
8007                     s = scan_str(s,!!PL_madskills,FALSE);
8008                     if (!s)
8009                         Perl_croak(aTHX_ "Prototype not terminated");
8010                     /* strip spaces and check for bad characters */
8011                     d = SvPVX(PL_lex_stuff);
8012                     tmp = 0;
8013                     for (p = d; *p; ++p) {
8014                         if (!isSPACE(*p)) {
8015                             d[tmp++] = *p;
8016
8017                             if (warnillegalproto) {
8018                                 if (must_be_last)
8019                                     proto_after_greedy_proto = TRUE;
8020                                 if (!strchr("$@%*;[]&\\_+", *p)) {
8021                                     bad_proto = TRUE;
8022                                 }
8023                                 else {
8024                                     if ( underscore ) {
8025                                         if ( *p != ';' )
8026                                             bad_proto = TRUE;
8027                                         underscore = FALSE;
8028                                     }
8029                                     if ( *p == '[' ) {
8030                                         in_brackets = TRUE;
8031                                     }
8032                                     else if ( *p == ']' ) {
8033                                         in_brackets = FALSE;
8034                                     }
8035                                     else if ( (*p == '@' || *p == '%') &&
8036                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8037                                          !in_brackets ) {
8038                                         must_be_last = TRUE;
8039                                         greedy_proto = *p;
8040                                     }
8041                                     else if ( *p == '_' ) {
8042                                         underscore = seen_underscore = TRUE;
8043                                     }
8044                                 }
8045                             }
8046                         }
8047                     }
8048                     d[tmp] = '\0';
8049                     if (proto_after_greedy_proto)
8050                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8051                                     "Prototype after '%c' for %"SVf" : %s",
8052                                     greedy_proto, SVfARG(PL_subname), d);
8053                     if (bad_proto)
8054                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8055                                     "Illegal character %sin prototype for %"SVf" : %s",
8056                                     seen_underscore ? "after '_' " : "",
8057                                     SVfARG(PL_subname), d);
8058                     SvCUR_set(PL_lex_stuff, tmp);
8059                     have_proto = TRUE;
8060
8061 #ifdef PERL_MAD
8062                     start_force(0);
8063                     CURMAD('q', PL_thisopen);
8064                     CURMAD('_', tmpwhite);
8065                     CURMAD('=', PL_thisstuff);
8066                     CURMAD('Q', PL_thisclose);
8067                     NEXTVAL_NEXTTOKE.opval =
8068                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8069                     PL_lex_stuff = NULL;
8070                     force_next(THING);
8071
8072                     s = SKIPSPACE2(s,tmpwhite);
8073 #else
8074                     s = skipspace(s);
8075 #endif
8076                 }
8077                 else
8078                     have_proto = FALSE;
8079
8080                 if (*s == ':' && s[1] != ':')
8081                     PL_expect = attrful;
8082                 else if (*s != '{' && key == KEY_sub) {
8083                     if (!have_name)
8084                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8085                     else if (*s != ';' && *s != '}')
8086                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8087                 }
8088
8089 #ifdef PERL_MAD
8090                 start_force(0);
8091                 if (tmpwhite) {
8092                     if (PL_madskills)
8093                         curmad('^', newSVpvs(""));
8094                     CURMAD('_', tmpwhite);
8095                 }
8096                 force_next(0);
8097
8098                 PL_thistoken = subtoken;
8099 #else
8100                 if (have_proto) {
8101                     NEXTVAL_NEXTTOKE.opval =
8102                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8103                     PL_lex_stuff = NULL;
8104                     force_next(THING);
8105                 }
8106 #endif
8107                 if (!have_name) {
8108                     if (PL_curstash)
8109                         sv_setpvs(PL_subname, "__ANON__");
8110                     else
8111                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8112                     TOKEN(ANONSUB);
8113                 }
8114 #ifndef PERL_MAD
8115                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8116                                   FALSE, TRUE, TRUE);
8117 #endif
8118                 if (key == KEY_my)
8119                     TOKEN(MYSUB);
8120                 TOKEN(SUB);
8121             }
8122
8123         case KEY_system:
8124             LOP(OP_SYSTEM,XREF);
8125
8126         case KEY_symlink:
8127             LOP(OP_SYMLINK,XTERM);
8128
8129         case KEY_syscall:
8130             LOP(OP_SYSCALL,XTERM);
8131
8132         case KEY_sysopen:
8133             LOP(OP_SYSOPEN,XTERM);
8134
8135         case KEY_sysseek:
8136             LOP(OP_SYSSEEK,XTERM);
8137
8138         case KEY_sysread:
8139             LOP(OP_SYSREAD,XTERM);
8140
8141         case KEY_syswrite:
8142             LOP(OP_SYSWRITE,XTERM);
8143
8144         case KEY_tr:
8145             s = scan_trans(s);
8146             TERM(sublex_start());
8147
8148         case KEY_tell:
8149             UNI(OP_TELL);
8150
8151         case KEY_telldir:
8152             UNI(OP_TELLDIR);
8153
8154         case KEY_tie:
8155             LOP(OP_TIE,XTERM);
8156
8157         case KEY_tied:
8158             UNI(OP_TIED);
8159
8160         case KEY_time:
8161             FUN0(OP_TIME);
8162
8163         case KEY_times:
8164             FUN0(OP_TMS);
8165
8166         case KEY_truncate:
8167             LOP(OP_TRUNCATE,XTERM);
8168
8169         case KEY_uc:
8170             UNI(OP_UC);
8171
8172         case KEY_ucfirst:
8173             UNI(OP_UCFIRST);
8174
8175         case KEY_untie:
8176             UNI(OP_UNTIE);
8177
8178         case KEY_until:
8179             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8180                 return REPORT(0);
8181             pl_yylval.ival = CopLINE(PL_curcop);
8182             OPERATOR(UNTIL);
8183
8184         case KEY_unless:
8185             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8186                 return REPORT(0);
8187             pl_yylval.ival = CopLINE(PL_curcop);
8188             OPERATOR(UNLESS);
8189
8190         case KEY_unlink:
8191             LOP(OP_UNLINK,XTERM);
8192
8193         case KEY_undef:
8194             UNIDOR(OP_UNDEF);
8195
8196         case KEY_unpack:
8197             LOP(OP_UNPACK,XTERM);
8198
8199         case KEY_utime:
8200             LOP(OP_UTIME,XTERM);
8201
8202         case KEY_umask:
8203             UNIDOR(OP_UMASK);
8204
8205         case KEY_unshift:
8206             LOP(OP_UNSHIFT,XTERM);
8207
8208         case KEY_use:
8209             s = tokenize_use(1, s);
8210             OPERATOR(USE);
8211
8212         case KEY_values:
8213             UNI(OP_VALUES);
8214
8215         case KEY_vec:
8216             LOP(OP_VEC,XTERM);
8217
8218         case KEY_when:
8219             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8220                 return REPORT(0);
8221             pl_yylval.ival = CopLINE(PL_curcop);
8222             OPERATOR(WHEN);
8223
8224         case KEY_while:
8225             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8226                 return REPORT(0);
8227             pl_yylval.ival = CopLINE(PL_curcop);
8228             OPERATOR(WHILE);
8229
8230         case KEY_warn:
8231             PL_hints |= HINT_BLOCK_SCOPE;
8232             LOP(OP_WARN,XTERM);
8233
8234         case KEY_wait:
8235             FUN0(OP_WAIT);
8236
8237         case KEY_waitpid:
8238             LOP(OP_WAITPID,XTERM);
8239
8240         case KEY_wantarray:
8241             FUN0(OP_WANTARRAY);
8242
8243         case KEY_write:
8244 #ifdef EBCDIC
8245         {
8246             char ctl_l[2];
8247             ctl_l[0] = toCTRL('L');
8248             ctl_l[1] = '\0';
8249             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8250         }
8251 #else
8252             /* Make sure $^L is defined */
8253             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8254 #endif
8255             UNI(OP_ENTERWRITE);
8256
8257         case KEY_x:
8258             if (PL_expect == XOPERATOR) {
8259                 if (*s == '=' && !PL_lex_allbrackets &&
8260                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8261                     return REPORT(0);
8262                 Mop(OP_REPEAT);
8263             }
8264             check_uni();
8265             goto just_a_word;
8266
8267         case KEY_xor:
8268             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8269                 return REPORT(0);
8270             pl_yylval.ival = OP_XOR;
8271             OPERATOR(OROP);
8272
8273         case KEY_y:
8274             s = scan_trans(s);
8275             TERM(sublex_start());
8276         }
8277     }}
8278 }
8279 #ifdef __SC__
8280 #pragma segment Main
8281 #endif
8282
8283 static int
8284 S_pending_ident(pTHX)
8285 {
8286     dVAR;
8287     register char *d;
8288     PADOFFSET tmp = 0;
8289     /* pit holds the identifier we read and pending_ident is reset */
8290     char pit = PL_pending_ident;
8291     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8292     /* All routes through this function want to know if there is a colon.  */
8293     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8294     PL_pending_ident = 0;
8295
8296     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8297     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8298           "### Pending identifier '%s'\n", PL_tokenbuf); });
8299
8300     /* if we're in a my(), we can't allow dynamics here.
8301        $foo'bar has already been turned into $foo::bar, so
8302        just check for colons.
8303
8304        if it's a legal name, the OP is a PADANY.
8305     */
8306     if (PL_in_my) {
8307         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8308             if (has_colon)
8309                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8310                                   "variable %s in \"our\"",
8311                                   PL_tokenbuf));
8312             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8313         }
8314         else {
8315             if (has_colon)
8316                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8317                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8318
8319             pl_yylval.opval = newOP(OP_PADANY, 0);
8320             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8321             return PRIVATEREF;
8322         }
8323     }
8324
8325     /*
8326        build the ops for accesses to a my() variable.
8327
8328        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8329        then used in a comparison.  This catches most, but not
8330        all cases.  For instance, it catches
8331            sort { my($a); $a <=> $b }
8332        but not
8333            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8334        (although why you'd do that is anyone's guess).
8335     */
8336
8337     if (!has_colon) {
8338         if (!PL_in_my)
8339             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8340         if (tmp != NOT_IN_PAD) {
8341             /* might be an "our" variable" */
8342             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8343                 /* build ops for a bareword */
8344                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8345                 HEK * const stashname = HvNAME_HEK(stash);
8346                 SV *  const sym = newSVhek(stashname);
8347                 sv_catpvs(sym, "::");
8348                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8349                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8350                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8351                 gv_fetchsv(sym,
8352                     (PL_in_eval
8353                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8354                         : GV_ADDMULTI
8355                     ),
8356                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8357                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8358                      : SVt_PVHV));
8359                 return WORD;
8360             }
8361
8362             /* if it's a sort block and they're naming $a or $b */
8363             if (PL_last_lop_op == OP_SORT &&
8364                 PL_tokenbuf[0] == '$' &&
8365                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8366                 && !PL_tokenbuf[2])
8367             {
8368                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8369                      d < PL_bufend && *d != '\n';
8370                      d++)
8371                 {
8372                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8373                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8374                               PL_tokenbuf);
8375                     }
8376                 }
8377             }
8378
8379             pl_yylval.opval = newOP(OP_PADANY, 0);
8380             pl_yylval.opval->op_targ = tmp;
8381             return PRIVATEREF;
8382         }
8383     }
8384
8385     /*
8386        Whine if they've said @foo in a doublequoted string,
8387        and @foo isn't a variable we can find in the symbol
8388        table.
8389     */
8390     if (ckWARN(WARN_AMBIGUOUS) &&
8391         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8392         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8393                                          SVt_PVAV);
8394         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8395                 /* DO NOT warn for @- and @+ */
8396                 && !( PL_tokenbuf[2] == '\0' &&
8397                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8398            )
8399         {
8400             /* Downgraded from fatal to warning 20000522 mjd */
8401             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8402                         "Possible unintended interpolation of %s in string",
8403                         PL_tokenbuf);
8404         }
8405     }
8406
8407     /* build ops for a bareword */
8408     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8409                                                       tokenbuf_len - 1));
8410     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8411     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8412                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8413                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8414                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8415                       : SVt_PVHV));
8416     return WORD;
8417 }
8418
8419 STATIC void
8420 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8421 {
8422     dVAR;
8423
8424     PERL_ARGS_ASSERT_CHECKCOMMA;
8425
8426     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8427         if (ckWARN(WARN_SYNTAX)) {
8428             int level = 1;
8429             const char *w;
8430             for (w = s+2; *w && level; w++) {
8431                 if (*w == '(')
8432                     ++level;
8433                 else if (*w == ')')
8434                     --level;
8435             }
8436             while (isSPACE(*w))
8437                 ++w;
8438             /* the list of chars below is for end of statements or
8439              * block / parens, boolean operators (&&, ||, //) and branch
8440              * constructs (or, and, if, until, unless, while, err, for).
8441              * Not a very solid hack... */
8442             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8443                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8444                             "%s (...) interpreted as function",name);
8445         }
8446     }
8447     while (s < PL_bufend && isSPACE(*s))
8448         s++;
8449     if (*s == '(')
8450         s++;
8451     while (s < PL_bufend && isSPACE(*s))
8452         s++;
8453     if (isIDFIRST_lazy_if(s,UTF)) {
8454         const char * const w = s++;
8455         while (isALNUM_lazy_if(s,UTF))
8456             s++;
8457         while (s < PL_bufend && isSPACE(*s))
8458             s++;
8459         if (*s == ',') {
8460             GV* gv;
8461             if (keyword(w, s - w, 0))
8462                 return;
8463
8464             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8465             if (gv && GvCVu(gv))
8466                 return;
8467             Perl_croak(aTHX_ "No comma allowed after %s", what);
8468         }
8469     }
8470 }
8471
8472 /* Either returns sv, or mortalizes sv and returns a new SV*.
8473    Best used as sv=new_constant(..., sv, ...).
8474    If s, pv are NULL, calls subroutine with one argument,
8475    and type is used with error messages only. */
8476
8477 STATIC SV *
8478 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8479                SV *sv, SV *pv, const char *type, STRLEN typelen)
8480 {
8481     dVAR; dSP;
8482     HV * const table = GvHV(PL_hintgv);          /* ^H */
8483     SV *res;
8484     SV **cvp;
8485     SV *cv, *typesv;
8486     const char *why1 = "", *why2 = "", *why3 = "";
8487
8488     PERL_ARGS_ASSERT_NEW_CONSTANT;
8489
8490     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8491         SV *msg;
8492
8493         why2 = (const char *)
8494             (strEQ(key,"charnames")
8495              ? "(possibly a missing \"use charnames ...\")"
8496              : "");
8497         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8498                             (type ? type: "undef"), why2);
8499
8500         /* This is convoluted and evil ("goto considered harmful")
8501          * but I do not understand the intricacies of all the different
8502          * failure modes of %^H in here.  The goal here is to make
8503          * the most probable error message user-friendly. --jhi */
8504
8505         goto msgdone;
8506
8507     report:
8508         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8509                             (type ? type: "undef"), why1, why2, why3);
8510     msgdone:
8511         yyerror(SvPVX_const(msg));
8512         SvREFCNT_dec(msg);
8513         return sv;
8514     }
8515
8516     /* charnames doesn't work well if there have been errors found */
8517     if (PL_error_count > 0 && strEQ(key,"charnames"))
8518         return &PL_sv_undef;
8519
8520     cvp = hv_fetch(table, key, keylen, FALSE);
8521     if (!cvp || !SvOK(*cvp)) {
8522         why1 = "$^H{";
8523         why2 = key;
8524         why3 = "} is not defined";
8525         goto report;
8526     }
8527     sv_2mortal(sv);                     /* Parent created it permanently */
8528     cv = *cvp;
8529     if (!pv && s)
8530         pv = newSVpvn_flags(s, len, SVs_TEMP);
8531     if (type && pv)
8532         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8533     else
8534         typesv = &PL_sv_undef;
8535
8536     PUSHSTACKi(PERLSI_OVERLOAD);
8537     ENTER ;
8538     SAVETMPS;
8539
8540     PUSHMARK(SP) ;
8541     EXTEND(sp, 3);
8542     if (pv)
8543         PUSHs(pv);
8544     PUSHs(sv);
8545     if (pv)
8546         PUSHs(typesv);
8547     PUTBACK;
8548     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8549
8550     SPAGAIN ;
8551
8552     /* Check the eval first */
8553     if (!PL_in_eval && SvTRUE(ERRSV)) {
8554         sv_catpvs(ERRSV, "Propagated");
8555         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8556         (void)POPs;
8557         res = SvREFCNT_inc_simple(sv);
8558     }
8559     else {
8560         res = POPs;
8561         SvREFCNT_inc_simple_void(res);
8562     }
8563
8564     PUTBACK ;
8565     FREETMPS ;
8566     LEAVE ;
8567     POPSTACK;
8568
8569     if (!SvOK(res)) {
8570         why1 = "Call to &{$^H{";
8571         why2 = key;
8572         why3 = "}} did not return a defined value";
8573         sv = res;
8574         goto report;
8575     }
8576
8577     return res;
8578 }
8579
8580 /* Returns a NUL terminated string, with the length of the string written to
8581    *slp
8582    */
8583 STATIC char *
8584 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8585 {
8586     dVAR;
8587     register char *d = dest;
8588     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8589
8590     PERL_ARGS_ASSERT_SCAN_WORD;
8591
8592     for (;;) {
8593         if (d >= e)
8594             Perl_croak(aTHX_ ident_too_long);
8595         if (isALNUM(*s))        /* UTF handled below */
8596             *d++ = *s++;
8597         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8598             *d++ = ':';
8599             *d++ = ':';
8600             s++;
8601         }
8602         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8603             *d++ = *s++;
8604             *d++ = *s++;
8605         }
8606         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8607             char *t = s + UTF8SKIP(s);
8608             size_t len;
8609             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8610                 t += UTF8SKIP(t);
8611             len = t - s;
8612             if (d + len > e)
8613                 Perl_croak(aTHX_ ident_too_long);
8614             Copy(s, d, len, char);
8615             d += len;
8616             s = t;
8617         }
8618         else {
8619             *d = '\0';
8620             *slp = d - dest;
8621             return s;
8622         }
8623     }
8624 }
8625
8626 STATIC char *
8627 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8628 {
8629     dVAR;
8630     char *bracket = NULL;
8631     char funny = *s++;
8632     register char *d = dest;
8633     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8634
8635     PERL_ARGS_ASSERT_SCAN_IDENT;
8636
8637     if (isSPACE(*s))
8638         s = PEEKSPACE(s);
8639     if (isDIGIT(*s)) {
8640         while (isDIGIT(*s)) {
8641             if (d >= e)
8642                 Perl_croak(aTHX_ ident_too_long);
8643             *d++ = *s++;
8644         }
8645     }
8646     else {
8647         for (;;) {
8648             if (d >= e)
8649                 Perl_croak(aTHX_ ident_too_long);
8650             if (isALNUM(*s))    /* UTF handled below */
8651                 *d++ = *s++;
8652             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8653                 *d++ = ':';
8654                 *d++ = ':';
8655                 s++;
8656             }
8657             else if (*s == ':' && s[1] == ':') {
8658                 *d++ = *s++;
8659                 *d++ = *s++;
8660             }
8661             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8662                 char *t = s + UTF8SKIP(s);
8663                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8664                     t += UTF8SKIP(t);
8665                 if (d + (t - s) > e)
8666                     Perl_croak(aTHX_ ident_too_long);
8667                 Copy(s, d, t - s, char);
8668                 d += t - s;
8669                 s = t;
8670             }
8671             else
8672                 break;
8673         }
8674     }
8675     *d = '\0';
8676     d = dest;
8677     if (*d) {
8678         if (PL_lex_state != LEX_NORMAL)
8679             PL_lex_state = LEX_INTERPENDMAYBE;
8680         return s;
8681     }
8682     if (*s == '$' && s[1] &&
8683         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8684     {
8685         return s;
8686     }
8687     if (*s == '{') {
8688         bracket = s;
8689         s++;
8690     }
8691     else if (ck_uni)
8692         check_uni();
8693     if (s < send)
8694         *d = *s++;
8695     d[1] = '\0';
8696     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8697         *d = toCTRL(*s);
8698         s++;
8699     }
8700     if (bracket) {
8701         if (isSPACE(s[-1])) {
8702             while (s < send) {
8703                 const char ch = *s++;
8704                 if (!SPACE_OR_TAB(ch)) {
8705                     *d = ch;
8706                     break;
8707                 }
8708             }
8709         }
8710         if (isIDFIRST_lazy_if(d,UTF)) {
8711             d++;
8712             if (UTF) {
8713                 char *end = s;
8714                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8715                     end += UTF8SKIP(end);
8716                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8717                         end += UTF8SKIP(end);
8718                 }
8719                 Copy(s, d, end - s, char);
8720                 d += end - s;
8721                 s = end;
8722             }
8723             else {
8724                 while ((isALNUM(*s) || *s == ':') && d < e)
8725                     *d++ = *s++;
8726                 if (d >= e)
8727                     Perl_croak(aTHX_ ident_too_long);
8728             }
8729             *d = '\0';
8730             while (s < send && SPACE_OR_TAB(*s))
8731                 s++;
8732             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8733                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8734                     const char * const brack =
8735                         (const char *)
8736                         ((*s == '[') ? "[...]" : "{...}");
8737    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8738                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8739                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8740                         funny, dest, brack, funny, dest, brack);
8741                 }
8742                 bracket++;
8743                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8744                 PL_lex_allbrackets++;
8745                 return s;
8746             }
8747         }
8748         /* Handle extended ${^Foo} variables
8749          * 1999-02-27 mjd-perl-patch@plover.com */
8750         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8751                  && isALNUM(*s))
8752         {
8753             d++;
8754             while (isALNUM(*s) && d < e) {
8755                 *d++ = *s++;
8756             }
8757             if (d >= e)
8758                 Perl_croak(aTHX_ ident_too_long);
8759             *d = '\0';
8760         }
8761         if (*s == '}') {
8762             s++;
8763             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8764                 PL_lex_state = LEX_INTERPEND;
8765                 PL_expect = XREF;
8766             }
8767             if (PL_lex_state == LEX_NORMAL) {
8768                 if (ckWARN(WARN_AMBIGUOUS) &&
8769                     (keyword(dest, d - dest, 0)
8770                      || get_cvn_flags(dest, d - dest, 0)))
8771                 {
8772                     if (funny == '#')
8773                         funny = '@';
8774                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8775                         "Ambiguous use of %c{%s} resolved to %c%s",
8776                         funny, dest, funny, dest);
8777                 }
8778             }
8779         }
8780         else {
8781             s = bracket;                /* let the parser handle it */
8782             *dest = '\0';
8783         }
8784     }
8785     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8786         PL_lex_state = LEX_INTERPEND;
8787     return s;
8788 }
8789
8790 static bool
8791 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8792
8793     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8794      * the parse starting at 's', based on the subset that are valid in this
8795      * context input to this routine in 'valid_flags'. Advances s.  Returns
8796      * TRUE if the input was a valid flag, so the next char may be as well;
8797      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8798      * current regex.  This routine will set it to any charset modifier found.
8799      * The caller shouldn't change it.  This way, another charset modifier
8800      * encountered in the parse can be detected as an error, as we have decided
8801      * allow only one */
8802
8803     const char c = **s;
8804
8805     if (! strchr(valid_flags, c)) {
8806         if (isALNUM(c)) {
8807             goto deprecate;
8808         }
8809         return FALSE;
8810     }
8811
8812     switch (c) {
8813
8814         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8815         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8816         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8817         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8818         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8819         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8820         case LOCALE_PAT_MOD:
8821
8822             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8823              * can't be regex modifiers.
8824              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8825              * much as easily done.  s///lei, for example, has to mean regex
8826              * modifiers if it's not an error (as does any word character
8827              * following the 'e').  Otherwise, we resolve to the backwards-
8828              * compatible, but less likely 's/// le ...', i.e. as meaning
8829              * less-than-or-equal.  The reason it's not likely is that s//
8830              * returns a number for code in the field (/r returns a string, but
8831              * that wasn't added until the 5.13 series), and so '<=' should be
8832              * used for comparing, not 'le'. */
8833             if (*((*s) + 1) == 't') {
8834                 goto deprecate;
8835             }
8836             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8837
8838                 /* 'e' is valid only for substitutes, s///e.  If it is not
8839                  * valid in the current context, then 'm//le' must mean the
8840                  * comparison operator, so use the regular deprecation message.
8841                  */
8842                 if (! strchr(valid_flags, 'e')) {
8843                     goto deprecate;
8844                 }
8845                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8846                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8847                 return FALSE;
8848             }
8849             if (*charset) {
8850                 goto multiple_charsets;
8851             }
8852             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8853             *charset = c;
8854             break;
8855         case UNICODE_PAT_MOD:
8856             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8857              * 'n' means they can't be regex modifiers */
8858             if (*((*s) + 1) == 'n') {
8859                 goto deprecate;
8860             }
8861             if (*charset) {
8862                 goto multiple_charsets;
8863             }
8864             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8865             *charset = c;
8866             break;
8867         case ASCII_RESTRICT_PAT_MOD:
8868             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8869              * can't be regex modifiers */
8870             if (*((*s) + 1) == 'n') {
8871                 goto deprecate;
8872             }
8873             if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
8874                 /* Doubled modifier implies more restricted */
8875                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8876                 (*s)++;
8877             }
8878             else {
8879                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8880             }
8881             if (*charset) { /* Do this after the increment of *s in /aa, so
8882                                the return advances the ptr correctly */
8883                 goto multiple_charsets;
8884             }
8885             *charset = c;
8886             break;
8887         case DEPENDS_PAT_MOD:
8888             if (*charset) {
8889                 goto multiple_charsets;
8890             }
8891             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8892             *charset = c;
8893             break;
8894     }
8895
8896     (*s)++;
8897     return TRUE;
8898
8899     deprecate:
8900         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8901             "Having no space between pattern and following word is deprecated");
8902         return FALSE;
8903
8904     multiple_charsets:
8905         if (*charset != c) {
8906             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8907         }
8908         else {
8909             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8910         }
8911
8912         /* Pretend that it worked, so will continue processing before dieing */
8913         (*s)++;
8914         return TRUE;
8915 }
8916
8917 STATIC char *
8918 S_scan_pat(pTHX_ char *start, I32 type)
8919 {
8920     dVAR;
8921     PMOP *pm;
8922     char *s = scan_str(start,!!PL_madskills,FALSE);
8923     const char * const valid_flags =
8924         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8925     char charset = '\0';    /* character set modifier */
8926 #ifdef PERL_MAD
8927     char *modstart;
8928 #endif
8929
8930     PERL_ARGS_ASSERT_SCAN_PAT;
8931
8932     if (!s) {
8933         const char * const delimiter = skipspace(start);
8934         Perl_croak(aTHX_
8935                    (const char *)
8936                    (*delimiter == '?'
8937                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8938                     : "Search pattern not terminated" ));
8939     }
8940
8941     pm = (PMOP*)newPMOP(type, 0);
8942     if (PL_multi_open == '?') {
8943         /* This is the only point in the code that sets PMf_ONCE:  */
8944         pm->op_pmflags |= PMf_ONCE;
8945
8946         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8947            allows us to restrict the list needed by reset to just the ??
8948            matches.  */
8949         assert(type != OP_TRANS);
8950         if (PL_curstash) {
8951             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8952             U32 elements;
8953             if (!mg) {
8954                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8955                                  0);
8956             }
8957             elements = mg->mg_len / sizeof(PMOP**);
8958             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8959             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8960             mg->mg_len = elements * sizeof(PMOP**);
8961             PmopSTASH_set(pm,PL_curstash);
8962         }
8963     }
8964 #ifdef PERL_MAD
8965     modstart = s;
8966 #endif
8967     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8968 #ifdef PERL_MAD
8969     if (PL_madskills && modstart != s) {
8970         SV* tmptoken = newSVpvn(modstart, s - modstart);
8971         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8972     }
8973 #endif
8974     /* issue a warning if /c is specified,but /g is not */
8975     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8976     {
8977         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8978                        "Use of /c modifier is meaningless without /g" );
8979     }
8980
8981     PL_lex_op = (OP*)pm;
8982     pl_yylval.ival = OP_MATCH;
8983     return s;
8984 }
8985
8986 STATIC char *
8987 S_scan_subst(pTHX_ char *start)
8988 {
8989     dVAR;
8990     char *s;
8991     register PMOP *pm;
8992     I32 first_start;
8993     I32 es = 0;
8994     char charset = '\0';    /* character set modifier */
8995 #ifdef PERL_MAD
8996     char *modstart;
8997 #endif
8998
8999     PERL_ARGS_ASSERT_SCAN_SUBST;
9000
9001     pl_yylval.ival = OP_NULL;
9002
9003     s = scan_str(start,!!PL_madskills,FALSE);
9004
9005     if (!s)
9006         Perl_croak(aTHX_ "Substitution pattern not terminated");
9007
9008     if (s[-1] == PL_multi_open)
9009         s--;
9010 #ifdef PERL_MAD
9011     if (PL_madskills) {
9012         CURMAD('q', PL_thisopen);
9013         CURMAD('_', PL_thiswhite);
9014         CURMAD('E', PL_thisstuff);
9015         CURMAD('Q', PL_thisclose);
9016         PL_realtokenstart = s - SvPVX(PL_linestr);
9017     }
9018 #endif
9019
9020     first_start = PL_multi_start;
9021     s = scan_str(s,!!PL_madskills,FALSE);
9022     if (!s) {
9023         if (PL_lex_stuff) {
9024             SvREFCNT_dec(PL_lex_stuff);
9025             PL_lex_stuff = NULL;
9026         }
9027         Perl_croak(aTHX_ "Substitution replacement not terminated");
9028     }
9029     PL_multi_start = first_start;       /* so whole substitution is taken together */
9030
9031     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9032
9033 #ifdef PERL_MAD
9034     if (PL_madskills) {
9035         CURMAD('z', PL_thisopen);
9036         CURMAD('R', PL_thisstuff);
9037         CURMAD('Z', PL_thisclose);
9038     }
9039     modstart = s;
9040 #endif
9041
9042     while (*s) {
9043         if (*s == EXEC_PAT_MOD) {
9044             s++;
9045             es++;
9046         }
9047         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9048         {
9049             break;
9050         }
9051     }
9052
9053 #ifdef PERL_MAD
9054     if (PL_madskills) {
9055         if (modstart != s)
9056             curmad('m', newSVpvn(modstart, s - modstart));
9057         append_madprops(PL_thismad, (OP*)pm, 0);
9058         PL_thismad = 0;
9059     }
9060 #endif
9061     if ((pm->op_pmflags & PMf_CONTINUE)) {
9062         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9063     }
9064
9065     if (es) {
9066         SV * const repl = newSVpvs("");
9067
9068         PL_sublex_info.super_bufptr = s;
9069         PL_sublex_info.super_bufend = PL_bufend;
9070         PL_multi_end = 0;
9071         pm->op_pmflags |= PMf_EVAL;
9072         while (es-- > 0) {
9073             if (es)
9074                 sv_catpvs(repl, "eval ");
9075             else
9076                 sv_catpvs(repl, "do ");
9077         }
9078         sv_catpvs(repl, "{");
9079         sv_catsv(repl, PL_lex_repl);
9080         if (strchr(SvPVX(PL_lex_repl), '#'))
9081             sv_catpvs(repl, "\n");
9082         sv_catpvs(repl, "}");
9083         SvEVALED_on(repl);
9084         SvREFCNT_dec(PL_lex_repl);
9085         PL_lex_repl = repl;
9086     }
9087
9088     PL_lex_op = (OP*)pm;
9089     pl_yylval.ival = OP_SUBST;
9090     return s;
9091 }
9092
9093 STATIC char *
9094 S_scan_trans(pTHX_ char *start)
9095 {
9096     dVAR;
9097     register char* s;
9098     OP *o;
9099     short *tbl;
9100     U8 squash;
9101     U8 del;
9102     U8 complement;
9103     bool nondestruct = 0;
9104 #ifdef PERL_MAD
9105     char *modstart;
9106 #endif
9107
9108     PERL_ARGS_ASSERT_SCAN_TRANS;
9109
9110     pl_yylval.ival = OP_NULL;
9111
9112     s = scan_str(start,!!PL_madskills,FALSE);
9113     if (!s)
9114         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9115
9116     if (s[-1] == PL_multi_open)
9117         s--;
9118 #ifdef PERL_MAD
9119     if (PL_madskills) {
9120         CURMAD('q', PL_thisopen);
9121         CURMAD('_', PL_thiswhite);
9122         CURMAD('E', PL_thisstuff);
9123         CURMAD('Q', PL_thisclose);
9124         PL_realtokenstart = s - SvPVX(PL_linestr);
9125     }
9126 #endif
9127
9128     s = scan_str(s,!!PL_madskills,FALSE);
9129     if (!s) {
9130         if (PL_lex_stuff) {
9131             SvREFCNT_dec(PL_lex_stuff);
9132             PL_lex_stuff = NULL;
9133         }
9134         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9135     }
9136     if (PL_madskills) {
9137         CURMAD('z', PL_thisopen);
9138         CURMAD('R', PL_thisstuff);
9139         CURMAD('Z', PL_thisclose);
9140     }
9141
9142     complement = del = squash = 0;
9143 #ifdef PERL_MAD
9144     modstart = s;
9145 #endif
9146     while (1) {
9147         switch (*s) {
9148         case 'c':
9149             complement = OPpTRANS_COMPLEMENT;
9150             break;
9151         case 'd':
9152             del = OPpTRANS_DELETE;
9153             break;
9154         case 's':
9155             squash = OPpTRANS_SQUASH;
9156             break;
9157         case 'r':
9158             nondestruct = 1;
9159             break;
9160         default:
9161             goto no_more;
9162         }
9163         s++;
9164     }
9165   no_more:
9166
9167     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9168     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9169     o->op_private &= ~OPpTRANS_ALL;
9170     o->op_private |= del|squash|complement|
9171       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9172       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9173
9174     PL_lex_op = o;
9175     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9176
9177 #ifdef PERL_MAD
9178     if (PL_madskills) {
9179         if (modstart != s)
9180             curmad('m', newSVpvn(modstart, s - modstart));
9181         append_madprops(PL_thismad, o, 0);
9182         PL_thismad = 0;
9183     }
9184 #endif
9185
9186     return s;
9187 }
9188
9189 STATIC char *
9190 S_scan_heredoc(pTHX_ register char *s)
9191 {
9192     dVAR;
9193     SV *herewas;
9194     I32 op_type = OP_SCALAR;
9195     I32 len;
9196     SV *tmpstr;
9197     char term;
9198     const char *found_newline;
9199     register char *d;
9200     register char *e;
9201     char *peek;
9202     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9203 #ifdef PERL_MAD
9204     I32 stuffstart = s - SvPVX(PL_linestr);
9205     char *tstart;
9206
9207     PL_realtokenstart = -1;
9208 #endif
9209
9210     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9211
9212     s += 2;
9213     d = PL_tokenbuf;
9214     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9215     if (!outer)
9216         *d++ = '\n';
9217     peek = s;
9218     while (SPACE_OR_TAB(*peek))
9219         peek++;
9220     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9221         s = peek;
9222         term = *s++;
9223         s = delimcpy(d, e, s, PL_bufend, term, &len);
9224         d += len;
9225         if (s < PL_bufend)
9226             s++;
9227     }
9228     else {
9229         if (*s == '\\')
9230             s++, term = '\'';
9231         else
9232             term = '"';
9233         if (!isALNUM_lazy_if(s,UTF))
9234             deprecate("bare << to mean <<\"\"");
9235         for (; isALNUM_lazy_if(s,UTF); s++) {
9236             if (d < e)
9237                 *d++ = *s;
9238         }
9239     }
9240     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9241         Perl_croak(aTHX_ "Delimiter for here document is too long");
9242     *d++ = '\n';
9243     *d = '\0';
9244     len = d - PL_tokenbuf;
9245
9246 #ifdef PERL_MAD
9247     if (PL_madskills) {
9248         tstart = PL_tokenbuf + !outer;
9249         PL_thisclose = newSVpvn(tstart, len - !outer);
9250         tstart = SvPVX(PL_linestr) + stuffstart;
9251         PL_thisopen = newSVpvn(tstart, s - tstart);
9252         stuffstart = s - SvPVX(PL_linestr);
9253     }
9254 #endif
9255 #ifndef PERL_STRICT_CR
9256     d = strchr(s, '\r');
9257     if (d) {
9258         char * const olds = s;
9259         s = d;
9260         while (s < PL_bufend) {
9261             if (*s == '\r') {
9262                 *d++ = '\n';
9263                 if (*++s == '\n')
9264                     s++;
9265             }
9266             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9267                 *d++ = *s++;
9268                 s++;
9269             }
9270             else
9271                 *d++ = *s++;
9272         }
9273         *d = '\0';
9274         PL_bufend = d;
9275         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9276         s = olds;
9277     }
9278 #endif
9279 #ifdef PERL_MAD
9280     found_newline = 0;
9281 #endif
9282     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9283         herewas = newSVpvn(s,PL_bufend-s);
9284     }
9285     else {
9286 #ifdef PERL_MAD
9287         herewas = newSVpvn(s-1,found_newline-s+1);
9288 #else
9289         s--;
9290         herewas = newSVpvn(s,found_newline-s);
9291 #endif
9292     }
9293 #ifdef PERL_MAD
9294     if (PL_madskills) {
9295         tstart = SvPVX(PL_linestr) + stuffstart;
9296         if (PL_thisstuff)
9297             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9298         else
9299             PL_thisstuff = newSVpvn(tstart, s - tstart);
9300     }
9301 #endif
9302     s += SvCUR(herewas);
9303
9304 #ifdef PERL_MAD
9305     stuffstart = s - SvPVX(PL_linestr);
9306
9307     if (found_newline)
9308         s--;
9309 #endif
9310
9311     tmpstr = newSV_type(SVt_PVIV);
9312     SvGROW(tmpstr, 80);
9313     if (term == '\'') {
9314         op_type = OP_CONST;
9315         SvIV_set(tmpstr, -1);
9316     }
9317     else if (term == '`') {
9318         op_type = OP_BACKTICK;
9319         SvIV_set(tmpstr, '\\');
9320     }
9321
9322     CLINE;
9323     PL_multi_start = CopLINE(PL_curcop);
9324     PL_multi_open = PL_multi_close = '<';
9325     term = *PL_tokenbuf;
9326     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9327         char * const bufptr = PL_sublex_info.super_bufptr;
9328         char * const bufend = PL_sublex_info.super_bufend;
9329         char * const olds = s - SvCUR(herewas);
9330         s = strchr(bufptr, '\n');
9331         if (!s)
9332             s = bufend;
9333         d = s;
9334         while (s < bufend &&
9335           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9336             if (*s++ == '\n')
9337                 CopLINE_inc(PL_curcop);
9338         }
9339         if (s >= bufend) {
9340             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9341             missingterm(PL_tokenbuf);
9342         }
9343         sv_setpvn(herewas,bufptr,d-bufptr+1);
9344         sv_setpvn(tmpstr,d+1,s-d);
9345         s += len - 1;
9346         sv_catpvn(herewas,s,bufend-s);
9347         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9348
9349         s = olds;
9350         goto retval;
9351     }
9352     else if (!outer) {
9353         d = s;
9354         while (s < PL_bufend &&
9355           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9356             if (*s++ == '\n')
9357                 CopLINE_inc(PL_curcop);
9358         }
9359         if (s >= PL_bufend) {
9360             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9361             missingterm(PL_tokenbuf);
9362         }
9363         sv_setpvn(tmpstr,d+1,s-d);
9364 #ifdef PERL_MAD
9365         if (PL_madskills) {
9366             if (PL_thisstuff)
9367                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9368             else
9369                 PL_thisstuff = newSVpvn(d + 1, s - d);
9370             stuffstart = s - SvPVX(PL_linestr);
9371         }
9372 #endif
9373         s += len - 1;
9374         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9375
9376         sv_catpvn(herewas,s,PL_bufend-s);
9377         sv_setsv(PL_linestr,herewas);
9378         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9379         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9380         PL_last_lop = PL_last_uni = NULL;
9381     }
9382     else
9383         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9384     while (s >= PL_bufend) {    /* multiple line string? */
9385 #ifdef PERL_MAD
9386         if (PL_madskills) {
9387             tstart = SvPVX(PL_linestr) + stuffstart;
9388             if (PL_thisstuff)
9389                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9390             else
9391                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9392         }
9393 #endif
9394         PL_bufptr = s;
9395         CopLINE_inc(PL_curcop);
9396         if (!outer || !lex_next_chunk(0)) {
9397             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9398             missingterm(PL_tokenbuf);
9399         }
9400         CopLINE_dec(PL_curcop);
9401         s = PL_bufptr;
9402 #ifdef PERL_MAD
9403         stuffstart = s - SvPVX(PL_linestr);
9404 #endif
9405         CopLINE_inc(PL_curcop);
9406         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9407         PL_last_lop = PL_last_uni = NULL;
9408 #ifndef PERL_STRICT_CR
9409         if (PL_bufend - PL_linestart >= 2) {
9410             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9411                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9412             {
9413                 PL_bufend[-2] = '\n';
9414                 PL_bufend--;
9415                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9416             }
9417             else if (PL_bufend[-1] == '\r')
9418                 PL_bufend[-1] = '\n';
9419         }
9420         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9421             PL_bufend[-1] = '\n';
9422 #endif
9423         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9424             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9425             *(SvPVX(PL_linestr) + off ) = ' ';
9426             sv_catsv(PL_linestr,herewas);
9427             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9428             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9429         }
9430         else {
9431             s = PL_bufend;
9432             sv_catsv(tmpstr,PL_linestr);
9433         }
9434     }
9435     s++;
9436 retval:
9437     PL_multi_end = CopLINE(PL_curcop);
9438     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9439         SvPV_shrink_to_cur(tmpstr);
9440     }
9441     SvREFCNT_dec(herewas);
9442     if (!IN_BYTES) {
9443         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9444             SvUTF8_on(tmpstr);
9445         else if (PL_encoding)
9446             sv_recode_to_utf8(tmpstr, PL_encoding);
9447     }
9448     PL_lex_stuff = tmpstr;
9449     pl_yylval.ival = op_type;
9450     return s;
9451 }
9452
9453 /* scan_inputsymbol
9454    takes: current position in input buffer
9455    returns: new position in input buffer
9456    side-effects: pl_yylval and lex_op are set.
9457
9458    This code handles:
9459
9460    <>           read from ARGV
9461    <FH>         read from filehandle
9462    <pkg::FH>    read from package qualified filehandle
9463    <pkg'FH>     read from package qualified filehandle
9464    <$fh>        read from filehandle in $fh
9465    <*.h>        filename glob
9466
9467 */
9468
9469 STATIC char *
9470 S_scan_inputsymbol(pTHX_ char *start)
9471 {
9472     dVAR;
9473     register char *s = start;           /* current position in buffer */
9474     char *end;
9475     I32 len;
9476     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9477     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9478
9479     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9480
9481     end = strchr(s, '\n');
9482     if (!end)
9483         end = PL_bufend;
9484     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9485
9486     /* die if we didn't have space for the contents of the <>,
9487        or if it didn't end, or if we see a newline
9488     */
9489
9490     if (len >= (I32)sizeof PL_tokenbuf)
9491         Perl_croak(aTHX_ "Excessively long <> operator");
9492     if (s >= end)
9493         Perl_croak(aTHX_ "Unterminated <> operator");
9494
9495     s++;
9496
9497     /* check for <$fh>
9498        Remember, only scalar variables are interpreted as filehandles by
9499        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9500        treated as a glob() call.
9501        This code makes use of the fact that except for the $ at the front,
9502        a scalar variable and a filehandle look the same.
9503     */
9504     if (*d == '$' && d[1]) d++;
9505
9506     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9507     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9508         d++;
9509
9510     /* If we've tried to read what we allow filehandles to look like, and
9511        there's still text left, then it must be a glob() and not a getline.
9512        Use scan_str to pull out the stuff between the <> and treat it
9513        as nothing more than a string.
9514     */
9515
9516     if (d - PL_tokenbuf != len) {
9517         pl_yylval.ival = OP_GLOB;
9518         s = scan_str(start,!!PL_madskills,FALSE);
9519         if (!s)
9520            Perl_croak(aTHX_ "Glob not terminated");
9521         return s;
9522     }
9523     else {
9524         bool readline_overriden = FALSE;
9525         GV *gv_readline;
9526         GV **gvp;
9527         /* we're in a filehandle read situation */
9528         d = PL_tokenbuf;
9529
9530         /* turn <> into <ARGV> */
9531         if (!len)
9532             Copy("ARGV",d,5,char);
9533
9534         /* Check whether readline() is overriden */
9535         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9536         if ((gv_readline
9537                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9538                 ||
9539                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9540                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9541                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9542             readline_overriden = TRUE;
9543
9544         /* if <$fh>, create the ops to turn the variable into a
9545            filehandle
9546         */
9547         if (*d == '$') {
9548             /* try to find it in the pad for this block, otherwise find
9549                add symbol table ops
9550             */
9551             const PADOFFSET tmp = pad_findmy(d, len, 0);
9552             if (tmp != NOT_IN_PAD) {
9553                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9554                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9555                     HEK * const stashname = HvNAME_HEK(stash);
9556                     SV * const sym = sv_2mortal(newSVhek(stashname));
9557                     sv_catpvs(sym, "::");
9558                     sv_catpv(sym, d+1);
9559                     d = SvPVX(sym);
9560                     goto intro_sym;
9561                 }
9562                 else {
9563                     OP * const o = newOP(OP_PADSV, 0);
9564                     o->op_targ = tmp;
9565                     PL_lex_op = readline_overriden
9566                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9567                                 op_append_elem(OP_LIST, o,
9568                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9569                         : (OP*)newUNOP(OP_READLINE, 0, o);
9570                 }
9571             }
9572             else {
9573                 GV *gv;
9574                 ++d;
9575 intro_sym:
9576                 gv = gv_fetchpv(d,
9577                                 (PL_in_eval
9578                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9579                                  : GV_ADDMULTI),
9580                                 SVt_PV);
9581                 PL_lex_op = readline_overriden
9582                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9583                             op_append_elem(OP_LIST,
9584                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9585                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9586                     : (OP*)newUNOP(OP_READLINE, 0,
9587                             newUNOP(OP_RV2SV, 0,
9588                                 newGVOP(OP_GV, 0, gv)));
9589             }
9590             if (!readline_overriden)
9591                 PL_lex_op->op_flags |= OPf_SPECIAL;
9592             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9593             pl_yylval.ival = OP_NULL;
9594         }
9595
9596         /* If it's none of the above, it must be a literal filehandle
9597            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9598         else {
9599             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9600             PL_lex_op = readline_overriden
9601                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9602                         op_append_elem(OP_LIST,
9603                             newGVOP(OP_GV, 0, gv),
9604                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9605                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9606             pl_yylval.ival = OP_NULL;
9607         }
9608     }
9609
9610     return s;
9611 }
9612
9613
9614 /* scan_str
9615    takes: start position in buffer
9616           keep_quoted preserve \ on the embedded delimiter(s)
9617           keep_delims preserve the delimiters around the string
9618    returns: position to continue reading from buffer
9619    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9620         updates the read buffer.
9621
9622    This subroutine pulls a string out of the input.  It is called for:
9623         q               single quotes           q(literal text)
9624         '               single quotes           'literal text'
9625         qq              double quotes           qq(interpolate $here please)
9626         "               double quotes           "interpolate $here please"
9627         qx              backticks               qx(/bin/ls -l)
9628         `               backticks               `/bin/ls -l`
9629         qw              quote words             @EXPORT_OK = qw( func() $spam )
9630         m//             regexp match            m/this/
9631         s///            regexp substitute       s/this/that/
9632         tr///           string transliterate    tr/this/that/
9633         y///            string transliterate    y/this/that/
9634         ($*@)           sub prototypes          sub foo ($)
9635         (stuff)         sub attr parameters     sub foo : attr(stuff)
9636         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9637
9638    In most of these cases (all but <>, patterns and transliterate)
9639    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9640    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9641    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9642    calls scan_str().
9643
9644    It skips whitespace before the string starts, and treats the first
9645    character as the delimiter.  If the delimiter is one of ([{< then
9646    the corresponding "close" character )]}> is used as the closing
9647    delimiter.  It allows quoting of delimiters, and if the string has
9648    balanced delimiters ([{<>}]) it allows nesting.
9649
9650    On success, the SV with the resulting string is put into lex_stuff or,
9651    if that is already non-NULL, into lex_repl. The second case occurs only
9652    when parsing the RHS of the special constructs s/// and tr/// (y///).
9653    For convenience, the terminating delimiter character is stuffed into
9654    SvIVX of the SV.
9655 */
9656
9657 STATIC char *
9658 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9659 {
9660     dVAR;
9661     SV *sv;                             /* scalar value: string */
9662     const char *tmps;                   /* temp string, used for delimiter matching */
9663     register char *s = start;           /* current position in the buffer */
9664     register char term;                 /* terminating character */
9665     register char *to;                  /* current position in the sv's data */
9666     I32 brackets = 1;                   /* bracket nesting level */
9667     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9668     I32 termcode;                       /* terminating char. code */
9669     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9670     STRLEN termlen;                     /* length of terminating string */
9671     int last_off = 0;                   /* last position for nesting bracket */
9672 #ifdef PERL_MAD
9673     int stuffstart;
9674     char *tstart;
9675 #endif
9676
9677     PERL_ARGS_ASSERT_SCAN_STR;
9678
9679     /* skip space before the delimiter */
9680     if (isSPACE(*s)) {
9681         s = PEEKSPACE(s);
9682     }
9683
9684 #ifdef PERL_MAD
9685     if (PL_realtokenstart >= 0) {
9686         stuffstart = PL_realtokenstart;
9687         PL_realtokenstart = -1;
9688     }
9689     else
9690         stuffstart = start - SvPVX(PL_linestr);
9691 #endif
9692     /* mark where we are, in case we need to report errors */
9693     CLINE;
9694
9695     /* after skipping whitespace, the next character is the terminator */
9696     term = *s;
9697     if (!UTF) {
9698         termcode = termstr[0] = term;
9699         termlen = 1;
9700     }
9701     else {
9702         termcode = utf8_to_uvchr((U8*)s, &termlen);
9703         Copy(s, termstr, termlen, U8);
9704         if (!UTF8_IS_INVARIANT(term))
9705             has_utf8 = TRUE;
9706     }
9707
9708     /* mark where we are */
9709     PL_multi_start = CopLINE(PL_curcop);
9710     PL_multi_open = term;
9711
9712     /* find corresponding closing delimiter */
9713     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9714         termcode = termstr[0] = term = tmps[5];
9715
9716     PL_multi_close = term;
9717
9718     /* create a new SV to hold the contents.  79 is the SV's initial length.
9719        What a random number. */
9720     sv = newSV_type(SVt_PVIV);
9721     SvGROW(sv, 80);
9722     SvIV_set(sv, termcode);
9723     (void)SvPOK_only(sv);               /* validate pointer */
9724
9725     /* move past delimiter and try to read a complete string */
9726     if (keep_delims)
9727         sv_catpvn(sv, s, termlen);
9728     s += termlen;
9729 #ifdef PERL_MAD
9730     tstart = SvPVX(PL_linestr) + stuffstart;
9731     if (!PL_thisopen && !keep_delims) {
9732         PL_thisopen = newSVpvn(tstart, s - tstart);
9733         stuffstart = s - SvPVX(PL_linestr);
9734     }
9735 #endif
9736     for (;;) {
9737         if (PL_encoding && !UTF) {
9738             bool cont = TRUE;
9739
9740             while (cont) {
9741                 int offset = s - SvPVX_const(PL_linestr);
9742                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9743                                            &offset, (char*)termstr, termlen);
9744                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9745                 char * const svlast = SvEND(sv) - 1;
9746
9747                 for (; s < ns; s++) {
9748                     if (*s == '\n' && !PL_rsfp)
9749                         CopLINE_inc(PL_curcop);
9750                 }
9751                 if (!found)
9752                     goto read_more_line;
9753                 else {
9754                     /* handle quoted delimiters */
9755                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9756                         const char *t;
9757                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9758                             t--;
9759                         if ((svlast-1 - t) % 2) {
9760                             if (!keep_quoted) {
9761                                 *(svlast-1) = term;
9762                                 *svlast = '\0';
9763                                 SvCUR_set(sv, SvCUR(sv) - 1);
9764                             }
9765                             continue;
9766                         }
9767                     }
9768                     if (PL_multi_open == PL_multi_close) {
9769                         cont = FALSE;
9770                     }
9771                     else {
9772                         const char *t;
9773                         char *w;
9774                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9775                             /* At here, all closes are "was quoted" one,
9776                                so we don't check PL_multi_close. */
9777                             if (*t == '\\') {
9778                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9779                                     t++;
9780                                 else
9781                                     *w++ = *t++;
9782                             }
9783                             else if (*t == PL_multi_open)
9784                                 brackets++;
9785
9786                             *w = *t;
9787                         }
9788                         if (w < t) {
9789                             *w++ = term;
9790                             *w = '\0';
9791                             SvCUR_set(sv, w - SvPVX_const(sv));
9792                         }
9793                         last_off = w - SvPVX(sv);
9794                         if (--brackets <= 0)
9795                             cont = FALSE;
9796                     }
9797                 }
9798             }
9799             if (!keep_delims) {
9800                 SvCUR_set(sv, SvCUR(sv) - 1);
9801                 *SvEND(sv) = '\0';
9802             }
9803             break;
9804         }
9805
9806         /* extend sv if need be */
9807         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9808         /* set 'to' to the next character in the sv's string */
9809         to = SvPVX(sv)+SvCUR(sv);
9810
9811         /* if open delimiter is the close delimiter read unbridle */
9812         if (PL_multi_open == PL_multi_close) {
9813             for (; s < PL_bufend; s++,to++) {
9814                 /* embedded newlines increment the current line number */
9815                 if (*s == '\n' && !PL_rsfp)
9816                     CopLINE_inc(PL_curcop);
9817                 /* handle quoted delimiters */
9818                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9819                     if (!keep_quoted && s[1] == term)
9820                         s++;
9821                 /* any other quotes are simply copied straight through */
9822                     else
9823                         *to++ = *s++;
9824                 }
9825                 /* terminate when run out of buffer (the for() condition), or
9826                    have found the terminator */
9827                 else if (*s == term) {
9828                     if (termlen == 1)
9829                         break;
9830                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9831                         break;
9832                 }
9833                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9834                     has_utf8 = TRUE;
9835                 *to = *s;
9836             }
9837         }
9838
9839         /* if the terminator isn't the same as the start character (e.g.,
9840            matched brackets), we have to allow more in the quoting, and
9841            be prepared for nested brackets.
9842         */
9843         else {
9844             /* read until we run out of string, or we find the terminator */
9845             for (; s < PL_bufend; s++,to++) {
9846                 /* embedded newlines increment the line count */
9847                 if (*s == '\n' && !PL_rsfp)
9848                     CopLINE_inc(PL_curcop);
9849                 /* backslashes can escape the open or closing characters */
9850                 if (*s == '\\' && s+1 < PL_bufend) {
9851                     if (!keep_quoted &&
9852                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9853                         s++;
9854                     else
9855                         *to++ = *s++;
9856                 }
9857                 /* allow nested opens and closes */
9858                 else if (*s == PL_multi_close && --brackets <= 0)
9859                     break;
9860                 else if (*s == PL_multi_open)
9861                     brackets++;
9862                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9863                     has_utf8 = TRUE;
9864                 *to = *s;
9865             }
9866         }
9867         /* terminate the copied string and update the sv's end-of-string */
9868         *to = '\0';
9869         SvCUR_set(sv, to - SvPVX_const(sv));
9870
9871         /*
9872          * this next chunk reads more into the buffer if we're not done yet
9873          */
9874
9875         if (s < PL_bufend)
9876             break;              /* handle case where we are done yet :-) */
9877
9878 #ifndef PERL_STRICT_CR
9879         if (to - SvPVX_const(sv) >= 2) {
9880             if ((to[-2] == '\r' && to[-1] == '\n') ||
9881                 (to[-2] == '\n' && to[-1] == '\r'))
9882             {
9883                 to[-2] = '\n';
9884                 to--;
9885                 SvCUR_set(sv, to - SvPVX_const(sv));
9886             }
9887             else if (to[-1] == '\r')
9888                 to[-1] = '\n';
9889         }
9890         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9891             to[-1] = '\n';
9892 #endif
9893
9894      read_more_line:
9895         /* if we're out of file, or a read fails, bail and reset the current
9896            line marker so we can report where the unterminated string began
9897         */
9898 #ifdef PERL_MAD
9899         if (PL_madskills) {
9900             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9901             if (PL_thisstuff)
9902                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9903             else
9904                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9905         }
9906 #endif
9907         CopLINE_inc(PL_curcop);
9908         PL_bufptr = PL_bufend;
9909         if (!lex_next_chunk(0)) {
9910             sv_free(sv);
9911             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9912             return NULL;
9913         }
9914         s = PL_bufptr;
9915 #ifdef PERL_MAD
9916         stuffstart = 0;
9917 #endif
9918     }
9919
9920     /* at this point, we have successfully read the delimited string */
9921
9922     if (!PL_encoding || UTF) {
9923 #ifdef PERL_MAD
9924         if (PL_madskills) {
9925             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9926             const int len = s - tstart;
9927             if (PL_thisstuff)
9928                 sv_catpvn(PL_thisstuff, tstart, len);
9929             else
9930                 PL_thisstuff = newSVpvn(tstart, len);
9931             if (!PL_thisclose && !keep_delims)
9932                 PL_thisclose = newSVpvn(s,termlen);
9933         }
9934 #endif
9935
9936         if (keep_delims)
9937             sv_catpvn(sv, s, termlen);
9938         s += termlen;
9939     }
9940 #ifdef PERL_MAD
9941     else {
9942         if (PL_madskills) {
9943             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9944             const int len = s - tstart - termlen;
9945             if (PL_thisstuff)
9946                 sv_catpvn(PL_thisstuff, tstart, len);
9947             else
9948                 PL_thisstuff = newSVpvn(tstart, len);
9949             if (!PL_thisclose && !keep_delims)
9950                 PL_thisclose = newSVpvn(s - termlen,termlen);
9951         }
9952     }
9953 #endif
9954     if (has_utf8 || PL_encoding)
9955         SvUTF8_on(sv);
9956
9957     PL_multi_end = CopLINE(PL_curcop);
9958
9959     /* if we allocated too much space, give some back */
9960     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9961         SvLEN_set(sv, SvCUR(sv) + 1);
9962         SvPV_renew(sv, SvLEN(sv));
9963     }
9964
9965     /* decide whether this is the first or second quoted string we've read
9966        for this op
9967     */
9968
9969     if (PL_lex_stuff)
9970         PL_lex_repl = sv;
9971     else
9972         PL_lex_stuff = sv;
9973     return s;
9974 }
9975
9976 /*
9977   scan_num
9978   takes: pointer to position in buffer
9979   returns: pointer to new position in buffer
9980   side-effects: builds ops for the constant in pl_yylval.op
9981
9982   Read a number in any of the formats that Perl accepts:
9983
9984   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9985   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9986   0b[01](_?[01])*
9987   0[0-7](_?[0-7])*
9988   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9989
9990   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9991   thing it reads.
9992
9993   If it reads a number without a decimal point or an exponent, it will
9994   try converting the number to an integer and see if it can do so
9995   without loss of precision.
9996 */
9997
9998 char *
9999 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10000 {
10001     dVAR;
10002     register const char *s = start;     /* current position in buffer */
10003     register char *d;                   /* destination in temp buffer */
10004     register char *e;                   /* end of temp buffer */
10005     NV nv;                              /* number read, as a double */
10006     SV *sv = NULL;                      /* place to put the converted number */
10007     bool floatit;                       /* boolean: int or float? */
10008     const char *lastub = NULL;          /* position of last underbar */
10009     static char const number_too_long[] = "Number too long";
10010
10011     PERL_ARGS_ASSERT_SCAN_NUM;
10012
10013     /* We use the first character to decide what type of number this is */
10014
10015     switch (*s) {
10016     default:
10017       Perl_croak(aTHX_ "panic: scan_num");
10018
10019     /* if it starts with a 0, it could be an octal number, a decimal in
10020        0.13 disguise, or a hexadecimal number, or a binary number. */
10021     case '0':
10022         {
10023           /* variables:
10024              u          holds the "number so far"
10025              shift      the power of 2 of the base
10026                         (hex == 4, octal == 3, binary == 1)
10027              overflowed was the number more than we can hold?
10028
10029              Shift is used when we add a digit.  It also serves as an "are
10030              we in octal/hex/binary?" indicator to disallow hex characters
10031              when in octal mode.
10032            */
10033             NV n = 0.0;
10034             UV u = 0;
10035             I32 shift;
10036             bool overflowed = FALSE;
10037             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10038             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10039             static const char* const bases[5] =
10040               { "", "binary", "", "octal", "hexadecimal" };
10041             static const char* const Bases[5] =
10042               { "", "Binary", "", "Octal", "Hexadecimal" };
10043             static const char* const maxima[5] =
10044               { "",
10045                 "0b11111111111111111111111111111111",
10046                 "",
10047                 "037777777777",
10048                 "0xffffffff" };
10049             const char *base, *Base, *max;
10050
10051             /* check for hex */
10052             if (s[1] == 'x' || s[1] == 'X') {
10053                 shift = 4;
10054                 s += 2;
10055                 just_zero = FALSE;
10056             } else if (s[1] == 'b' || s[1] == 'B') {
10057                 shift = 1;
10058                 s += 2;
10059                 just_zero = FALSE;
10060             }
10061             /* check for a decimal in disguise */
10062             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10063                 goto decimal;
10064             /* so it must be octal */
10065             else {
10066                 shift = 3;
10067                 s++;
10068             }
10069
10070             if (*s == '_') {
10071                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10072                                "Misplaced _ in number");
10073                lastub = s++;
10074             }
10075
10076             base = bases[shift];
10077             Base = Bases[shift];
10078             max  = maxima[shift];
10079
10080             /* read the rest of the number */
10081             for (;;) {
10082                 /* x is used in the overflow test,
10083                    b is the digit we're adding on. */
10084                 UV x, b;
10085
10086                 switch (*s) {
10087
10088                 /* if we don't mention it, we're done */
10089                 default:
10090                     goto out;
10091
10092                 /* _ are ignored -- but warned about if consecutive */
10093                 case '_':
10094                     if (lastub && s == lastub + 1)
10095                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10096                                        "Misplaced _ in number");
10097                     lastub = s++;
10098                     break;
10099
10100                 /* 8 and 9 are not octal */
10101                 case '8': case '9':
10102                     if (shift == 3)
10103                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10104                     /* FALL THROUGH */
10105
10106                 /* octal digits */
10107                 case '2': case '3': case '4':
10108                 case '5': case '6': case '7':
10109                     if (shift == 1)
10110                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10111                     /* FALL THROUGH */
10112
10113                 case '0': case '1':
10114                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10115                     goto digit;
10116
10117                 /* hex digits */
10118                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10119                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10120                     /* make sure they said 0x */
10121                     if (shift != 4)
10122                         goto out;
10123                     b = (*s++ & 7) + 9;
10124
10125                     /* Prepare to put the digit we have onto the end
10126                        of the number so far.  We check for overflows.
10127                     */
10128
10129                   digit:
10130                     just_zero = FALSE;
10131                     if (!overflowed) {
10132                         x = u << shift; /* make room for the digit */
10133
10134                         if ((x >> shift) != u
10135                             && !(PL_hints & HINT_NEW_BINARY)) {
10136                             overflowed = TRUE;
10137                             n = (NV) u;
10138                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10139                                              "Integer overflow in %s number",
10140                                              base);
10141                         } else
10142                             u = x | b;          /* add the digit to the end */
10143                     }
10144                     if (overflowed) {
10145                         n *= nvshift[shift];
10146                         /* If an NV has not enough bits in its
10147                          * mantissa to represent an UV this summing of
10148                          * small low-order numbers is a waste of time
10149                          * (because the NV cannot preserve the
10150                          * low-order bits anyway): we could just
10151                          * remember when did we overflow and in the
10152                          * end just multiply n by the right
10153                          * amount. */
10154                         n += (NV) b;
10155                     }
10156                     break;
10157                 }
10158             }
10159
10160           /* if we get here, we had success: make a scalar value from
10161              the number.
10162           */
10163           out:
10164
10165             /* final misplaced underbar check */
10166             if (s[-1] == '_') {
10167                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10168             }
10169
10170             if (overflowed) {
10171                 if (n > 4294967295.0)
10172                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10173                                    "%s number > %s non-portable",
10174                                    Base, max);
10175                 sv = newSVnv(n);
10176             }
10177             else {
10178 #if UVSIZE > 4
10179                 if (u > 0xffffffff)
10180                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10181                                    "%s number > %s non-portable",
10182                                    Base, max);
10183 #endif
10184                 sv = newSVuv(u);
10185             }
10186             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10187                 sv = new_constant(start, s - start, "integer",
10188                                   sv, NULL, NULL, 0);
10189             else if (PL_hints & HINT_NEW_BINARY)
10190                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10191         }
10192         break;
10193
10194     /*
10195       handle decimal numbers.
10196       we're also sent here when we read a 0 as the first digit
10197     */
10198     case '1': case '2': case '3': case '4': case '5':
10199     case '6': case '7': case '8': case '9': case '.':
10200       decimal:
10201         d = PL_tokenbuf;
10202         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10203         floatit = FALSE;
10204
10205         /* read next group of digits and _ and copy into d */
10206         while (isDIGIT(*s) || *s == '_') {
10207             /* skip underscores, checking for misplaced ones
10208                if -w is on
10209             */
10210             if (*s == '_') {
10211                 if (lastub && s == lastub + 1)
10212                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10213                                    "Misplaced _ in number");
10214                 lastub = s++;
10215             }
10216             else {
10217                 /* check for end of fixed-length buffer */
10218                 if (d >= e)
10219                     Perl_croak(aTHX_ number_too_long);
10220                 /* if we're ok, copy the character */
10221                 *d++ = *s++;
10222             }
10223         }
10224
10225         /* final misplaced underbar check */
10226         if (lastub && s == lastub + 1) {
10227             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10228         }
10229
10230         /* read a decimal portion if there is one.  avoid
10231            3..5 being interpreted as the number 3. followed
10232            by .5
10233         */
10234         if (*s == '.' && s[1] != '.') {
10235             floatit = TRUE;
10236             *d++ = *s++;
10237
10238             if (*s == '_') {
10239                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10240                                "Misplaced _ in number");
10241                 lastub = s;
10242             }
10243
10244             /* copy, ignoring underbars, until we run out of digits.
10245             */
10246             for (; isDIGIT(*s) || *s == '_'; s++) {
10247                 /* fixed length buffer check */
10248                 if (d >= e)
10249                     Perl_croak(aTHX_ number_too_long);
10250                 if (*s == '_') {
10251                    if (lastub && s == lastub + 1)
10252                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10253                                       "Misplaced _ in number");
10254                    lastub = s;
10255                 }
10256                 else
10257                     *d++ = *s;
10258             }
10259             /* fractional part ending in underbar? */
10260             if (s[-1] == '_') {
10261                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10262                                "Misplaced _ in number");
10263             }
10264             if (*s == '.' && isDIGIT(s[1])) {
10265                 /* oops, it's really a v-string, but without the "v" */
10266                 s = start;
10267                 goto vstring;
10268             }
10269         }
10270
10271         /* read exponent part, if present */
10272         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10273             floatit = TRUE;
10274             s++;
10275
10276             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10277             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10278
10279             /* stray preinitial _ */
10280             if (*s == '_') {
10281                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10282                                "Misplaced _ in number");
10283                 lastub = s++;
10284             }
10285
10286             /* allow positive or negative exponent */
10287             if (*s == '+' || *s == '-')
10288                 *d++ = *s++;
10289
10290             /* stray initial _ */
10291             if (*s == '_') {
10292                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10293                                "Misplaced _ in number");
10294                 lastub = s++;
10295             }
10296
10297             /* read digits of exponent */
10298             while (isDIGIT(*s) || *s == '_') {
10299                 if (isDIGIT(*s)) {
10300                     if (d >= e)
10301                         Perl_croak(aTHX_ number_too_long);
10302                     *d++ = *s++;
10303                 }
10304                 else {
10305                    if (((lastub && s == lastub + 1) ||
10306                         (!isDIGIT(s[1]) && s[1] != '_')))
10307                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10308                                       "Misplaced _ in number");
10309                    lastub = s++;
10310                 }
10311             }
10312         }
10313
10314
10315         /*
10316            We try to do an integer conversion first if no characters
10317            indicating "float" have been found.
10318          */
10319
10320         if (!floatit) {
10321             UV uv;
10322             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10323
10324             if (flags == IS_NUMBER_IN_UV) {
10325               if (uv <= IV_MAX)
10326                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10327               else
10328                 sv = newSVuv(uv);
10329             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10330               if (uv <= (UV) IV_MIN)
10331                 sv = newSViv(-(IV)uv);
10332               else
10333                 floatit = TRUE;
10334             } else
10335               floatit = TRUE;
10336         }
10337         if (floatit) {
10338             /* terminate the string */
10339             *d = '\0';
10340             nv = Atof(PL_tokenbuf);
10341             sv = newSVnv(nv);
10342         }
10343
10344         if ( floatit
10345              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10346             const char *const key = floatit ? "float" : "integer";
10347             const STRLEN keylen = floatit ? 5 : 7;
10348             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10349                                 key, keylen, sv, NULL, NULL, 0);
10350         }
10351         break;
10352
10353     /* if it starts with a v, it could be a v-string */
10354     case 'v':
10355 vstring:
10356                 sv = newSV(5); /* preallocate storage space */
10357                 s = scan_vstring(s, PL_bufend, sv);
10358         break;
10359     }
10360
10361     /* make the op for the constant and return */
10362
10363     if (sv)
10364         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10365     else
10366         lvalp->opval = NULL;
10367
10368     return (char *)s;
10369 }
10370
10371 STATIC char *
10372 S_scan_formline(pTHX_ register char *s)
10373 {
10374     dVAR;
10375     register char *eol;
10376     register char *t;
10377     SV * const stuff = newSVpvs("");
10378     bool needargs = FALSE;
10379     bool eofmt = FALSE;
10380 #ifdef PERL_MAD
10381     char *tokenstart = s;
10382     SV* savewhite = NULL;
10383
10384     if (PL_madskills) {
10385         savewhite = PL_thiswhite;
10386         PL_thiswhite = 0;
10387     }
10388 #endif
10389
10390     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10391
10392     while (!needargs) {
10393         if (*s == '.') {
10394             t = s+1;
10395 #ifdef PERL_STRICT_CR
10396             while (SPACE_OR_TAB(*t))
10397                 t++;
10398 #else
10399             while (SPACE_OR_TAB(*t) || *t == '\r')
10400                 t++;
10401 #endif
10402             if (*t == '\n' || t == PL_bufend) {
10403                 eofmt = TRUE;
10404                 break;
10405             }
10406         }
10407         if (PL_in_eval && !PL_rsfp) {
10408             eol = (char *) memchr(s,'\n',PL_bufend-s);
10409             if (!eol++)
10410                 eol = PL_bufend;
10411         }
10412         else
10413             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10414         if (*s != '#') {
10415             for (t = s; t < eol; t++) {
10416                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10417                     needargs = FALSE;
10418                     goto enough;        /* ~~ must be first line in formline */
10419                 }
10420                 if (*t == '@' || *t == '^')
10421                     needargs = TRUE;
10422             }
10423             if (eol > s) {
10424                 sv_catpvn(stuff, s, eol-s);
10425 #ifndef PERL_STRICT_CR
10426                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10427                     char *end = SvPVX(stuff) + SvCUR(stuff);
10428                     end[-2] = '\n';
10429                     end[-1] = '\0';
10430                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10431                 }
10432 #endif
10433             }
10434             else
10435               break;
10436         }
10437         s = (char*)eol;
10438         if (PL_rsfp) {
10439             bool got_some;
10440 #ifdef PERL_MAD
10441             if (PL_madskills) {
10442                 if (PL_thistoken)
10443                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10444                 else
10445                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10446             }
10447 #endif
10448             PL_bufptr = PL_bufend;
10449             CopLINE_inc(PL_curcop);
10450             got_some = lex_next_chunk(0);
10451             CopLINE_dec(PL_curcop);
10452             s = PL_bufptr;
10453 #ifdef PERL_MAD
10454             tokenstart = PL_bufptr;
10455 #endif
10456             if (!got_some)
10457                 break;
10458         }
10459         incline(s);
10460     }
10461   enough:
10462     if (SvCUR(stuff)) {
10463         PL_expect = XTERM;
10464         if (needargs) {
10465             PL_lex_state = LEX_NORMAL;
10466             start_force(PL_curforce);
10467             NEXTVAL_NEXTTOKE.ival = 0;
10468             force_next(',');
10469         }
10470         else
10471             PL_lex_state = LEX_FORMLINE;
10472         if (!IN_BYTES) {
10473             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10474                 SvUTF8_on(stuff);
10475             else if (PL_encoding)
10476                 sv_recode_to_utf8(stuff, PL_encoding);
10477         }
10478         start_force(PL_curforce);
10479         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10480         force_next(THING);
10481         start_force(PL_curforce);
10482         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10483         force_next(LSTOP);
10484     }
10485     else {
10486         SvREFCNT_dec(stuff);
10487         if (eofmt)
10488             PL_lex_formbrack = 0;
10489         PL_bufptr = s;
10490     }
10491 #ifdef PERL_MAD
10492     if (PL_madskills) {
10493         if (PL_thistoken)
10494             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10495         else
10496             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10497         PL_thiswhite = savewhite;
10498     }
10499 #endif
10500     return s;
10501 }
10502
10503 I32
10504 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10505 {
10506     dVAR;
10507     const I32 oldsavestack_ix = PL_savestack_ix;
10508     CV* const outsidecv = PL_compcv;
10509
10510     if (PL_compcv) {
10511         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10512     }
10513     SAVEI32(PL_subline);
10514     save_item(PL_subname);
10515     SAVESPTR(PL_compcv);
10516
10517     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10518     CvFLAGS(PL_compcv) |= flags;
10519
10520     PL_subline = CopLINE(PL_curcop);
10521     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10522     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10523     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10524
10525     return oldsavestack_ix;
10526 }
10527
10528 #ifdef __SC__
10529 #pragma segment Perl_yylex
10530 #endif
10531 static int
10532 S_yywarn(pTHX_ const char *const s)
10533 {
10534     dVAR;
10535
10536     PERL_ARGS_ASSERT_YYWARN;
10537
10538     PL_in_eval |= EVAL_WARNONLY;
10539     yyerror(s);
10540     PL_in_eval &= ~EVAL_WARNONLY;
10541     return 0;
10542 }
10543
10544 int
10545 Perl_yyerror(pTHX_ const char *const s)
10546 {
10547     dVAR;
10548     const char *where = NULL;
10549     const char *context = NULL;
10550     int contlen = -1;
10551     SV *msg;
10552     int yychar  = PL_parser->yychar;
10553
10554     PERL_ARGS_ASSERT_YYERROR;
10555
10556     if (!yychar || (yychar == ';' && !PL_rsfp))
10557         where = "at EOF";
10558     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10559       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10560       PL_oldbufptr != PL_bufptr) {
10561         /*
10562                 Only for NetWare:
10563                 The code below is removed for NetWare because it abends/crashes on NetWare
10564                 when the script has error such as not having the closing quotes like:
10565                     if ($var eq "value)
10566                 Checking of white spaces is anyway done in NetWare code.
10567         */
10568 #ifndef NETWARE
10569         while (isSPACE(*PL_oldoldbufptr))
10570             PL_oldoldbufptr++;
10571 #endif
10572         context = PL_oldoldbufptr;
10573         contlen = PL_bufptr - PL_oldoldbufptr;
10574     }
10575     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10576       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10577         /*
10578                 Only for NetWare:
10579                 The code below is removed for NetWare because it abends/crashes on NetWare
10580                 when the script has error such as not having the closing quotes like:
10581                     if ($var eq "value)
10582                 Checking of white spaces is anyway done in NetWare code.
10583         */
10584 #ifndef NETWARE
10585         while (isSPACE(*PL_oldbufptr))
10586             PL_oldbufptr++;
10587 #endif
10588         context = PL_oldbufptr;
10589         contlen = PL_bufptr - PL_oldbufptr;
10590     }
10591     else if (yychar > 255)
10592         where = "next token ???";
10593     else if (yychar == -2) { /* YYEMPTY */
10594         if (PL_lex_state == LEX_NORMAL ||
10595            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10596             where = "at end of line";
10597         else if (PL_lex_inpat)
10598             where = "within pattern";
10599         else
10600             where = "within string";
10601     }
10602     else {
10603         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10604         if (yychar < 32)
10605             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10606         else if (isPRINT_LC(yychar)) {
10607             const char string = yychar;
10608             sv_catpvn(where_sv, &string, 1);
10609         }
10610         else
10611             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10612         where = SvPVX_const(where_sv);
10613     }
10614     msg = sv_2mortal(newSVpv(s, 0));
10615     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10616         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10617     if (context)
10618         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10619     else
10620         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10621     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10622         Perl_sv_catpvf(aTHX_ msg,
10623         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10624                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10625         PL_multi_end = 0;
10626     }
10627     if (PL_in_eval & EVAL_WARNONLY) {
10628         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10629     }
10630     else
10631         qerror(msg);
10632     if (PL_error_count >= 10) {
10633         if (PL_in_eval && SvCUR(ERRSV))
10634             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10635                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10636         else
10637             Perl_croak(aTHX_ "%s has too many errors.\n",
10638             OutCopFILE(PL_curcop));
10639     }
10640     PL_in_my = 0;
10641     PL_in_my_stash = NULL;
10642     return 0;
10643 }
10644 #ifdef __SC__
10645 #pragma segment Main
10646 #endif
10647
10648 STATIC char*
10649 S_swallow_bom(pTHX_ U8 *s)
10650 {
10651     dVAR;
10652     const STRLEN slen = SvCUR(PL_linestr);
10653
10654     PERL_ARGS_ASSERT_SWALLOW_BOM;
10655
10656     switch (s[0]) {
10657     case 0xFF:
10658         if (s[1] == 0xFE) {
10659             /* UTF-16 little-endian? (or UTF-32LE?) */
10660             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10661                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10662 #ifndef PERL_NO_UTF16_FILTER
10663             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10664             s += 2;
10665             if (PL_bufend > (char*)s) {
10666                 s = add_utf16_textfilter(s, TRUE);
10667             }
10668 #else
10669             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10670 #endif
10671         }
10672         break;
10673     case 0xFE:
10674         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10675 #ifndef PERL_NO_UTF16_FILTER
10676             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10677             s += 2;
10678             if (PL_bufend > (char *)s) {
10679                 s = add_utf16_textfilter(s, FALSE);
10680             }
10681 #else
10682             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10683 #endif
10684         }
10685         break;
10686     case 0xEF:
10687         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10688             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10689             s += 3;                      /* UTF-8 */
10690         }
10691         break;
10692     case 0:
10693         if (slen > 3) {
10694              if (s[1] == 0) {
10695                   if (s[2] == 0xFE && s[3] == 0xFF) {
10696                        /* UTF-32 big-endian */
10697                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10698                   }
10699              }
10700              else if (s[2] == 0 && s[3] != 0) {
10701                   /* Leading bytes
10702                    * 00 xx 00 xx
10703                    * are a good indicator of UTF-16BE. */
10704 #ifndef PERL_NO_UTF16_FILTER
10705                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10706                   s = add_utf16_textfilter(s, FALSE);
10707 #else
10708                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10709 #endif
10710              }
10711         }
10712 #ifdef EBCDIC
10713     case 0xDD:
10714         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10715             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10716             s += 4;                      /* UTF-8 */
10717         }
10718         break;
10719 #endif
10720
10721     default:
10722          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10723                   /* Leading bytes
10724                    * xx 00 xx 00
10725                    * are a good indicator of UTF-16LE. */
10726 #ifndef PERL_NO_UTF16_FILTER
10727               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10728               s = add_utf16_textfilter(s, TRUE);
10729 #else
10730               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10731 #endif
10732          }
10733     }
10734     return (char*)s;
10735 }
10736
10737
10738 #ifndef PERL_NO_UTF16_FILTER
10739 static I32
10740 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10741 {
10742     dVAR;
10743     SV *const filter = FILTER_DATA(idx);
10744     /* We re-use this each time round, throwing the contents away before we
10745        return.  */
10746     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10747     SV *const utf8_buffer = filter;
10748     IV status = IoPAGE(filter);
10749     const bool reverse = cBOOL(IoLINES(filter));
10750     I32 retval;
10751
10752     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10753
10754     /* As we're automatically added, at the lowest level, and hence only called
10755        from this file, we can be sure that we're not called in block mode. Hence
10756        don't bother writing code to deal with block mode.  */
10757     if (maxlen) {
10758         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10759     }
10760     if (status < 0) {
10761         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10762     }
10763     DEBUG_P(PerlIO_printf(Perl_debug_log,
10764                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10765                           FPTR2DPTR(void *, S_utf16_textfilter),
10766                           reverse ? 'l' : 'b', idx, maxlen, status,
10767                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10768
10769     while (1) {
10770         STRLEN chars;
10771         STRLEN have;
10772         I32 newlen;
10773         U8 *end;
10774         /* First, look in our buffer of existing UTF-8 data:  */
10775         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10776
10777         if (nl) {
10778             ++nl;
10779         } else if (status == 0) {
10780             /* EOF */
10781             IoPAGE(filter) = 0;
10782             nl = SvEND(utf8_buffer);
10783         }
10784         if (nl) {
10785             STRLEN got = nl - SvPVX(utf8_buffer);
10786             /* Did we have anything to append?  */
10787             retval = got != 0;
10788             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10789             /* Everything else in this code works just fine if SVp_POK isn't
10790                set.  This, however, needs it, and we need it to work, else
10791                we loop infinitely because the buffer is never consumed.  */
10792             sv_chop(utf8_buffer, nl);
10793             break;
10794         }
10795
10796         /* OK, not a complete line there, so need to read some more UTF-16.
10797            Read an extra octect if the buffer currently has an odd number. */
10798         while (1) {
10799             if (status <= 0)
10800                 break;
10801             if (SvCUR(utf16_buffer) >= 2) {
10802                 /* Location of the high octet of the last complete code point.
10803                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10804                    *coupled* with all the benefits of partial reads and
10805                    endianness.  */
10806                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10807                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10808
10809                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10810                     break;
10811                 }
10812
10813                 /* We have the first half of a surrogate. Read more.  */
10814                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10815             }
10816
10817             status = FILTER_READ(idx + 1, utf16_buffer,
10818                                  160 + (SvCUR(utf16_buffer) & 1));
10819             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10820             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10821             if (status < 0) {
10822                 /* Error */
10823                 IoPAGE(filter) = status;
10824                 return status;
10825             }
10826         }
10827
10828         chars = SvCUR(utf16_buffer) >> 1;
10829         have = SvCUR(utf8_buffer);
10830         SvGROW(utf8_buffer, have + chars * 3 + 1);
10831
10832         if (reverse) {
10833             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10834                                          (U8*)SvPVX_const(utf8_buffer) + have,
10835                                          chars * 2, &newlen);
10836         } else {
10837             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10838                                 (U8*)SvPVX_const(utf8_buffer) + have,
10839                                 chars * 2, &newlen);
10840         }
10841         SvCUR_set(utf8_buffer, have + newlen);
10842         *end = '\0';
10843
10844         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10845            it's private to us, and utf16_to_utf8{,reversed} take a
10846            (pointer,length) pair, rather than a NUL-terminated string.  */
10847         if(SvCUR(utf16_buffer) & 1) {
10848             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10849             SvCUR_set(utf16_buffer, 1);
10850         } else {
10851             SvCUR_set(utf16_buffer, 0);
10852         }
10853     }
10854     DEBUG_P(PerlIO_printf(Perl_debug_log,
10855                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10856                           status,
10857                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10858     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10859     return retval;
10860 }
10861
10862 static U8 *
10863 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10864 {
10865     SV *filter = filter_add(S_utf16_textfilter, NULL);
10866
10867     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10868
10869     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10870     sv_setpvs(filter, "");
10871     IoLINES(filter) = reversed;
10872     IoPAGE(filter) = 1; /* Not EOF */
10873
10874     /* Sadly, we have to return a valid pointer, come what may, so we have to
10875        ignore any error return from this.  */
10876     SvCUR_set(PL_linestr, 0);
10877     if (FILTER_READ(0, PL_linestr, 0)) {
10878         SvUTF8_on(PL_linestr);
10879     } else {
10880         SvUTF8_on(PL_linestr);
10881     }
10882     PL_bufend = SvEND(PL_linestr);
10883     return (U8*)SvPVX(PL_linestr);
10884 }
10885 #endif
10886
10887 /*
10888 Returns a pointer to the next character after the parsed
10889 vstring, as well as updating the passed in sv.
10890
10891 Function must be called like
10892
10893         sv = newSV(5);
10894         s = scan_vstring(s,e,sv);
10895
10896 where s and e are the start and end of the string.
10897 The sv should already be large enough to store the vstring
10898 passed in, for performance reasons.
10899
10900 */
10901
10902 char *
10903 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10904 {
10905     dVAR;
10906     const char *pos = s;
10907     const char *start = s;
10908
10909     PERL_ARGS_ASSERT_SCAN_VSTRING;
10910
10911     if (*pos == 'v') pos++;  /* get past 'v' */
10912     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10913         pos++;
10914     if ( *pos != '.') {
10915         /* this may not be a v-string if followed by => */
10916         const char *next = pos;
10917         while (next < e && isSPACE(*next))
10918             ++next;
10919         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10920             /* return string not v-string */
10921             sv_setpvn(sv,(char *)s,pos-s);
10922             return (char *)pos;
10923         }
10924     }
10925
10926     if (!isALPHA(*pos)) {
10927         U8 tmpbuf[UTF8_MAXBYTES+1];
10928
10929         if (*s == 'v')
10930             s++;  /* get past 'v' */
10931
10932         sv_setpvs(sv, "");
10933
10934         for (;;) {
10935             /* this is atoi() that tolerates underscores */
10936             U8 *tmpend;
10937             UV rev = 0;
10938             const char *end = pos;
10939             UV mult = 1;
10940             while (--end >= s) {
10941                 if (*end != '_') {
10942                     const UV orev = rev;
10943                     rev += (*end - '0') * mult;
10944                     mult *= 10;
10945                     if (orev > rev)
10946                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10947                                          "Integer overflow in decimal number");
10948                 }
10949             }
10950 #ifdef EBCDIC
10951             if (rev > 0x7FFFFFFF)
10952                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10953 #endif
10954             /* Append native character for the rev point */
10955             tmpend = uvchr_to_utf8(tmpbuf, rev);
10956             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10957             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10958                  SvUTF8_on(sv);
10959             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10960                  s = ++pos;
10961             else {
10962                  s = pos;
10963                  break;
10964             }
10965             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10966                  pos++;
10967         }
10968         SvPOK_on(sv);
10969         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10970         SvRMAGICAL_on(sv);
10971     }
10972     return (char *)s;
10973 }
10974
10975 int
10976 Perl_keyword_plugin_standard(pTHX_
10977         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10978 {
10979     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10980     PERL_UNUSED_CONTEXT;
10981     PERL_UNUSED_ARG(keyword_ptr);
10982     PERL_UNUSED_ARG(keyword_len);
10983     PERL_UNUSED_ARG(op_ptr);
10984     return KEYWORD_PLUGIN_DECLINE;
10985 }
10986
10987 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10988 static void
10989 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
10990 {
10991     SAVEI32(PL_lex_brackets);
10992     if (PL_lex_brackets > 100)
10993         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10994     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
10995     SAVEI32(PL_lex_allbrackets);
10996     PL_lex_allbrackets = 0;
10997     SAVEI8(PL_lex_fakeeof);
10998     PL_lex_fakeeof = (U8)fakeeof;
10999     if(yyparse(gramtype) && !PL_parser->error_count)
11000         qerror(Perl_mess(aTHX_ "Parse error"));
11001 }
11002
11003 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11004 static OP *
11005 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11006 {
11007     OP *o;
11008     ENTER;
11009     SAVEVPTR(PL_eval_root);
11010     PL_eval_root = NULL;
11011     parse_recdescent(gramtype, fakeeof);
11012     o = PL_eval_root;
11013     LEAVE;
11014     return o;
11015 }
11016
11017 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11018 static OP *
11019 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11020 {
11021     OP *exprop;
11022     if (flags & ~PARSE_OPTIONAL)
11023         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11024     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11025     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11026         if (!PL_parser->error_count)
11027             qerror(Perl_mess(aTHX_ "Parse error"));
11028         exprop = newOP(OP_NULL, 0);
11029     }
11030     return exprop;
11031 }
11032
11033 /*
11034 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11035
11036 Parse a Perl arithmetic expression.  This may contain operators of precedence
11037 down to the bit shift operators.  The expression must be followed (and thus
11038 terminated) either by a comparison or lower-precedence operator or by
11039 something that would normally terminate an expression such as semicolon.
11040 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11041 otherwise it is mandatory.  It is up to the caller to ensure that the
11042 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11043 the source of the code to be parsed and the lexical context for the
11044 expression.
11045
11046 The op tree representing the expression is returned.  If an optional
11047 expression is absent, a null pointer is returned, otherwise the pointer
11048 will be non-null.
11049
11050 If an error occurs in parsing or compilation, in most cases a valid op
11051 tree is returned anyway.  The error is reflected in the parser state,
11052 normally resulting in a single exception at the top level of parsing
11053 which covers all the compilation errors that occurred.  Some compilation
11054 errors, however, will throw an exception immediately.
11055
11056 =cut
11057 */
11058
11059 OP *
11060 Perl_parse_arithexpr(pTHX_ U32 flags)
11061 {
11062     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11063 }
11064
11065 /*
11066 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11067
11068 Parse a Perl term expression.  This may contain operators of precedence
11069 down to the assignment operators.  The expression must be followed (and thus
11070 terminated) either by a comma or lower-precedence operator or by
11071 something that would normally terminate an expression such as semicolon.
11072 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11073 otherwise it is mandatory.  It is up to the caller to ensure that the
11074 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11075 the source of the code to be parsed and the lexical context for the
11076 expression.
11077
11078 The op tree representing the expression is returned.  If an optional
11079 expression is absent, a null pointer is returned, otherwise the pointer
11080 will be non-null.
11081
11082 If an error occurs in parsing or compilation, in most cases a valid op
11083 tree is returned anyway.  The error is reflected in the parser state,
11084 normally resulting in a single exception at the top level of parsing
11085 which covers all the compilation errors that occurred.  Some compilation
11086 errors, however, will throw an exception immediately.
11087
11088 =cut
11089 */
11090
11091 OP *
11092 Perl_parse_termexpr(pTHX_ U32 flags)
11093 {
11094     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11095 }
11096
11097 /*
11098 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11099
11100 Parse a Perl list expression.  This may contain operators of precedence
11101 down to the comma operator.  The expression must be followed (and thus
11102 terminated) either by a low-precedence logic operator such as C<or> or by
11103 something that would normally terminate an expression such as semicolon.
11104 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11105 otherwise it is mandatory.  It is up to the caller to ensure that the
11106 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11107 the source of the code to be parsed and the lexical context for the
11108 expression.
11109
11110 The op tree representing the expression is returned.  If an optional
11111 expression is absent, a null pointer is returned, otherwise the pointer
11112 will be non-null.
11113
11114 If an error occurs in parsing or compilation, in most cases a valid op
11115 tree is returned anyway.  The error is reflected in the parser state,
11116 normally resulting in a single exception at the top level of parsing
11117 which covers all the compilation errors that occurred.  Some compilation
11118 errors, however, will throw an exception immediately.
11119
11120 =cut
11121 */
11122
11123 OP *
11124 Perl_parse_listexpr(pTHX_ U32 flags)
11125 {
11126     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11127 }
11128
11129 /*
11130 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11131
11132 Parse a single complete Perl expression.  This allows the full
11133 expression grammar, including the lowest-precedence operators such
11134 as C<or>.  The expression must be followed (and thus terminated) by a
11135 token that an expression would normally be terminated by: end-of-file,
11136 closing bracketing punctuation, semicolon, or one of the keywords that
11137 signals a postfix expression-statement modifier.  If I<flags> includes
11138 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11139 mandatory.  It is up to the caller to ensure that the dynamic parser
11140 state (L</PL_parser> et al) is correctly set to reflect the source of
11141 the code to be parsed and the lexical context for the expression.
11142
11143 The op tree representing the expression is returned.  If an optional
11144 expression is absent, a null pointer is returned, otherwise the pointer
11145 will be non-null.
11146
11147 If an error occurs in parsing or compilation, in most cases a valid op
11148 tree is returned anyway.  The error is reflected in the parser state,
11149 normally resulting in a single exception at the top level of parsing
11150 which covers all the compilation errors that occurred.  Some compilation
11151 errors, however, will throw an exception immediately.
11152
11153 =cut
11154 */
11155
11156 OP *
11157 Perl_parse_fullexpr(pTHX_ U32 flags)
11158 {
11159     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11160 }
11161
11162 /*
11163 =for apidoc Amx|OP *|parse_block|U32 flags
11164
11165 Parse a single complete Perl code block.  This consists of an opening
11166 brace, a sequence of statements, and a closing brace.  The block
11167 constitutes a lexical scope, so C<my> variables and various compile-time
11168 effects can be contained within it.  It is up to the caller to ensure
11169 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11170 reflect the source of the code to be parsed and the lexical context for
11171 the statement.
11172
11173 The op tree representing the code block is returned.  This is always a
11174 real op, never a null pointer.  It will normally be a C<lineseq> list,
11175 including C<nextstate> or equivalent ops.  No ops to construct any kind
11176 of runtime scope are included by virtue of it being a block.
11177
11178 If an error occurs in parsing or compilation, in most cases a valid op
11179 tree (most likely null) is returned anyway.  The error is reflected in
11180 the parser state, normally resulting in a single exception at the top
11181 level of parsing which covers all the compilation errors that occurred.
11182 Some compilation errors, however, will throw an exception immediately.
11183
11184 The I<flags> parameter is reserved for future use, and must always
11185 be zero.
11186
11187 =cut
11188 */
11189
11190 OP *
11191 Perl_parse_block(pTHX_ U32 flags)
11192 {
11193     if (flags)
11194         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11195     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11196 }
11197
11198 /*
11199 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11200
11201 Parse a single unadorned Perl statement.  This may be a normal imperative
11202 statement or a declaration that has compile-time effect.  It does not
11203 include any label or other affixture.  It is up to the caller to ensure
11204 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11205 reflect the source of the code to be parsed and the lexical context for
11206 the statement.
11207
11208 The op tree representing the statement is returned.  This may be a
11209 null pointer if the statement is null, for example if it was actually
11210 a subroutine definition (which has compile-time side effects).  If not
11211 null, it will be ops directly implementing the statement, suitable to
11212 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11213 equivalent op (except for those embedded in a scope contained entirely
11214 within the statement).
11215
11216 If an error occurs in parsing or compilation, in most cases a valid op
11217 tree (most likely null) is returned anyway.  The error is reflected in
11218 the parser state, normally resulting in a single exception at the top
11219 level of parsing which covers all the compilation errors that occurred.
11220 Some compilation errors, however, will throw an exception immediately.
11221
11222 The I<flags> parameter is reserved for future use, and must always
11223 be zero.
11224
11225 =cut
11226 */
11227
11228 OP *
11229 Perl_parse_barestmt(pTHX_ U32 flags)
11230 {
11231     if (flags)
11232         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11233     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11234 }
11235
11236 /*
11237 =for apidoc Amx|SV *|parse_label|U32 flags
11238
11239 Parse a single label, possibly optional, of the type that may prefix a
11240 Perl statement.  It is up to the caller to ensure that the dynamic parser
11241 state (L</PL_parser> et al) is correctly set to reflect the source of
11242 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11243 label is optional, otherwise it is mandatory.
11244
11245 The name of the label is returned in the form of a fresh scalar.  If an
11246 optional label is absent, a null pointer is returned.
11247
11248 If an error occurs in parsing, which can only occur if the label is
11249 mandatory, a valid label is returned anyway.  The error is reflected in
11250 the parser state, normally resulting in a single exception at the top
11251 level of parsing which covers all the compilation errors that occurred.
11252
11253 =cut
11254 */
11255
11256 SV *
11257 Perl_parse_label(pTHX_ U32 flags)
11258 {
11259     if (flags & ~PARSE_OPTIONAL)
11260         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11261     if (PL_lex_state == LEX_KNOWNEXT) {
11262         PL_parser->yychar = yylex();
11263         if (PL_parser->yychar == LABEL) {
11264             char *lpv = pl_yylval.pval;
11265             STRLEN llen = strlen(lpv);
11266             SV *lsv;
11267             PL_parser->yychar = YYEMPTY;
11268             lsv = newSV_type(SVt_PV);
11269             SvPV_set(lsv, lpv);
11270             SvCUR_set(lsv, llen);
11271             SvLEN_set(lsv, llen+1);
11272             SvPOK_on(lsv);
11273             return lsv;
11274         } else {
11275             yyunlex();
11276             goto no_label;
11277         }
11278     } else {
11279         char *s, *t;
11280         U8 c;
11281         STRLEN wlen, bufptr_pos;
11282         lex_read_space(0);
11283         t = s = PL_bufptr;
11284         c = (U8)*s;
11285         if (!isIDFIRST_A(c))
11286             goto no_label;
11287         do {
11288             c = (U8)*++t;
11289         } while(isWORDCHAR_A(c));
11290         wlen = t - s;
11291         if (word_takes_any_delimeter(s, wlen))
11292             goto no_label;
11293         bufptr_pos = s - SvPVX(PL_linestr);
11294         PL_bufptr = t;
11295         lex_read_space(LEX_KEEP_PREVIOUS);
11296         t = PL_bufptr;
11297         s = SvPVX(PL_linestr) + bufptr_pos;
11298         if (t[0] == ':' && t[1] != ':') {
11299             PL_oldoldbufptr = PL_oldbufptr;
11300             PL_oldbufptr = s;
11301             PL_bufptr = t+1;
11302             return newSVpvn(s, wlen);
11303         } else {
11304             PL_bufptr = s;
11305             no_label:
11306             if (flags & PARSE_OPTIONAL) {
11307                 return NULL;
11308             } else {
11309                 qerror(Perl_mess(aTHX_ "Parse error"));
11310                 return newSVpvs("x");
11311             }
11312         }
11313     }
11314 }
11315
11316 /*
11317 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11318
11319 Parse a single complete Perl statement.  This may be a normal imperative
11320 statement or a declaration that has compile-time effect, and may include
11321 optional labels.  It is up to the caller to ensure that the dynamic
11322 parser state (L</PL_parser> et al) is correctly set to reflect the source
11323 of the code to be parsed and the lexical context for the statement.
11324
11325 The op tree representing the statement is returned.  This may be a
11326 null pointer if the statement is null, for example if it was actually
11327 a subroutine definition (which has compile-time side effects).  If not
11328 null, it will be the result of a L</newSTATEOP> call, normally including
11329 a C<nextstate> or equivalent op.
11330
11331 If an error occurs in parsing or compilation, in most cases a valid op
11332 tree (most likely null) is returned anyway.  The error is reflected in
11333 the parser state, normally resulting in a single exception at the top
11334 level of parsing which covers all the compilation errors that occurred.
11335 Some compilation errors, however, will throw an exception immediately.
11336
11337 The I<flags> parameter is reserved for future use, and must always
11338 be zero.
11339
11340 =cut
11341 */
11342
11343 OP *
11344 Perl_parse_fullstmt(pTHX_ U32 flags)
11345 {
11346     if (flags)
11347         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11348     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11349 }
11350
11351 /*
11352 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11353
11354 Parse a sequence of zero or more Perl statements.  These may be normal
11355 imperative statements, including optional labels, or declarations
11356 that have compile-time effect, or any mixture thereof.  The statement
11357 sequence ends when a closing brace or end-of-file is encountered in a
11358 place where a new statement could have validly started.  It is up to
11359 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11360 is correctly set to reflect the source of the code to be parsed and the
11361 lexical context for the statements.
11362
11363 The op tree representing the statement sequence is returned.  This may
11364 be a null pointer if the statements were all null, for example if there
11365 were no statements or if there were only subroutine definitions (which
11366 have compile-time side effects).  If not null, it will be a C<lineseq>
11367 list, normally including C<nextstate> or equivalent ops.
11368
11369 If an error occurs in parsing or compilation, in most cases a valid op
11370 tree is returned anyway.  The error is reflected in the parser state,
11371 normally resulting in a single exception at the top level of parsing
11372 which covers all the compilation errors that occurred.  Some compilation
11373 errors, however, will throw an exception immediately.
11374
11375 The I<flags> parameter is reserved for future use, and must always
11376 be zero.
11377
11378 =cut
11379 */
11380
11381 OP *
11382 Perl_parse_stmtseq(pTHX_ U32 flags)
11383 {
11384     OP *stmtseqop;
11385     I32 c;
11386     if (flags)
11387         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11388     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11389     c = lex_peek_unichar(0);
11390     if (c != -1 && c != /*{*/'}')
11391         qerror(Perl_mess(aTHX_ "Parse error"));
11392     return stmtseqop;
11393 }
11394
11395 void
11396 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11397 {
11398     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11399     deprecate("qw(...) as parentheses");
11400     force_next((4<<24)|')');
11401     if (qwlist->op_type == OP_STUB) {
11402         op_free(qwlist);
11403     }
11404     else {
11405         start_force(PL_curforce);
11406         NEXTVAL_NEXTTOKE.opval = qwlist;
11407         force_next(THING);
11408     }
11409     force_next((2<<24)|'(');
11410 }
11411
11412 /*
11413  * Local variables:
11414  * c-indentation-style: bsd
11415  * c-basic-offset: 4
11416  * indent-tabs-mode: t
11417  * End:
11418  *
11419  * ex: set ts=8 sts=4 sw=4 noet:
11420  */