toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmnU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "invlist_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g, h)  \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char* const ident_too_long = "Identifier too long";
  97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
 117
 118 #define SPACE_OR_TAB(c) isBLANK_A(c)
 119
 120 #define HEXFP_PEEK(s)     \
 121     (((s[0] == '.') && \
 122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 123      isALPHA_FOLD_EQ(s[0], 'p'))
 124
 125 /* LEX_* are values for PL_lex_state, the state of the lexer.
 126  * They are arranged oddly so that the guard on the switch statement
 127  * can get by with a single comparison (if the compiler is smart enough).
 128  *
 129  * These values refer to the various states within a sublex parse,
 130  * i.e. within a double quotish string
 131  */
 132
 133 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 134
 135 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 136 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 137 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 138 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 139 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 140
 141                                    /* at end of code, eg "$x" followed by:  */
 142 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 143 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 144
 145 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 146                                         string or after \E, $foo, etc       */
 147 #define LEX_INTERPCONST          2 /* NOT USED */
 148 #define LEX_FORMLINE             1 /* expecting a format line               */
 149
 150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
 151    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
 152    can also return it.
 153
 154    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
 155    other token values are 258 or higher (see perly.h), so -1 should be
 156    a safe value here.
 157 */
 158 #define YYL_RETRY (-1)
 159
 160 #ifdef DEBUGGING
 161 static const char* const lex_state_names[] = {
 162     "KNOWNEXT",
 163     "FORMLINE",
 164     "INTERPCONST",
 165     "INTERPCONCAT",
 166     "INTERPENDMAYBE",
 167     "INTERPEND",
 168     "INTERPSTART",
 169     "INTERPPUSH",
 170     "INTERPCASEMOD",
 171     "INTERPNORMAL",
 172     "NORMAL"
 173 };
 174 #endif
 175
 176 #include "keywords.h"
 177
 178 /* CLINE is a macro that ensures PL_copline has a sane value */
 179
 180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 181
 182 /*
 183  * Convenience functions to return different tokens and prime the
 184  * lexer for the next token.  They all take an argument.
 185  *
 186  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 187  * OPERATOR     : generic operator
 188  * AOPERATOR    : assignment operator
 189  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 190  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 191  * PREREF       : *EXPR where EXPR is not a simple identifier
 192  * TERM         : expression term
 193  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 194  * LOOPX        : loop exiting command (goto, last, dump, etc)
 195  * FTST         : file test operator
 196  * FUN0         : zero-argument function
 197  * FUN0OP       : zero-argument function, with its op created in this file
 198  * FUN1         : not used, except for not, which isn't a UNIOP
 199  * BOop         : bitwise or or xor
 200  * BAop         : bitwise and
 201  * BCop         : bitwise complement
 202  * SHop         : shift operator
 203  * PWop         : power operator
 204  * PMop         : pattern-matching operator
 205  * Aop          : addition-level operator
 206  * AopNOASSIGN  : addition-level operator that is never part of .=
 207  * Mop          : multiplication-level operator
 208  * ChEop        : chaining equality-testing operator
 209  * NCEop        : non-chaining comparison operator at equality precedence
 210  * ChRop        : chaining relational operator <= != gt
 211  * NCRop        : non-chaining relational operator isa
 212  *
 213  * Also see LOP and lop() below.
 214  */
 215
 216 #ifdef DEBUGGING /* Serve -DT. */
 217 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 218 #else
 219 #   define REPORT(retval) (retval)
 220 #endif
 221
 222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 231                          pl_yylval.ival=f, \
 232                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 233                          REPORT((int)LOOPEX))
 234 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 235 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 236 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 237 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 238 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 239 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 241                        REPORT('~')
 242 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 243 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 244 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 245 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 247 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
 249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
 250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
 251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
 252
 253 /* This bit of chicanery makes a unary function followed by
 254  * a parenthesis into a function with one argument, highest precedence.
 255  * The UNIDOR macro is for unary functions that can be followed by the //
 256  * operator (such as C<shift // 0>).
 257  */
 258 #define UNI3(f,x,have_x) { \
 259         pl_yylval.ival = f; \
 260         if (have_x) PL_expect = x; \
 261         PL_bufptr = s; \
 262         PL_last_uni = PL_oldbufptr; \
 263         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 264         if (*s == '(') \
 265             return REPORT( (int)FUNC1 ); \
 266         s = skipspace(s); \
 267         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 268         }
 269 #define UNI(f)    UNI3(f,XTERM,1)
 270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 271 #define UNIPROTO(f,optional) { \
 272         if (optional) PL_last_uni = PL_oldbufptr; \
 273         OPERATOR(f); \
 274         }
 275
 276 #define UNIBRACK(f) UNI3(f,0,0)
 277
 278 /* grandfather return to old style */
 279 #define OLDLOP(f) \
 280         do { \
 281             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 282                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 283             pl_yylval.ival = (f); \
 284             PL_expect = XTERM; \
 285             PL_bufptr = s; \
 286             return (int)LSTOP; \
 287         } while(0)
 288
 289 #define COPLINE_INC_WITH_HERELINES                  \
 290     STMT_START {                                     \
 291         CopLINE_inc(PL_curcop);                       \
 292         if (PL_parser->herelines)                      \
 293             CopLINE(PL_curcop) += PL_parser->herelines, \
 294             PL_parser->herelines = 0;                    \
 295     } STMT_END
 296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 297  * is no sublex_push to follow. */
 298 #define COPLINE_SET_FROM_MULTI_END            \
 299     STMT_START {                               \
 300         CopLINE_set(PL_curcop, PL_multi_end);   \
 301         if (PL_multi_end != PL_multi_start)      \
 302             PL_parser->herelines = 0;             \
 303     } STMT_END
 304
 305
 306 /* A file-local structure for passing around information about subroutines and
 307  * related definable words */
 308 struct code {
 309     SV *sv;
 310     CV *cv;
 311     GV *gv, **gvp;
 312     OP *rv2cv_op;
 313     PADOFFSET off;
 314     bool lex;
 315 };
 316
 317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
 318
 319 #ifdef DEBUGGING
 320
 321 /* how to interpret the pl_yylval associated with the token */
 322 enum token_type {
 323     TOKENTYPE_NONE,
 324     TOKENTYPE_IVAL,
 325     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 326     TOKENTYPE_PVAL,
 327     TOKENTYPE_OPVAL
 328 };
 329
 330 static struct debug_tokens {
 331     const int token;
 332     enum token_type type;
 333     const char *name;
 334 } const debug_tokens[] =
 335 {
 336     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 337     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 338     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 339     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 340     { ANON_SIGSUB,      TOKENTYPE_IVAL,         "ANON_SIGSUB" },
 341     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 342     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 343     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 344     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 345     { CHEQOP,           TOKENTYPE_OPNUM,        "CHEQOP" },
 346     { CHRELOP,          TOKENTYPE_OPNUM,        "CHRELOP" },
 347     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 348     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 349     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 350     { DO,               TOKENTYPE_NONE,         "DO" },
 351     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 352     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 353     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 354     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 355     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 356     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 357     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 358     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 359     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 360     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 361     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 362     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 363     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 364     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 365     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 366     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 367     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 368     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 369     { IF,               TOKENTYPE_IVAL,         "IF" },
 370     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 371     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 372     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 373     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 374     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 375     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 376     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 377     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 378     { MY,               TOKENTYPE_IVAL,         "MY" },
 379     { NCEQOP,           TOKENTYPE_OPNUM,        "NCEQOP" },
 380     { NCRELOP,          TOKENTYPE_OPNUM,        "NCRELOP" },
 381     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 382     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 383     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 384     { OROR,             TOKENTYPE_NONE,         "OROR" },
 385     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 386     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 387     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 388     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 389     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 390     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 391     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 392     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 393     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 394     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 395     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 396     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 397     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 398     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 399     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 400     { SIGSUB,           TOKENTYPE_NONE,         "SIGSUB" },
 401     { SUB,              TOKENTYPE_NONE,         "SUB" },
 402     { SUBLEXEND,        TOKENTYPE_NONE,         "SUBLEXEND" },
 403     { SUBLEXSTART,      TOKENTYPE_NONE,         "SUBLEXSTART" },
 404     { THING,            TOKENTYPE_OPVAL,        "THING" },
 405     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 406     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 407     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 408     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 409     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 410     { USE,              TOKENTYPE_IVAL,         "USE" },
 411     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 412     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 413     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 414     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 415     { 0,                TOKENTYPE_NONE,         NULL }
 416 };
 417
 418 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 419
 420 STATIC int
 421 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 422 {
 423     PERL_ARGS_ASSERT_TOKEREPORT;
 424
 425     if (DEBUG_T_TEST) {
 426         const char *name = NULL;
 427         enum token_type type = TOKENTYPE_NONE;
 428         const struct debug_tokens *p;
 429         SV* const report = newSVpvs("<== ");
 430
 431         for (p = debug_tokens; p->token; p++) {
 432             if (p->token == (int)rv) {
 433                 name = p->name;
 434                 type = p->type;
 435                 break;
 436             }
 437         }
 438         if (name)
 439             Perl_sv_catpv(aTHX_ report, name);
 440         else if (isGRAPH(rv))
 441         {
 442             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 443             if ((char)rv == 'p')
 444                 sv_catpvs(report, " (pending identifier)");
 445         }
 446         else if (!rv)
 447             sv_catpvs(report, "EOF");
 448         else
 449             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 450         switch (type) {
 451         case TOKENTYPE_NONE:
 452             break;
 453         case TOKENTYPE_IVAL:
 454             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 455             break;
 456         case TOKENTYPE_OPNUM:
 457             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 458                                     PL_op_name[lvalp->ival]);
 459             break;
 460         case TOKENTYPE_PVAL:
 461             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 462             break;
 463         case TOKENTYPE_OPVAL:
 464             if (lvalp->opval) {
 465                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 466                                     PL_op_name[lvalp->opval->op_type]);
 467                 if (lvalp->opval->op_type == OP_CONST) {
 468                     Perl_sv_catpvf(aTHX_ report, " %s",
 469                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 470                 }
 471
 472             }
 473             else
 474                 sv_catpvs(report, "(opval=null)");
 475             break;
 476         }
 477         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 478     };
 479     return (int)rv;
 480 }
 481
 482
 483 /* print the buffer with suitable escapes */
 484
 485 STATIC void
 486 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 487 {
 488     SV* const tmp = newSVpvs("");
 489
 490     PERL_ARGS_ASSERT_PRINTBUF;
 491
 492     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 493     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 494     GCC_DIAG_RESTORE_STMT;
 495     SvREFCNT_dec(tmp);
 496 }
 497
 498 #endif
 499
 500 /*
 501  * S_ao
 502  *
 503  * This subroutine looks for an '=' next to the operator that has just been
 504  * parsed and turns it into an ASSIGNOP if it finds one.
 505  */
 506
 507 STATIC int
 508 S_ao(pTHX_ int toketype)
 509 {
 510     if (*PL_bufptr == '=') {
 511         PL_bufptr++;
 512         if (toketype == ANDAND)
 513             pl_yylval.ival = OP_ANDASSIGN;
 514         else if (toketype == OROR)
 515             pl_yylval.ival = OP_ORASSIGN;
 516         else if (toketype == DORDOR)
 517             pl_yylval.ival = OP_DORASSIGN;
 518         toketype = ASSIGNOP;
 519     }
 520     return REPORT(toketype);
 521 }
 522
 523 /*
 524  * S_no_op
 525  * When Perl expects an operator and finds something else, no_op
 526  * prints the warning.  It always prints "<something> found where
 527  * operator expected.  It prints "Missing semicolon on previous line?"
 528  * if the surprise occurs at the start of the line.  "do you need to
 529  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 530  * where the compiler doesn't know if foo is a method call or a function.
 531  * It prints "Missing operator before end of line" if there's nothing
 532  * after the missing operator, or "... before <...>" if there is something
 533  * after the missing operator.
 534  *
 535  * PL_bufptr is expected to point to the start of the thing that was found,
 536  * and s after the next token or partial token.
 537  */
 538
 539 STATIC void
 540 S_no_op(pTHX_ const char *const what, char *s)
 541 {
 542     char * const oldbp = PL_bufptr;
 543     const bool is_first = (PL_oldbufptr == PL_linestart);
 544
 545     PERL_ARGS_ASSERT_NO_OP;
 546
 547     if (!s)
 548         s = oldbp;
 549     else
 550         PL_bufptr = s;
 551     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 552     if (ckWARN_d(WARN_SYNTAX)) {
 553         if (is_first)
 554             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 555                     "\t(Missing semicolon on previous line?)\n");
 556         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 557                                                            PL_bufend,
 558                                                            UTF))
 559         {
 560             const char *t;
 561             for (t = PL_oldoldbufptr;
 562                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 563                  t += UTF ? UTF8SKIP(t) : 1)
 564             {
 565                 NOOP;
 566             }
 567             if (t < PL_bufptr && isSPACE(*t))
 568                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 569                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 570                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 571         }
 572         else {
 573             assert(s >= oldbp);
 574             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 575                     "\t(Missing operator before %" UTF8f "?)\n",
 576                      UTF8fARG(UTF, s - oldbp, oldbp));
 577         }
 578     }
 579     PL_bufptr = oldbp;
 580 }
 581
 582 /*
 583  * S_missingterm
 584  * Complain about missing quote/regexp/heredoc terminator.
 585  * If it's called with NULL then it cauterizes the line buffer.
 586  * If we're in a delimited string and the delimiter is a control
 587  * character, it's reformatted into a two-char sequence like ^C.
 588  * This is fatal.
 589  */
 590
 591 STATIC void
 592 S_missingterm(pTHX_ char *s, STRLEN len)
 593 {
 594     char tmpbuf[UTF8_MAXBYTES + 1];
 595     char q;
 596     bool uni = FALSE;
 597     SV *sv;
 598     if (s) {
 599         char * const nl = (char *) my_memrchr(s, '\n', len);
 600         if (nl) {
 601             *nl = '\0';
 602             len = nl - s;
 603         }
 604         uni = UTF;
 605     }
 606     else if (PL_multi_close < 32) {
 607         *tmpbuf = '^';
 608         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 609         tmpbuf[2] = '\0';
 610         s = tmpbuf;
 611         len = 2;
 612     }
 613     else {
 614         if (LIKELY(PL_multi_close < 256)) {
 615             *tmpbuf = (char)PL_multi_close;
 616             tmpbuf[1] = '\0';
 617             len = 1;
 618         }
 619         else {
 620             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 621             *end = '\0';
 622             len = end - tmpbuf;
 623             uni = TRUE;
 624         }
 625         s = tmpbuf;
 626     }
 627     q = memchr(s, '"', len) ? '\'' : '"';
 628     sv = sv_2mortal(newSVpvn(s, len));
 629     if (uni)
 630         SvUTF8_on(sv);
 631     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 632                      " anywhere before EOF", q, SVfARG(sv), q);
 633 }
 634
 635 #include "feature.h"
 636
 637 /*
 638  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 639  * utf16-to-utf8-reversed.
 640  */
 641
 642 #ifdef PERL_CR_FILTER
 643 static void
 644 strip_return(SV *sv)
 645 {
 646     const char *s = SvPVX_const(sv);
 647     const char * const e = s + SvCUR(sv);
 648
 649     PERL_ARGS_ASSERT_STRIP_RETURN;
 650
 651     /* outer loop optimized to do nothing if there are no CR-LFs */
 652     while (s < e) {
 653         if (*s++ == '\r' && *s == '\n') {
 654             /* hit a CR-LF, need to copy the rest */
 655             char *d = s - 1;
 656             *d++ = *s++;
 657             while (s < e) {
 658                 if (*s == '\r' && s[1] == '\n')
 659                     s++;
 660                 *d++ = *s++;
 661             }
 662             SvCUR(sv) -= s - d;
 663             return;
 664         }
 665     }
 666 }
 667
 668 STATIC I32
 669 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 670 {
 671     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 672     if (count > 0 && !maxlen)
 673         strip_return(sv);
 674     return count;
 675 }
 676 #endif
 677
 678 /*
 679 =for apidoc lex_start
 680
 681 Creates and initialises a new lexer/parser state object, supplying
 682 a context in which to lex and parse from a new source of Perl code.
 683 A pointer to the new state object is placed in L</PL_parser>.  An entry
 684 is made on the save stack so that upon unwinding, the new state object
 685 will be destroyed and the former value of L</PL_parser> will be restored.
 686 Nothing else need be done to clean up the parsing context.
 687
 688 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 689 non-null, provides a string (in SV form) containing code to be parsed.
 690 A copy of the string is made, so subsequent modification of C<line>
 691 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 692 from which code will be read to be parsed.  If both are non-null, the
 693 code in C<line> comes first and must consist of complete lines of input,
 694 and C<rsfp> supplies the remainder of the source.
 695
 696 The C<flags> parameter is reserved for future use.  Currently it is only
 697 used by perl internally, so extensions should always pass zero.
 698
 699 =cut
 700 */
 701
 702 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 703    can share filters with the current parser.
 704    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 705    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 706    destruction. This is used to handle the case of defaulting to reading the
 707    script from the standard input because no filename was given on the command
 708    line (without getting confused by situation where STDIN has been closed, so
 709    the script handle is opened on fd 0)  */
 710
 711 void
 712 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 713 {
 714     const char *s = NULL;
 715     yy_parser *parser, *oparser;
 716
 717     if (flags && flags & ~LEX_START_FLAGS)
 718         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 719
 720     /* create and initialise a parser */
 721
 722     Newxz(parser, 1, yy_parser);
 723     parser->old_parser = oparser = PL_parser;
 724     PL_parser = parser;
 725
 726     parser->stack = NULL;
 727     parser->stack_max1 = NULL;
 728     parser->ps = NULL;
 729
 730     /* on scope exit, free this parser and restore any outer one */
 731     SAVEPARSER(parser);
 732     parser->saved_curcop = PL_curcop;
 733
 734     /* initialise lexer state */
 735
 736     parser->nexttoke = 0;
 737     parser->error_count = oparser ? oparser->error_count : 0;
 738     parser->copline = parser->preambling = NOLINE;
 739     parser->lex_state = LEX_NORMAL;
 740     parser->expect = XSTATE;
 741     parser->rsfp = rsfp;
 742     parser->recheck_utf8_validity = TRUE;
 743     parser->rsfp_filters =
 744       !(flags & LEX_START_SAME_FILTER) || !oparser
 745         ? NULL
 746         : MUTABLE_AV(SvREFCNT_inc(
 747             oparser->rsfp_filters
 748              ? oparser->rsfp_filters
 749              : (oparser->rsfp_filters = newAV())
 750           ));
 751
 752     Newx(parser->lex_brackstack, 120, char);
 753     Newx(parser->lex_casestack, 12, char);
 754     *parser->lex_casestack = '\0';
 755     Newxz(parser->lex_shared, 1, LEXSHARED);
 756
 757     if (line) {
 758         STRLEN len;
 759         const U8* first_bad_char_loc;
 760
 761         s = SvPV_const(line, len);
 762
 763         if (   SvUTF8(line)
 764             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 765                                              SvCUR(line),
 766                                              &first_bad_char_loc)))
 767         {
 768             _force_out_malformed_utf8_message(first_bad_char_loc,
 769                                               (U8 *) s + SvCUR(line),
 770                                               0,
 771                                               1 /* 1 means die */ );
 772             NOT_REACHED; /* NOTREACHED */
 773         }
 774
 775         parser->linestr = flags & LEX_START_COPIED
 776                             ? SvREFCNT_inc_simple_NN(line)
 777                             : newSVpvn_flags(s, len, SvUTF8(line));
 778         if (!rsfp)
 779             sv_catpvs(parser->linestr, "\n;");
 780     } else {
 781         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 782     }
 783
 784     parser->oldoldbufptr =
 785         parser->oldbufptr =
 786         parser->bufptr =
 787         parser->linestart = SvPVX(parser->linestr);
 788     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 789     parser->last_lop = parser->last_uni = NULL;
 790
 791     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 792                                                         |LEX_DONT_CLOSE_RSFP));
 793     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 794                                                         |LEX_DONT_CLOSE_RSFP));
 795
 796     parser->in_pod = parser->filtered = 0;
 797 }
 798
 799
 800 /* delete a parser object */
 801
 802 void
 803 Perl_parser_free(pTHX_  const yy_parser *parser)
 804 {
 805     PERL_ARGS_ASSERT_PARSER_FREE;
 806
 807     PL_curcop = parser->saved_curcop;
 808     SvREFCNT_dec(parser->linestr);
 809
 810     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 811         PerlIO_clearerr(parser->rsfp);
 812     else if (parser->rsfp && (!parser->old_parser
 813           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 814         PerlIO_close(parser->rsfp);
 815     SvREFCNT_dec(parser->rsfp_filters);
 816     SvREFCNT_dec(parser->lex_stuff);
 817     SvREFCNT_dec(parser->lex_sub_repl);
 818
 819     Safefree(parser->lex_brackstack);
 820     Safefree(parser->lex_casestack);
 821     Safefree(parser->lex_shared);
 822     PL_parser = parser->old_parser;
 823     Safefree(parser);
 824 }
 825
 826 void
 827 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 828 {
 829     I32 nexttoke = parser->nexttoke;
 830     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 831     while (nexttoke--) {
 832         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 833          && parser->nextval[nexttoke].opval
 834          && parser->nextval[nexttoke].opval->op_slabbed
 835          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 836             op_free(parser->nextval[nexttoke].opval);
 837             parser->nextval[nexttoke].opval = NULL;
 838         }
 839     }
 840 }
 841
 842
 843 /*
 844 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
 845
 846 Buffer scalar containing the chunk currently under consideration of the
 847 text currently being lexed.  This is always a plain string scalar (for
 848 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 849 normal scalar means; instead refer to the buffer directly by the pointer
 850 variables described below.
 851
 852 The lexer maintains various C<char*> pointers to things in the
 853 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 854 reallocated, all of these pointers must be updated.  Don't attempt to
 855 do this manually, but rather use L</lex_grow_linestr> if you need to
 856 reallocate the buffer.
 857
 858 The content of the text chunk in the buffer is commonly exactly one
 859 complete line of input, up to and including a newline terminator,
 860 but there are situations where it is otherwise.  The octets of the
 861 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 862 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 863 flag on this scalar, which may disagree with it.
 864
 865 For direct examination of the buffer, the variable
 866 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 867 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 868 of these pointers is usually preferable to examination of the scalar
 869 through normal scalar means.
 870
 871 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
 872
 873 Direct pointer to the end of the chunk of text currently being lexed, the
 874 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 875 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 876 always located at the end of the buffer, and does not count as part of
 877 the buffer's contents.
 878
 879 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
 880
 881 Points to the current position of lexing inside the lexer buffer.
 882 Characters around this point may be freely examined, within
 883 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 884 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 885 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 886
 887 Lexing code (whether in the Perl core or not) moves this pointer past
 888 the characters that it consumes.  It is also expected to perform some
 889 bookkeeping whenever a newline character is consumed.  This movement
 890 can be more conveniently performed by the function L</lex_read_to>,
 891 which handles newlines appropriately.
 892
 893 Interpretation of the buffer's octets can be abstracted out by
 894 using the slightly higher-level functions L</lex_peek_unichar> and
 895 L</lex_read_unichar>.
 896
 897 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
 898
 899 Points to the start of the current line inside the lexer buffer.
 900 This is useful for indicating at which column an error occurred, and
 901 not much else.  This must be updated by any lexing code that consumes
 902 a newline; the function L</lex_read_to> handles this detail.
 903
 904 =cut
 905 */
 906
 907 /*
 908 =for apidoc lex_bufutf8
 909
 910 Indicates whether the octets in the lexer buffer
 911 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 912 of Unicode characters.  If not, they should be interpreted as Latin-1
 913 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 914
 915 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 916 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 917 encoding.
 918
 919 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 920 is significant, but not the whole story regarding the input character
 921 encoding.  Normally, when a file is being read, the scalar contains octets
 922 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 923 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 924 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 925 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 926 is in effect.  This logic may change in the future; use this function
 927 instead of implementing the logic yourself.
 928
 929 =cut
 930 */
 931
 932 bool
 933 Perl_lex_bufutf8(pTHX)
 934 {
 935     return UTF;
 936 }
 937
 938 /*
 939 =for apidoc lex_grow_linestr
 940
 941 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 942 at least C<len> octets (including terminating C<NUL>).  Returns a
 943 pointer to the reallocated buffer.  This is necessary before making
 944 any direct modification of the buffer that would increase its length.
 945 L</lex_stuff_pvn> provides a more convenient way to insert text into
 946 the buffer.
 947
 948 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 949 this function updates all of the lexer's variables that point directly
 950 into the buffer.
 951
 952 =cut
 953 */
 954
 955 char *
 956 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 957 {
 958     SV *linestr;
 959     char *buf;
 960     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 961     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 962     bool current;
 963
 964     linestr = PL_parser->linestr;
 965     buf = SvPVX(linestr);
 966     if (len <= SvLEN(linestr))
 967         return buf;
 968
 969     /* Is the lex_shared linestr SV the same as the current linestr SV?
 970      * Only in this case does re_eval_start need adjusting, since it
 971      * points within lex_shared->ls_linestr's buffer */
 972     current = (   !PL_parser->lex_shared->ls_linestr
 973                || linestr == PL_parser->lex_shared->ls_linestr);
 974
 975     bufend_pos = PL_parser->bufend - buf;
 976     bufptr_pos = PL_parser->bufptr - buf;
 977     oldbufptr_pos = PL_parser->oldbufptr - buf;
 978     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 979     linestart_pos = PL_parser->linestart - buf;
 980     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 981     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 982     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 983                             PL_parser->lex_shared->re_eval_start - buf : 0;
 984
 985     buf = sv_grow(linestr, len);
 986
 987     PL_parser->bufend = buf + bufend_pos;
 988     PL_parser->bufptr = buf + bufptr_pos;
 989     PL_parser->oldbufptr = buf + oldbufptr_pos;
 990     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 991     PL_parser->linestart = buf + linestart_pos;
 992     if (PL_parser->last_uni)
 993         PL_parser->last_uni = buf + last_uni_pos;
 994     if (PL_parser->last_lop)
 995         PL_parser->last_lop = buf + last_lop_pos;
 996     if (current && PL_parser->lex_shared->re_eval_start)
 997         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 998     return buf;
 999 }
1000
1001 /*
1002 =for apidoc lex_stuff_pvn
1003
1004 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1005 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1006 reallocating the buffer if necessary.  This means that lexing code that
1007 runs later will see the characters as if they had appeared in the input.
1008 It is not recommended to do this as part of normal parsing, and most
1009 uses of this facility run the risk of the inserted characters being
1010 interpreted in an unintended manner.
1011
1012 The string to be inserted is represented by C<len> octets starting
1013 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1014 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1015 The characters are recoded for the lexer buffer, according to how the
1016 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1017 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1018 function is more convenient.
1019
1020 =for apidoc Amnh||LEX_STUFF_UTF8
1021
1022 =cut
1023 */
1024
1025 void
1026 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1027 {
1028     char *bufptr;
1029     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1030     if (flags & ~(LEX_STUFF_UTF8))
1031         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1032     if (UTF) {
1033         if (flags & LEX_STUFF_UTF8) {
1034             goto plain_copy;
1035         } else {
1036             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1037                                                        (U8 *) pv + len);
1038             const char *p, *e = pv+len;;
1039             if (!highhalf)
1040                 goto plain_copy;
1041             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1042             bufptr = PL_parser->bufptr;
1043             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1044             SvCUR_set(PL_parser->linestr,
1045                 SvCUR(PL_parser->linestr) + len+highhalf);
1046             PL_parser->bufend += len+highhalf;
1047             for (p = pv; p != e; p++) {
1048                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1049             }
1050         }
1051     } else {
1052         if (flags & LEX_STUFF_UTF8) {
1053             STRLEN highhalf = 0;
1054             const char *p, *e = pv+len;
1055             for (p = pv; p != e; p++) {
1056                 U8 c = (U8)*p;
1057                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1058                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1059                                 "non-Latin-1 character into Latin-1 input");
1060                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1061                     p++;
1062                     highhalf++;
1063                 } else assert(UTF8_IS_INVARIANT(c));
1064             }
1065             if (!highhalf)
1066                 goto plain_copy;
1067             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1068             bufptr = PL_parser->bufptr;
1069             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1070             SvCUR_set(PL_parser->linestr,
1071                 SvCUR(PL_parser->linestr) + len-highhalf);
1072             PL_parser->bufend += len-highhalf;
1073             p = pv;
1074             while (p < e) {
1075                 if (UTF8_IS_INVARIANT(*p)) {
1076                     *bufptr++ = *p;
1077                     p++;
1078                 }
1079                 else {
1080                     assert(p < e -1 );
1081                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1082                     p += 2;
1083                 }
1084             }
1085         } else {
1086           plain_copy:
1087             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1088             bufptr = PL_parser->bufptr;
1089             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1090             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1091             PL_parser->bufend += len;
1092             Copy(pv, bufptr, len, char);
1093         }
1094     }
1095 }
1096
1097 /*
1098 =for apidoc lex_stuff_pv
1099
1100 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1101 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1102 reallocating the buffer if necessary.  This means that lexing code that
1103 runs later will see the characters as if they had appeared in the input.
1104 It is not recommended to do this as part of normal parsing, and most
1105 uses of this facility run the risk of the inserted characters being
1106 interpreted in an unintended manner.
1107
1108 The string to be inserted is represented by octets starting at C<pv>
1109 and continuing to the first nul.  These octets are interpreted as either
1110 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1111 in C<flags>.  The characters are recoded for the lexer buffer, according
1112 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1113 If it is not convenient to nul-terminate a string to be inserted, the
1114 L</lex_stuff_pvn> function is more appropriate.
1115
1116 =cut
1117 */
1118
1119 void
1120 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1121 {
1122     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1123     lex_stuff_pvn(pv, strlen(pv), flags);
1124 }
1125
1126 /*
1127 =for apidoc lex_stuff_sv
1128
1129 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1130 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1131 reallocating the buffer if necessary.  This means that lexing code that
1132 runs later will see the characters as if they had appeared in the input.
1133 It is not recommended to do this as part of normal parsing, and most
1134 uses of this facility run the risk of the inserted characters being
1135 interpreted in an unintended manner.
1136
1137 The string to be inserted is the string value of C<sv>.  The characters
1138 are recoded for the lexer buffer, according to how the buffer is currently
1139 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1140 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1141 need to construct a scalar.
1142
1143 =cut
1144 */
1145
1146 void
1147 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1148 {
1149     char *pv;
1150     STRLEN len;
1151     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1152     if (flags)
1153         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1154     pv = SvPV(sv, len);
1155     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1156 }
1157
1158 /*
1159 =for apidoc lex_unstuff
1160
1161 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1162 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1163 This hides the discarded text from any lexing code that runs later,
1164 as if the text had never appeared.
1165
1166 This is not the normal way to consume lexed text.  For that, use
1167 L</lex_read_to>.
1168
1169 =cut
1170 */
1171
1172 void
1173 Perl_lex_unstuff(pTHX_ char *ptr)
1174 {
1175     char *buf, *bufend;
1176     STRLEN unstuff_len;
1177     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1178     buf = PL_parser->bufptr;
1179     if (ptr < buf)
1180         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1181     if (ptr == buf)
1182         return;
1183     bufend = PL_parser->bufend;
1184     if (ptr > bufend)
1185         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1186     unstuff_len = ptr - buf;
1187     Move(ptr, buf, bufend+1-ptr, char);
1188     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1189     PL_parser->bufend = bufend - unstuff_len;
1190 }
1191
1192 /*
1193 =for apidoc lex_read_to
1194
1195 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1196 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1197 performing the correct bookkeeping whenever a newline character is passed.
1198 This is the normal way to consume lexed text.
1199
1200 Interpretation of the buffer's octets can be abstracted out by
1201 using the slightly higher-level functions L</lex_peek_unichar> and
1202 L</lex_read_unichar>.
1203
1204 =cut
1205 */
1206
1207 void
1208 Perl_lex_read_to(pTHX_ char *ptr)
1209 {
1210     char *s;
1211     PERL_ARGS_ASSERT_LEX_READ_TO;
1212     s = PL_parser->bufptr;
1213     if (ptr < s || ptr > PL_parser->bufend)
1214         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1215     for (; s != ptr; s++)
1216         if (*s == '\n') {
1217             COPLINE_INC_WITH_HERELINES;
1218             PL_parser->linestart = s+1;
1219         }
1220     PL_parser->bufptr = ptr;
1221 }
1222
1223 /*
1224 =for apidoc lex_discard_to
1225
1226 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1227 up to C<ptr>.  The remaining content of the buffer will be moved, and
1228 all pointers into the buffer updated appropriately.  C<ptr> must not
1229 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1230 it is not permitted to discard text that has yet to be lexed.
1231
1232 Normally it is not necessarily to do this directly, because it suffices to
1233 use the implicit discarding behaviour of L</lex_next_chunk> and things
1234 based on it.  However, if a token stretches across multiple lines,
1235 and the lexing code has kept multiple lines of text in the buffer for
1236 that purpose, then after completion of the token it would be wise to
1237 explicitly discard the now-unneeded earlier lines, to avoid future
1238 multi-line tokens growing the buffer without bound.
1239
1240 =cut
1241 */
1242
1243 void
1244 Perl_lex_discard_to(pTHX_ char *ptr)
1245 {
1246     char *buf;
1247     STRLEN discard_len;
1248     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1249     buf = SvPVX(PL_parser->linestr);
1250     if (ptr < buf)
1251         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1252     if (ptr == buf)
1253         return;
1254     if (ptr > PL_parser->bufptr)
1255         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1256     discard_len = ptr - buf;
1257     if (PL_parser->oldbufptr < ptr)
1258         PL_parser->oldbufptr = ptr;
1259     if (PL_parser->oldoldbufptr < ptr)
1260         PL_parser->oldoldbufptr = ptr;
1261     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1262         PL_parser->last_uni = NULL;
1263     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1264         PL_parser->last_lop = NULL;
1265     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1266     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1267     PL_parser->bufend -= discard_len;
1268     PL_parser->bufptr -= discard_len;
1269     PL_parser->oldbufptr -= discard_len;
1270     PL_parser->oldoldbufptr -= discard_len;
1271     if (PL_parser->last_uni)
1272         PL_parser->last_uni -= discard_len;
1273     if (PL_parser->last_lop)
1274         PL_parser->last_lop -= discard_len;
1275 }
1276
1277 void
1278 Perl_notify_parser_that_changed_to_utf8(pTHX)
1279 {
1280     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1281      * off to on.  At compile time, this has the effect of entering a 'use
1282      * utf8' section.  This means that any input was not previously checked for
1283      * UTF-8 (because it was off), but now we do need to check it, or our
1284      * assumptions about the input being sane could be wrong, and we could
1285      * segfault.  This routine just sets a flag so that the next time we look
1286      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1287      * proper phase, there may not be a parser object, but if there is, setting
1288      * the flag is harmless */
1289
1290     if (PL_parser) {
1291         PL_parser->recheck_utf8_validity = TRUE;
1292     }
1293 }
1294
1295 /*
1296 =for apidoc lex_next_chunk
1297
1298 Reads in the next chunk of text to be lexed, appending it to
1299 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1300 looked to the end of the current chunk and wants to know more.  It is
1301 usual, but not necessary, for lexing to have consumed the entirety of
1302 the current chunk at this time.
1303
1304 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1305 chunk (i.e., the current chunk has been entirely consumed), normally the
1306 current chunk will be discarded at the same time that the new chunk is
1307 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1308 will not be discarded.  If the current chunk has not been entirely
1309 consumed, then it will not be discarded regardless of the flag.
1310
1311 Returns true if some new text was added to the buffer, or false if the
1312 buffer has reached the end of the input text.
1313
1314 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1315
1316 =cut
1317 */
1318
1319 #define LEX_FAKE_EOF 0x80000000
1320 #define LEX_NO_TERM  0x40000000 /* here-doc */
1321
1322 bool
1323 Perl_lex_next_chunk(pTHX_ U32 flags)
1324 {
1325     SV *linestr;
1326     char *buf;
1327     STRLEN old_bufend_pos, new_bufend_pos;
1328     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1329     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1330     bool got_some_for_debugger = 0;
1331     bool got_some;
1332
1333     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1334         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1335     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1336         return FALSE;
1337     linestr = PL_parser->linestr;
1338     buf = SvPVX(linestr);
1339     if (!(flags & LEX_KEEP_PREVIOUS)
1340           && PL_parser->bufptr == PL_parser->bufend)
1341     {
1342         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1343         linestart_pos = 0;
1344         if (PL_parser->last_uni != PL_parser->bufend)
1345             PL_parser->last_uni = NULL;
1346         if (PL_parser->last_lop != PL_parser->bufend)
1347             PL_parser->last_lop = NULL;
1348         last_uni_pos = last_lop_pos = 0;
1349         *buf = 0;
1350         SvCUR_set(linestr, 0);
1351     } else {
1352         old_bufend_pos = PL_parser->bufend - buf;
1353         bufptr_pos = PL_parser->bufptr - buf;
1354         oldbufptr_pos = PL_parser->oldbufptr - buf;
1355         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1356         linestart_pos = PL_parser->linestart - buf;
1357         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1358         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1359     }
1360     if (flags & LEX_FAKE_EOF) {
1361         goto eof;
1362     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1363         got_some = 0;
1364     } else if (filter_gets(linestr, old_bufend_pos)) {
1365         got_some = 1;
1366         got_some_for_debugger = 1;
1367     } else if (flags & LEX_NO_TERM) {
1368         got_some = 0;
1369     } else {
1370         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1371             SvPVCLEAR(linestr);
1372         eof:
1373         /* End of real input.  Close filehandle (unless it was STDIN),
1374          * then add implicit termination.
1375          */
1376         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1377             PerlIO_clearerr(PL_parser->rsfp);
1378         else if (PL_parser->rsfp)
1379             (void)PerlIO_close(PL_parser->rsfp);
1380         PL_parser->rsfp = NULL;
1381         PL_parser->in_pod = PL_parser->filtered = 0;
1382         if (!PL_in_eval && PL_minus_p) {
1383             sv_catpvs(linestr,
1384                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1385             PL_minus_n = PL_minus_p = 0;
1386         } else if (!PL_in_eval && PL_minus_n) {
1387             sv_catpvs(linestr, /*{*/";}");
1388             PL_minus_n = 0;
1389         } else
1390             sv_catpvs(linestr, ";");
1391         got_some = 1;
1392     }
1393     buf = SvPVX(linestr);
1394     new_bufend_pos = SvCUR(linestr);
1395     PL_parser->bufend = buf + new_bufend_pos;
1396     PL_parser->bufptr = buf + bufptr_pos;
1397
1398     if (UTF) {
1399         const U8* first_bad_char_loc;
1400         if (UNLIKELY(! is_utf8_string_loc(
1401                             (U8 *) PL_parser->bufptr,
1402                                    PL_parser->bufend - PL_parser->bufptr,
1403                                    &first_bad_char_loc)))
1404         {
1405             _force_out_malformed_utf8_message(first_bad_char_loc,
1406                                               (U8 *) PL_parser->bufend,
1407                                               0,
1408                                               1 /* 1 means die */ );
1409             NOT_REACHED; /* NOTREACHED */
1410         }
1411     }
1412
1413     PL_parser->oldbufptr = buf + oldbufptr_pos;
1414     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1415     PL_parser->linestart = buf + linestart_pos;
1416     if (PL_parser->last_uni)
1417         PL_parser->last_uni = buf + last_uni_pos;
1418     if (PL_parser->last_lop)
1419         PL_parser->last_lop = buf + last_lop_pos;
1420     if (PL_parser->preambling != NOLINE) {
1421         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1422         PL_parser->preambling = NOLINE;
1423     }
1424     if (   got_some_for_debugger
1425         && PERLDB_LINE_OR_SAVESRC
1426         && PL_curstash != PL_debstash)
1427     {
1428         /* debugger active and we're not compiling the debugger code,
1429          * so store the line into the debugger's array of lines
1430          */
1431         update_debugger_info(NULL, buf+old_bufend_pos,
1432             new_bufend_pos-old_bufend_pos);
1433     }
1434     return got_some;
1435 }
1436
1437 /*
1438 =for apidoc lex_peek_unichar
1439
1440 Looks ahead one (Unicode) character in the text currently being lexed.
1441 Returns the codepoint (unsigned integer value) of the next character,
1442 or -1 if lexing has reached the end of the input text.  To consume the
1443 peeked character, use L</lex_read_unichar>.
1444
1445 If the next character is in (or extends into) the next chunk of input
1446 text, the next chunk will be read in.  Normally the current chunk will be
1447 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1448 bit set, then the current chunk will not be discarded.
1449
1450 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1451 is encountered, an exception is generated.
1452
1453 =cut
1454 */
1455
1456 I32
1457 Perl_lex_peek_unichar(pTHX_ U32 flags)
1458 {
1459     char *s, *bufend;
1460     if (flags & ~(LEX_KEEP_PREVIOUS))
1461         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1462     s = PL_parser->bufptr;
1463     bufend = PL_parser->bufend;
1464     if (UTF) {
1465         U8 head;
1466         I32 unichar;
1467         STRLEN len, retlen;
1468         if (s == bufend) {
1469             if (!lex_next_chunk(flags))
1470                 return -1;
1471             s = PL_parser->bufptr;
1472             bufend = PL_parser->bufend;
1473         }
1474         head = (U8)*s;
1475         if (UTF8_IS_INVARIANT(head))
1476             return head;
1477         if (UTF8_IS_START(head)) {
1478             len = UTF8SKIP(&head);
1479             while ((STRLEN)(bufend-s) < len) {
1480                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1481                     break;
1482                 s = PL_parser->bufptr;
1483                 bufend = PL_parser->bufend;
1484             }
1485         }
1486         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1487         if (retlen == (STRLEN)-1) {
1488             _force_out_malformed_utf8_message((U8 *) s,
1489                                               (U8 *) bufend,
1490                                               0,
1491                                               1 /* 1 means die */ );
1492             NOT_REACHED; /* NOTREACHED */
1493         }
1494         return unichar;
1495     } else {
1496         if (s == bufend) {
1497             if (!lex_next_chunk(flags))
1498                 return -1;
1499             s = PL_parser->bufptr;
1500         }
1501         return (U8)*s;
1502     }
1503 }
1504
1505 /*
1506 =for apidoc lex_read_unichar
1507
1508 Reads the next (Unicode) character in the text currently being lexed.
1509 Returns the codepoint (unsigned integer value) of the character read,
1510 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1511 if lexing has reached the end of the input text.  To non-destructively
1512 examine the next character, use L</lex_peek_unichar> instead.
1513
1514 If the next character is in (or extends into) the next chunk of input
1515 text, the next chunk will be read in.  Normally the current chunk will be
1516 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1517 bit set, then the current chunk will not be discarded.
1518
1519 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1520 is encountered, an exception is generated.
1521
1522 =cut
1523 */
1524
1525 I32
1526 Perl_lex_read_unichar(pTHX_ U32 flags)
1527 {
1528     I32 c;
1529     if (flags & ~(LEX_KEEP_PREVIOUS))
1530         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1531     c = lex_peek_unichar(flags);
1532     if (c != -1) {
1533         if (c == '\n')
1534             COPLINE_INC_WITH_HERELINES;
1535         if (UTF)
1536             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1537         else
1538             ++(PL_parser->bufptr);
1539     }
1540     return c;
1541 }
1542
1543 /*
1544 =for apidoc lex_read_space
1545
1546 Reads optional spaces, in Perl style, in the text currently being
1547 lexed.  The spaces may include ordinary whitespace characters and
1548 Perl-style comments.  C<#line> directives are processed if encountered.
1549 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1550 at a non-space character (or the end of the input text).
1551
1552 If spaces extend into the next chunk of input text, the next chunk will
1553 be read in.  Normally the current chunk will be discarded at the same
1554 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1555 chunk will not be discarded.
1556
1557 =cut
1558 */
1559
1560 #define LEX_NO_INCLINE    0x40000000
1561 #define LEX_NO_NEXT_CHUNK 0x80000000
1562
1563 void
1564 Perl_lex_read_space(pTHX_ U32 flags)
1565 {
1566     char *s, *bufend;
1567     const bool can_incline = !(flags & LEX_NO_INCLINE);
1568     bool need_incline = 0;
1569     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1570         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1571     s = PL_parser->bufptr;
1572     bufend = PL_parser->bufend;
1573     while (1) {
1574         char c = *s;
1575         if (c == '#') {
1576             do {
1577                 c = *++s;
1578             } while (!(c == '\n' || (c == 0 && s == bufend)));
1579         } else if (c == '\n') {
1580             s++;
1581             if (can_incline) {
1582                 PL_parser->linestart = s;
1583                 if (s == bufend)
1584                     need_incline = 1;
1585                 else
1586                     incline(s, bufend);
1587             }
1588         } else if (isSPACE(c)) {
1589             s++;
1590         } else if (c == 0 && s == bufend) {
1591             bool got_more;
1592             line_t l;
1593             if (flags & LEX_NO_NEXT_CHUNK)
1594                 break;
1595             PL_parser->bufptr = s;
1596             l = CopLINE(PL_curcop);
1597             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1598             got_more = lex_next_chunk(flags);
1599             CopLINE_set(PL_curcop, l);
1600             s = PL_parser->bufptr;
1601             bufend = PL_parser->bufend;
1602             if (!got_more)
1603                 break;
1604             if (can_incline && need_incline && PL_parser->rsfp) {
1605                 incline(s, bufend);
1606                 need_incline = 0;
1607             }
1608         } else if (!c) {
1609             s++;
1610         } else {
1611             break;
1612         }
1613     }
1614     PL_parser->bufptr = s;
1615 }
1616
1617 /*
1618
1619 =for apidoc validate_proto
1620
1621 This function performs syntax checking on a prototype, C<proto>.
1622 If C<warn> is true, any illegal characters or mismatched brackets
1623 will trigger illegalproto warnings, declaring that they were
1624 detected in the prototype for C<name>.
1625
1626 The return value is C<true> if this is a valid prototype, and
1627 C<false> if it is not, regardless of whether C<warn> was C<true> or
1628 C<false>.
1629
1630 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1631
1632 =cut
1633
1634  */
1635
1636 bool
1637 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1638 {
1639     STRLEN len, origlen;
1640     char *p;
1641     bool bad_proto = FALSE;
1642     bool in_brackets = FALSE;
1643     bool after_slash = FALSE;
1644     char greedy_proto = ' ';
1645     bool proto_after_greedy_proto = FALSE;
1646     bool must_be_last = FALSE;
1647     bool underscore = FALSE;
1648     bool bad_proto_after_underscore = FALSE;
1649
1650     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1651
1652     if (!proto)
1653         return TRUE;
1654
1655     p = SvPV(proto, len);
1656     origlen = len;
1657     for (; len--; p++) {
1658         if (!isSPACE(*p)) {
1659             if (must_be_last)
1660                 proto_after_greedy_proto = TRUE;
1661             if (underscore) {
1662                 if (!memCHRs(";@%", *p))
1663                     bad_proto_after_underscore = TRUE;
1664                 underscore = FALSE;
1665             }
1666             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1667                 bad_proto = TRUE;
1668             }
1669             else {
1670                 if (*p == '[')
1671                     in_brackets = TRUE;
1672                 else if (*p == ']')
1673                     in_brackets = FALSE;
1674                 else if ((*p == '@' || *p == '%')
1675                          && !after_slash
1676                          && !in_brackets )
1677                 {
1678                     must_be_last = TRUE;
1679                     greedy_proto = *p;
1680                 }
1681                 else if (*p == '_')
1682                     underscore = TRUE;
1683             }
1684             if (*p == '\\')
1685                 after_slash = TRUE;
1686             else
1687                 after_slash = FALSE;
1688         }
1689     }
1690
1691     if (warn) {
1692         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1693         p -= origlen;
1694         p = SvUTF8(proto)
1695             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1696                              origlen, UNI_DISPLAY_ISPRINT)
1697             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1698
1699         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1700             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1701             sv_catpvs(name2, "::");
1702             sv_catsv(name2, (SV *)name);
1703             name = name2;
1704         }
1705
1706         if (proto_after_greedy_proto)
1707             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1708                         "Prototype after '%c' for %" SVf " : %s",
1709                         greedy_proto, SVfARG(name), p);
1710         if (in_brackets)
1711             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1712                         "Missing ']' in prototype for %" SVf " : %s",
1713                         SVfARG(name), p);
1714         if (bad_proto)
1715             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1716                         "Illegal character in prototype for %" SVf " : %s",
1717                         SVfARG(name), p);
1718         if (bad_proto_after_underscore)
1719             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1720                         "Illegal character after '_' in prototype for %" SVf " : %s",
1721                         SVfARG(name), p);
1722     }
1723
1724     return (! (proto_after_greedy_proto || bad_proto) );
1725 }
1726
1727 /*
1728  * S_incline
1729  * This subroutine has nothing to do with tilting, whether at windmills
1730  * or pinball tables.  Its name is short for "increment line".  It
1731  * increments the current line number in CopLINE(PL_curcop) and checks
1732  * to see whether the line starts with a comment of the form
1733  *    # line 500 "foo.pm"
1734  * If so, it sets the current line number and file to the values in the comment.
1735  */
1736
1737 STATIC void
1738 S_incline(pTHX_ const char *s, const char *end)
1739 {
1740     const char *t;
1741     const char *n;
1742     const char *e;
1743     line_t line_num;
1744     UV uv;
1745
1746     PERL_ARGS_ASSERT_INCLINE;
1747
1748     assert(end >= s);
1749
1750     COPLINE_INC_WITH_HERELINES;
1751     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1752      && s+1 == PL_bufend && *s == ';') {
1753         /* fake newline in string eval */
1754         CopLINE_dec(PL_curcop);
1755         return;
1756     }
1757     if (*s++ != '#')
1758         return;
1759     while (SPACE_OR_TAB(*s))
1760         s++;
1761     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1762         s += sizeof("line") - 1;
1763     else
1764         return;
1765     if (SPACE_OR_TAB(*s))
1766         s++;
1767     else
1768         return;
1769     while (SPACE_OR_TAB(*s))
1770         s++;
1771     if (!isDIGIT(*s))
1772         return;
1773
1774     n = s;
1775     while (isDIGIT(*s))
1776         s++;
1777     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1778         return;
1779     while (SPACE_OR_TAB(*s))
1780         s++;
1781     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1782         s++;
1783         e = t + 1;
1784     }
1785     else {
1786         t = s;
1787         while (*t && !isSPACE(*t))
1788             t++;
1789         e = t;
1790     }
1791     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1792         e++;
1793     if (*e != '\n' && *e != '\0')
1794         return;         /* false alarm */
1795
1796     if (!grok_atoUV(n, &uv, &e))
1797         return;
1798     line_num = ((line_t)uv) - 1;
1799
1800     if (t - s > 0) {
1801         const STRLEN len = t - s;
1802
1803         if (!PL_rsfp && !PL_parser->filtered) {
1804             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1805              * to *{"::_<newfilename"} */
1806             /* However, the long form of evals is only turned on by the
1807                debugger - usually they're "(eval %lu)" */
1808             GV * const cfgv = CopFILEGV(PL_curcop);
1809             if (cfgv) {
1810                 char smallbuf[128];
1811                 STRLEN tmplen2 = len;
1812                 char *tmpbuf2;
1813                 GV *gv2;
1814
1815                 if (tmplen2 + 2 <= sizeof smallbuf)
1816                     tmpbuf2 = smallbuf;
1817                 else
1818                     Newx(tmpbuf2, tmplen2 + 2, char);
1819
1820                 tmpbuf2[0] = '_';
1821                 tmpbuf2[1] = '<';
1822
1823                 memcpy(tmpbuf2 + 2, s, tmplen2);
1824                 tmplen2 += 2;
1825
1826                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1827                 if (!isGV(gv2)) {
1828                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1829                     /* adjust ${"::_<newfilename"} to store the new file name */
1830                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1831                     /* The line number may differ. If that is the case,
1832                        alias the saved lines that are in the array.
1833                        Otherwise alias the whole array. */
1834                     if (CopLINE(PL_curcop) == line_num) {
1835                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1836                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1837                     }
1838                     else if (GvAV(cfgv)) {
1839                         AV * const av = GvAV(cfgv);
1840                         const line_t start = CopLINE(PL_curcop)+1;
1841                         SSize_t items = AvFILLp(av) - start;
1842                         if (items > 0) {
1843                             AV * const av2 = GvAVn(gv2);
1844                             SV **svp = AvARRAY(av) + start;
1845                             Size_t l = line_num+1;
1846                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1847                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1848                         }
1849                     }
1850                 }
1851
1852                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1853             }
1854         }
1855         CopFILE_free(PL_curcop);
1856         CopFILE_setn(PL_curcop, s, len);
1857     }
1858     CopLINE_set(PL_curcop, line_num);
1859 }
1860
1861 STATIC void
1862 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1863 {
1864     AV *av = CopFILEAVx(PL_curcop);
1865     if (av) {
1866         SV * sv;
1867         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1868         else {
1869             sv = *av_fetch(av, 0, 1);
1870             SvUPGRADE(sv, SVt_PVMG);
1871         }
1872         if (!SvPOK(sv)) SvPVCLEAR(sv);
1873         if (orig_sv)
1874             sv_catsv(sv, orig_sv);
1875         else
1876             sv_catpvn(sv, buf, len);
1877         if (!SvIOK(sv)) {
1878             (void)SvIOK_on(sv);
1879             SvIV_set(sv, 0);
1880         }
1881         if (PL_parser->preambling == NOLINE)
1882             av_store(av, CopLINE(PL_curcop), sv);
1883     }
1884 }
1885
1886 /*
1887  * skipspace
1888  * Called to gobble the appropriate amount and type of whitespace.
1889  * Skips comments as well.
1890  * Returns the next character after the whitespace that is skipped.
1891  *
1892  * peekspace
1893  * Same thing, but look ahead without incrementing line numbers or
1894  * adjusting PL_linestart.
1895  */
1896
1897 #define skipspace(s) skipspace_flags(s, 0)
1898 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1899
1900 char *
1901 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1902 {
1903     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1904     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1905         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1906             s++;
1907     } else {
1908         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1909         PL_bufptr = s;
1910         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1911                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1912                     LEX_NO_NEXT_CHUNK : 0));
1913         s = PL_bufptr;
1914         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1915         if (PL_linestart > PL_bufptr)
1916             PL_bufptr = PL_linestart;
1917         return s;
1918     }
1919     return s;
1920 }
1921
1922 /*
1923  * S_check_uni
1924  * Check the unary operators to ensure there's no ambiguity in how they're
1925  * used.  An ambiguous piece of code would be:
1926  *     rand + 5
1927  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1928  * the +5 is its argument.
1929  */
1930
1931 STATIC void
1932 S_check_uni(pTHX)
1933 {
1934     const char *s;
1935
1936     if (PL_oldoldbufptr != PL_last_uni)
1937         return;
1938     while (isSPACE(*PL_last_uni))
1939         PL_last_uni++;
1940     s = PL_last_uni;
1941     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1942         s += UTF ? UTF8SKIP(s) : 1;
1943     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1944         return;
1945
1946     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1947                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1948                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1949 }
1950
1951 /*
1952  * LOP : macro to build a list operator.  Its behaviour has been replaced
1953  * with a subroutine, S_lop() for which LOP is just another name.
1954  */
1955
1956 #define LOP(f,x) return lop(f,x,s)
1957
1958 /*
1959  * S_lop
1960  * Build a list operator (or something that might be one).  The rules:
1961  *  - if we have a next token, then it's a list operator (no parens) for
1962  *    which the next token has already been parsed; e.g.,
1963  *       sort foo @args
1964  *       sort foo (@args)
1965  *  - if the next thing is an opening paren, then it's a function
1966  *  - else it's a list operator
1967  */
1968
1969 STATIC I32
1970 S_lop(pTHX_ I32 f, U8 x, char *s)
1971 {
1972     PERL_ARGS_ASSERT_LOP;
1973
1974     pl_yylval.ival = f;
1975     CLINE;
1976     PL_bufptr = s;
1977     PL_last_lop = PL_oldbufptr;
1978     PL_last_lop_op = (OPCODE)f;
1979     if (PL_nexttoke)
1980         goto lstop;
1981     PL_expect = x;
1982     if (*s == '(')
1983         return REPORT(FUNC);
1984     s = skipspace(s);
1985     if (*s == '(')
1986         return REPORT(FUNC);
1987     else {
1988         lstop:
1989         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1990             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1991         return REPORT(LSTOP);
1992     }
1993 }
1994
1995 /*
1996  * S_force_next
1997  * When the lexer realizes it knows the next token (for instance,
1998  * it is reordering tokens for the parser) then it can call S_force_next
1999  * to know what token to return the next time the lexer is called.  Caller
2000  * will need to set PL_nextval[] and possibly PL_expect to ensure
2001  * the lexer handles the token correctly.
2002  */
2003
2004 STATIC void
2005 S_force_next(pTHX_ I32 type)
2006 {
2007 #ifdef DEBUGGING
2008     if (DEBUG_T_TEST) {
2009         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2010         tokereport(type, &NEXTVAL_NEXTTOKE);
2011     }
2012 #endif
2013     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2014     PL_nexttype[PL_nexttoke] = type;
2015     PL_nexttoke++;
2016 }
2017
2018 /*
2019  * S_postderef
2020  *
2021  * This subroutine handles postfix deref syntax after the arrow has already
2022  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2023  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2024  * only the first, leaving yylex to find the next.
2025  */
2026
2027 static int
2028 S_postderef(pTHX_ int const funny, char const next)
2029 {
2030     assert(funny == DOLSHARP || memCHRs("$@%&*", funny));
2031     if (next == '*') {
2032         PL_expect = XOPERATOR;
2033         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2034             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2035             PL_lex_state = LEX_INTERPEND;
2036             if ('@' == funny)
2037                 force_next(POSTJOIN);
2038         }
2039         force_next(next);
2040         PL_bufptr+=2;
2041     }
2042     else {
2043         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2044          && !PL_lex_brackets)
2045             PL_lex_dojoin = 2;
2046         PL_expect = XOPERATOR;
2047         PL_bufptr++;
2048     }
2049     return funny;
2050 }
2051
2052 void
2053 Perl_yyunlex(pTHX)
2054 {
2055     int yyc = PL_parser->yychar;
2056     if (yyc != YYEMPTY) {
2057         if (yyc) {
2058             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2059             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2060                 PL_lex_allbrackets--;
2061                 PL_lex_brackets--;
2062                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2063             } else if (yyc == '('/*)*/) {
2064                 PL_lex_allbrackets--;
2065                 yyc |= (2<<24);
2066             }
2067             force_next(yyc);
2068         }
2069         PL_parser->yychar = YYEMPTY;
2070     }
2071 }
2072
2073 STATIC SV *
2074 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2075 {
2076     SV * const sv = newSVpvn_utf8(start, len,
2077                     ! IN_BYTES
2078                   &&  UTF
2079                   &&  len != 0
2080                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2081     return sv;
2082 }
2083
2084 /*
2085  * S_force_word
2086  * When the lexer knows the next thing is a word (for instance, it has
2087  * just seen -> and it knows that the next char is a word char, then
2088  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2089  * lookahead.
2090  *
2091  * Arguments:
2092  *   char *start : buffer position (must be within PL_linestr)
2093  *   int token   : PL_next* will be this type of bare word
2094  *                 (e.g., METHOD,BAREWORD)
2095  *   int check_keyword : if true, Perl checks to make sure the word isn't
2096  *       a keyword (do this if the word is a label, e.g. goto FOO)
2097  *   int allow_pack : if true, : characters will also be allowed (require,
2098  *       use, etc. do this)
2099  */
2100
2101 STATIC char *
2102 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2103 {
2104     char *s;
2105     STRLEN len;
2106
2107     PERL_ARGS_ASSERT_FORCE_WORD;
2108
2109     start = skipspace(start);
2110     s = start;
2111     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2112         || (allow_pack && *s == ':' && s[1] == ':') )
2113     {
2114         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2115         if (check_keyword) {
2116           char *s2 = PL_tokenbuf;
2117           STRLEN len2 = len;
2118           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2119             s2 += sizeof("CORE::") - 1;
2120             len2 -= sizeof("CORE::") - 1;
2121           }
2122           if (keyword(s2, len2, 0))
2123             return start;
2124         }
2125         if (token == METHOD) {
2126             s = skipspace(s);
2127             if (*s == '(')
2128                 PL_expect = XTERM;
2129             else {
2130                 PL_expect = XOPERATOR;
2131             }
2132         }
2133         NEXTVAL_NEXTTOKE.opval
2134             = newSVOP(OP_CONST,0,
2135                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2136         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2137         force_next(token);
2138     }
2139     return s;
2140 }
2141
2142 /*
2143  * S_force_ident
2144  * Called when the lexer wants $foo *foo &foo etc, but the program
2145  * text only contains the "foo" portion.  The first argument is a pointer
2146  * to the "foo", and the second argument is the type symbol to prefix.
2147  * Forces the next token to be a "BAREWORD".
2148  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2149  */
2150
2151 STATIC void
2152 S_force_ident(pTHX_ const char *s, int kind)
2153 {
2154     PERL_ARGS_ASSERT_FORCE_IDENT;
2155
2156     if (s[0]) {
2157         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2158         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2159                                                                 UTF ? SVf_UTF8 : 0));
2160         NEXTVAL_NEXTTOKE.opval = o;
2161         force_next(BAREWORD);
2162         if (kind) {
2163             o->op_private = OPpCONST_ENTERED;
2164             /* XXX see note in pp_entereval() for why we forgo typo
2165                warnings if the symbol must be introduced in an eval.
2166                GSAR 96-10-12 */
2167             gv_fetchpvn_flags(s, len,
2168                               (PL_in_eval ? GV_ADDMULTI
2169                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2170                               kind == '$' ? SVt_PV :
2171                               kind == '@' ? SVt_PVAV :
2172                               kind == '%' ? SVt_PVHV :
2173                               SVt_PVGV
2174                               );
2175         }
2176     }
2177 }
2178
2179 static void
2180 S_force_ident_maybe_lex(pTHX_ char pit)
2181 {
2182     NEXTVAL_NEXTTOKE.ival = pit;
2183     force_next('p');
2184 }
2185
2186 NV
2187 Perl_str_to_version(pTHX_ SV *sv)
2188 {
2189     NV retval = 0.0;
2190     NV nshift = 1.0;
2191     STRLEN len;
2192     const char *start = SvPV_const(sv,len);
2193     const char * const end = start + len;
2194     const bool utf = cBOOL(SvUTF8(sv));
2195
2196     PERL_ARGS_ASSERT_STR_TO_VERSION;
2197
2198     while (start < end) {
2199         STRLEN skip;
2200         UV n;
2201         if (utf)
2202             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2203         else {
2204             n = *(U8*)start;
2205             skip = 1;
2206         }
2207         retval += ((NV)n)/nshift;
2208         start += skip;
2209         nshift *= 1000;
2210     }
2211     return retval;
2212 }
2213
2214 /*
2215  * S_force_version
2216  * Forces the next token to be a version number.
2217  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2218  * and if "guessing" is TRUE, then no new token is created (and the caller
2219  * must use an alternative parsing method).
2220  */
2221
2222 STATIC char *
2223 S_force_version(pTHX_ char *s, int guessing)
2224 {
2225     OP *version = NULL;
2226     char *d;
2227
2228     PERL_ARGS_ASSERT_FORCE_VERSION;
2229
2230     s = skipspace(s);
2231
2232     d = s;
2233     if (*d == 'v')
2234         d++;
2235     if (isDIGIT(*d)) {
2236         while (isDIGIT(*d) || *d == '_' || *d == '.')
2237             d++;
2238         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2239             SV *ver;
2240             s = scan_num(s, &pl_yylval);
2241             version = pl_yylval.opval;
2242             ver = cSVOPx(version)->op_sv;
2243             if (SvPOK(ver) && !SvNIOK(ver)) {
2244                 SvUPGRADE(ver, SVt_PVNV);
2245                 SvNV_set(ver, str_to_version(ver));
2246                 SvNOK_on(ver);          /* hint that it is a version */
2247             }
2248         }
2249         else if (guessing) {
2250             return s;
2251         }
2252     }
2253
2254     /* NOTE: The parser sees the package name and the VERSION swapped */
2255     NEXTVAL_NEXTTOKE.opval = version;
2256     force_next(BAREWORD);
2257
2258     return s;
2259 }
2260
2261 /*
2262  * S_force_strict_version
2263  * Forces the next token to be a version number using strict syntax rules.
2264  */
2265
2266 STATIC char *
2267 S_force_strict_version(pTHX_ char *s)
2268 {
2269     OP *version = NULL;
2270     const char *errstr = NULL;
2271
2272     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2273
2274     while (isSPACE(*s)) /* leading whitespace */
2275         s++;
2276
2277     if (is_STRICT_VERSION(s,&errstr)) {
2278         SV *ver = newSV(0);
2279         s = (char *)scan_version(s, ver, 0);
2280         version = newSVOP(OP_CONST, 0, ver);
2281     }
2282     else if ((*s != ';' && *s != '{' && *s != '}' )
2283              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2284     {
2285         PL_bufptr = s;
2286         if (errstr)
2287             yyerror(errstr); /* version required */
2288         return s;
2289     }
2290
2291     /* NOTE: The parser sees the package name and the VERSION swapped */
2292     NEXTVAL_NEXTTOKE.opval = version;
2293     force_next(BAREWORD);
2294
2295     return s;
2296 }
2297
2298 /*
2299  * S_tokeq
2300  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2301  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2302  * unchanged, and a new SV containing the modified input is returned.
2303  */
2304
2305 STATIC SV *
2306 S_tokeq(pTHX_ SV *sv)
2307 {
2308     char *s;
2309     char *send;
2310     char *d;
2311     SV *pv = sv;
2312
2313     PERL_ARGS_ASSERT_TOKEQ;
2314
2315     assert (SvPOK(sv));
2316     assert (SvLEN(sv));
2317     assert (!SvIsCOW(sv));
2318     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2319         goto finish;
2320     s = SvPVX(sv);
2321     send = SvEND(sv);
2322     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2323     while (s < send && !(*s == '\\' && s[1] == '\\'))
2324         s++;
2325     if (s == send)
2326         goto finish;
2327     d = s;
2328     if ( PL_hints & HINT_NEW_STRING ) {
2329         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2330                             SVs_TEMP | SvUTF8(sv));
2331     }
2332     while (s < send) {
2333         if (*s == '\\') {
2334             if (s + 1 < send && (s[1] == '\\'))
2335                 s++;            /* all that, just for this */
2336         }
2337         *d++ = *s++;
2338     }
2339     *d = '\0';
2340     SvCUR_set(sv, d - SvPVX_const(sv));
2341   finish:
2342     if ( PL_hints & HINT_NEW_STRING )
2343        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2344     return sv;
2345 }
2346
2347 /*
2348  * Now come three functions related to double-quote context,
2349  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2350  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2351  * interact with PL_lex_state, and create fake ( ... ) argument lists
2352  * to handle functions and concatenation.
2353  * For example,
2354  *   "foo\lbar"
2355  * is tokenised as
2356  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2357  */
2358
2359 /*
2360  * S_sublex_start
2361  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2362  *
2363  * Pattern matching will set PL_lex_op to the pattern-matching op to
2364  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2365  *
2366  * OP_CONST is easy--just make the new op and return.
2367  *
2368  * Everything else becomes a FUNC.
2369  *
2370  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2371  * had an OP_CONST.  This just sets us up for a
2372  * call to S_sublex_push().
2373  */
2374
2375 STATIC I32
2376 S_sublex_start(pTHX)
2377 {
2378     const I32 op_type = pl_yylval.ival;
2379
2380     if (op_type == OP_NULL) {
2381         pl_yylval.opval = PL_lex_op;
2382         PL_lex_op = NULL;
2383         return THING;
2384     }
2385     if (op_type == OP_CONST) {
2386         SV *sv = PL_lex_stuff;
2387         PL_lex_stuff = NULL;
2388         sv = tokeq(sv);
2389
2390         if (SvTYPE(sv) == SVt_PVIV) {
2391             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2392             STRLEN len;
2393             const char * const p = SvPV_const(sv, len);
2394             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2395             SvREFCNT_dec(sv);
2396             sv = nsv;
2397         }
2398         pl_yylval.opval = newSVOP(op_type, 0, sv);
2399         return THING;
2400     }
2401
2402     PL_parser->lex_super_state = PL_lex_state;
2403     PL_parser->lex_sub_inwhat = (U16)op_type;
2404     PL_parser->lex_sub_op = PL_lex_op;
2405     PL_parser->sub_no_recover = FALSE;
2406     PL_parser->sub_error_count = PL_error_count;
2407     PL_lex_state = LEX_INTERPPUSH;
2408
2409     PL_expect = XTERM;
2410     if (PL_lex_op) {
2411         pl_yylval.opval = PL_lex_op;
2412         PL_lex_op = NULL;
2413         return PMFUNC;
2414     }
2415     else
2416         return FUNC;
2417 }
2418
2419 /*
2420  * S_sublex_push
2421  * Create a new scope to save the lexing state.  The scope will be
2422  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2423  * to the uc, lc, etc. found before.
2424  * Sets PL_lex_state to LEX_INTERPCONCAT.
2425  */
2426
2427 STATIC I32
2428 S_sublex_push(pTHX)
2429 {
2430     LEXSHARED *shared;
2431     const bool is_heredoc = PL_multi_close == '<';
2432     ENTER;
2433
2434     PL_lex_state = PL_parser->lex_super_state;
2435     SAVEI8(PL_lex_dojoin);
2436     SAVEI32(PL_lex_brackets);
2437     SAVEI32(PL_lex_allbrackets);
2438     SAVEI32(PL_lex_formbrack);
2439     SAVEI8(PL_lex_fakeeof);
2440     SAVEI32(PL_lex_casemods);
2441     SAVEI32(PL_lex_starts);
2442     SAVEI8(PL_lex_state);
2443     SAVESPTR(PL_lex_repl);
2444     SAVEVPTR(PL_lex_inpat);
2445     SAVEI16(PL_lex_inwhat);
2446     if (is_heredoc)
2447     {
2448         SAVECOPLINE(PL_curcop);
2449         SAVEI32(PL_multi_end);
2450         SAVEI32(PL_parser->herelines);
2451         PL_parser->herelines = 0;
2452     }
2453     SAVEIV(PL_multi_close);
2454     SAVEPPTR(PL_bufptr);
2455     SAVEPPTR(PL_bufend);
2456     SAVEPPTR(PL_oldbufptr);
2457     SAVEPPTR(PL_oldoldbufptr);
2458     SAVEPPTR(PL_last_lop);
2459     SAVEPPTR(PL_last_uni);
2460     SAVEPPTR(PL_linestart);
2461     SAVESPTR(PL_linestr);
2462     SAVEGENERICPV(PL_lex_brackstack);
2463     SAVEGENERICPV(PL_lex_casestack);
2464     SAVEGENERICPV(PL_parser->lex_shared);
2465     SAVEBOOL(PL_parser->lex_re_reparsing);
2466     SAVEI32(PL_copline);
2467
2468     /* The here-doc parser needs to be able to peek into outer lexing
2469        scopes to find the body of the here-doc.  So we put PL_linestr and
2470        PL_bufptr into lex_shared, to ‘share’ those values.
2471      */
2472     PL_parser->lex_shared->ls_linestr = PL_linestr;
2473     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2474
2475     PL_linestr = PL_lex_stuff;
2476     PL_lex_repl = PL_parser->lex_sub_repl;
2477     PL_lex_stuff = NULL;
2478     PL_parser->lex_sub_repl = NULL;
2479
2480     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2481        set for an inner quote-like operator and then an error causes scope-
2482        popping.  We must not have a PL_lex_stuff value left dangling, as
2483        that breaks assumptions elsewhere.  See bug #123617.  */
2484     SAVEGENERICSV(PL_lex_stuff);
2485     SAVEGENERICSV(PL_parser->lex_sub_repl);
2486
2487     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2488         = SvPVX(PL_linestr);
2489     PL_bufend += SvCUR(PL_linestr);
2490     PL_last_lop = PL_last_uni = NULL;
2491     SAVEFREESV(PL_linestr);
2492     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2493
2494     PL_lex_dojoin = FALSE;
2495     PL_lex_brackets = PL_lex_formbrack = 0;
2496     PL_lex_allbrackets = 0;
2497     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2498     Newx(PL_lex_brackstack, 120, char);
2499     Newx(PL_lex_casestack, 12, char);
2500     PL_lex_casemods = 0;
2501     *PL_lex_casestack = '\0';
2502     PL_lex_starts = 0;
2503     PL_lex_state = LEX_INTERPCONCAT;
2504     if (is_heredoc)
2505         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2506     PL_copline = NOLINE;
2507
2508     Newxz(shared, 1, LEXSHARED);
2509     shared->ls_prev = PL_parser->lex_shared;
2510     PL_parser->lex_shared = shared;
2511
2512     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2513     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2514     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2515         PL_lex_inpat = PL_parser->lex_sub_op;
2516     else
2517         PL_lex_inpat = NULL;
2518
2519     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2520     PL_in_eval &= ~EVAL_RE_REPARSING;
2521
2522     return SUBLEXSTART;
2523 }
2524
2525 /*
2526  * S_sublex_done
2527  * Restores lexer state after a S_sublex_push.
2528  */
2529
2530 STATIC I32
2531 S_sublex_done(pTHX)
2532 {
2533     if (!PL_lex_starts++) {
2534         SV * const sv = newSVpvs("");
2535         if (SvUTF8(PL_linestr))
2536             SvUTF8_on(sv);
2537         PL_expect = XOPERATOR;
2538         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2539         return THING;
2540     }
2541
2542     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2543         PL_lex_state = LEX_INTERPCASEMOD;
2544         return yylex();
2545     }
2546
2547     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2548     assert(PL_lex_inwhat != OP_TRANSR);
2549     if (PL_lex_repl) {
2550         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2551         PL_linestr = PL_lex_repl;
2552         PL_lex_inpat = 0;
2553         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2554         PL_bufend += SvCUR(PL_linestr);
2555         PL_last_lop = PL_last_uni = NULL;
2556         PL_lex_dojoin = FALSE;
2557         PL_lex_brackets = 0;
2558         PL_lex_allbrackets = 0;
2559         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2560         PL_lex_casemods = 0;
2561         *PL_lex_casestack = '\0';
2562         PL_lex_starts = 0;
2563         if (SvEVALED(PL_lex_repl)) {
2564             PL_lex_state = LEX_INTERPNORMAL;
2565             PL_lex_starts++;
2566             /*  we don't clear PL_lex_repl here, so that we can check later
2567                 whether this is an evalled subst; that means we rely on the
2568                 logic to ensure sublex_done() is called again only via the
2569                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2570         }
2571         else {
2572             PL_lex_state = LEX_INTERPCONCAT;
2573             PL_lex_repl = NULL;
2574         }
2575         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2576             CopLINE(PL_curcop) +=
2577                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2578                  + PL_parser->herelines;
2579             PL_parser->herelines = 0;
2580         }
2581         return '/';
2582     }
2583     else {
2584         const line_t l = CopLINE(PL_curcop);
2585         LEAVE;
2586         if (PL_parser->sub_error_count != PL_error_count) {
2587             if (PL_parser->sub_no_recover) {
2588                 yyquit();
2589                 NOT_REACHED;
2590             }
2591         }
2592         if (PL_multi_close == '<')
2593             PL_parser->herelines += l - PL_multi_end;
2594         PL_bufend = SvPVX(PL_linestr);
2595         PL_bufend += SvCUR(PL_linestr);
2596         PL_expect = XOPERATOR;
2597         return SUBLEXEND;
2598     }
2599 }
2600
2601 HV *
2602 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2603                           const STRLEN context_len, const char ** error_msg)
2604 {
2605     /* Load the official _charnames module if not already there.  The
2606      * parameters are just to give info for any error messages generated:
2607      *  char_name   a name to look up which is the reason for loading this
2608      *  context     'char_name' in the context in the input in which it appears
2609      *  context_len how many bytes 'context' occupies
2610      *  error_msg   *error_msg will be set to any error
2611      *
2612      *  Returns the ^H table if success; otherwise NULL */
2613
2614     unsigned int i;
2615     HV * table;
2616     SV **cvp;
2617     SV * res;
2618
2619     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2620
2621     /* This loop is executed 1 1/2 times.  On the first time through, if it
2622      * isn't already loaded, try loading it, and iterate just once to see if it
2623      * worked.  */
2624     for (i = 0; i < 2; i++) {
2625         table = GvHV(PL_hintgv);                 /* ^H */
2626
2627         if (    table
2628             && (PL_hints & HINT_LOCALIZE_HH)
2629             && (cvp = hv_fetchs(table, "charnames", FALSE))
2630             &&  SvOK(*cvp))
2631         {
2632             return table;   /* Quit if already loaded */
2633         }
2634
2635         if (i == 0) {
2636             Perl_load_module(aTHX_
2637                 0,
2638                 newSVpvs("_charnames"),
2639
2640                 /* version parameter; no need to specify it, as if we get too early
2641                 * a version, will fail anyway, not being able to find 'charnames'
2642                 * */
2643                 NULL,
2644                 newSVpvs(":full"),
2645                 newSVpvs(":short"),
2646                 NULL);
2647         }
2648     }
2649
2650     /* Here, it failed; new_constant will give appropriate error messages */
2651     *error_msg = NULL;
2652     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2653                         context, context_len, error_msg);
2654     SvREFCNT_dec(res);
2655
2656     return NULL;
2657 }
2658
2659 STATIC SV*
2660 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2661 {
2662     /* This justs wraps get_and_check_backslash_N_name() to output any error
2663      * message it returns. */
2664
2665     const char * error_msg = NULL;
2666     SV * result;
2667
2668     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2669
2670     /* charnames doesn't work well if there have been errors found */
2671     if (PL_error_count > 0) {
2672         return NULL;
2673     }
2674
2675     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2676
2677     if (error_msg) {
2678         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2679     }
2680
2681     return result;
2682 }
2683
2684 SV*
2685 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2686                                           const char* const e,
2687                                           const bool is_utf8,
2688                                           const char ** error_msg)
2689 {
2690     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2691      * interior, hence to the "}".  Finds what the name resolves to, returning
2692      * an SV* containing it; NULL if no valid one found.
2693      *
2694      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2695      * doesn't have to be. */
2696
2697     SV* char_name;
2698     SV* res;
2699     HV * table;
2700     SV **cvp;
2701     SV *cv;
2702     SV *rv;
2703     HV *stash;
2704
2705     /* Points to the beginning of the \N{... so that any messages include the
2706      * context of what's failing*/
2707     const char* context = s - 3;
2708     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2709
2710
2711     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2712
2713     assert(e >= s);
2714     assert(s > (char *) 3);
2715
2716     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2717
2718     if (!SvCUR(char_name)) {
2719         SvREFCNT_dec_NN(char_name);
2720         /* diag_listed_as: Unknown charname '%s' */
2721         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2722         return NULL;
2723     }
2724
2725     /* Autoload the charnames module */
2726
2727     table = load_charnames(char_name, context, context_len, error_msg);
2728     if (table == NULL) {
2729         return NULL;
2730     }
2731
2732     *error_msg = NULL;
2733     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2734                         context, context_len, error_msg);
2735     if (*error_msg) {
2736         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2737
2738         SvREFCNT_dec(res);
2739         return NULL;
2740     }
2741
2742     /* See if the charnames handler is the Perl core's, and if so, we can skip
2743      * the validation needed for a user-supplied one, as Perl's does its own
2744      * validation. */
2745     cvp = hv_fetchs(table, "charnames", FALSE);
2746     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2747         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2748     {
2749         const char * const name = HvNAME(stash);
2750          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2751            return res;
2752        }
2753     }
2754
2755     /* Here, it isn't Perl's charname handler.  We can't rely on a
2756      * user-supplied handler to validate the input name.  For non-ut8 input,
2757      * look to see that the first character is legal.  Then loop through the
2758      * rest checking that each is a continuation */
2759
2760     /* This code makes the reasonable assumption that the only Latin1-range
2761      * characters that begin a character name alias are alphabetic, otherwise
2762      * would have to create a isCHARNAME_BEGIN macro */
2763
2764     if (! is_utf8) {
2765         if (! isALPHAU(*s)) {
2766             goto bad_charname;
2767         }
2768         s++;
2769         while (s < e) {
2770             if (! isCHARNAME_CONT(*s)) {
2771                 goto bad_charname;
2772             }
2773             if (*s == ' ' && *(s-1) == ' ') {
2774                 goto multi_spaces;
2775             }
2776             s++;
2777         }
2778     }
2779     else {
2780         /* Similarly for utf8.  For invariants can check directly; for other
2781          * Latin1, can calculate their code point and check; otherwise  use an
2782          * inversion list */
2783         if (UTF8_IS_INVARIANT(*s)) {
2784             if (! isALPHAU(*s)) {
2785                 goto bad_charname;
2786             }
2787             s++;
2788         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2789             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2790                 goto bad_charname;
2791             }
2792             s += 2;
2793         }
2794         else {
2795             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2796                                        utf8_to_uvchr_buf((U8 *) s,
2797                                                          (U8 *) e,
2798                                                          NULL)))
2799             {
2800                 goto bad_charname;
2801             }
2802             s += UTF8SKIP(s);
2803         }
2804
2805         while (s < e) {
2806             if (UTF8_IS_INVARIANT(*s)) {
2807                 if (! isCHARNAME_CONT(*s)) {
2808                     goto bad_charname;
2809                 }
2810                 if (*s == ' ' && *(s-1) == ' ') {
2811                     goto multi_spaces;
2812                 }
2813                 s++;
2814             }
2815             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2816                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2817                 {
2818                     goto bad_charname;
2819                 }
2820                 s += 2;
2821             }
2822             else {
2823                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2824                                            utf8_to_uvchr_buf((U8 *) s,
2825                                                              (U8 *) e,
2826                                                              NULL)))
2827                 {
2828                     goto bad_charname;
2829                 }
2830                 s += UTF8SKIP(s);
2831             }
2832         }
2833     }
2834     if (*(s-1) == ' ') {
2835         /* diag_listed_as: charnames alias definitions may not contain
2836                            trailing white-space; marked by <-- HERE in %s
2837          */
2838         *error_msg = Perl_form(aTHX_
2839             "charnames alias definitions may not contain trailing "
2840             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2841             (int)(s - context + 1), context,
2842             (int)(e - s + 1), s + 1);
2843         return NULL;
2844     }
2845
2846     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2847         const U8* first_bad_char_loc;
2848         STRLEN len;
2849         const char* const str = SvPV_const(res, len);
2850         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2851                                           &first_bad_char_loc)))
2852         {
2853             _force_out_malformed_utf8_message(first_bad_char_loc,
2854                                               (U8 *) PL_parser->bufend,
2855                                               0,
2856                                               0 /* 0 means don't die */ );
2857             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2858                                immediately after '%s' */
2859             *error_msg = Perl_form(aTHX_
2860                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2861                  (int) context_len, context,
2862                  (int) ((char *) first_bad_char_loc - str), str);
2863             return NULL;
2864         }
2865     }
2866
2867     return res;
2868
2869   bad_charname: {
2870
2871         /* The final %.*s makes sure that should the trailing NUL be missing
2872          * that this print won't run off the end of the string */
2873         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2874                            in \N{%s} */
2875         *error_msg = Perl_form(aTHX_
2876             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2877             (int)(s - context + 1), context,
2878             (int)(e - s + 1), s + 1);
2879         return NULL;
2880     }
2881
2882   multi_spaces:
2883         /* diag_listed_as: charnames alias definitions may not contain a
2884                            sequence of multiple spaces; marked by <-- HERE
2885                            in %s */
2886         *error_msg = Perl_form(aTHX_
2887             "charnames alias definitions may not contain a sequence of "
2888             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2889             (int)(s - context + 1), context,
2890             (int)(e - s + 1), s + 1);
2891         return NULL;
2892 }
2893
2894 /*
2895   scan_const
2896
2897   Extracts the next constant part of a pattern, double-quoted string,
2898   or transliteration.  This is terrifying code.
2899
2900   For example, in parsing the double-quoted string "ab\x63$d", it would
2901   stop at the '$' and return an OP_CONST containing 'abc'.
2902
2903   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2904   processing a pattern (PL_lex_inpat is true), a transliteration
2905   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2906
2907   Returns a pointer to the character scanned up to. If this is
2908   advanced from the start pointer supplied (i.e. if anything was
2909   successfully parsed), will leave an OP_CONST for the substring scanned
2910   in pl_yylval. Caller must intuit reason for not parsing further
2911   by looking at the next characters herself.
2912
2913   In patterns:
2914     expand:
2915       \N{FOO}  => \N{U+hex_for_character_FOO}
2916       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2917
2918     pass through:
2919         all other \-char, including \N and \N{ apart from \N{ABC}
2920
2921     stops on:
2922         @ and $ where it appears to be a var, but not for $ as tail anchor
2923         \l \L \u \U \Q \E
2924         (?{  or  (??{
2925
2926   In transliterations:
2927     characters are VERY literal, except for - not at the start or end
2928     of the string, which indicates a range.  However some backslash sequences
2929     are recognized: \r, \n, and the like
2930                     \007 \o{}, \x{}, \N{}
2931     If all elements in the transliteration are below 256,
2932     scan_const expands the range to the full set of intermediate
2933     characters. If the range is in utf8, the hyphen is replaced with
2934     a certain range mark which will be handled by pmtrans() in op.c.
2935
2936   In double-quoted strings:
2937     backslashes:
2938       all those recognized in transliterations
2939       deprecated backrefs: \1 (in substitution replacements)
2940       case and quoting: \U \Q \E
2941     stops on @ and $
2942
2943   scan_const does *not* construct ops to handle interpolated strings.
2944   It stops processing as soon as it finds an embedded $ or @ variable
2945   and leaves it to the caller to work out what's going on.
2946
2947   embedded arrays (whether in pattern or not) could be:
2948       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2949
2950   $ in double-quoted strings must be the symbol of an embedded scalar.
2951
2952   $ in pattern could be $foo or could be tail anchor.  Assumption:
2953   it's a tail anchor if $ is the last thing in the string, or if it's
2954   followed by one of "()| \r\n\t"
2955
2956   \1 (backreferences) are turned into $1 in substitutions
2957
2958   The structure of the code is
2959       while (there's a character to process) {
2960           handle transliteration ranges
2961           skip regexp comments /(?#comment)/ and codes /(?{code})/
2962           skip #-initiated comments in //x patterns
2963           check for embedded arrays
2964           check for embedded scalars
2965           if (backslash) {
2966               deprecate \1 in substitution replacements
2967               handle string-changing backslashes \l \U \Q \E, etc.
2968               switch (what was escaped) {
2969                   handle \- in a transliteration (becomes a literal -)
2970                   if a pattern and not \N{, go treat as regular character
2971                   handle \132 (octal characters)
2972                   handle \x15 and \x{1234} (hex characters)
2973                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2974                   handle \cV (control characters)
2975                   handle printf-style backslashes (\f, \r, \n, etc)
2976               } (end switch)
2977               continue
2978           } (end if backslash)
2979           handle regular character
2980     } (end while character to read)
2981
2982 */
2983
2984 STATIC char *
2985 S_scan_const(pTHX_ char *start)
2986 {
2987     char *send = PL_bufend;             /* end of the constant */
2988     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2989                                            on sizing. */
2990     char *s = start;                    /* start of the constant */
2991     char *d = SvPVX(sv);                /* destination for copies */
2992     bool dorange = FALSE;               /* are we in a translit range? */
2993     bool didrange = FALSE;              /* did we just finish a range? */
2994     bool in_charclass = FALSE;          /* within /[...]/ */
2995     bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
2996                                            UTF8?  But, this can show as true
2997                                            when the source isn't utf8, as for
2998                                            example when it is entirely composed
2999                                            of hex constants */
3000     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3001     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3002                                            number of characters found so far
3003                                            that will expand (into 2 bytes)
3004                                            should we have to convert to
3005                                            UTF-8) */
3006     SV *res;                            /* result from charnames */
3007     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3008                                    high-end character is temporarily placed */
3009
3010     /* Does something require special handling in tr/// ?  This avoids extra
3011      * work in a less likely case.  As such, khw didn't feel it was worth
3012      * adding any branches to the more mainline code to handle this, which
3013      * means that this doesn't get set in some circumstances when things like
3014      * \x{100} get expanded out.  As a result there needs to be extra testing
3015      * done in the tr code */
3016     bool has_above_latin1 = FALSE;
3017
3018     /* Note on sizing:  The scanned constant is placed into sv, which is
3019      * initialized by newSV() assuming one byte of output for every byte of
3020      * input.  This routine expects newSV() to allocate an extra byte for a
3021      * trailing NUL, which this routine will append if it gets to the end of
3022      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3023      * CAPITAL LETTER A}), or more output than input if the constant ends up
3024      * recoded to utf8, but each time a construct is found that might increase
3025      * the needed size, SvGROW() is called.  Its size parameter each time is
3026      * based on the best guess estimate at the time, namely the length used so
3027      * far, plus the length the current construct will occupy, plus room for
3028      * the trailing NUL, plus one byte for every input byte still unscanned */
3029
3030     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3031                        before set */
3032 #ifdef EBCDIC
3033     int backslash_N = 0;            /* ? was the character from \N{} */
3034     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3035                                        platform-specific like \x65 */
3036 #endif
3037
3038     PERL_ARGS_ASSERT_SCAN_CONST;
3039
3040     assert(PL_lex_inwhat != OP_TRANSR);
3041
3042     /* Protect sv from errors and fatal warnings. */
3043     ENTER_with_name("scan_const");
3044     SAVEFREESV(sv);
3045
3046     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3047      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3048      * valid */
3049     assert(*send == '\0');
3050
3051     while (s < send
3052            || dorange   /* Handle tr/// range at right edge of input */
3053     ) {
3054
3055         /* get transliterations out of the way (they're most literal) */
3056         if (PL_lex_inwhat == OP_TRANS) {
3057
3058             /* But there isn't any special handling necessary unless there is a
3059              * range, so for most cases we just drop down and handle the value
3060              * as any other.  There are two exceptions.
3061              *
3062              * 1.  A hyphen indicates that we are actually going to have a
3063              *     range.  In this case, skip the '-', set a flag, then drop
3064              *     down to handle what should be the end range value.
3065              * 2.  After we've handled that value, the next time through, that
3066              *     flag is set and we fix up the range.
3067              *
3068              * Ranges entirely within Latin1 are expanded out entirely, in
3069              * order to make the transliteration a simple table look-up.
3070              * Ranges that extend above Latin1 have to be done differently, so
3071              * there is no advantage to expanding them here, so they are
3072              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3073              * a byte that can't occur in legal UTF-8, and hence can signify a
3074              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3075              * the range is expressed as Unicode, the Latin1 portion is
3076              * expanded out even if the range extends above Latin1.  This is
3077              * because each code point in it has to be processed here
3078              * individually to get its native translation */
3079
3080             if (! dorange) {
3081
3082                 /* Here, we don't think we're in a range.  If the new character
3083                  * is not a hyphen; or if it is a hyphen, but it's too close to
3084                  * either edge to indicate a range, or if we haven't output any
3085                  * characters yet then it's a regular character. */
3086                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3087                 {
3088
3089                     /* A regular character.  Process like any other, but first
3090                      * clear any flags */
3091                     didrange = FALSE;
3092                     dorange = FALSE;
3093 #ifdef EBCDIC
3094                     non_portable_endpoint = 0;
3095                     backslash_N = 0;
3096 #endif
3097                     /* The tests here for being above Latin1 and similar ones
3098                      * in the following 'else' suffice to find all such
3099                      * occurences in the constant, except those added by a
3100                      * backslash escape sequence, like \x{100}.  Mostly, those
3101                      * set 'has_above_latin1' as appropriate */
3102                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3103                         has_above_latin1 = TRUE;
3104                     }
3105
3106                     /* Drops down to generic code to process current byte */
3107                 }
3108                 else {  /* Is a '-' in the context where it means a range */
3109                     if (didrange) { /* Something like y/A-C-Z// */
3110                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3111                                          " operator");
3112                     }
3113
3114                     dorange = TRUE;
3115
3116                     s++;    /* Skip past the hyphen */
3117
3118                     /* d now points to where the end-range character will be
3119                      * placed.  Drop down to get that character.  We'll finish
3120                      * processing the range the next time through the loop */
3121
3122                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3123                         has_above_latin1 = TRUE;
3124                     }
3125
3126                     /* Drops down to generic code to process current byte */
3127                 }
3128             }  /* End of not a range */
3129             else {
3130                 /* Here we have parsed a range.  Now must handle it.  At this
3131                  * point:
3132                  * 'sv' is a SV* that contains the output string we are
3133                  *      constructing.  The final two characters in that string
3134                  *      are the range start and range end, in order.
3135                  * 'd'  points to just beyond the range end in the 'sv' string,
3136                  *      where we would next place something
3137                  */
3138                 char * max_ptr;
3139                 char * min_ptr;
3140                 IV range_min;
3141                 IV range_max;   /* last character in range */
3142                 STRLEN grow;
3143                 Size_t offset_to_min = 0;
3144                 Size_t extras = 0;
3145 #ifdef EBCDIC
3146                 bool convert_unicode;
3147                 IV real_range_max = 0;
3148 #endif
3149                 /* Get the code point values of the range ends. */
3150                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3151                 offset_to_max = max_ptr - SvPVX_const(sv);
3152                 if (d_is_utf8) {
3153                     /* We know the utf8 is valid, because we just constructed
3154                      * it ourselves in previous loop iterations */
3155                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3156                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3157                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3158
3159                     /* This compensates for not all code setting
3160                      * 'has_above_latin1', so that we don't skip stuff that
3161                      * should be executed */
3162                     if (range_max > 255) {
3163                         has_above_latin1 = TRUE;
3164                     }
3165                 }
3166                 else {
3167                     min_ptr = max_ptr - 1;
3168                     range_min = * (U8*) min_ptr;
3169                     range_max = * (U8*) max_ptr;
3170                 }
3171
3172                 /* If the range is just a single code point, like tr/a-a/.../,
3173                  * that code point is already in the output, twice.  We can
3174                  * just back up over the second instance and avoid all the rest
3175                  * of the work.  But if it is a variant character, it's been
3176                  * counted twice, so decrement.  (This unlikely scenario is
3177                  * special cased, like the one for a range of 2 code points
3178                  * below, only because the main-line code below needs a range
3179                  * of 3 or more to work without special casing.  Might as well
3180                  * get it out of the way now.) */
3181                 if (UNLIKELY(range_max == range_min)) {
3182                     d = max_ptr;
3183                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3184                         utf8_variant_count--;
3185                     }
3186                     goto range_done;
3187                 }
3188
3189 #ifdef EBCDIC
3190                 /* On EBCDIC platforms, we may have to deal with portable
3191                  * ranges.  These happen if at least one range endpoint is a
3192                  * Unicode value (\N{...}), or if the range is a subset of
3193                  * [A-Z] or [a-z], and both ends are literal characters,
3194                  * like 'A', and not like \x{C1} */
3195                 convert_unicode =
3196                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3197                                                        hence portable range */
3198                     || (     ! non_portable_endpoint
3199                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3200                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3201                 if (convert_unicode) {
3202
3203                     /* Special handling is needed for these portable ranges.
3204                      * They are defined to be in Unicode terms, which includes
3205                      * all the Unicode code points between the end points.
3206                      * Convert to Unicode to get the Unicode range.  Later we
3207                      * will convert each code point in the range back to
3208                      * native.  */
3209                     range_min = NATIVE_TO_UNI(range_min);
3210                     range_max = NATIVE_TO_UNI(range_max);
3211                 }
3212 #endif
3213
3214                 if (range_min > range_max) {
3215 #ifdef EBCDIC
3216                     if (convert_unicode) {
3217                         /* Need to convert back to native for meaningful
3218                          * messages for this platform */
3219                         range_min = UNI_TO_NATIVE(range_min);
3220                         range_max = UNI_TO_NATIVE(range_max);
3221                     }
3222 #endif
3223                     /* Use the characters themselves for the error message if
3224                      * ASCII printables; otherwise some visible representation
3225                      * of them */
3226                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3227                         Perl_croak(aTHX_
3228                          "Invalid range \"%c-%c\" in transliteration operator",
3229                          (char)range_min, (char)range_max);
3230                     }
3231 #ifdef EBCDIC
3232                     else if (convert_unicode) {
3233         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3234                         Perl_croak(aTHX_
3235                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3236                            UVXf "}\" in transliteration operator",
3237                            range_min, range_max);
3238                     }
3239 #endif
3240                     else {
3241         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3242                         Perl_croak(aTHX_
3243                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3244                            " in transliteration operator",
3245                            range_min, range_max);
3246                     }
3247                 }
3248
3249                 /* If the range is exactly two code points long, they are
3250                  * already both in the output */
3251                 if (UNLIKELY(range_min + 1 == range_max)) {
3252                     goto range_done;
3253                 }
3254
3255                 /* Here the range contains at least 3 code points */
3256
3257                 if (d_is_utf8) {
3258
3259                     /* If everything in the transliteration is below 256, we
3260                      * can avoid special handling later.  A translation table
3261                      * for each of those bytes is created by op.c.  So we
3262                      * expand out all ranges to their constituent code points.
3263                      * But if we've encountered something above 255, the
3264                      * expanding won't help, so skip doing that.  But if it's
3265                      * EBCDIC, we may have to look at each character below 256
3266                      * if we have to convert to/from Unicode values */
3267                     if (   has_above_latin1
3268 #ifdef EBCDIC
3269                         && (range_min > 255 || ! convert_unicode)
3270 #endif
3271                     ) {
3272                         const STRLEN off = d - SvPVX(sv);
3273                         const STRLEN extra = 1 + (send - s) + 1;
3274                         char *e;
3275
3276                         /* Move the high character one byte to the right; then
3277                          * insert between it and the range begin, an illegal
3278                          * byte which serves to indicate this is a range (using
3279                          * a '-' would be ambiguous). */
3280
3281                         if (off + extra > SvLEN(sv)) {
3282                             d = off + SvGROW(sv, off + extra);
3283                             max_ptr = d - off + offset_to_max;
3284                         }
3285
3286                         e = d++;
3287                         while (e-- > max_ptr) {
3288                             *(e + 1) = *e;
3289                         }
3290                         *(e + 1) = (char) RANGE_INDICATOR;
3291                         goto range_done;
3292                     }
3293
3294                     /* Here, we're going to expand out the range.  For EBCDIC
3295                      * the range can extend above 255 (not so in ASCII), so
3296                      * for EBCDIC, split it into the parts above and below
3297                      * 255/256 */
3298 #ifdef EBCDIC
3299                     if (range_max > 255) {
3300                         real_range_max = range_max;
3301                         range_max = 255;
3302                     }
3303 #endif
3304                 }
3305
3306                 /* Here we need to expand out the string to contain each
3307                  * character in the range.  Grow the output to handle this.
3308                  * For non-UTF8, we need a byte for each code point in the
3309                  * range, minus the three that we've already allocated for: the
3310                  * hyphen, the min, and the max.  For UTF-8, we need this
3311                  * plus an extra byte for each code point that occupies two
3312                  * bytes (is variant) when in UTF-8 (except we've already
3313                  * allocated for the end points, including if they are
3314                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3315                  * platforms, it's easy to calculate a precise number.  To
3316                  * start, we count the variants in the range, which we need
3317                  * elsewhere in this function anyway.  (For the case where it
3318                  * isn't easy to calculate, 'extras' has been initialized to 0,
3319                  * and the calculation is done in a loop further down.) */
3320 #ifdef EBCDIC
3321                 if (convert_unicode)
3322 #endif
3323                 {
3324                     /* This is executed unconditionally on ASCII, and for
3325                      * Unicode ranges on EBCDIC.  Under these conditions, all
3326                      * code points above a certain value are variant; and none
3327                      * under that value are.  We just need to find out how much
3328                      * of the range is above that value.  We don't count the
3329                      * end points here, as they will already have been counted
3330                      * as they were parsed. */
3331                     if (range_min >= UTF_CONTINUATION_MARK) {
3332
3333                         /* The whole range is made up of variants */
3334                         extras = (range_max - 1) - (range_min + 1) + 1;
3335                     }
3336                     else if (range_max >= UTF_CONTINUATION_MARK) {
3337
3338                         /* Only the higher portion of the range is variants */
3339                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3340                     }
3341
3342                     utf8_variant_count += extras;
3343                 }
3344
3345                 /* The base growth is the number of code points in the range,
3346                  * not including the endpoints, which have already been sized
3347                  * for (and output).  We don't subtract for the hyphen, as it
3348                  * has been parsed but not output, and the SvGROW below is
3349                  * based only on what's been output plus what's left to parse.
3350                  * */
3351                 grow = (range_max - 1) - (range_min + 1) + 1;
3352
3353                 if (d_is_utf8) {
3354 #ifdef EBCDIC
3355                     /* In some cases in EBCDIC, we haven't yet calculated a
3356                      * precise amount needed for the UTF-8 variants.  Just
3357                      * assume the worst case, that everything will expand by a
3358                      * byte */
3359                     if (! convert_unicode) {
3360                         grow *= 2;
3361                     }
3362                     else
3363 #endif
3364                     {
3365                         /* Otherwise we know exactly how many variants there
3366                          * are in the range. */
3367                         grow += extras;
3368                     }
3369                 }
3370
3371                 /* Grow, but position the output to overwrite the range min end
3372                  * point, because in some cases we overwrite that */
3373                 SvCUR_set(sv, d - SvPVX_const(sv));
3374                 offset_to_min = min_ptr - SvPVX_const(sv);
3375
3376                 /* See Note on sizing above. */
3377                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3378                                              + (send - s)
3379                                              + grow
3380                                              + 1 /* Trailing NUL */ );
3381
3382                 /* Now, we can expand out the range. */
3383 #ifdef EBCDIC
3384                 if (convert_unicode) {
3385                     SSize_t i;
3386
3387                     /* Recall that the min and max are now in Unicode terms, so
3388                      * we have to convert each character to its native
3389                      * equivalent */
3390                     if (d_is_utf8) {
3391                         for (i = range_min; i <= range_max; i++) {
3392                             append_utf8_from_native_byte(
3393                                                     LATIN1_TO_NATIVE((U8) i),
3394                                                     (U8 **) &d);
3395                         }
3396                     }
3397                     else {
3398                         for (i = range_min; i <= range_max; i++) {
3399                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3400                         }
3401                     }
3402                 }
3403                 else
3404 #endif
3405                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3406                 {
3407                     /* Here, no conversions are necessary, which means that the
3408                      * first character in the range is already in 'd' and
3409                      * valid, so we can skip overwriting it */
3410                     if (d_is_utf8) {
3411                         SSize_t i;
3412                         d += UTF8SKIP(d);
3413                         for (i = range_min + 1; i <= range_max; i++) {
3414                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3415                         }
3416                     }
3417                     else {
3418                         SSize_t i;
3419                         d++;
3420                         assert(range_min + 1 <= range_max);
3421                         for (i = range_min + 1; i < range_max; i++) {
3422 #ifdef EBCDIC
3423                             /* In this case on EBCDIC, we haven't calculated
3424                              * the variants.  Do it here, as we go along */
3425                             if (! UVCHR_IS_INVARIANT(i)) {
3426                                 utf8_variant_count++;
3427                             }
3428 #endif
3429                             *d++ = (char)i;
3430                         }
3431
3432                         /* The range_max is done outside the loop so as to
3433                          * avoid having to special case not incrementing
3434                          * 'utf8_variant_count' on EBCDIC (it's already been
3435                          * counted when originally parsed) */
3436                         *d++ = (char) range_max;
3437                     }
3438                 }
3439
3440 #ifdef EBCDIC
3441                 /* If the original range extended above 255, add in that
3442                  * portion. */
3443                 if (real_range_max) {
3444                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3445                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3446                     if (real_range_max > 0x100) {
3447                         if (real_range_max > 0x101) {
3448                             *d++ = (char) RANGE_INDICATOR;
3449                         }
3450                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3451                     }
3452                 }
3453 #endif
3454
3455               range_done:
3456                 /* mark the range as done, and continue */
3457                 didrange = TRUE;
3458                 dorange = FALSE;
3459 #ifdef EBCDIC
3460                 non_portable_endpoint = 0;
3461                 backslash_N = 0;
3462 #endif
3463                 continue;
3464             } /* End of is a range */
3465         } /* End of transliteration.  Joins main code after these else's */
3466         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3467             char *s1 = s-1;
3468             int esc = 0;
3469             while (s1 >= start && *s1-- == '\\')
3470                 esc = !esc;
3471             if (!esc)
3472                 in_charclass = TRUE;
3473         }
3474         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3475             char *s1 = s-1;
3476             int esc = 0;
3477             while (s1 >= start && *s1-- == '\\')
3478                 esc = !esc;
3479             if (!esc)
3480                 in_charclass = FALSE;
3481         }
3482             /* skip for regexp comments /(?#comment)/, except for the last
3483              * char, which will be done separately.  Stop on (?{..}) and
3484              * friends */
3485         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3486             if (s[2] == '#') {
3487                 if (s_is_utf8) {
3488                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3489
3490                     while (s + len < send && *s != ')') {
3491                         Copy(s, d, len, U8);
3492                         d += len;
3493                         s += len;
3494                         len = UTF8_SAFE_SKIP(s, send);
3495                     }
3496                 }
3497                 else while (s+1 < send && *s != ')') {
3498                     *d++ = *s++;
3499                 }
3500             }
3501             else if (!PL_lex_casemods
3502                      && (    s[2] == '{' /* This should match regcomp.c */
3503                          || (s[2] == '?' && s[3] == '{')))
3504             {
3505                 break;
3506             }
3507         }
3508             /* likewise skip #-initiated comments in //x patterns */
3509         else if (*s == '#'
3510                  && PL_lex_inpat
3511                  && !in_charclass
3512                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3513         {
3514             while (s < send && *s != '\n')
3515                 *d++ = *s++;
3516         }
3517             /* no further processing of single-quoted regex */
3518         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3519             goto default_action;
3520
3521             /* check for embedded arrays
3522              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3523              */
3524         else if (*s == '@' && s[1]) {
3525             if (UTF
3526                ? isIDFIRST_utf8_safe(s+1, send)
3527                : isWORDCHAR_A(s[1]))
3528             {
3529                 break;
3530             }
3531             if (memCHRs(":'{$", s[1]))
3532                 break;
3533             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3534                 break; /* in regexp, neither @+ nor @- are interpolated */
3535         }
3536             /* check for embedded scalars.  only stop if we're sure it's a
3537              * variable.  */
3538         else if (*s == '$') {
3539             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3540                 break;
3541             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3542                 if (s[1] == '\\') {
3543                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3544                                    "Possible unintended interpolation of $\\ in regex");
3545                 }
3546                 break;          /* in regexp, $ might be tail anchor */
3547             }
3548         }
3549
3550         /* End of else if chain - OP_TRANS rejoin rest */
3551
3552         if (UNLIKELY(s >= send)) {
3553             assert(s == send);
3554             break;
3555         }
3556
3557         /* backslashes */
3558         if (*s == '\\' && s+1 < send) {
3559             char* e;    /* Can be used for ending '}', etc. */
3560
3561             s++;
3562
3563             /* warn on \1 - \9 in substitution replacements, but note that \11
3564              * is an octal; and \19 is \1 followed by '9' */
3565             if (PL_lex_inwhat == OP_SUBST
3566                 && !PL_lex_inpat
3567                 && isDIGIT(*s)
3568                 && *s != '0'
3569                 && !isDIGIT(s[1]))
3570             {
3571                 /* diag_listed_as: \%d better written as $%d */
3572                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3573                 *--s = '$';
3574                 break;
3575             }
3576
3577             /* string-change backslash escapes */
3578             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3579                 --s;
3580                 break;
3581             }
3582             /* In a pattern, process \N, but skip any other backslash escapes.
3583              * This is because we don't want to translate an escape sequence
3584              * into a meta symbol and have the regex compiler use the meta
3585              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3586              * in spite of this, we do have to process \N here while the proper
3587              * charnames handler is in scope.  See bugs #56444 and #62056.
3588              *
3589              * There is a complication because \N in a pattern may also stand
3590              * for 'match a non-nl', and not mean a charname, in which case its
3591              * processing should be deferred to the regex compiler.  To be a
3592              * charname it must be followed immediately by a '{', and not look
3593              * like \N followed by a curly quantifier, i.e., not something like
3594              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3595              * quantifier */
3596             else if (PL_lex_inpat
3597                     && (*s != 'N'
3598                         || s[1] != '{'
3599                         || regcurly(s + 1)))
3600             {
3601                 *d++ = '\\';
3602                 goto default_action;
3603             }
3604
3605             switch (*s) {
3606             default:
3607                 {
3608                     if ((isALPHANUMERIC(*s)))
3609                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3610                                        "Unrecognized escape \\%c passed through",
3611                                        *s);
3612                     /* default action is to copy the quoted character */
3613                     goto default_action;
3614                 }
3615
3616             /* eg. \132 indicates the octal constant 0132 */
3617             case '0': case '1': case '2': case '3':
3618             case '4': case '5': case '6': case '7':
3619                 {
3620                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3621                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3622                     STRLEN len = 3;
3623                     uv = grok_oct(s, &len, &flags, NULL);
3624                     s += len;
3625                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3626                         && s < send
3627                         && isDIGIT(*s)  /* like \08, \178 */
3628                         && ckWARN(WARN_MISC))
3629                     {
3630                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3631                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3632                     }
3633                 }
3634                 goto NUM_ESCAPE_INSERT;
3635
3636             /* eg. \o{24} indicates the octal constant \024 */
3637             case 'o':
3638                 {
3639                     const char* error;
3640
3641                     if (! grok_bslash_o(&s, send,
3642                                                &uv, &error,
3643                                                NULL,
3644                                                FALSE, /* Not strict */
3645                                                FALSE, /* No illegal cp's */
3646                                                UTF))
3647                     {
3648                         yyerror(error);
3649                         uv = 0; /* drop through to ensure range ends are set */
3650                     }
3651                     goto NUM_ESCAPE_INSERT;
3652                 }
3653
3654             /* eg. \x24 indicates the hex constant 0x24 */
3655             case 'x':
3656                 {
3657                     const char* error;
3658
3659                     if (! grok_bslash_x(&s, send,
3660                                                &uv, &error,
3661                                                NULL,
3662                                                FALSE, /* Not strict */
3663                                                FALSE, /* No illegal cp's */
3664                                                UTF))
3665                     {
3666                         yyerror(error);
3667                         uv = 0; /* drop through to ensure range ends are set */
3668                     }
3669                 }
3670
3671               NUM_ESCAPE_INSERT:
3672                 /* Insert oct or hex escaped character. */
3673
3674                 /* Here uv is the ordinal of the next character being added */
3675                 if (UVCHR_IS_INVARIANT(uv)) {
3676                     *d++ = (char) uv;
3677                 }
3678                 else {
3679                     if (!d_is_utf8 && uv > 255) {
3680
3681                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3682                          * If we've only seen invariants so far, all we have to
3683                          * do is turn on the flag */
3684                         if (utf8_variant_count == 0) {
3685                             SvUTF8_on(sv);
3686                         }
3687                         else {
3688                             SvCUR_set(sv, d - SvPVX_const(sv));
3689                             SvPOK_on(sv);
3690                             *d = '\0';
3691
3692                             sv_utf8_upgrade_flags_grow(
3693                                            sv,
3694                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3695
3696                                            /* Since we're having to grow here,
3697                                             * make sure we have enough room for
3698                                             * this escape and a NUL, so the
3699                                             * code immediately below won't have
3700                                             * to actually grow again */
3701                                           UVCHR_SKIP(uv)
3702                                         + (STRLEN)(send - s) + 1);
3703                             d = SvPVX(sv) + SvCUR(sv);
3704                         }
3705
3706                         has_above_latin1 = TRUE;
3707                         d_is_utf8 = TRUE;
3708                     }
3709
3710                     if (! d_is_utf8) {
3711                         *d++ = (char)uv;
3712                         utf8_variant_count++;
3713                     }
3714                     else {
3715                        /* Usually, there will already be enough room in 'sv'
3716                         * since such escapes are likely longer than any UTF-8
3717                         * sequence they can end up as.  This isn't the case on
3718                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3719                         * UTF-8 for it contains 14.  And, we have to allow for
3720                         * a trailing NUL.  It probably can't happen on ASCII
3721                         * platforms, but be safe.  See Note on sizing above. */
3722                         const STRLEN needed = d - SvPVX(sv)
3723                                             + UVCHR_SKIP(uv)
3724                                             + (send - s)
3725                                             + 1;
3726                         if (UNLIKELY(needed > SvLEN(sv))) {
3727                             SvCUR_set(sv, d - SvPVX_const(sv));
3728                             d = SvCUR(sv) + SvGROW(sv, needed);
3729                         }
3730
3731                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3732                                                    (ckWARN(WARN_PORTABLE))
3733                                                    ? UNICODE_WARN_PERL_EXTENDED
3734                                                    : 0);
3735                     }
3736                 }
3737 #ifdef EBCDIC
3738                 non_portable_endpoint++;
3739 #endif
3740                 continue;
3741
3742             case 'N':
3743                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3744                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3745                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3746                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3747                  * convenience all three forms are referred to as "named
3748                  * characters" below.
3749                  *
3750                  * For patterns, \N also can mean to match a non-newline.  Code
3751                  * before this 'switch' statement should already have handled
3752                  * this situation, and hence this code only has to deal with
3753                  * the named character cases.
3754                  *
3755                  * For non-patterns, the named characters are converted to
3756                  * their string equivalents.  In patterns, named characters are
3757                  * not converted to their ultimate forms for the same reasons
3758                  * that other escapes aren't (mainly that the ultimate
3759                  * character could be considered a meta-symbol by the regex
3760                  * compiler).  Instead, they are converted to the \N{U+...}
3761                  * form to get the value from the charnames that is in effect
3762                  * right now, while preserving the fact that it was a named
3763                  * character, so that the regex compiler knows this.
3764                  *
3765                  * The structure of this section of code (besides checking for
3766                  * errors and upgrading to utf8) is:
3767                  *    If the named character is of the form \N{U+...}, pass it
3768                  *      through if a pattern; otherwise convert the code point
3769                  *      to utf8
3770                  *    Otherwise must be some \N{NAME}: convert to
3771                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3772                  *
3773                  * Transliteration is an exception.  The conversion to utf8 is
3774                  * only done if the code point requires it to be representable.
3775                  *
3776                  * Here, 's' points to the 'N'; the test below is guaranteed to
3777                  * succeed if we are being called on a pattern, as we already
3778                  * know from a test above that the next character is a '{'.  A
3779                  * non-pattern \N must mean 'named character', which requires
3780                  * braces */
3781                 s++;
3782                 if (*s != '{') {
3783                     yyerror("Missing braces on \\N{}");
3784                     *d++ = '\0';
3785                     continue;
3786                 }
3787                 s++;
3788
3789                 /* If there is no matching '}', it is an error. */
3790                 if (! (e = (char *) memchr(s, '}', send - s))) {
3791                     if (! PL_lex_inpat) {
3792                         yyerror("Missing right brace on \\N{}");
3793                     } else {
3794                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3795                     }
3796                     yyquit(); /* Have exhausted the input. */
3797                 }
3798
3799                 /* Here it looks like a named character */
3800
3801                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3802                     s += 2;         /* Skip to next char after the 'U+' */
3803                     if (PL_lex_inpat) {
3804
3805                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3806                         /* Check the syntax.  */
3807                         const char *orig_s;
3808                         orig_s = s - 5;
3809                         if (!isXDIGIT(*s)) {
3810                           bad_NU:
3811                             yyerror(
3812                                 "Invalid hexadecimal number in \\N{U+...}"
3813                             );
3814                             s = e + 1;
3815                             *d++ = '\0';
3816                             continue;
3817                         }
3818                         while (++s < e) {
3819                             if (isXDIGIT(*s))
3820                                 continue;
3821                             else if ((*s == '.' || *s == '_')
3822                                   && isXDIGIT(s[1]))
3823                                 continue;
3824                             goto bad_NU;
3825                         }
3826
3827                         /* Pass everything through unchanged.
3828                          * +1 is for the '}' */
3829                         Copy(orig_s, d, e - orig_s + 1, char);
3830                         d += e - orig_s + 1;
3831                     }
3832                     else {  /* Not a pattern: convert the hex to string */
3833                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3834                                   | PERL_SCAN_SILENT_ILLDIGIT
3835                                   | PERL_SCAN_SILENT_OVERFLOW
3836                                   | PERL_SCAN_DISALLOW_PREFIX;
3837                         STRLEN len = e - s;
3838
3839                         uv = grok_hex(s, &len, &flags, NULL);
3840                         if (len == 0 || (len != (STRLEN)(e - s)))
3841                             goto bad_NU;
3842
3843                         if (    uv > MAX_LEGAL_CP
3844                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3845                         {
3846                             yyerror(form_cp_too_large_msg(16, s, len, 0));
3847                             uv = 0; /* drop through to ensure range ends are
3848                                        set */
3849                         }
3850
3851                          /* For non-tr///, if the destination is not in utf8,
3852                           * unconditionally recode it to be so.  This is
3853                           * because \N{} implies Unicode semantics, and scalars
3854                           * have to be in utf8 to guarantee those semantics.
3855                           * tr/// doesn't care about Unicode rules, so no need
3856                           * there to upgrade to UTF-8 for small enough code
3857                           * points */
3858                         if (! d_is_utf8 && (   uv > 0xFF
3859                                            || PL_lex_inwhat != OP_TRANS))
3860                         {
3861                             /* See Note on sizing above.  */
3862                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3863
3864                             SvCUR_set(sv, d - SvPVX_const(sv));
3865                             SvPOK_on(sv);
3866                             *d = '\0';
3867
3868                             if (utf8_variant_count == 0) {
3869                                 SvUTF8_on(sv);
3870                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3871                             }
3872                             else {
3873                                 sv_utf8_upgrade_flags_grow(
3874                                                sv,
3875                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3876                                                extra);
3877                                 d = SvPVX(sv) + SvCUR(sv);
3878                             }
3879
3880                             d_is_utf8 = TRUE;
3881                             has_above_latin1 = TRUE;
3882                         }
3883
3884                         /* Add the (Unicode) code point to the output. */
3885                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3886                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3887                         }
3888                         else {
3889                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
3890                                                    (ckWARN(WARN_PORTABLE))
3891                                                    ? UNICODE_WARN_PERL_EXTENDED
3892                                                    : 0);
3893                         }
3894                     }
3895                 }
3896                 else /* Here is \N{NAME} but not \N{U+...}. */
3897                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3898                 {   /* Failed.  We should die eventually, but for now use a NUL
3899                        to keep parsing */
3900                     *d++ = '\0';
3901                 }
3902                 else {  /* Successfully evaluated the name */
3903                     STRLEN len;
3904                     const char *str = SvPV_const(res, len);
3905                     if (PL_lex_inpat) {
3906
3907                         if (! len) { /* The name resolved to an empty string */
3908                             const char empty_N[] = "\\N{_}";
3909                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
3910                             d += sizeof(empty_N) - 1;
3911                         }
3912                         else {
3913                             /* In order to not lose information for the regex
3914                             * compiler, pass the result in the specially made
3915                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3916                             * the code points in hex of each character
3917                             * returned by charnames */
3918
3919                             const char *str_end = str + len;
3920                             const STRLEN off = d - SvPVX_const(sv);
3921
3922                             if (! SvUTF8(res)) {
3923                                 /* For the non-UTF-8 case, we can determine the
3924                                  * exact length needed without having to parse
3925                                  * through the string.  Each character takes up
3926                                  * 2 hex digits plus either a trailing dot or
3927                                  * the "}" */
3928                                 const char initial_text[] = "\\N{U+";
3929                                 const STRLEN initial_len = sizeof(initial_text)
3930                                                            - 1;
3931                                 d = off + SvGROW(sv, off
3932                                                     + 3 * len
3933
3934                                                     /* +1 for trailing NUL */
3935                                                     + initial_len + 1
3936
3937                                                     + (STRLEN)(send - e));
3938                                 Copy(initial_text, d, initial_len, char);
3939                                 d += initial_len;
3940                                 while (str < str_end) {
3941                                     char hex_string[4];
3942                                     int len =
3943                                         my_snprintf(hex_string,
3944                                                   sizeof(hex_string),
3945                                                   "%02X.",
3946
3947                                                   /* The regex compiler is
3948                                                    * expecting Unicode, not
3949                                                    * native */
3950                                                   NATIVE_TO_LATIN1(*str));
3951                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3952                                                            sizeof(hex_string));
3953                                     Copy(hex_string, d, 3, char);
3954                                     d += 3;
3955                                     str++;
3956                                 }
3957                                 d--;    /* Below, we will overwrite the final
3958                                            dot with a right brace */
3959                             }
3960                             else {
3961                                 STRLEN char_length; /* cur char's byte length */
3962
3963                                 /* and the number of bytes after this is
3964                                  * translated into hex digits */
3965                                 STRLEN output_length;
3966
3967                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3968                                  * for max('U+', '.'); and 1 for NUL */
3969                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3970
3971                                 /* Get the first character of the result. */
3972                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3973                                                         len,
3974                                                         &char_length,
3975                                                         UTF8_ALLOW_ANYUV);
3976                                 /* Convert first code point to Unicode hex,
3977                                  * including the boiler plate before it. */
3978                                 output_length =
3979                                     my_snprintf(hex_string, sizeof(hex_string),
3980                                              "\\N{U+%X",
3981                                              (unsigned int) NATIVE_TO_UNI(uv));
3982
3983                                 /* Make sure there is enough space to hold it */
3984                                 d = off + SvGROW(sv, off
3985                                                     + output_length
3986                                                     + (STRLEN)(send - e)
3987                                                     + 2);       /* '}' + NUL */
3988                                 /* And output it */
3989                                 Copy(hex_string, d, output_length, char);
3990                                 d += output_length;
3991
3992                                 /* For each subsequent character, append dot and
3993                                 * its Unicode code point in hex */
3994                                 while ((str += char_length) < str_end) {
3995                                     const STRLEN off = d - SvPVX_const(sv);
3996                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3997                                                             str_end - str,
3998                                                             &char_length,
3999                                                             UTF8_ALLOW_ANYUV);
4000                                     output_length =
4001                                         my_snprintf(hex_string,
4002                                              sizeof(hex_string),
4003                                              ".%X",
4004                                              (unsigned int) NATIVE_TO_UNI(uv));
4005
4006                                     d = off + SvGROW(sv, off
4007                                                         + output_length
4008                                                         + (STRLEN)(send - e)
4009                                                         + 2);   /* '}' +  NUL */
4010                                     Copy(hex_string, d, output_length, char);
4011                                     d += output_length;
4012                                 }
4013                             }
4014
4015                             *d++ = '}'; /* Done.  Add the trailing brace */
4016                         }
4017                     }
4018                     else { /* Here, not in a pattern.  Convert the name to a
4019                             * string. */
4020
4021                         if (PL_lex_inwhat == OP_TRANS) {
4022                             str = SvPV_const(res, len);
4023                             if (len > ((SvUTF8(res))
4024                                        ? UTF8SKIP(str)
4025                                        : 1U))
4026                             {
4027                                 yyerror(Perl_form(aTHX_
4028                                     "%.*s must not be a named sequence"
4029                                     " in transliteration operator",
4030                                         /*  +1 to include the "}" */
4031                                     (int) (e + 1 - start), start));
4032                                 *d++ = '\0';
4033                                 goto end_backslash_N;
4034                             }
4035
4036                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4037                                 has_above_latin1 = TRUE;
4038                             }
4039
4040                         }
4041                         else if (! SvUTF8(res)) {
4042                             /* Make sure \N{} return is UTF-8.  This is because
4043                              * \N{} implies Unicode semantics, and scalars have
4044                              * to be in utf8 to guarantee those semantics; but
4045                              * not needed in tr/// */
4046                             sv_utf8_upgrade_flags(res, 0);
4047                             str = SvPV_const(res, len);
4048                         }
4049
4050                          /* Upgrade destination to be utf8 if this new
4051                           * component is */
4052                         if (! d_is_utf8 && SvUTF8(res)) {
4053                             /* See Note on sizing above.  */
4054                             const STRLEN extra = len + (send - s) + 1;
4055
4056                             SvCUR_set(sv, d - SvPVX_const(sv));
4057                             SvPOK_on(sv);
4058                             *d = '\0';
4059
4060                             if (utf8_variant_count == 0) {
4061                                 SvUTF8_on(sv);
4062                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4063                             }
4064                             else {
4065                                 sv_utf8_upgrade_flags_grow(sv,
4066                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4067                                                 extra);
4068                                 d = SvPVX(sv) + SvCUR(sv);
4069                             }
4070                             d_is_utf8 = TRUE;
4071                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
4072
4073                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4074                              * set correctly here). */
4075                             const STRLEN extra = len + (send - e) + 1;
4076                             const STRLEN off = d - SvPVX_const(sv);
4077                             d = off + SvGROW(sv, off + extra);
4078                         }
4079                         Copy(str, d, len, char);
4080                         d += len;
4081                     }
4082
4083                     SvREFCNT_dec(res);
4084
4085                 } /* End \N{NAME} */
4086
4087               end_backslash_N:
4088 #ifdef EBCDIC
4089                 backslash_N++; /* \N{} is defined to be Unicode */
4090 #endif
4091                 s = e + 1;  /* Point to just after the '}' */
4092                 continue;
4093
4094             /* \c is a control character */
4095             case 'c':
4096                 s++;
4097                 if (s < send) {
4098                     const char * message;
4099
4100                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4101                         yyerror(message);
4102                         yyquit();   /* Have always immediately croaked on
4103                                        errors in this */
4104                     }
4105                     d++;
4106                 }
4107                 else {
4108                     yyerror("Missing control char name in \\c");
4109                     yyquit();   /* Are at end of input, no sense continuing */
4110                 }
4111 #ifdef EBCDIC
4112                 non_portable_endpoint++;
4113 #endif
4114                 break;
4115
4116             /* printf-style backslashes, formfeeds, newlines, etc */
4117             case 'b':
4118                 *d++ = '\b';
4119                 break;
4120             case 'n':
4121                 *d++ = '\n';
4122                 break;
4123             case 'r':
4124                 *d++ = '\r';
4125                 break;
4126             case 'f':
4127                 *d++ = '\f';
4128                 break;
4129             case 't':
4130                 *d++ = '\t';
4131                 break;
4132             case 'e':
4133                 *d++ = ESC_NATIVE;
4134                 break;
4135             case 'a':
4136                 *d++ = '\a';
4137                 break;
4138             } /* end switch */
4139
4140             s++;
4141             continue;
4142         } /* end if (backslash) */
4143
4144     default_action:
4145         /* Just copy the input to the output, though we may have to convert
4146          * to/from UTF-8.
4147          *
4148          * If the input has the same representation in UTF-8 as not, it will be
4149          * a single byte, and we don't care about UTF8ness; just copy the byte */
4150         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4151             *d++ = *s++;
4152         }
4153         else if (! s_is_utf8 && ! d_is_utf8) {
4154             /* If neither source nor output is UTF-8, is also a single byte,
4155              * just copy it; but this byte counts should we later have to
4156              * convert to UTF-8 */
4157             *d++ = *s++;
4158             utf8_variant_count++;
4159         }
4160         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4161             const STRLEN len = UTF8SKIP(s);
4162
4163             /* We expect the source to have already been checked for
4164              * malformedness */
4165             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4166
4167             Copy(s, d, len, U8);
4168             d += len;
4169             s += len;
4170         }
4171         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4172             STRLEN need = send - s + 1; /* See Note on sizing above. */
4173
4174             SvCUR_set(sv, d - SvPVX_const(sv));
4175             SvPOK_on(sv);
4176             *d = '\0';
4177
4178             if (utf8_variant_count == 0) {
4179                 SvUTF8_on(sv);
4180                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4181             }
4182             else {
4183                 sv_utf8_upgrade_flags_grow(sv,
4184                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4185                                            need);
4186                 d = SvPVX(sv) + SvCUR(sv);
4187             }
4188             d_is_utf8 = TRUE;
4189             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4190         }
4191         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4192                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4193                    the input byte since we haven't incremented 's' yet. See
4194                    Note on sizing above. */
4195             const STRLEN off = d - SvPVX(sv);
4196             const STRLEN extra = 2 + (send - s - 1) + 1;
4197             if (off + extra > SvLEN(sv)) {
4198                 d = off + SvGROW(sv, off + extra);
4199             }
4200             *d++ = UTF8_EIGHT_BIT_HI(*s);
4201             *d++ = UTF8_EIGHT_BIT_LO(*s);
4202             s++;
4203         }
4204     } /* while loop to process each character */
4205
4206     {
4207         const STRLEN off = d - SvPVX(sv);
4208
4209         /* See if room for the terminating NUL */
4210         if (UNLIKELY(off >= SvLEN(sv))) {
4211
4212 #ifndef DEBUGGING
4213
4214             if (off > SvLEN(sv))
4215 #endif
4216                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4217                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4218
4219             /* Whew!  Here we don't have room for the terminating NUL, but
4220              * everything else so far has fit.  It's not too late to grow
4221              * to fit the NUL and continue on.  But it is a bug, as the code
4222              * above was supposed to have made room for this, so under
4223              * DEBUGGING builds, we panic anyway.  */
4224             d = off + SvGROW(sv, off + 1);
4225         }
4226     }
4227
4228     /* terminate the string and set up the sv */
4229     *d = '\0';
4230     SvCUR_set(sv, d - SvPVX_const(sv));
4231
4232     SvPOK_on(sv);
4233     if (d_is_utf8) {
4234         SvUTF8_on(sv);
4235     }
4236
4237     /* shrink the sv if we allocated more than we used */
4238     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4239         SvPV_shrink_to_cur(sv);
4240     }
4241
4242     /* return the substring (via pl_yylval) only if we parsed anything */
4243     if (s > start) {
4244         char *s2 = start;
4245         for (; s2 < s; s2++) {
4246             if (*s2 == '\n')
4247                 COPLINE_INC_WITH_HERELINES;
4248         }
4249         SvREFCNT_inc_simple_void_NN(sv);
4250         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4251             && ! PL_parser->lex_re_reparsing)
4252         {
4253             const char *const key = PL_lex_inpat ? "qr" : "q";
4254             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4255             const char *type;
4256             STRLEN typelen;
4257
4258             if (PL_lex_inwhat == OP_TRANS) {
4259                 type = "tr";
4260                 typelen = 2;
4261             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4262                 type = "s";
4263                 typelen = 1;
4264             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4265                 type = "q";
4266                 typelen = 1;
4267             } else {
4268                 type = "qq";
4269                 typelen = 2;
4270             }
4271
4272             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4273                                 type, typelen, NULL);
4274         }
4275         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4276     }
4277     LEAVE_with_name("scan_const");
4278     return s;
4279 }
4280
4281 /* S_intuit_more
4282  * Returns TRUE if there's more to the expression (e.g., a subscript),
4283  * FALSE otherwise.
4284  *
4285  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4286  *
4287  * ->[ and ->{ return TRUE
4288  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4289  * { and [ outside a pattern are always subscripts, so return TRUE
4290  * if we're outside a pattern and it's not { or [, then return FALSE
4291  * if we're in a pattern and the first char is a {
4292  *   {4,5} (any digits around the comma) returns FALSE
4293  * if we're in a pattern and the first char is a [
4294  *   [] returns FALSE
4295  *   [SOMETHING] has a funky algorithm to decide whether it's a
4296  *      character class or not.  It has to deal with things like
4297  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4298  * anything else returns TRUE
4299  */
4300
4301 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4302
4303 STATIC int
4304 S_intuit_more(pTHX_ char *s, char *e)
4305 {
4306     PERL_ARGS_ASSERT_INTUIT_MORE;
4307
4308     if (PL_lex_brackets)
4309         return TRUE;
4310     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4311         return TRUE;
4312     if (*s == '-' && s[1] == '>'
4313      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4314      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4315         ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4316         return TRUE;
4317     if (*s != '{' && *s != '[')
4318         return FALSE;
4319     PL_parser->sub_no_recover = TRUE;
4320     if (!PL_lex_inpat)
4321         return TRUE;
4322
4323     /* In a pattern, so maybe we have {n,m}. */
4324     if (*s == '{') {
4325         if (regcurly(s)) {
4326             return FALSE;
4327         }
4328         return TRUE;
4329     }
4330
4331     /* On the other hand, maybe we have a character class */
4332
4333     s++;
4334     if (*s == ']' || *s == '^')
4335         return FALSE;
4336     else {
4337         /* this is terrifying, and it works */
4338         int weight;
4339         char seen[256];
4340         const char * const send = (char *) memchr(s, ']', e - s);
4341         unsigned char un_char, last_un_char;
4342         char tmpbuf[sizeof PL_tokenbuf * 4];
4343
4344         if (!send)              /* has to be an expression */
4345             return TRUE;
4346         weight = 2;             /* let's weigh the evidence */
4347
4348         if (*s == '$')
4349             weight -= 3;
4350         else if (isDIGIT(*s)) {
4351             if (s[1] != ']') {
4352                 if (isDIGIT(s[1]) && s[2] == ']')
4353                     weight -= 10;
4354             }
4355             else
4356                 weight -= 100;
4357         }
4358         Zero(seen,256,char);
4359         un_char = 255;
4360         for (; s < send; s++) {
4361             last_un_char = un_char;
4362             un_char = (unsigned char)*s;
4363             switch (*s) {
4364             case '@':
4365             case '&':
4366             case '$':
4367                 weight -= seen[un_char] * 10;
4368                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4369                     int len;
4370                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4371                     len = (int)strlen(tmpbuf);
4372                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4373                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4374                         weight -= 100;
4375                     else
4376                         weight -= 10;
4377                 }
4378                 else if (*s == '$'
4379                          && s[1]
4380                          && memCHRs("[#!%*<>()-=",s[1]))
4381                 {
4382                     if (/*{*/ memCHRs("])} =",s[2]))
4383                         weight -= 10;
4384                     else
4385                         weight -= 1;
4386                 }
4387                 break;
4388             case '\\':
4389                 un_char = 254;
4390                 if (s[1]) {
4391                     if (memCHRs("wds]",s[1]))
4392                         weight += 100;
4393                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4394                         weight += 1;
4395                     else if (memCHRs("rnftbxcav",s[1]))
4396                         weight += 40;
4397                     else if (isDIGIT(s[1])) {
4398                         weight += 40;
4399                         while (s[1] && isDIGIT(s[1]))
4400                             s++;
4401                     }
4402                 }
4403                 else
4404                     weight += 100;
4405                 break;
4406             case '-':
4407                 if (s[1] == '\\')
4408                     weight += 50;
4409                 if (memCHRs("aA01! ",last_un_char))
4410                     weight += 30;
4411                 if (memCHRs("zZ79~",s[1]))
4412                     weight += 30;
4413                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4414                     weight -= 5;        /* cope with negative subscript */
4415                 break;
4416             default:
4417                 if (!isWORDCHAR(last_un_char)
4418                     && !(last_un_char == '$' || last_un_char == '@'
4419                          || last_un_char == '&')
4420                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4421                     char *d = s;
4422                     while (isALPHA(*s))
4423                         s++;
4424                     if (keyword(d, s - d, 0))
4425                         weight -= 150;
4426                 }
4427                 if (un_char == last_un_char + 1)
4428                     weight += 5;
4429                 weight -= seen[un_char];
4430                 break;
4431             }
4432             seen[un_char]++;
4433         }
4434         if (weight >= 0)        /* probably a character class */
4435             return FALSE;
4436     }
4437
4438     return TRUE;
4439 }
4440
4441 /*
4442  * S_intuit_method
4443  *
4444  * Does all the checking to disambiguate
4445  *   foo bar
4446  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4447  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4448  *
4449  * First argument is the stuff after the first token, e.g. "bar".
4450  *
4451  * Not a method if foo is a filehandle.
4452  * Not a method if foo is a subroutine prototyped to take a filehandle.
4453  * Not a method if it's really "Foo $bar"
4454  * Method if it's "foo $bar"
4455  * Not a method if it's really "print foo $bar"
4456  * Method if it's really "foo package::" (interpreted as package->foo)
4457  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4458  * Not a method if bar is a filehandle or package, but is quoted with
4459  *   =>
4460  */
4461
4462 STATIC int
4463 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4464 {
4465     char *s = start + (*start == '$');
4466     char tmpbuf[sizeof PL_tokenbuf];
4467     STRLEN len;
4468     GV* indirgv;
4469         /* Mustn't actually add anything to a symbol table.
4470            But also don't want to "initialise" any placeholder
4471            constants that might already be there into full
4472            blown PVGVs with attached PVCV.  */
4473     GV * const gv =
4474         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4475
4476     PERL_ARGS_ASSERT_INTUIT_METHOD;
4477
4478     if (!FEATURE_INDIRECT_IS_ENABLED)
4479         return 0;
4480
4481     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4482             return 0;
4483     if (cv && SvPOK(cv)) {
4484         const char *proto = CvPROTO(cv);
4485         if (proto) {
4486             while (*proto && (isSPACE(*proto) || *proto == ';'))
4487                 proto++;
4488             if (*proto == '*')
4489                 return 0;
4490         }
4491     }
4492
4493     if (*start == '$') {
4494         SSize_t start_off = start - SvPVX(PL_linestr);
4495         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4496             || isUPPER(*PL_tokenbuf))
4497             return 0;
4498         /* this could be $# */
4499         if (isSPACE(*s))
4500             s = skipspace(s);
4501         PL_bufptr = SvPVX(PL_linestr) + start_off;
4502         PL_expect = XREF;
4503         return *s == '(' ? FUNCMETH : METHOD;
4504     }
4505
4506     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4507     /* start is the beginning of the possible filehandle/object,
4508      * and s is the end of it
4509      * tmpbuf is a copy of it (but with single quotes as double colons)
4510      */
4511
4512     if (!keyword(tmpbuf, len, 0)) {
4513         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4514             len -= 2;
4515             tmpbuf[len] = '\0';
4516             goto bare_package;
4517         }
4518         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4519                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4520                                     SVt_PVCV);
4521         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4522          && (!isGV(indirgv) || GvCVu(indirgv)))
4523             return 0;
4524         /* filehandle or package name makes it a method */
4525         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4526             s = skipspace(s);
4527             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4528                 return 0;       /* no assumptions -- "=>" quotes bareword */
4529       bare_package:
4530             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4531                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4532             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4533             PL_expect = XTERM;
4534             force_next(BAREWORD);
4535             PL_bufptr = s;
4536             return *s == '(' ? FUNCMETH : METHOD;
4537         }
4538     }
4539     return 0;
4540 }
4541
4542 /* Encoded script support. filter_add() effectively inserts a
4543  * 'pre-processing' function into the current source input stream.
4544  * Note that the filter function only applies to the current source file
4545  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4546  *
4547  * The datasv parameter (which may be NULL) can be used to pass
4548  * private data to this instance of the filter. The filter function
4549  * can recover the SV using the FILTER_DATA macro and use it to
4550  * store private buffers and state information.
4551  *
4552  * The supplied datasv parameter is upgraded to a PVIO type
4553  * and the IoDIRP/IoANY field is used to store the function pointer,
4554  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4555  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4556  * private use must be set using malloc'd pointers.
4557  */
4558
4559 SV *
4560 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4561 {
4562     if (!funcp)
4563         return NULL;
4564
4565     if (!PL_parser)
4566         return NULL;
4567
4568     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4569         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4570
4571     if (!PL_rsfp_filters)
4572         PL_rsfp_filters = newAV();
4573     if (!datasv)
4574         datasv = newSV(0);
4575     SvUPGRADE(datasv, SVt_PVIO);
4576     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4577     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4578     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4579                           FPTR2DPTR(void *, IoANY(datasv)),
4580                           SvPV_nolen(datasv)));
4581     av_unshift(PL_rsfp_filters, 1);
4582     av_store(PL_rsfp_filters, 0, datasv) ;
4583     if (
4584         !PL_parser->filtered
4585      && PL_parser->lex_flags & LEX_EVALBYTES
4586      && PL_bufptr < PL_bufend
4587     ) {
4588         const char *s = PL_bufptr;
4589         while (s < PL_bufend) {
4590             if (*s == '\n') {
4591                 SV *linestr = PL_parser->linestr;
4592                 char *buf = SvPVX(linestr);
4593                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4594                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4595                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4596                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4597                 STRLEN const last_uni_pos =
4598                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4599                 STRLEN const last_lop_pos =
4600                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4601                 av_push(PL_rsfp_filters, linestr);
4602                 PL_parser->linestr =
4603                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4604                 buf = SvPVX(PL_parser->linestr);
4605                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4606                 PL_parser->bufptr = buf + bufptr_pos;
4607                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4608                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4609                 PL_parser->linestart = buf + linestart_pos;
4610                 if (PL_parser->last_uni)
4611                     PL_parser->last_uni = buf + last_uni_pos;
4612                 if (PL_parser->last_lop)
4613                     PL_parser->last_lop = buf + last_lop_pos;
4614                 SvLEN_set(linestr, SvCUR(linestr));
4615                 SvCUR_set(linestr, s - SvPVX(linestr));
4616                 PL_parser->filtered = 1;
4617                 break;
4618             }
4619             s++;
4620         }
4621     }
4622     return(datasv);
4623 }
4624
4625
4626 /* Delete most recently added instance of this filter function. */
4627 void
4628 Perl_filter_del(pTHX_ filter_t funcp)
4629 {
4630     SV *datasv;
4631
4632     PERL_ARGS_ASSERT_FILTER_DEL;
4633
4634 #ifdef DEBUGGING
4635     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4636                           FPTR2DPTR(void*, funcp)));
4637 #endif
4638     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4639         return;
4640     /* if filter is on top of stack (usual case) just pop it off */
4641     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4642     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4643         sv_free(av_pop(PL_rsfp_filters));
4644
4645         return;
4646     }
4647     /* we need to search for the correct entry and clear it     */
4648     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4649 }
4650
4651
4652 /* Invoke the idxth filter function for the current rsfp.        */
4653 /* maxlen 0 = read one text line */
4654 I32
4655 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4656 {
4657     filter_t funcp;
4658     I32 ret;
4659     SV *datasv = NULL;
4660     /* This API is bad. It should have been using unsigned int for maxlen.
4661        Not sure if we want to change the API, but if not we should sanity
4662        check the value here.  */
4663     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4664
4665     PERL_ARGS_ASSERT_FILTER_READ;
4666
4667     if (!PL_parser || !PL_rsfp_filters)
4668         return -1;
4669     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4670         /* Provide a default input filter to make life easy.    */
4671         /* Note that we append to the line. This is handy.      */
4672         DEBUG_P(PerlIO_printf(Perl_debug_log,
4673                               "filter_read %d: from rsfp\n", idx));
4674         if (correct_length) {
4675             /* Want a block */
4676             int len ;
4677             const int old_len = SvCUR(buf_sv);
4678
4679             /* ensure buf_sv is large enough */
4680             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4681             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4682                                    correct_length)) <= 0) {
4683                 if (PerlIO_error(PL_rsfp))
4684                     return -1;          /* error */
4685                 else
4686                     return 0 ;          /* end of file */
4687             }
4688             SvCUR_set(buf_sv, old_len + len) ;
4689             SvPVX(buf_sv)[old_len + len] = '\0';
4690         } else {
4691             /* Want a line */
4692             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4693                 if (PerlIO_error(PL_rsfp))
4694                     return -1;          /* error */
4695                 else
4696                     return 0 ;          /* end of file */
4697             }
4698         }
4699         return SvCUR(buf_sv);
4700     }
4701     /* Skip this filter slot if filter has been deleted */
4702     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4703         DEBUG_P(PerlIO_printf(Perl_debug_log,
4704                               "filter_read %d: skipped (filter deleted)\n",
4705                               idx));
4706         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4707     }
4708     if (SvTYPE(datasv) != SVt_PVIO) {
4709         if (correct_length) {
4710             /* Want a block */
4711             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4712             if (!remainder) return 0; /* eof */
4713             if (correct_length > remainder) correct_length = remainder;
4714             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4715             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4716         } else {
4717             /* Want a line */
4718             const char *s = SvEND(datasv);
4719             const char *send = SvPVX(datasv) + SvLEN(datasv);
4720             while (s < send) {
4721                 if (*s == '\n') {
4722                     s++;
4723                     break;
4724                 }
4725                 s++;
4726             }
4727             if (s == send) return 0; /* eof */
4728             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4729             SvCUR_set(datasv, s-SvPVX(datasv));
4730         }
4731         return SvCUR(buf_sv);
4732     }
4733     /* Get function pointer hidden within datasv        */
4734     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4735     DEBUG_P(PerlIO_printf(Perl_debug_log,
4736                           "filter_read %d: via function %p (%s)\n",
4737                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4738     /* Call function. The function is expected to       */
4739     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4740     /* Return: <0:error, =0:eof, >0:not eof             */
4741     ENTER;
4742     save_scalar(PL_errgv);
4743     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4744     LEAVE;
4745     return ret;
4746 }
4747
4748 STATIC char *
4749 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4750 {
4751     PERL_ARGS_ASSERT_FILTER_GETS;
4752
4753 #ifdef PERL_CR_FILTER
4754     if (!PL_rsfp_filters) {
4755         filter_add(S_cr_textfilter,NULL);
4756     }
4757 #endif
4758     if (PL_rsfp_filters) {
4759         if (!append)
4760             SvCUR_set(sv, 0);   /* start with empty line        */
4761         if (FILTER_READ(0, sv, 0) > 0)
4762             return ( SvPVX(sv) ) ;
4763         else
4764             return NULL ;
4765     }
4766     else
4767         return (sv_gets(sv, PL_rsfp, append));
4768 }
4769
4770 STATIC HV *
4771 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4772 {
4773     GV *gv;
4774
4775     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4776
4777     if (memEQs(pkgname, len, "__PACKAGE__"))
4778         return PL_curstash;
4779
4780     if (len > 2
4781         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4782         && (gv = gv_fetchpvn_flags(pkgname,
4783                                    len,
4784                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4785     {
4786         return GvHV(gv);                        /* Foo:: */
4787     }
4788
4789     /* use constant CLASS => 'MyClass' */
4790     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4791     if (gv && GvCV(gv)) {
4792         SV * const sv = cv_const_sv(GvCV(gv));
4793         if (sv)
4794             return gv_stashsv(sv, 0);
4795     }
4796
4797     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4798 }
4799
4800
4801 STATIC char *
4802 S_tokenize_use(pTHX_ int is_use, char *s) {
4803     PERL_ARGS_ASSERT_TOKENIZE_USE;
4804
4805     if (PL_expect != XSTATE)
4806         /* diag_listed_as: "use" not allowed in expression */
4807         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4808                     is_use ? "use" : "no"));
4809     PL_expect = XTERM;
4810     s = skipspace(s);
4811     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4812         s = force_version(s, TRUE);
4813         if (*s == ';' || *s == '}'
4814                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4815             NEXTVAL_NEXTTOKE.opval = NULL;
4816             force_next(BAREWORD);
4817         }
4818         else if (*s == 'v') {
4819             s = force_word(s,BAREWORD,FALSE,TRUE);
4820             s = force_version(s, FALSE);
4821         }
4822     }
4823     else {
4824         s = force_word(s,BAREWORD,FALSE,TRUE);
4825         s = force_version(s, FALSE);
4826     }
4827     pl_yylval.ival = is_use;
4828     return s;
4829 }
4830 #ifdef DEBUGGING
4831     static const char* const exp_name[] =
4832         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4833           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4834           "SIGVAR", "TERMORDORDOR"
4835         };
4836 #endif
4837
4838 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4839 STATIC bool
4840 S_word_takes_any_delimiter(char *p, STRLEN len)
4841 {
4842     return (len == 1 && memCHRs("msyq", p[0]))
4843             || (len == 2
4844                 && ((p[0] == 't' && p[1] == 'r')
4845                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4846 }
4847
4848 static void
4849 S_check_scalar_slice(pTHX_ char *s)
4850 {
4851     s++;
4852     while (SPACE_OR_TAB(*s)) s++;
4853     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4854                                                              PL_bufend,
4855                                                              UTF))
4856     {
4857         return;
4858     }
4859     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4860            || (*s && memCHRs(" \t$#+-'\"", *s)))
4861     {
4862         s += UTF ? UTF8SKIP(s) : 1;
4863     }
4864     if (*s == '}' || *s == ']')
4865         pl_yylval.ival = OPpSLICEWARNING;
4866 }
4867
4868 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4869 static void
4870 S_lex_token_boundary(pTHX)
4871 {
4872     PL_oldoldbufptr = PL_oldbufptr;
4873     PL_oldbufptr = PL_bufptr;
4874 }
4875
4876 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4877 static char *
4878 S_vcs_conflict_marker(pTHX_ char *s)
4879 {
4880     lex_token_boundary();
4881     PL_bufptr = s;
4882     yyerror("Version control conflict marker");
4883     while (s < PL_bufend && *s != '\n')
4884         s++;
4885     return s;
4886 }
4887
4888 static int
4889 yyl_sigvar(pTHX_ char *s)
4890 {
4891     /* we expect the sigil and optional var name part of a
4892      * signature element here. Since a '$' is not necessarily
4893      * followed by a var name, handle it specially here; the general
4894      * yylex code would otherwise try to interpret whatever follows
4895      * as a var; e.g. ($, ...) would be seen as the var '$,'
4896      */
4897
4898     U8 sigil;
4899
4900     s = skipspace(s);
4901     sigil = *s++;
4902     PL_bufptr = s; /* for error reporting */
4903     switch (sigil) {
4904     case '$':
4905     case '@':
4906     case '%':
4907         /* spot stuff that looks like an prototype */
4908         if (memCHRs("$:@%&*;\\[]", *s)) {
4909             yyerror("Illegal character following sigil in a subroutine signature");
4910             break;
4911         }
4912         /* '$#' is banned, while '$ # comment' isn't */
4913         if (*s == '#') {
4914             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4915             break;
4916         }
4917         s = skipspace(s);
4918         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4919             char *dest = PL_tokenbuf + 1;
4920             /* read var name, including sigil, into PL_tokenbuf */
4921             PL_tokenbuf[0] = sigil;
4922             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4923                 0, cBOOL(UTF), FALSE, FALSE);
4924             *dest = '\0';
4925             assert(PL_tokenbuf[1]); /* we have a variable name */
4926         }
4927         else {
4928             *PL_tokenbuf = 0;
4929             PL_in_my = 0;
4930         }
4931
4932         s = skipspace(s);
4933         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4934          * as the ASSIGNOP, and exclude other tokens that start with =
4935          */
4936         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
4937             /* save now to report with the same context as we did when
4938              * all ASSIGNOPS were accepted */
4939             PL_oldbufptr = s;
4940
4941             ++s;
4942             NEXTVAL_NEXTTOKE.ival = 0;
4943             force_next(ASSIGNOP);
4944             PL_expect = XTERM;
4945         }
4946         else if (*s == ',' || *s == ')') {
4947             PL_expect = XOPERATOR;
4948         }
4949         else {
4950             /* make sure the context shows the unexpected character and
4951              * hopefully a bit more */
4952             if (*s) ++s;
4953             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4954                 s++;
4955             PL_bufptr = s; /* for error reporting */
4956             yyerror("Illegal operator following parameter in a subroutine signature");
4957             PL_in_my = 0;
4958         }
4959         if (*PL_tokenbuf) {
4960             NEXTVAL_NEXTTOKE.ival = sigil;
4961             force_next('p'); /* force a signature pending identifier */
4962         }
4963         break;
4964
4965     case ')':
4966         PL_expect = XBLOCK;
4967         break;
4968     case ',': /* handle ($a,,$b) */
4969         break;
4970
4971     default:
4972         PL_in_my = 0;
4973         yyerror("A signature parameter must start with '$', '@' or '%'");
4974         /* very crude error recovery: skip to likely next signature
4975          * element */
4976         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4977             s++;
4978         break;
4979     }
4980
4981     TOKEN(sigil);
4982 }
4983
4984 static int
4985 yyl_dollar(pTHX_ char *s)
4986 {
4987     CLINE;
4988
4989     if (PL_expect == XPOSTDEREF) {
4990         if (s[1] == '#') {
4991             s++;
4992             POSTDEREF(DOLSHARP);
4993         }
4994         POSTDEREF('$');
4995     }
4996
4997     if (   s[1] == '#'
4998         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
4999             || memCHRs("{$:+-@", s[2])))
5000     {
5001         PL_tokenbuf[0] = '@';
5002         s = scan_ident(s + 1, PL_tokenbuf + 1,
5003                        sizeof PL_tokenbuf - 1, FALSE);
5004         if (PL_expect == XOPERATOR) {
5005             char *d = s;
5006             if (PL_bufptr > s) {
5007                 d = PL_bufptr-1;
5008                 PL_bufptr = PL_oldbufptr;
5009             }
5010             no_op("Array length", d);
5011         }
5012         if (!PL_tokenbuf[1])
5013             PREREF(DOLSHARP);
5014         PL_expect = XOPERATOR;
5015         force_ident_maybe_lex('#');
5016         TOKEN(DOLSHARP);
5017     }
5018
5019     PL_tokenbuf[0] = '$';
5020     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5021     if (PL_expect == XOPERATOR) {
5022         char *d = s;
5023         if (PL_bufptr > s) {
5024             d = PL_bufptr-1;
5025             PL_bufptr = PL_oldbufptr;
5026         }
5027         no_op("Scalar", d);
5028     }
5029     if (!PL_tokenbuf[1]) {
5030         if (s == PL_bufend)
5031             yyerror("Final $ should be \\$ or $name");
5032         PREREF('$');
5033     }
5034
5035     {
5036         const char tmp = *s;
5037         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5038             s = skipspace(s);
5039
5040         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5041             && intuit_more(s, PL_bufend)) {
5042             if (*s == '[') {
5043                 PL_tokenbuf[0] = '@';
5044                 if (ckWARN(WARN_SYNTAX)) {
5045                     char *t = s+1;
5046
5047                     while ( t < PL_bufend ) {
5048                         if (isSPACE(*t)) {
5049                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5050                             /* consumed one or more space chars */
5051                         } else if (*t == '$' || *t == '@') {
5052                             /* could be more than one '$' like $$ref or @$ref */
5053                             do { t++; } while (t < PL_bufend && *t == '$');
5054
5055                             /* could be an abigail style identifier like $ foo */
5056                             while (t < PL_bufend && *t == ' ') t++;
5057
5058                             /* strip off the name of the var */
5059                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5060                                 t += UTF ? UTF8SKIP(t) : 1;
5061                             /* consumed a varname */
5062                         } else if (isDIGIT(*t)) {
5063                             /* deal with hex constants like 0x11 */
5064                             if (t[0] == '0' && t[1] == 'x') {
5065                                 t += 2;
5066                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5067                             } else {
5068                                 /* deal with decimal/octal constants like 1 and 0123 */
5069                                 do { t++; } while (isDIGIT(*t));
5070                                 if (t<PL_bufend && *t == '.') {
5071                                     do { t++; } while (isDIGIT(*t));
5072                                 }
5073                             }
5074                             /* consumed a number */
5075                         } else {
5076                             /* not a var nor a space nor a number */
5077                             break;
5078                         }
5079                     }
5080                     if (t < PL_bufend && *t++ == ',') {
5081                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5082                         while (t < PL_bufend && *t != ']')
5083                             t++;
5084                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5085                                     "Multidimensional syntax %" UTF8f " not supported",
5086                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5087                     }
5088                 }
5089             }
5090             else if (*s == '{') {
5091                 char *t;
5092                 PL_tokenbuf[0] = '%';
5093                 if (    strEQ(PL_tokenbuf+1, "SIG")
5094                     && ckWARN(WARN_SYNTAX)
5095                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5096                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5097                 {
5098                     char tmpbuf[sizeof PL_tokenbuf];
5099                     do {
5100                         t++;
5101                     } while (isSPACE(*t));
5102                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5103                         STRLEN len;
5104                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5105                                         &len);
5106                         while (isSPACE(*t))
5107                             t++;
5108                         if (  *t == ';'
5109                             && get_cvn_flags(tmpbuf, len, UTF
5110                                                             ? SVf_UTF8
5111                                                             : 0))
5112                         {
5113                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5114                                 "You need to quote \"%" UTF8f "\"",
5115                                     UTF8fARG(UTF, len, tmpbuf));
5116                         }
5117                     }
5118                 }
5119             }
5120         }
5121
5122         PL_expect = XOPERATOR;
5123         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5124             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5125             if (!islop || PL_last_lop_op == OP_GREPSTART)
5126                 PL_expect = XOPERATOR;
5127             else if (memCHRs("$@\"'`q", *s))
5128                 PL_expect = XTERM;              /* e.g. print $fh "foo" */
5129             else if (   memCHRs("&*<%", *s)
5130                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5131             {
5132                 PL_expect = XTERM;              /* e.g. print $fh &sub */
5133             }
5134             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5135                 char tmpbuf[sizeof PL_tokenbuf];
5136                 int t2;
5137                 STRLEN len;
5138                 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5139                 if ((t2 = keyword(tmpbuf, len, 0))) {
5140                     /* binary operators exclude handle interpretations */
5141                     switch (t2) {
5142                     case -KEY_x:
5143                     case -KEY_eq:
5144                     case -KEY_ne:
5145                     case -KEY_gt:
5146                     case -KEY_lt:
5147                     case -KEY_ge:
5148                     case -KEY_le:
5149                     case -KEY_cmp:
5150                         break;
5151                     default:
5152                         PL_expect = XTERM;      /* e.g. print $fh length() */
5153                         break;
5154                     }
5155                 }
5156                 else {
5157                     PL_expect = XTERM;  /* e.g. print $fh subr() */
5158                 }
5159             }
5160             else if (isDIGIT(*s))
5161                 PL_expect = XTERM;              /* e.g. print $fh 3 */
5162             else if (*s == '.' && isDIGIT(s[1]))
5163                 PL_expect = XTERM;              /* e.g. print $fh .3 */
5164             else if ((*s == '?' || *s == '-' || *s == '+')
5165                      && !isSPACE(s[1]) && s[1] != '=')
5166                 PL_expect = XTERM;              /* e.g. print $fh -1 */
5167             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5168                      && s[1] != '/')
5169                 PL_expect = XTERM;              /* e.g. print $fh /.../
5170                                                XXX except DORDOR operator
5171                                             */
5172             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5173                      && s[2] != '=')
5174                 PL_expect = XTERM;              /* print $fh <<"EOF" */
5175         }
5176     }
5177     force_ident_maybe_lex('$');
5178     TOKEN('$');
5179 }
5180
5181 static int
5182 yyl_sub(pTHX_ char *s, const int key)
5183 {
5184     char * const tmpbuf = PL_tokenbuf + 1;
5185     bool have_name, have_proto;
5186     STRLEN len;
5187     SV *format_name = NULL;
5188     bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5189
5190     SSize_t off = s-SvPVX(PL_linestr);
5191     char *d;
5192
5193     s = skipspace(s); /* can move PL_linestr */
5194
5195     d = SvPVX(PL_linestr)+off;
5196
5197     SAVEBOOL(PL_parser->sig_seen);
5198     PL_parser->sig_seen = FALSE;
5199
5200     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5201         || *s == '\''
5202         || (*s == ':' && s[1] == ':'))
5203     {
5204
5205         PL_expect = XATTRBLOCK;
5206         d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5207                       &len);
5208         if (key == KEY_format)
5209             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5210         *PL_tokenbuf = '&';
5211         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5212          || pad_findmy_pvn(
5213                 PL_tokenbuf, len + 1, 0
5214             ) != NOT_IN_PAD)
5215             sv_setpvn(PL_subname, tmpbuf, len);
5216         else {
5217             sv_setsv(PL_subname,PL_curstname);
5218             sv_catpvs(PL_subname,"::");
5219             sv_catpvn(PL_subname,tmpbuf,len);
5220         }
5221         if (SvUTF8(PL_linestr))
5222             SvUTF8_on(PL_subname);
5223         have_name = TRUE;
5224
5225         s = skipspace(d);
5226     }
5227     else {
5228         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5229             *d = '\0';
5230             /* diag_listed_as: Missing name in "%s sub" */
5231             Perl_croak(aTHX_
5232                       "Missing name in \"%s\"", PL_bufptr);
5233         }
5234         PL_expect = XATTRTERM;
5235         sv_setpvs(PL_subname,"?");
5236         have_name = FALSE;
5237     }
5238
5239     if (key == KEY_format) {
5240         if (format_name) {
5241             NEXTVAL_NEXTTOKE.opval
5242                 = newSVOP(OP_CONST,0, format_name);
5243             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5244             force_next(BAREWORD);
5245         }
5246         PREBLOCK(FORMAT);
5247     }
5248
5249     /* Look for a prototype */
5250     if (*s == '(' && !is_sigsub) {
5251         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5252         if (!s)
5253             Perl_croak(aTHX_ "Prototype not terminated");
5254         COPLINE_SET_FROM_MULTI_END;
5255         (void)validate_proto(PL_subname, PL_lex_stuff,
5256                              ckWARN(WARN_ILLEGALPROTO), 0);
5257         have_proto = TRUE;
5258
5259         s = skipspace(s);
5260     }
5261     else
5262         have_proto = FALSE;
5263
5264     if (  !(*s == ':' && s[1] != ':')
5265         && (*s != '{' && *s != '(') && key != KEY_format)
5266     {
5267         assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5268                key == KEY_DESTROY || key == KEY_BEGIN ||
5269                key == KEY_UNITCHECK || key == KEY_CHECK ||
5270                key == KEY_INIT || key == KEY_END ||
5271                key == KEY_my || key == KEY_state ||
5272                key == KEY_our);
5273         if (!have_name)
5274             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5275         else if (*s != ';' && *s != '}')
5276             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5277     }
5278
5279     if (have_proto) {
5280         NEXTVAL_NEXTTOKE.opval =
5281             newSVOP(OP_CONST, 0, PL_lex_stuff);
5282         PL_lex_stuff = NULL;
5283         force_next(THING);
5284     }
5285     if (!have_name) {
5286         if (PL_curstash)
5287             sv_setpvs(PL_subname, "__ANON__");
5288         else
5289             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5290         if (is_sigsub)
5291             TOKEN(ANON_SIGSUB);
5292         else
5293             TOKEN(ANONSUB);
5294     }
5295     force_ident_maybe_lex('&');
5296     if (is_sigsub)
5297         TOKEN(SIGSUB);
5298     else
5299         TOKEN(SUB);
5300 }
5301
5302 static int
5303 yyl_interpcasemod(pTHX_ char *s)
5304 {
5305 #ifdef DEBUGGING
5306     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5307         Perl_croak(aTHX_
5308                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5309                    PL_bufptr, PL_bufend, *PL_bufptr);
5310 #endif
5311
5312     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5313         /* if at a \E */
5314         if (PL_lex_casemods) {
5315             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5316             PL_lex_casestack[PL_lex_casemods] = '\0';
5317
5318             if (PL_bufptr != PL_bufend
5319                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5320                     || oldmod == 'F')) {
5321                 PL_bufptr += 2;
5322                 PL_lex_state = LEX_INTERPCONCAT;
5323             }
5324             PL_lex_allbrackets--;
5325             return REPORT(')');
5326         }
5327         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5328            /* Got an unpaired \E */
5329            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5330                     "Useless use of \\E");
5331         }
5332         if (PL_bufptr != PL_bufend)
5333             PL_bufptr += 2;
5334         PL_lex_state = LEX_INTERPCONCAT;
5335         return yylex();
5336     }
5337     else {
5338         DEBUG_T({
5339             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5340         });
5341         s = PL_bufptr + 1;
5342         if (s[1] == '\\' && s[2] == 'E') {
5343             PL_bufptr = s + 3;
5344             PL_lex_state = LEX_INTERPCONCAT;
5345             return yylex();
5346         }
5347         else {
5348             I32 tmp;
5349             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5350                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5351             {
5352                 tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
5353             }
5354             if ((*s == 'L' || *s == 'U' || *s == 'F')
5355                 && (strpbrk(PL_lex_casestack, "LUF")))
5356             {
5357                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5358                 PL_lex_allbrackets--;
5359                 return REPORT(')');
5360             }
5361             if (PL_lex_casemods > 10)
5362                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5363             PL_lex_casestack[PL_lex_casemods++] = *s;
5364             PL_lex_casestack[PL_lex_casemods] = '\0';
5365             PL_lex_state = LEX_INTERPCONCAT;
5366             NEXTVAL_NEXTTOKE.ival = 0;
5367             force_next((2<<24)|'(');
5368             if (*s == 'l')
5369                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5370             else if (*s == 'u')
5371                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5372             else if (*s == 'L')
5373                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5374             else if (*s == 'U')
5375                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5376             else if (*s == 'Q')
5377                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5378             else if (*s == 'F')
5379                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5380             else
5381                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5382             PL_bufptr = s + 1;
5383         }
5384         force_next(FUNC);
5385         if (PL_lex_starts) {
5386             s = PL_bufptr;
5387             PL_lex_starts = 0;
5388             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5389             if (PL_lex_casemods == 1 && PL_lex_inpat)
5390                 TOKEN(',');
5391             else
5392                 AopNOASSIGN(OP_CONCAT);
5393         }
5394         else
5395             return yylex();
5396     }
5397 }
5398
5399 static int
5400 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5401                         GV **pgv, GV ***pgvp)
5402 {
5403     GV *ogv = NULL;     /* override (winner) */
5404     GV *hgv = NULL;     /* hidden (loser) */
5405     GV *gv = *pgv;
5406
5407     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5408         CV *cv;
5409         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5410                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5411                                     SVt_PVCV))
5412             && (cv = GvCVu(gv)))
5413         {
5414             if (GvIMPORTED_CV(gv))
5415                 ogv = gv;
5416             else if (! CvMETHOD(cv))
5417                 hgv = gv;
5418         }
5419         if (!ogv
5420             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5421             && (gv = **pgvp)
5422             && (isGV_with_GP(gv)
5423                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5424                 :   SvPCS_IMPORTED(gv)
5425                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5426                                                          len, 0), 1)))
5427         {
5428             ogv = gv;
5429         }
5430     }
5431
5432     *pgv = gv;
5433
5434     if (ogv) {
5435         *orig_keyword = key;
5436         return 0;               /* overridden by import or by GLOBAL */
5437     }
5438     else if (gv && !*pgvp
5439              && -key==KEY_lock  /* XXX generalizable kludge */
5440              && GvCVu(gv))
5441     {
5442         return 0;               /* any sub overrides "weak" keyword */
5443     }
5444     else {                      /* no override */
5445         key = -key;
5446         if (key == KEY_dump) {
5447             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5448         }
5449         *pgv = NULL;
5450         *pgvp = 0;
5451         if (hgv && key != KEY_x)        /* never ambiguous */
5452             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5453                            "Ambiguous call resolved as CORE::%s(), "
5454                            "qualify as such or use &",
5455                            GvENAME(hgv));
5456         return key;
5457     }
5458 }
5459
5460 static int
5461 yyl_qw(pTHX_ char *s, STRLEN len)
5462 {
5463     OP *words = NULL;
5464
5465     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5466     if (!s)
5467         missingterm(NULL, 0);
5468
5469     COPLINE_SET_FROM_MULTI_END;
5470     PL_expect = XOPERATOR;
5471     if (SvCUR(PL_lex_stuff)) {
5472         int warned_comma = !ckWARN(WARN_QW);
5473         int warned_comment = warned_comma;
5474         char *d = SvPV_force(PL_lex_stuff, len);
5475         while (len) {
5476             for (; isSPACE(*d) && len; --len, ++d)
5477                 /**/;
5478             if (len) {
5479                 SV *sv;
5480                 const char *b = d;
5481                 if (!warned_comma || !warned_comment) {
5482                     for (; !isSPACE(*d) && len; --len, ++d) {
5483                         if (!warned_comma && *d == ',') {
5484                             Perl_warner(aTHX_ packWARN(WARN_QW),
5485                                 "Possible attempt to separate words with commas");
5486                             ++warned_comma;
5487                         }
5488                         else if (!warned_comment && *d == '#') {
5489                             Perl_warner(aTHX_ packWARN(WARN_QW),
5490                                 "Possible attempt to put comments in qw() list");
5491                             ++warned_comment;
5492                         }
5493                     }
5494                 }
5495                 else {
5496                     for (; !isSPACE(*d) && len; --len, ++d)
5497                         /**/;
5498                 }
5499                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5500                 words = op_append_elem(OP_LIST, words,
5501                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5502             }
5503         }
5504     }
5505     if (!words)
5506         words = newNULLLIST();
5507     SvREFCNT_dec_NN(PL_lex_stuff);
5508     PL_lex_stuff = NULL;
5509     PL_expect = XOPERATOR;
5510     pl_yylval.opval = sawparens(words);
5511     TOKEN(QWLIST);
5512 }
5513
5514 static int
5515 yyl_hyphen(pTHX_ char *s)
5516 {
5517     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5518         I32 ftst = 0;
5519         char tmp;
5520
5521         s++;
5522         PL_bufptr = s;
5523         tmp = *s++;
5524
5525         while (s < PL_bufend && SPACE_OR_TAB(*s))
5526             s++;
5527
5528         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5529             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5530             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5531             OPERATOR('-');              /* unary minus */
5532         }
5533         switch (tmp) {
5534         case 'r': ftst = OP_FTEREAD;    break;
5535         case 'w': ftst = OP_FTEWRITE;   break;
5536         case 'x': ftst = OP_FTEEXEC;    break;
5537         case 'o': ftst = OP_FTEOWNED;   break;
5538         case 'R': ftst = OP_FTRREAD;    break;
5539         case 'W': ftst = OP_FTRWRITE;   break;
5540         case 'X': ftst = OP_FTREXEC;    break;
5541         case 'O': ftst = OP_FTROWNED;   break;
5542         case 'e': ftst = OP_FTIS;       break;
5543         case 'z': ftst = OP_FTZERO;     break;
5544         case 's': ftst = OP_FTSIZE;     break;
5545         case 'f': ftst = OP_FTFILE;     break;
5546         case 'd': ftst = OP_FTDIR;      break;
5547         case 'l': ftst = OP_FTLINK;     break;
5548         case 'p': ftst = OP_FTPIPE;     break;
5549         case 'S': ftst = OP_FTSOCK;     break;
5550         case 'u': ftst = OP_FTSUID;     break;
5551         case 'g': ftst = OP_FTSGID;     break;
5552         case 'k': ftst = OP_FTSVTX;     break;
5553         case 'b': ftst = OP_FTBLK;      break;
5554         case 'c': ftst = OP_FTCHR;      break;
5555         case 't': ftst = OP_FTTTY;      break;
5556         case 'T': ftst = OP_FTTEXT;     break;
5557         case 'B': ftst = OP_FTBINARY;   break;
5558         case 'M': case 'A': case 'C':
5559             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5560             switch (tmp) {
5561             case 'M': ftst = OP_FTMTIME; break;
5562             case 'A': ftst = OP_FTATIME; break;
5563             case 'C': ftst = OP_FTCTIME; break;
5564             default:                     break;
5565             }
5566             break;
5567         default:
5568             break;
5569         }
5570         if (ftst) {
5571             PL_last_uni = PL_oldbufptr;
5572             PL_last_lop_op = (OPCODE)ftst;
5573             DEBUG_T( {
5574                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5575             } );
5576             FTST(ftst);
5577         }
5578         else {
5579             /* Assume it was a minus followed by a one-letter named
5580              * subroutine call (or a -bareword), then. */
5581             DEBUG_T( {
5582                 PerlIO_printf(Perl_debug_log,
5583                     "### '-%c' looked like a file test but was not\n",
5584                     (int) tmp);
5585             } );
5586             s = --PL_bufptr;
5587         }
5588     }
5589     {
5590         const char tmp = *s++;
5591         if (*s == tmp) {
5592             s++;
5593             if (PL_expect == XOPERATOR)
5594                 TERM(POSTDEC);
5595             else
5596                 OPERATOR(PREDEC);
5597         }
5598         else if (*s == '>') {
5599             s++;
5600             s = skipspace(s);
5601             if (((*s == '$' || *s == '&') && s[1] == '*')
5602               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5603               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5604               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5605              )
5606             {
5607                 PL_expect = XPOSTDEREF;
5608                 TOKEN(ARROW);
5609             }
5610             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5611                 s = force_word(s,METHOD,FALSE,TRUE);
5612                 TOKEN(ARROW);
5613             }
5614             else if (*s == '$')
5615                 OPERATOR(ARROW);
5616             else
5617                 TERM(ARROW);
5618         }
5619         if (PL_expect == XOPERATOR) {
5620             if (*s == '='
5621                 && !PL_lex_allbrackets
5622                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5623             {
5624                 s--;
5625                 TOKEN(0);
5626             }
5627             Aop(OP_SUBTRACT);
5628         }
5629         else {
5630             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5631                 check_uni();
5632             OPERATOR('-');              /* unary minus */
5633         }
5634     }
5635 }
5636
5637 static int
5638 yyl_plus(pTHX_ char *s)
5639 {
5640     const char tmp = *s++;
5641     if (*s == tmp) {
5642         s++;
5643         if (PL_expect == XOPERATOR)
5644             TERM(POSTINC);
5645         else
5646             OPERATOR(PREINC);
5647     }
5648     if (PL_expect == XOPERATOR) {
5649         if (*s == '='
5650             && !PL_lex_allbrackets
5651             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5652         {
5653             s--;
5654             TOKEN(0);
5655         }
5656         Aop(OP_ADD);
5657     }
5658     else {
5659         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5660             check_uni();
5661         OPERATOR('+');
5662     }
5663 }
5664
5665 static int
5666 yyl_star(pTHX_ char *s)
5667 {
5668     if (PL_expect == XPOSTDEREF)
5669         POSTDEREF('*');
5670
5671     if (PL_expect != XOPERATOR) {
5672         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5673         PL_expect = XOPERATOR;
5674         force_ident(PL_tokenbuf, '*');
5675         if (!*PL_tokenbuf)
5676             PREREF('*');
5677         TERM('*');
5678     }
5679
5680     s++;
5681     if (*s == '*') {
5682         s++;
5683         if (*s == '=' && !PL_lex_allbrackets
5684             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5685         {
5686             s -= 2;
5687             TOKEN(0);
5688         }
5689         PWop(OP_POW);
5690     }
5691
5692     if (*s == '='
5693         && !PL_lex_allbrackets
5694         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5695     {
5696         s--;
5697         TOKEN(0);
5698     }
5699
5700     Mop(OP_MULTIPLY);
5701 }
5702
5703 static int
5704 yyl_percent(pTHX_ char *s)
5705 {
5706     if (PL_expect == XOPERATOR) {
5707         if (s[1] == '='
5708             && !PL_lex_allbrackets
5709             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5710         {
5711             TOKEN(0);
5712         }
5713         ++s;
5714         Mop(OP_MODULO);
5715     }
5716     else if (PL_expect == XPOSTDEREF)
5717         POSTDEREF('%');
5718
5719     PL_tokenbuf[0] = '%';
5720     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5721     pl_yylval.ival = 0;
5722     if (!PL_tokenbuf[1]) {
5723         PREREF('%');
5724     }
5725     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5726         && intuit_more(s, PL_bufend)) {
5727         if (*s == '[')
5728             PL_tokenbuf[0] = '@';
5729     }
5730     PL_expect = XOPERATOR;
5731     force_ident_maybe_lex('%');
5732     TERM('%');
5733 }
5734
5735 static int
5736 yyl_caret(pTHX_ char *s)
5737 {
5738     char *d = s;
5739     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5740     if (bof && s[1] == '.')
5741         s++;
5742     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5743             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5744     {
5745         s = d;
5746         TOKEN(0);
5747     }
5748     s++;
5749     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5750 }
5751
5752 static int
5753 yyl_colon(pTHX_ char *s)
5754 {
5755     OP *attrs;
5756
5757     switch (PL_expect) {
5758     case XOPERATOR:
5759         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5760             break;
5761         PL_bufptr = s;  /* update in case we back off */
5762         if (*s == '=') {
5763             Perl_croak(aTHX_
5764                        "Use of := for an empty attribute list is not allowed");
5765         }
5766         goto grabattrs;
5767     case XATTRBLOCK:
5768         PL_expect = XBLOCK;
5769         goto grabattrs;
5770     case XATTRTERM:
5771         PL_expect = XTERMBLOCK;
5772      grabattrs:
5773         /* NB: as well as parsing normal attributes, we also end up
5774          * here if there is something looking like attributes
5775          * following a signature (which is illegal, but used to be
5776          * legal in 5.20..5.26). If the latter, we still parse the
5777          * attributes so that error messages(s) are less confusing,
5778          * but ignore them (parser->sig_seen).
5779          */
5780         s = skipspace(s);
5781         attrs = NULL;
5782         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5783             bool sig = PL_parser->sig_seen;
5784             I32 tmp;
5785             SV *sv;
5786             STRLEN len;
5787             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5788             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5789                 if (tmp < 0) tmp = -tmp;
5790                 switch (tmp) {
5791                 case KEY_or:
5792                 case KEY_and:
5793                 case KEY_for:
5794                 case KEY_foreach:
5795                 case KEY_unless:
5796                 case KEY_if:
5797                 case KEY_while:
5798                 case KEY_until:
5799                     goto got_attrs;
5800                 default:
5801                     break;
5802                 }
5803             }
5804             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5805             if (*d == '(') {
5806                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5807                 if (!d) {
5808                     if (attrs)
5809                         op_free(attrs);
5810                     sv_free(sv);
5811                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5812                 }
5813                 COPLINE_SET_FROM_MULTI_END;
5814             }
5815             if (PL_lex_stuff) {
5816                 sv_catsv(sv, PL_lex_stuff);
5817                 attrs = op_append_elem(OP_LIST, attrs,
5818                                     newSVOP(OP_CONST, 0, sv));
5819                 SvREFCNT_dec_NN(PL_lex_stuff);
5820                 PL_lex_stuff = NULL;
5821             }
5822             else {
5823                 /* NOTE: any CV attrs applied here need to be part of
5824                    the CVf_BUILTIN_ATTRS define in cv.h! */
5825                 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5826                     sv_free(sv);
5827                     if (!sig)
5828                         CvLVALUE_on(PL_compcv);
5829                 }
5830                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5831                     sv_free(sv);
5832                     if (!sig)
5833                         CvMETHOD_on(PL_compcv);
5834                 }
5835                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5836                     sv_free(sv);
5837                     if (!sig) {
5838                         Perl_ck_warner_d(aTHX_
5839                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5840                            ":const is experimental"
5841                         );
5842                         CvANONCONST_on(PL_compcv);
5843                         if (!CvANON(PL_compcv))
5844                             yyerror(":const is not permitted on named "
5845                                     "subroutines");
5846                     }
5847                 }
5848                 /* After we've set the flags, it could be argued that
5849                    we don't need to do the attributes.pm-based setting
5850                    process, and shouldn't bother appending recognized
5851                    flags.  To experiment with that, uncomment the
5852                    following "else".  (Note that's already been
5853                    uncommented.  That keeps the above-applied built-in
5854                    attributes from being intercepted (and possibly
5855                    rejected) by a package's attribute routines, but is
5856                    justified by the performance win for the common case
5857                    of applying only built-in attributes.) */
5858                 else
5859                     attrs = op_append_elem(OP_LIST, attrs,
5860                                         newSVOP(OP_CONST, 0,
5861                                                 sv));
5862             }
5863             s = skipspace(d);
5864             if (*s == ':' && s[1] != ':')
5865                 s = skipspace(s+1);
5866             else if (s == d)
5867                 break;  /* require real whitespace or :'s */
5868             /* XXX losing whitespace on sequential attributes here */
5869         }
5870
5871         if (*s != ';'
5872             && *s != '}'
5873             && !(PL_expect == XOPERATOR
5874                  ? (*s == '=' ||  *s == ')')
5875                  : (*s == '{' ||  *s == '(')))
5876         {
5877             const char q = ((*s == '\'') ? '"' : '\'');
5878             /* If here for an expression, and parsed no attrs, back off. */
5879             if (PL_expect == XOPERATOR && !attrs) {
5880                 s = PL_bufptr;
5881                 break;
5882             }
5883             /* MUST advance bufptr here to avoid bogus "at end of line"
5884                context messages from yyerror().
5885             */
5886             PL_bufptr = s;
5887             yyerror( (const char *)
5888                      (*s
5889                       ? Perl_form(aTHX_ "Invalid separator character "
5890                                   "%c%c%c in attribute list", q, *s, q)
5891                       : "Unterminated attribute list" ) );
5892             if (attrs)
5893                 op_free(attrs);
5894             OPERATOR(':');
5895         }
5896
5897     got_attrs:
5898         if (PL_parser->sig_seen) {
5899             /* see comment about about sig_seen and parser error
5900              * handling */
5901             if (attrs)
5902                 op_free(attrs);
5903             Perl_croak(aTHX_ "Subroutine attributes must come "
5904                              "before the signature");
5905         }
5906         if (attrs) {
5907             NEXTVAL_NEXTTOKE.opval = attrs;
5908             force_next(THING);
5909         }
5910         TOKEN(COLONATTR);
5911     }
5912
5913     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5914         s--;
5915         TOKEN(0);
5916     }
5917
5918     PL_lex_allbrackets--;
5919     OPERATOR(':');
5920 }
5921
5922 static int
5923 yyl_subproto(pTHX_ char *s, CV *cv)
5924 {
5925     STRLEN protolen = CvPROTOLEN(cv);
5926     const char *proto = CvPROTO(cv);
5927     bool optional;
5928
5929     proto = S_strip_spaces(aTHX_ proto, &protolen);
5930     if (!protolen)
5931         TERM(FUNC0SUB);
5932     if ((optional = *proto == ';')) {
5933         do {
5934             proto++;
5935         } while (*proto == ';');
5936     }
5937
5938     if (
5939         (
5940             (
5941                 *proto == '$' || *proto == '_'
5942              || *proto == '*' || *proto == '+'
5943             )
5944          && proto[1] == '\0'
5945         )
5946      || (
5947          *proto == '\\' && proto[1] && proto[2] == '\0'
5948         )
5949     ) {
5950         UNIPROTO(UNIOPSUB,optional);
5951     }
5952
5953     if (*proto == '\\' && proto[1] == '[') {
5954         const char *p = proto + 2;
5955         while(*p && *p != ']')
5956             ++p;
5957         if(*p == ']' && !p[1])
5958             UNIPROTO(UNIOPSUB,optional);
5959     }
5960
5961     if (*proto == '&' && *s == '{') {
5962         if (PL_curstash)
5963             sv_setpvs(PL_subname, "__ANON__");
5964         else
5965             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5966         if (!PL_lex_allbrackets
5967             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5968         {
5969             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5970         }
5971         PREBLOCK(LSTOPSUB);
5972     }
5973
5974     return KEY_NULL;
5975 }
5976
5977 static int
5978 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
5979 {
5980     char *d;
5981     if (PL_lex_brackets > 100) {
5982         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5983     }
5984
5985     switch (PL_expect) {
5986     case XTERM:
5987     case XTERMORDORDOR:
5988         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5989         PL_lex_allbrackets++;
5990         OPERATOR(HASHBRACK);
5991     case XOPERATOR:
5992         while (s < PL_bufend && SPACE_OR_TAB(*s))
5993             s++;
5994         d = s;
5995         PL_tokenbuf[0] = '\0';
5996         if (d < PL_bufend && *d == '-') {
5997             PL_tokenbuf[0] = '-';
5998             d++;
5999             while (d < PL_bufend && SPACE_OR_TAB(*d))
6000                 d++;
6001         }
6002         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6003             STRLEN len;
6004             d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6005                           FALSE, &len);
6006             while (d < PL_bufend && SPACE_OR_TAB(*d))
6007                 d++;
6008             if (*d == '}') {
6009                 const char minus = (PL_tokenbuf[0] == '-');
6010                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6011                 if (minus)
6012                     force_next('-');
6013             }
6014         }
6015         /* FALLTHROUGH */
6016     case XATTRTERM:
6017     case XTERMBLOCK:
6018         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6019         PL_lex_allbrackets++;
6020         PL_expect = XSTATE;
6021         break;
6022     case XATTRBLOCK:
6023     case XBLOCK:
6024         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6025         PL_lex_allbrackets++;
6026         PL_expect = XSTATE;
6027         break;
6028     case XBLOCKTERM:
6029         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6030         PL_lex_allbrackets++;
6031         PL_expect = XSTATE;
6032         break;
6033     default: {
6034             const char *t;
6035             if (PL_oldoldbufptr == PL_last_lop)
6036                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6037             else
6038                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6039             PL_lex_allbrackets++;
6040             s = skipspace(s);
6041             if (*s == '}') {
6042                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6043                     PL_expect = XTERM;
6044                     /* This hack is to get the ${} in the message. */
6045                     PL_bufptr = s+1;
6046                     yyerror("syntax error");
6047                     break;
6048                 }
6049                 OPERATOR(HASHBRACK);
6050             }
6051             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6052                 /* ${...} or @{...} etc., but not print {...}
6053                  * Skip the disambiguation and treat this as a block.
6054                  */
6055                 goto block_expectation;
6056             }
6057             /* This hack serves to disambiguate a pair of curlies
6058              * as being a block or an anon hash.  Normally, expectation
6059              * determines that, but in cases where we're not in a
6060              * position to expect anything in particular (like inside
6061              * eval"") we have to resolve the ambiguity.  This code
6062              * covers the case where the first term in the curlies is a
6063              * quoted string.  Most other cases need to be explicitly
6064              * disambiguated by prepending a "+" before the opening
6065              * curly in order to force resolution as an anon hash.
6066              *
6067              * XXX should probably propagate the outer expectation
6068              * into eval"" to rely less on this hack, but that could
6069              * potentially break current behavior of eval"".
6070              * GSAR 97-07-21
6071              */
6072             t = s;
6073             if (*s == '\'' || *s == '"' || *s == '`') {
6074                 /* common case: get past first string, handling escapes */
6075                 for (t++; t < PL_bufend && *t != *s;)
6076                     if (*t++ == '\\')
6077                         t++;
6078                 t++;
6079             }
6080             else if (*s == 'q') {
6081                 if (++t < PL_bufend
6082                     && (!isWORDCHAR(*t)
6083                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6084                             && !isWORDCHAR(*t))))
6085                 {
6086                     /* skip q//-like construct */
6087                     const char *tmps;
6088                     char open, close, term;
6089                     I32 brackets = 1;
6090
6091                     while (t < PL_bufend && isSPACE(*t))
6092                         t++;
6093                     /* check for q => */
6094                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6095                         OPERATOR(HASHBRACK);
6096                     }
6097                     term = *t;
6098                     open = term;
6099                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6100                         term = tmps[5];
6101                     close = term;
6102                     if (open == close)
6103                         for (t++; t < PL_bufend; t++) {
6104                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6105                                 t++;
6106                             else if (*t == open)
6107                                 break;
6108                         }
6109                     else {
6110                         for (t++; t < PL_bufend; t++) {
6111                             if (*t == '\\' && t+1 < PL_bufend)
6112                                 t++;
6113                             else if (*t == close && --brackets <= 0)
6114                                 break;
6115                             else if (*t == open)
6116                                 brackets++;
6117                         }
6118                     }
6119                     t++;
6120                 }
6121                 else
6122                     /* skip plain q word */
6123                     while (   t < PL_bufend
6124                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6125                     {
6126                         t += UTF ? UTF8SKIP(t) : 1;
6127                     }
6128             }
6129             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6130                 t += UTF ? UTF8SKIP(t) : 1;
6131                 while (   t < PL_bufend
6132                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6133                 {
6134                     t += UTF ? UTF8SKIP(t) : 1;
6135                 }
6136             }
6137             while (t < PL_bufend && isSPACE(*t))
6138                 t++;
6139             /* if comma follows first term, call it an anon hash */
6140             /* XXX it could be a comma expression with loop modifiers */
6141             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6142                                || (*t == '=' && t[1] == '>')))
6143                 OPERATOR(HASHBRACK);
6144             if (PL_expect == XREF) {
6145               block_expectation:
6146                 /* If there is an opening brace or 'sub:', treat it
6147                    as a term to make ${{...}}{k} and &{sub:attr...}
6148                    dwim.  Otherwise, treat it as a statement, so
6149                    map {no strict; ...} works.
6150                  */
6151                 s = skipspace(s);
6152                 if (*s == '{') {
6153                     PL_expect = XTERM;
6154                     break;
6155                 }
6156                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6157                     PL_bufptr = s;
6158                     d = s + 3;
6159                     d = skipspace(d);
6160                     s = PL_bufptr;
6161                     if (*d == ':') {
6162                         PL_expect = XTERM;
6163                         break;
6164                     }
6165                 }
6166                 PL_expect = XSTATE;
6167             }
6168             else {
6169                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6170                 PL_expect = XSTATE;
6171             }
6172         }
6173         break;
6174     }
6175
6176     pl_yylval.ival = CopLINE(PL_curcop);
6177     PL_copline = NOLINE;   /* invalidate current command line number */
6178     TOKEN(formbrack ? '=' : '{');
6179 }
6180
6181 static int
6182 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6183 {
6184     assert(s != PL_bufend);
6185     s++;
6186
6187     if (PL_lex_brackets <= 0)
6188         /* diag_listed_as: Unmatched right %s bracket */
6189         yyerror("Unmatched right curly bracket");
6190     else
6191         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6192
6193     PL_lex_allbrackets--;
6194
6195     if (PL_lex_state == LEX_INTERPNORMAL) {
6196         if (PL_lex_brackets == 0) {
6197             if (PL_expect & XFAKEBRACK) {
6198                 PL_expect &= XENUMMASK;
6199                 PL_lex_state = LEX_INTERPEND;
6200                 PL_bufptr = s;
6201                 return yylex(); /* ignore fake brackets */
6202             }
6203             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6204              && SvEVALED(PL_lex_repl))
6205                 PL_lex_state = LEX_INTERPEND;
6206             else if (*s == '-' && s[1] == '>')
6207                 PL_lex_state = LEX_INTERPENDMAYBE;
6208             else if (*s != '[' && *s != '{')
6209                 PL_lex_state = LEX_INTERPEND;
6210         }
6211     }
6212
6213     if (PL_expect & XFAKEBRACK) {
6214         PL_expect &= XENUMMASK;
6215         PL_bufptr = s;
6216         return yylex();         /* ignore fake brackets */
6217     }
6218
6219     force_next(formbrack ? '.' : '}');
6220     if (formbrack) LEAVE_with_name("lex_format");
6221     if (formbrack == 2) { /* means . where arguments were expected */
6222         force_next(';');
6223         TOKEN(FORMRBRACK);
6224     }
6225
6226     TOKEN(';');
6227 }
6228
6229 static int
6230 yyl_ampersand(pTHX_ char *s)
6231 {
6232     if (PL_expect == XPOSTDEREF)
6233         POSTDEREF('&');
6234
6235     s++;
6236     if (*s++ == '&') {
6237         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6238                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6239             s -= 2;
6240             TOKEN(0);
6241         }
6242         AOPERATOR(ANDAND);
6243     }
6244     s--;
6245
6246     if (PL_expect == XOPERATOR) {
6247         char *d;
6248         bool bof;
6249         if (   PL_bufptr == PL_linestart
6250             && ckWARN(WARN_SEMICOLON)
6251             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6252         {
6253             CopLINE_dec(PL_curcop);
6254             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6255             CopLINE_inc(PL_curcop);
6256         }
6257         d = s;
6258         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6259             s++;
6260         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6261                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6262             s = d;
6263             s--;
6264             TOKEN(0);
6265         }
6266         if (d == s)
6267             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6268         else
6269             BAop(OP_SBIT_AND);
6270     }
6271
6272     PL_tokenbuf[0] = '&';
6273     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6274     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6275
6276     if (PL_tokenbuf[1])
6277         force_ident_maybe_lex('&');
6278     else
6279         PREREF('&');
6280
6281     TERM('&');
6282 }
6283
6284 static int
6285 yyl_verticalbar(pTHX_ char *s)
6286 {
6287     char *d;
6288     bool bof;
6289
6290     s++;
6291     if (*s++ == '|') {
6292         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6293                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6294             s -= 2;
6295             TOKEN(0);
6296         }
6297         AOPERATOR(OROR);
6298     }
6299
6300     s--;
6301     d = s;
6302     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6303         s++;
6304
6305     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6306             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6307         s = d - 1;
6308         TOKEN(0);
6309     }
6310
6311     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6312 }
6313
6314 static int
6315 yyl_bang(pTHX_ char *s)
6316 {
6317     const char tmp = *s++;
6318     if (tmp == '=') {
6319         /* was this !=~ where !~ was meant?
6320          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6321
6322         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6323             const char *t = s+1;
6324
6325             while (t < PL_bufend && isSPACE(*t))
6326                 ++t;
6327
6328             if (*t == '/' || *t == '?'
6329                 || ((*t == 'm' || *t == 's' || *t == 'y')
6330                     && !isWORDCHAR(t[1]))
6331                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6332                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6333                             "!=~ should be !~");
6334         }
6335
6336         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6337             s -= 2;
6338             TOKEN(0);
6339         }
6340
6341         ChEop(OP_NE);
6342     }
6343
6344     if (tmp == '~')
6345         PMop(OP_NOT);
6346
6347     s--;
6348     OPERATOR('!');
6349 }
6350
6351 static int
6352 yyl_snail(pTHX_ char *s)
6353 {
6354     if (PL_expect == XPOSTDEREF)
6355         POSTDEREF('@');
6356     PL_tokenbuf[0] = '@';
6357     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6358     if (PL_expect == XOPERATOR) {
6359         char *d = s;
6360         if (PL_bufptr > s) {
6361             d = PL_bufptr-1;
6362             PL_bufptr = PL_oldbufptr;
6363         }
6364         no_op("Array", d);
6365     }
6366     pl_yylval.ival = 0;
6367     if (!PL_tokenbuf[1]) {
6368         PREREF('@');
6369     }
6370     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6371         s = skipspace(s);
6372     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6373         && intuit_more(s, PL_bufend))
6374     {
6375         if (*s == '{')
6376             PL_tokenbuf[0] = '%';
6377
6378         /* Warn about @ where they meant $. */
6379         if (*s == '[' || *s == '{') {
6380             if (ckWARN(WARN_SYNTAX)) {
6381                 S_check_scalar_slice(aTHX_ s);
6382             }
6383         }
6384     }
6385     PL_expect = XOPERATOR;
6386     force_ident_maybe_lex('@');
6387     TERM('@');
6388 }
6389
6390 static int
6391 yyl_slash(pTHX_ char *s)
6392 {
6393     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6394         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6395                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6396             TOKEN(0);
6397         s += 2;
6398         AOPERATOR(DORDOR);
6399     }
6400     else if (PL_expect == XOPERATOR) {
6401         s++;
6402         if (*s == '=' && !PL_lex_allbrackets
6403             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6404         {
6405             s--;
6406             TOKEN(0);
6407         }
6408         Mop(OP_DIVIDE);
6409     }
6410     else {
6411         /* Disable warning on "study /blah/" */
6412         if (    PL_oldoldbufptr == PL_last_uni
6413             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6414                 || memNE(PL_last_uni, "study", 5)
6415                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6416          ))
6417             check_uni();
6418         s = scan_pat(s,OP_MATCH);
6419         TERM(sublex_start());
6420     }
6421 }
6422
6423 static int
6424 yyl_leftsquare(pTHX_ char *s)
6425 {
6426     char tmp;
6427
6428     if (PL_lex_brackets > 100)
6429         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6430     PL_lex_brackstack[PL_lex_brackets++] = 0;
6431     PL_lex_allbrackets++;
6432     tmp = *s++;
6433     OPERATOR(tmp);
6434 }
6435
6436 static int
6437 yyl_rightsquare(pTHX_ char *s)
6438 {
6439     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6440         TOKEN(0);
6441     s++;
6442     if (PL_lex_brackets <= 0)
6443         /* diag_listed_as: Unmatched right %s bracket */
6444         yyerror("Unmatched right square bracket");
6445     else
6446         --PL_lex_brackets;
6447     PL_lex_allbrackets--;
6448     if (PL_lex_state == LEX_INTERPNORMAL) {
6449         if (PL_lex_brackets == 0) {
6450             if (*s == '-' && s[1] == '>')
6451                 PL_lex_state = LEX_INTERPENDMAYBE;
6452             else if (*s != '[' && *s != '{')
6453                 PL_lex_state = LEX_INTERPEND;
6454         }
6455     }
6456     TERM(']');
6457 }
6458
6459 static int
6460 yyl_tilde(pTHX_ char *s)
6461 {
6462     bool bof;
6463     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6464         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6465             TOKEN(0);
6466         s += 2;
6467         Perl_ck_warner_d(aTHX_
6468             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6469             "Smartmatch is experimental");
6470         NCEop(OP_SMARTMATCH);
6471     }
6472     s++;
6473     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6474         s++;
6475         BCop(OP_SCOMPLEMENT);
6476     }
6477     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6478 }
6479
6480 static int
6481 yyl_leftparen(pTHX_ char *s)
6482 {
6483     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6484         PL_oldbufptr = PL_oldoldbufptr;         /* allow print(STDOUT 123) */
6485     else
6486         PL_expect = XTERM;
6487     s = skipspace(s);
6488     PL_lex_allbrackets++;
6489     TOKEN('(');
6490 }
6491
6492 static int
6493 yyl_rightparen(pTHX_ char *s)
6494 {
6495     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6496         TOKEN(0);
6497     s++;
6498     PL_lex_allbrackets--;
6499     s = skipspace(s);
6500     if (*s == '{')
6501         PREBLOCK(')');
6502     TERM(')');
6503 }
6504
6505 static int
6506 yyl_leftpointy(pTHX_ char *s)
6507 {
6508     char tmp;
6509
6510     if (PL_expect != XOPERATOR) {
6511         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6512             check_uni();
6513         if (s[1] == '<' && s[2] != '>')
6514             s = scan_heredoc(s);
6515         else
6516             s = scan_inputsymbol(s);
6517         PL_expect = XOPERATOR;
6518         TOKEN(sublex_start());
6519     }
6520
6521     s++;
6522
6523     tmp = *s++;
6524     if (tmp == '<') {
6525         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6526             s -= 2;
6527             TOKEN(0);
6528         }
6529         SHop(OP_LEFT_SHIFT);
6530     }
6531     if (tmp == '=') {
6532         tmp = *s++;
6533         if (tmp == '>') {
6534             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6535                 s -= 3;
6536                 TOKEN(0);
6537             }
6538             NCEop(OP_NCMP);
6539         }
6540         s--;
6541         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6542             s -= 2;
6543             TOKEN(0);
6544         }
6545         ChRop(OP_LE);
6546     }
6547
6548     s--;
6549     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6550         s--;
6551         TOKEN(0);
6552     }
6553
6554     ChRop(OP_LT);
6555 }
6556
6557 static int
6558 yyl_rightpointy(pTHX_ char *s)
6559 {
6560     const char tmp = *s++;
6561
6562     if (tmp == '>') {
6563         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6564             s -= 2;
6565             TOKEN(0);
6566         }
6567         SHop(OP_RIGHT_SHIFT);
6568     }
6569     else if (tmp == '=') {
6570         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6571             s -= 2;
6572             TOKEN(0);
6573         }
6574         ChRop(OP_GE);
6575     }
6576
6577     s--;
6578     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6579         s--;
6580         TOKEN(0);
6581     }
6582
6583     ChRop(OP_GT);
6584 }
6585
6586 static int
6587 yyl_sglquote(pTHX_ char *s)
6588 {
6589     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6590     if (!s)
6591         missingterm(NULL, 0);
6592     COPLINE_SET_FROM_MULTI_END;
6593     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6594     if (PL_expect == XOPERATOR) {
6595         no_op("String",s);
6596     }
6597     pl_yylval.ival = OP_CONST;
6598     TERM(sublex_start());
6599 }
6600
6601 static int
6602 yyl_dblquote(pTHX_ char *s)
6603 {
6604     char *d;
6605     STRLEN len;
6606     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6607     DEBUG_T( {
6608         if (s)
6609             printbuf("### Saw string before %s\n", s);
6610         else
6611             PerlIO_printf(Perl_debug_log,
6612                          "### Saw unterminated string\n");
6613     } );
6614     if (PL_expect == XOPERATOR) {
6615             no_op("String",s);
6616     }
6617     if (!s)
6618         missingterm(NULL, 0);
6619     pl_yylval.ival = OP_CONST;
6620     /* FIXME. I think that this can be const if char *d is replaced by
6621        more localised variables.  */
6622     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6623         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6624             pl_yylval.ival = OP_STRINGIFY;
6625             break;
6626         }
6627     }
6628     if (pl_yylval.ival == OP_CONST)
6629         COPLINE_SET_FROM_MULTI_END;
6630     TERM(sublex_start());
6631 }
6632
6633 static int
6634 yyl_backtick(pTHX_ char *s)
6635 {
6636     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6637     DEBUG_T( {
6638         if (s)
6639             printbuf("### Saw backtick string before %s\n", s);
6640         else
6641             PerlIO_printf(Perl_debug_log,
6642                          "### Saw unterminated backtick string\n");
6643     } );
6644     if (PL_expect == XOPERATOR)
6645         no_op("Backticks",s);
6646     if (!s)
6647         missingterm(NULL, 0);
6648     pl_yylval.ival = OP_BACKTICK;
6649     TERM(sublex_start());
6650 }
6651
6652 static int
6653 yyl_backslash(pTHX_ char *s)
6654 {
6655     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6656         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6657                        *s, *s);
6658     if (PL_expect == XOPERATOR)
6659         no_op("Backslash",s);
6660     OPERATOR(REFGEN);
6661 }
6662
6663 static void
6664 yyl_data_handle(pTHX)
6665 {
6666     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6667                             ? PL_curstash
6668                             : PL_defstash;
6669     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6670
6671     if (!isGV(gv))
6672         gv_init(gv,stash,"DATA",4,0);
6673
6674     GvMULTI_on(gv);
6675     if (!GvIO(gv))
6676         GvIOp(gv) = newIO();
6677     IoIFP(GvIOp(gv)) = PL_rsfp;
6678
6679     /* Mark this internal pseudo-handle as clean */
6680     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6681     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6682         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6683     else
6684         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6685
6686 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6687     /* if the script was opened in binmode, we need to revert
6688      * it to text mode for compatibility; but only iff it has CRs
6689      * XXX this is a questionable hack at best. */
6690     if (PL_bufend-PL_bufptr > 2
6691         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6692     {
6693         Off_t loc = 0;
6694         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6695             loc = PerlIO_tell(PL_rsfp);
6696             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6697         }
6698         if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6699             if (loc > 0)
6700                 PerlIO_seek(PL_rsfp, loc, 0);
6701         }
6702     }
6703 #endif
6704
6705 #ifdef PERLIO_LAYERS
6706     if (!IN_BYTES) {
6707         if (UTF)
6708             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6709     }
6710 #endif
6711
6712     PL_rsfp = NULL;
6713 }
6714
6715 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6716     __attribute__noreturn__;
6717
6718 PERL_STATIC_NO_RET void
6719 yyl_croak_unrecognised(pTHX_ char *s)
6720 {
6721     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6722     const char *c;
6723     char *d;
6724     STRLEN len;
6725
6726     if (UTF) {
6727         STRLEN skiplen = UTF8SKIP(s);
6728         STRLEN stravail = PL_bufend - s;
6729         c = sv_uni_display(dsv, newSVpvn_flags(s,
6730                                                skiplen > stravail ? stravail : skiplen,
6731                                                SVs_TEMP | SVf_UTF8),
6732                            10, UNI_DISPLAY_ISPRINT);
6733     }
6734     else {
6735         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6736     }
6737
6738     if (s >= PL_linestart) {
6739         d = PL_linestart;
6740     }
6741     else {
6742         /* somehow (probably due to a parse failure), PL_linestart has advanced
6743          * pass PL_bufptr, get a reasonable beginning of line
6744          */
6745         d = s;
6746         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6747             --d;
6748     }
6749     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6750     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6751         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6752     }
6753
6754     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6755                       UTF8fARG(UTF, (s - d), d),
6756                      (int) len + 1);
6757 }
6758
6759 static int
6760 yyl_require(pTHX_ char *s, I32 orig_keyword)
6761 {
6762     s = skipspace(s);
6763     if (isDIGIT(*s)) {
6764         s = force_version(s, FALSE);
6765     }
6766     else if (*s != 'v' || !isDIGIT(s[1])
6767             || (s = force_version(s, TRUE), *s == 'v'))
6768     {
6769         *PL_tokenbuf = '\0';
6770         s = force_word(s,BAREWORD,TRUE,TRUE);
6771         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6772                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6773                                    UTF))
6774         {
6775             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6776                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6777         }
6778         else if (*s == '<')
6779             yyerror("<> at require-statement should be quotes");
6780     }
6781
6782     if (orig_keyword == KEY_require)
6783         pl_yylval.ival = 1;
6784     else
6785         pl_yylval.ival = 0;
6786
6787     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6788     PL_bufptr = s;
6789     PL_last_uni = PL_oldbufptr;
6790     PL_last_lop_op = OP_REQUIRE;
6791     s = skipspace(s);
6792     return REPORT( (int)REQUIRE );
6793 }
6794
6795 static int
6796 yyl_foreach(pTHX_ char *s)
6797 {
6798     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6799         return REPORT(0);
6800     pl_yylval.ival = CopLINE(PL_curcop);
6801     s = skipspace(s);
6802     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6803         char *p = s;
6804         SSize_t s_off = s - SvPVX(PL_linestr);
6805         STRLEN len;
6806
6807         if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6808             p += 2;
6809         }
6810         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6811             p += 3;
6812         }
6813
6814         p = skipspace(p);
6815         /* skip optional package name, as in "for my abc $x (..)" */
6816         if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6817             p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6818             p = skipspace(p);
6819         }
6820         if (*p != '$' && *p != '\\')
6821             Perl_croak(aTHX_ "Missing $ on loop variable");
6822
6823         /* The buffer may have been reallocated, update s */
6824         s = SvPVX(PL_linestr) + s_off;
6825     }
6826     OPERATOR(FOR);
6827 }
6828
6829 static int
6830 yyl_do(pTHX_ char *s, I32 orig_keyword)
6831 {
6832     s = skipspace(s);
6833     if (*s == '{')
6834         PRETERMBLOCK(DO);
6835     if (*s != '\'') {
6836         char *d;
6837         STRLEN len;
6838         *PL_tokenbuf = '&';
6839         d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6840                       1, &len);
6841         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6842          && !keyword(PL_tokenbuf + 1, len, 0)) {
6843             SSize_t off = s-SvPVX(PL_linestr);
6844             d = skipspace(d);
6845             s = SvPVX(PL_linestr)+off;
6846             if (*d == '(') {
6847                 force_ident_maybe_lex('&');
6848                 s = d;
6849             }
6850         }
6851     }
6852     if (orig_keyword == KEY_do)
6853         pl_yylval.ival = 1;
6854     else
6855         pl_yylval.ival = 0;
6856     OPERATOR(DO);
6857 }
6858
6859 static int
6860 yyl_my(pTHX_ char *s, I32 my)
6861 {
6862     if (PL_in_my) {
6863         PL_bufptr = s;
6864         yyerror(Perl_form(aTHX_
6865                           "Can't redeclare \"%s\" in \"%s\"",
6866                            my       == KEY_my    ? "my" :
6867                            my       == KEY_state ? "state" : "our",
6868                            PL_in_my == KEY_my    ? "my" :
6869                            PL_in_my == KEY_state ? "state" : "our"));
6870     }
6871     PL_in_my = (U16)my;
6872     s = skipspace(s);
6873     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6874         STRLEN len;
6875         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6876         if (memEQs(PL_tokenbuf, len, "sub"))
6877             return yyl_sub(aTHX_ s, my);
6878         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6879         if (!PL_in_my_stash) {
6880             char tmpbuf[1024];
6881             int i;
6882             PL_bufptr = s;
6883             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6884             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6885             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6886         }
6887     }
6888     else if (*s == '\\') {
6889         if (!FEATURE_MYREF_IS_ENABLED)
6890             Perl_croak(aTHX_ "The experimental declared_refs "
6891                              "feature is not enabled");
6892         Perl_ck_warner_d(aTHX_
6893              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6894             "Declaring references is experimental");
6895     }
6896     OPERATOR(MY);
6897 }
6898
6899 static int yyl_try(pTHX_ char*);
6900
6901 static bool
6902 yyl_eol_needs_semicolon(pTHX_ char **ps)
6903 {
6904     char *s = *ps;
6905     if (PL_lex_state != LEX_NORMAL
6906         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6907     {
6908         const bool in_comment = *s == '#';
6909         char *d;
6910         if (*s == '#' && s == PL_linestart && PL_in_eval
6911          && !PL_rsfp && !PL_parser->filtered) {
6912             /* handle eval qq[#line 1 "foo"\n ...] */
6913             CopLINE_dec(PL_curcop);
6914             incline(s, PL_bufend);
6915         }
6916         d = s;
6917         while (d < PL_bufend && *d != '\n')
6918             d++;
6919         if (d < PL_bufend)
6920             d++;
6921         s = d;
6922         if (in_comment && d == PL_bufend
6923             && PL_lex_state == LEX_INTERPNORMAL
6924             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6925             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6926         else
6927             incline(s, PL_bufend);
6928         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6929             PL_lex_state = LEX_FORMLINE;
6930             force_next(FORMRBRACK);
6931             *ps = s;
6932             return TRUE;
6933         }
6934     }
6935     else {
6936         while (s < PL_bufend && *s != '\n')
6937             s++;
6938         if (s < PL_bufend) {
6939             s++;
6940             if (s < PL_bufend)
6941                 incline(s, PL_bufend);
6942         }
6943     }
6944     *ps = s;
6945     return FALSE;
6946 }
6947
6948 static int
6949 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
6950 {
6951     char *d;
6952
6953     goto start;
6954
6955     do {
6956         fake_eof = 0;
6957         bof = cBOOL(PL_rsfp);
6958       start:
6959
6960         PL_bufptr = PL_bufend;
6961         COPLINE_INC_WITH_HERELINES;
6962         if (!lex_next_chunk(fake_eof)) {
6963             CopLINE_dec(PL_curcop);
6964             s = PL_bufptr;
6965             TOKEN(';'); /* not infinite loop because rsfp is NULL now */
6966         }
6967         CopLINE_dec(PL_curcop);
6968         s = PL_bufptr;
6969         /* If it looks like the start of a BOM or raw UTF-16,
6970          * check if it in fact is. */
6971         if (bof && PL_rsfp
6972             && (   *s == 0
6973                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
6974                 || *(U8*)s >= 0xFE
6975                 || s[1] == 0))
6976         {
6977             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
6978             bof = (offset == (Off_t)SvCUR(PL_linestr));
6979 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
6980             /* offset may include swallowed CR */
6981             if (!bof)
6982                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6983 #endif
6984             if (bof) {
6985                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6986                 s = swallow_bom((U8*)s);
6987             }
6988         }
6989         if (PL_parser->in_pod) {
6990             /* Incest with pod. */
6991             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
6992                 && !isALPHA(s[4]))
6993             {
6994                 SvPVCLEAR(PL_linestr);
6995                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
6996                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6997                 PL_last_lop = PL_last_uni = NULL;
6998                 PL_parser->in_pod = 0;
6999             }
7000         }
7001         if (PL_rsfp || PL_parser->filtered)
7002             incline(s, PL_bufend);
7003     } while (PL_parser->in_pod);
7004
7005     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7006     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7007     PL_last_lop = PL_last_uni = NULL;
7008     if (CopLINE(PL_curcop) == 1) {
7009         while (s < PL_bufend && isSPACE(*s))
7010             s++;
7011         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7012             s++;
7013         d = NULL;
7014         if (!PL_in_eval) {
7015             if (*s == '#' && *(s+1) == '!')
7016                 d = s + 2;
7017 #ifdef ALTERNATE_SHEBANG
7018             else {
7019                 static char const as[] = ALTERNATE_SHEBANG;
7020                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7021                     d = s + (sizeof(as) - 1);
7022             }
7023 #endif /* ALTERNATE_SHEBANG */
7024         }
7025         if (d) {
7026             char *ipath;
7027             char *ipathend;
7028
7029             while (isSPACE(*d))
7030                 d++;
7031             ipath = d;
7032             while (*d && !isSPACE(*d))
7033                 d++;
7034             ipathend = d;
7035
7036 #ifdef ARG_ZERO_IS_SCRIPT
7037             if (ipathend > ipath) {
7038                 /*
7039                  * HP-UX (at least) sets argv[0] to the script name,
7040                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7041                  * at least, set argv[0] to the basename of the Perl
7042                  * interpreter. So, having found "#!", we'll set it right.
7043                  */
7044                 SV* copfilesv = CopFILESV(PL_curcop);
7045                 if (copfilesv) {
7046                     SV * const x =
7047                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7048                                          SVt_PV)); /* $^X */
7049                     assert(SvPOK(x) || SvGMAGICAL(x));
7050                     if (sv_eq(x, copfilesv)) {
7051                         sv_setpvn(x, ipath, ipathend - ipath);
7052                         SvSETMAGIC(x);
7053                     }
7054                     else {
7055                         STRLEN blen;
7056                         STRLEN llen;
7057                         const char *bstart = SvPV_const(copfilesv, blen);
7058                         const char * const lstart = SvPV_const(x, llen);
7059                         if (llen < blen) {
7060                             bstart += blen - llen;
7061                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7062                                 sv_setpvn(x, ipath, ipathend - ipath);
7063                                 SvSETMAGIC(x);
7064                             }
7065                         }
7066                     }
7067                 }
7068                 else {
7069                     /* Anything to do if no copfilesv? */
7070                 }
7071                 TAINT_NOT;      /* $^X is always tainted, but that's OK */
7072             }
7073 #endif /* ARG_ZERO_IS_SCRIPT */
7074
7075             /*
7076              * Look for options.
7077              */
7078             d = instr(s,"perl -");
7079             if (!d) {
7080                 d = instr(s,"perl");
7081 #if defined(DOSISH)
7082                 /* avoid getting into infinite loops when shebang
7083                  * line contains "Perl" rather than "perl" */
7084                 if (!d) {
7085                     for (d = ipathend-4; d >= ipath; --d) {
7086                         if (isALPHA_FOLD_EQ(*d, 'p')
7087                             && !ibcmp(d, "perl", 4))
7088                         {
7089                             break;
7090                         }
7091                     }
7092                     if (d < ipath)
7093                         d = NULL;
7094                 }
7095 #endif
7096             }
7097 #ifdef ALTERNATE_SHEBANG
7098             /*
7099              * If the ALTERNATE_SHEBANG on this system starts with a
7100              * character that can be part of a Perl expression, then if
7101              * we see it but not "perl", we're probably looking at the
7102              * start of Perl code, not a request to hand off to some
7103              * other interpreter.  Similarly, if "perl" is there, but
7104              * not in the first 'word' of the line, we assume the line
7105              * contains the start of the Perl program.
7106              */
7107             if (d && *s != '#') {
7108                 const char *c = ipath;
7109                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7110                     c++;
7111                 if (c < d)
7112                     d = NULL;   /* "perl" not in first word; ignore */
7113                 else
7114                     *s = '#';   /* Don't try to parse shebang line */
7115             }
7116 #endif /* ALTERNATE_SHEBANG */
7117             if (!d
7118                 && *s == '#'
7119                 && ipathend > ipath
7120                 && !PL_minus_c
7121                 && !instr(s,"indir")
7122                 && instr(PL_origargv[0],"perl"))
7123             {
7124                 char **newargv;
7125
7126                 *ipathend = '\0';
7127                 s = ipathend + 1;
7128                 while (s < PL_bufend && isSPACE(*s))
7129                     s++;
7130                 if (s < PL_bufend) {
7131                     Newx(newargv,PL_origargc+3,char*);
7132                     newargv[1] = s;
7133                     while (s < PL_bufend && !isSPACE(*s))
7134                         s++;
7135                     *s = '\0';
7136                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7137                 }
7138                 else
7139                     newargv = PL_origargv;
7140                 newargv[0] = ipath;
7141                 PERL_FPU_PRE_EXEC
7142                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7143                 PERL_FPU_POST_EXEC
7144                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7145             }
7146             if (d) {
7147                 while (*d && !isSPACE(*d))
7148                     d++;
7149                 while (SPACE_OR_TAB(*d))
7150                     d++;
7151
7152                 if (*d++ == '-') {
7153                     const bool switches_done = PL_doswitches;
7154                     const U32 oldpdb = PL_perldb;
7155                     const bool oldn = PL_minus_n;
7156                     const bool oldp = PL_minus_p;
7157                     const char *d1 = d;
7158
7159                     do {
7160                         bool baduni = FALSE;
7161                         if (*d1 == 'C') {
7162                             const char *d2 = d1 + 1;
7163                             if (parse_unicode_opts((const char **)&d2)
7164                                 != PL_unicode)
7165                                 baduni = TRUE;
7166                         }
7167                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7168                             const char * const m = d1;
7169                             while (*d1 && !isSPACE(*d1))
7170                                 d1++;
7171                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7172                                   (int)(d1 - m), m);
7173                         }
7174                         d1 = moreswitches(d1);
7175                     } while (d1);
7176                     if (PL_doswitches && !switches_done) {
7177                         int argc = PL_origargc;
7178                         char **argv = PL_origargv;
7179                         do {
7180                             argc--,argv++;
7181                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7182                         init_argv_symbols(argc,argv);
7183                     }
7184                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7185                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7186                           /* if we have already added "LINE: while (<>) {",
7187                              we must not do it again */
7188                     {
7189                         SvPVCLEAR(PL_linestr);
7190                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7191                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7192                         PL_last_lop = PL_last_uni = NULL;
7193                         PL_preambled = FALSE;
7194                         if (PERLDB_LINE_OR_SAVESRC)
7195                             (void)gv_fetchfile(PL_origfilename);
7196                         return YYL_RETRY;
7197                     }
7198                 }
7199             }
7200         }
7201     }
7202
7203     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7204         PL_lex_state = LEX_FORMLINE;
7205         force_next(FORMRBRACK);
7206         TOKEN(';');
7207     }
7208
7209     PL_bufptr = s;
7210     return YYL_RETRY;
7211 }
7212
7213 static int
7214 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7215 {
7216     CLINE;
7217     pl_yylval.opval
7218         = newSVOP(OP_CONST, 0,
7219                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7220     pl_yylval.opval->op_private = OPpCONST_BARE;
7221     TERM(BAREWORD);
7222 }
7223
7224 static int
7225 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7226 {
7227     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7228         && PL_parser->saw_infix_sigil)
7229     {
7230         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7231                          "Operator or semicolon missing before %c%" UTF8f,
7232                          lastchar,
7233                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7234                                   PL_tokenbuf));
7235         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7236                          "Ambiguous use of %c resolved as operator %c",
7237                          lastchar, lastchar);
7238     }
7239     TOKEN(BAREWORD);
7240 }
7241
7242 static int
7243 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7244 {
7245     if (sv) {
7246         op_free(rv2cv_op);
7247         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7248         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7249         if (SvTYPE(sv) == SVt_PVAV)
7250             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7251                                       pl_yylval.opval);
7252         else {
7253             pl_yylval.opval->op_private = 0;
7254             pl_yylval.opval->op_folded = 1;
7255             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7256         }
7257         TOKEN(BAREWORD);
7258     }
7259
7260     op_free(pl_yylval.opval);
7261     pl_yylval.opval =
7262         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7263     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7264     PL_last_lop = PL_oldbufptr;
7265     PL_last_lop_op = OP_ENTERSUB;
7266
7267     /* Is there a prototype? */
7268     if (SvPOK(cv)) {
7269         int k = yyl_subproto(aTHX_ s, cv);
7270         if (k != KEY_NULL)
7271             return k;
7272     }
7273
7274     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7275     PL_expect = XTERM;
7276     force_next(off ? PRIVATEREF : BAREWORD);
7277     if (!PL_lex_allbrackets
7278         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7279     {
7280         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7281     }
7282
7283     TOKEN(NOAMP);
7284 }
7285
7286 /* Honour "reserved word" warnings, and enforce strict subs */
7287 static void
7288 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7289 {
7290     /* after "print" and similar functions (corresponding to
7291      * "F? L" in opcode.pl), whatever wasn't already parsed as
7292      * a filehandle should be subject to "strict subs".
7293      * Likewise for the optional indirect-object argument to system
7294      * or exec, which can't be a bareword */
7295     if ((PL_last_lop_op == OP_PRINT
7296             || PL_last_lop_op == OP_PRTF
7297             || PL_last_lop_op == OP_SAY
7298             || PL_last_lop_op == OP_SYSTEM
7299             || PL_last_lop_op == OP_EXEC)
7300         && (PL_hints & HINT_STRICT_SUBS))
7301     {
7302         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7303     }
7304
7305     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7306         char *d = PL_tokenbuf;
7307         while (isLOWER(*d))
7308             d++;
7309         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7310             /* PL_warn_reserved is constant */
7311             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7312             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7313                         PL_tokenbuf);
7314             GCC_DIAG_RESTORE_STMT;
7315         }
7316     }
7317 }
7318
7319 static int
7320 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7321 {
7322     int pkgname = 0;
7323     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7324     bool safebw;
7325     bool no_op_error = FALSE;
7326     /* Use this var to track whether intuit_method has been
7327        called.  intuit_method returns 0 or > 255.  */
7328     int key = 1;
7329
7330     if (PL_expect == XOPERATOR) {
7331         if (PL_bufptr == PL_linestart) {
7332             CopLINE_dec(PL_curcop);
7333             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7334             CopLINE_inc(PL_curcop);
7335         }
7336         else
7337             /* We want to call no_op with s pointing after the
7338                bareword, so defer it.  But we want it to come
7339                before the Bad name croak.  */
7340             no_op_error = TRUE;
7341     }
7342
7343     /* Get the rest if it looks like a package qualifier */
7344
7345     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7346         STRLEN morelen;
7347         s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7348                       TRUE, &morelen);
7349         if (no_op_error) {
7350             no_op("Bareword",s);
7351             no_op_error = FALSE;
7352         }
7353         if (!morelen)
7354             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7355                     UTF8fARG(UTF, len, PL_tokenbuf),
7356                     *s == '\'' ? "'" : "::");
7357         len += morelen;
7358         pkgname = 1;
7359     }
7360
7361     if (no_op_error)
7362         no_op("Bareword",s);
7363
7364     /* See if the name is "Foo::",
7365        in which case Foo is a bareword
7366        (and a package name). */
7367
7368     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7369         if (ckWARN(WARN_BAREWORD)
7370             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7371             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7372                         "Bareword \"%" UTF8f
7373                         "\" refers to nonexistent package",
7374                         UTF8fARG(UTF, len, PL_tokenbuf));
7375         len -= 2;
7376         PL_tokenbuf[len] = '\0';
7377         c.gv = NULL;
7378         c.gvp = 0;
7379         safebw = TRUE;
7380     }
7381     else {
7382         safebw = FALSE;
7383     }
7384
7385     /* if we saw a global override before, get the right name */
7386
7387     if (!c.sv)
7388         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7389     if (c.gvp) {
7390         SV *sv = newSVpvs("CORE::GLOBAL::");
7391         sv_catsv(sv, c.sv);
7392         SvREFCNT_dec(c.sv);
7393         c.sv = sv;
7394     }
7395
7396     /* Presume this is going to be a bareword of some sort. */
7397     CLINE;
7398     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7399     pl_yylval.opval->op_private = OPpCONST_BARE;
7400
7401     /* And if "Foo::", then that's what it certainly is. */
7402     if (safebw)
7403         return yyl_safe_bareword(aTHX_ s, lastchar);
7404
7405     if (!c.off) {
7406         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7407         const_op->op_private = OPpCONST_BARE;
7408         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7409         c.cv = c.lex
7410             ? isGV(c.gv)
7411                 ? GvCV(c.gv)
7412                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7413                     ? (CV *)SvRV(c.gv)
7414                     : ((CV *)c.gv)
7415             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7416     }
7417
7418     /* See if it's the indirect object for a list operator. */
7419
7420     if (PL_oldoldbufptr
7421         && PL_oldoldbufptr < PL_bufptr
7422         && (PL_oldoldbufptr == PL_last_lop
7423             || PL_oldoldbufptr == PL_last_uni)
7424         && /* NO SKIPSPACE BEFORE HERE! */
7425            (PL_expect == XREF
7426             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7427                                                    == OA_FILEREF))
7428     {
7429         bool immediate_paren = *s == '(';
7430         SSize_t s_off;
7431
7432         /* (Now we can afford to cross potential line boundary.) */
7433         s = skipspace(s);
7434
7435         /* intuit_method() can indirectly call lex_next_chunk(),
7436          * invalidating s
7437          */
7438         s_off = s - SvPVX(PL_linestr);
7439         /* Two barewords in a row may indicate method call. */
7440         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7441                 || *s == '$')
7442             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7443         {
7444             /* the code at method: doesn't use s */
7445             goto method;
7446         }
7447         s = SvPVX(PL_linestr) + s_off;
7448
7449         /* If not a declared subroutine, it's an indirect object. */
7450         /* (But it's an indir obj regardless for sort.) */
7451         /* Also, if "_" follows a filetest operator, it's a bareword */
7452
7453         if (
7454             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7455              || (!c.cv
7456                  && (PL_last_lop_op != OP_MAPSTART
7457                      && PL_last_lop_op != OP_GREPSTART))))
7458            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7459                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7460                                                 == OA_FILESTATOP))
7461            )
7462         {
7463             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7464             yyl_strictwarn_bareword(aTHX_ lastchar);
7465             op_free(c.rv2cv_op);
7466             return yyl_safe_bareword(aTHX_ s, lastchar);
7467         }
7468     }
7469
7470     PL_expect = XOPERATOR;
7471     s = skipspace(s);
7472
7473     /* Is this a word before a => operator? */
7474     if (*s == '=' && s[1] == '>' && !pkgname) {
7475         op_free(c.rv2cv_op);
7476         CLINE;
7477         if (c.gvp || (c.lex && !c.off)) {
7478             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7479             /* This is our own scalar, created a few lines
7480                above, so this is safe. */
7481             SvREADONLY_off(c.sv);
7482             sv_setpv(c.sv, PL_tokenbuf);
7483             if (UTF && !IN_BYTES
7484              && is_utf8_string((U8*)PL_tokenbuf, len))
7485                   SvUTF8_on(c.sv);
7486             SvREADONLY_on(c.sv);
7487         }
7488         TERM(BAREWORD);
7489     }
7490
7491     /* If followed by a paren, it's certainly a subroutine. */
7492     if (*s == '(') {
7493         CLINE;
7494         if (c.cv) {
7495             char *d = s + 1;
7496             while (SPACE_OR_TAB(*d))
7497                 d++;
7498             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7499                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7500         }
7501         NEXTVAL_NEXTTOKE.opval =
7502             c.off ? c.rv2cv_op : pl_yylval.opval;
7503         if (c.off)
7504              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7505         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7506         pl_yylval.ival = 0;
7507         TOKEN('&');
7508     }
7509
7510     /* If followed by var or block, call it a method (unless sub) */
7511
7512     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7513         op_free(c.rv2cv_op);
7514         PL_last_lop = PL_oldbufptr;
7515         PL_last_lop_op = OP_METHOD;
7516         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7517             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7518         PL_expect = XBLOCKTERM;
7519         PL_bufptr = s;
7520         return REPORT(METHOD);
7521     }
7522
7523     /* If followed by a bareword, see if it looks like indir obj. */
7524
7525     if (   key == 1
7526         && !orig_keyword
7527         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7528         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7529     {
7530       method:
7531         if (c.lex && !c.off) {
7532             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7533             SvREADONLY_off(c.sv);
7534             sv_setpvn(c.sv, PL_tokenbuf, len);
7535             if (UTF && !IN_BYTES
7536              && is_utf8_string((U8*)PL_tokenbuf, len))
7537                 SvUTF8_on(c.sv);
7538             else SvUTF8_off(c.sv);
7539         }
7540         op_free(c.rv2cv_op);
7541         if (key == METHOD && !PL_lex_allbrackets
7542             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7543         {
7544             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7545         }
7546         return REPORT(key);
7547     }
7548
7549     /* Not a method, so call it a subroutine (if defined) */
7550
7551     if (c.cv) {
7552         /* Check for a constant sub */
7553         c.sv = cv_const_sv_or_av(c.cv);
7554         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7555     }
7556
7557     /* Call it a bare word */
7558
7559     if (PL_hints & HINT_STRICT_SUBS)
7560         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7561     else
7562         yyl_strictwarn_bareword(aTHX_ lastchar);
7563
7564     op_free(c.rv2cv_op);
7565
7566     return yyl_safe_bareword(aTHX_ s, lastchar);
7567 }
7568
7569 static int
7570 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7571 {
7572     switch (key) {
7573     default:                    /* not a keyword */
7574         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7575
7576     case KEY___FILE__:
7577         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7578
7579     case KEY___LINE__:
7580         FUN0OP(
7581             newSVOP(OP_CONST, 0,
7582                 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7583         );
7584
7585     case KEY___PACKAGE__:
7586         FUN0OP(
7587             newSVOP(OP_CONST, 0, (PL_curstash
7588                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7589                                      : &PL_sv_undef))
7590         );
7591
7592     case KEY___DATA__:
7593     case KEY___END__:
7594         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7595             yyl_data_handle(aTHX);
7596         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7597
7598     case KEY___SUB__:
7599         FUN0OP(CvCLONE(PL_compcv)
7600                     ? newOP(OP_RUNCV, 0)
7601                     : newPVOP(OP_RUNCV,0,NULL));
7602
7603     case KEY_AUTOLOAD:
7604     case KEY_DESTROY:
7605     case KEY_BEGIN:
7606     case KEY_UNITCHECK:
7607     case KEY_CHECK:
7608     case KEY_INIT:
7609     case KEY_END:
7610         if (PL_expect == XSTATE)
7611             return yyl_sub(aTHX_ PL_bufptr, key);
7612         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7613
7614     case KEY_abs:
7615         UNI(OP_ABS);
7616
7617     case KEY_alarm:
7618         UNI(OP_ALARM);
7619
7620     case KEY_accept:
7621         LOP(OP_ACCEPT,XTERM);
7622
7623     case KEY_and:
7624         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7625             return REPORT(0);
7626         OPERATOR(ANDOP);
7627
7628     case KEY_atan2:
7629         LOP(OP_ATAN2,XTERM);
7630
7631     case KEY_bind:
7632         LOP(OP_BIND,XTERM);
7633
7634     case KEY_binmode:
7635         LOP(OP_BINMODE,XTERM);
7636
7637     case KEY_bless:
7638         LOP(OP_BLESS,XTERM);
7639
7640     case KEY_break:
7641         FUN0(OP_BREAK);
7642
7643     case KEY_chop:
7644         UNI(OP_CHOP);
7645
7646     case KEY_continue:
7647         /* We have to disambiguate the two senses of
7648           "continue". If the next token is a '{' then
7649           treat it as the start of a continue block;
7650           otherwise treat it as a control operator.
7651          */
7652         s = skipspace(s);
7653         if (*s == '{')
7654             PREBLOCK(CONTINUE);
7655         else
7656             FUN0(OP_CONTINUE);
7657
7658     case KEY_chdir:
7659         /* may use HOME */
7660         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7661         UNI(OP_CHDIR);
7662
7663     case KEY_close:
7664         UNI(OP_CLOSE);
7665
7666     case KEY_closedir:
7667         UNI(OP_CLOSEDIR);
7668
7669     case KEY_cmp:
7670         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7671             return REPORT(0);
7672         NCEop(OP_SCMP);
7673
7674     case KEY_caller:
7675         UNI(OP_CALLER);
7676
7677     case KEY_crypt:
7678
7679         LOP(OP_CRYPT,XTERM);
7680
7681     case KEY_chmod:
7682         LOP(OP_CHMOD,XTERM);
7683
7684     case KEY_chown:
7685         LOP(OP_CHOWN,XTERM);
7686
7687     case KEY_connect:
7688         LOP(OP_CONNECT,XTERM);
7689
7690     case KEY_chr:
7691         UNI(OP_CHR);
7692
7693     case KEY_cos:
7694         UNI(OP_COS);
7695
7696     case KEY_chroot:
7697         UNI(OP_CHROOT);
7698
7699     case KEY_default:
7700         PREBLOCK(DEFAULT);
7701
7702     case KEY_do:
7703         return yyl_do(aTHX_ s, orig_keyword);
7704
7705     case KEY_die:
7706         PL_hints |= HINT_BLOCK_SCOPE;
7707         LOP(OP_DIE,XTERM);
7708
7709     case KEY_defined:
7710         UNI(OP_DEFINED);
7711
7712     case KEY_delete:
7713         UNI(OP_DELETE);
7714
7715     case KEY_dbmopen:
7716         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7717                           STR_WITH_LEN("NDBM_File::"),
7718                           STR_WITH_LEN("DB_File::"),
7719                           STR_WITH_LEN("GDBM_File::"),
7720                           STR_WITH_LEN("SDBM_File::"),
7721                           STR_WITH_LEN("ODBM_File::"),
7722                           NULL);
7723         LOP(OP_DBMOPEN,XTERM);
7724
7725     case KEY_dbmclose:
7726         UNI(OP_DBMCLOSE);
7727
7728     case KEY_dump:
7729         LOOPX(OP_DUMP);
7730
7731     case KEY_else:
7732         PREBLOCK(ELSE);
7733
7734     case KEY_elsif:
7735         pl_yylval.ival = CopLINE(PL_curcop);
7736         OPERATOR(ELSIF);
7737
7738     case KEY_eq:
7739         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7740             return REPORT(0);
7741         ChEop(OP_SEQ);
7742
7743     case KEY_exists:
7744         UNI(OP_EXISTS);
7745
7746     case KEY_exit:
7747         UNI(OP_EXIT);
7748
7749     case KEY_eval:
7750         s = skipspace(s);
7751         if (*s == '{') { /* block eval */
7752             PL_expect = XTERMBLOCK;
7753             UNIBRACK(OP_ENTERTRY);
7754         }
7755         else { /* string eval */
7756             PL_expect = XTERM;
7757             UNIBRACK(OP_ENTEREVAL);
7758         }
7759
7760     case KEY_evalbytes:
7761         PL_expect = XTERM;
7762         UNIBRACK(-OP_ENTEREVAL);
7763
7764     case KEY_eof:
7765         UNI(OP_EOF);
7766
7767     case KEY_exp:
7768         UNI(OP_EXP);
7769
7770     case KEY_each:
7771         UNI(OP_EACH);
7772
7773     case KEY_exec:
7774         LOP(OP_EXEC,XREF);
7775
7776     case KEY_endhostent:
7777         FUN0(OP_EHOSTENT);
7778
7779     case KEY_endnetent:
7780         FUN0(OP_ENETENT);
7781
7782     case KEY_endservent:
7783         FUN0(OP_ESERVENT);
7784
7785     case KEY_endprotoent:
7786         FUN0(OP_EPROTOENT);
7787
7788     case KEY_endpwent:
7789         FUN0(OP_EPWENT);
7790
7791     case KEY_endgrent:
7792         FUN0(OP_EGRENT);
7793
7794     case KEY_for:
7795     case KEY_foreach:
7796         return yyl_foreach(aTHX_ s);
7797
7798     case KEY_formline:
7799         LOP(OP_FORMLINE,XTERM);
7800
7801     case KEY_fork:
7802         FUN0(OP_FORK);
7803
7804     case KEY_fc:
7805         UNI(OP_FC);
7806
7807     case KEY_fcntl:
7808         LOP(OP_FCNTL,XTERM);
7809
7810     case KEY_fileno:
7811         UNI(OP_FILENO);
7812
7813     case KEY_flock:
7814         LOP(OP_FLOCK,XTERM);
7815
7816     case KEY_gt:
7817         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7818             return REPORT(0);
7819         ChRop(OP_SGT);
7820
7821     case KEY_ge:
7822         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7823             return REPORT(0);
7824         ChRop(OP_SGE);
7825
7826     case KEY_grep:
7827         LOP(OP_GREPSTART, XREF);
7828
7829     case KEY_goto:
7830         LOOPX(OP_GOTO);
7831
7832     case KEY_gmtime:
7833         UNI(OP_GMTIME);
7834
7835     case KEY_getc:
7836         UNIDOR(OP_GETC);
7837
7838     case KEY_getppid:
7839         FUN0(OP_GETPPID);
7840
7841     case KEY_getpgrp:
7842         UNI(OP_GETPGRP);
7843
7844     case KEY_getpriority:
7845         LOP(OP_GETPRIORITY,XTERM);
7846
7847     case KEY_getprotobyname:
7848         UNI(OP_GPBYNAME);
7849
7850     case KEY_getprotobynumber:
7851         LOP(OP_GPBYNUMBER,XTERM);
7852
7853     case KEY_getprotoent:
7854         FUN0(OP_GPROTOENT);
7855
7856     case KEY_getpwent:
7857         FUN0(OP_GPWENT);
7858
7859     case KEY_getpwnam:
7860         UNI(OP_GPWNAM);
7861
7862     case KEY_getpwuid:
7863         UNI(OP_GPWUID);
7864
7865     case KEY_getpeername:
7866         UNI(OP_GETPEERNAME);
7867
7868     case KEY_gethostbyname:
7869         UNI(OP_GHBYNAME);
7870
7871     case KEY_gethostbyaddr:
7872         LOP(OP_GHBYADDR,XTERM);
7873
7874     case KEY_gethostent:
7875         FUN0(OP_GHOSTENT);
7876
7877     case KEY_getnetbyname:
7878         UNI(OP_GNBYNAME);
7879
7880     case KEY_getnetbyaddr:
7881         LOP(OP_GNBYADDR,XTERM);
7882
7883     case KEY_getnetent:
7884         FUN0(OP_GNETENT);
7885
7886     case KEY_getservbyname:
7887         LOP(OP_GSBYNAME,XTERM);
7888
7889     case KEY_getservbyport:
7890         LOP(OP_GSBYPORT,XTERM);
7891
7892     case KEY_getservent:
7893         FUN0(OP_GSERVENT);
7894
7895     case KEY_getsockname:
7896         UNI(OP_GETSOCKNAME);
7897
7898     case KEY_getsockopt:
7899         LOP(OP_GSOCKOPT,XTERM);
7900
7901     case KEY_getgrent:
7902         FUN0(OP_GGRENT);
7903
7904     case KEY_getgrnam:
7905         UNI(OP_GGRNAM);
7906
7907     case KEY_getgrgid:
7908         UNI(OP_GGRGID);
7909
7910     case KEY_getlogin:
7911         FUN0(OP_GETLOGIN);
7912
7913     case KEY_given:
7914         pl_yylval.ival = CopLINE(PL_curcop);
7915         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7916                          "given is experimental");
7917         OPERATOR(GIVEN);
7918
7919     case KEY_glob:
7920         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7921
7922     case KEY_hex:
7923         UNI(OP_HEX);
7924
7925     case KEY_if:
7926         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7927             return REPORT(0);
7928         pl_yylval.ival = CopLINE(PL_curcop);
7929         OPERATOR(IF);
7930
7931     case KEY_index:
7932         LOP(OP_INDEX,XTERM);
7933
7934     case KEY_int:
7935         UNI(OP_INT);
7936
7937     case KEY_ioctl:
7938         LOP(OP_IOCTL,XTERM);
7939
7940     case KEY_isa:
7941         Perl_ck_warner_d(aTHX_
7942             packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
7943         NCRop(OP_ISA);
7944
7945     case KEY_join:
7946         LOP(OP_JOIN,XTERM);
7947
7948     case KEY_keys:
7949         UNI(OP_KEYS);
7950
7951     case KEY_kill:
7952         LOP(OP_KILL,XTERM);
7953
7954     case KEY_last:
7955         LOOPX(OP_LAST);
7956
7957     case KEY_lc:
7958         UNI(OP_LC);
7959
7960     case KEY_lcfirst:
7961         UNI(OP_LCFIRST);
7962
7963     case KEY_local:
7964         OPERATOR(LOCAL);
7965
7966     case KEY_length:
7967         UNI(OP_LENGTH);
7968
7969     case KEY_lt:
7970         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7971             return REPORT(0);
7972         ChRop(OP_SLT);
7973
7974     case KEY_le:
7975         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7976             return REPORT(0);
7977         ChRop(OP_SLE);
7978
7979     case KEY_localtime:
7980         UNI(OP_LOCALTIME);
7981
7982     case KEY_log:
7983         UNI(OP_LOG);
7984
7985     case KEY_link:
7986         LOP(OP_LINK,XTERM);
7987
7988     case KEY_listen:
7989         LOP(OP_LISTEN,XTERM);
7990
7991     case KEY_lock:
7992         UNI(OP_LOCK);
7993
7994     case KEY_lstat:
7995         UNI(OP_LSTAT);
7996
7997     case KEY_m:
7998         s = scan_pat(s,OP_MATCH);
7999         TERM(sublex_start());
8000
8001     case KEY_map:
8002         LOP(OP_MAPSTART, XREF);
8003
8004     case KEY_mkdir:
8005         LOP(OP_MKDIR,XTERM);
8006
8007     case KEY_msgctl:
8008         LOP(OP_MSGCTL,XTERM);
8009
8010     case KEY_msgget:
8011         LOP(OP_MSGGET,XTERM);
8012
8013     case KEY_msgrcv:
8014         LOP(OP_MSGRCV,XTERM);
8015
8016     case KEY_msgsnd:
8017         LOP(OP_MSGSND,XTERM);
8018
8019     case KEY_our:
8020     case KEY_my:
8021     case KEY_state:
8022         return yyl_my(aTHX_ s, key);
8023
8024     case KEY_next:
8025         LOOPX(OP_NEXT);
8026
8027     case KEY_ne:
8028         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8029             return REPORT(0);
8030         ChEop(OP_SNE);
8031
8032     case KEY_no:
8033         s = tokenize_use(0, s);
8034         TOKEN(USE);
8035
8036     case KEY_not:
8037         if (*s == '(' || (s = skipspace(s), *s == '('))
8038             FUN1(OP_NOT);
8039         else {
8040             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8041                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8042             OPERATOR(NOTOP);
8043         }
8044
8045     case KEY_open:
8046         s = skipspace(s);
8047         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8048             const char *t;
8049             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8050             for (t=d; isSPACE(*t);)
8051                 t++;
8052             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8053                 /* [perl #16184] */
8054                 && !(t[0] == '=' && t[1] == '>')
8055                 && !(t[0] == ':' && t[1] == ':')
8056                 && !keyword(s, d-s, 0)
8057             ) {
8058                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8059                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8060                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8061             }
8062         }
8063         LOP(OP_OPEN,XTERM);
8064
8065     case KEY_or:
8066         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8067             return REPORT(0);
8068         pl_yylval.ival = OP_OR;
8069         OPERATOR(OROP);
8070
8071     case KEY_ord:
8072         UNI(OP_ORD);
8073
8074     case KEY_oct:
8075         UNI(OP_OCT);
8076
8077     case KEY_opendir:
8078         LOP(OP_OPEN_DIR,XTERM);
8079
8080     case KEY_print:
8081         checkcomma(s,PL_tokenbuf,"filehandle");
8082         LOP(OP_PRINT,XREF);
8083
8084     case KEY_printf:
8085         checkcomma(s,PL_tokenbuf,"filehandle");
8086         LOP(OP_PRTF,XREF);
8087
8088     case KEY_prototype:
8089         UNI(OP_PROTOTYPE);
8090
8091     case KEY_push:
8092         LOP(OP_PUSH,XTERM);
8093
8094     case KEY_pop:
8095         UNIDOR(OP_POP);
8096
8097     case KEY_pos:
8098         UNIDOR(OP_POS);
8099
8100     case KEY_pack:
8101         LOP(OP_PACK,XTERM);
8102
8103     case KEY_package:
8104         s = force_word(s,BAREWORD,FALSE,TRUE);
8105         s = skipspace(s);
8106         s = force_strict_version(s);
8107         PREBLOCK(PACKAGE);
8108
8109     case KEY_pipe:
8110         LOP(OP_PIPE_OP,XTERM);
8111
8112     case KEY_q:
8113         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8114         if (!s)
8115             missingterm(NULL, 0);
8116         COPLINE_SET_FROM_MULTI_END;
8117         pl_yylval.ival = OP_CONST;
8118         TERM(sublex_start());
8119
8120     case KEY_quotemeta:
8121         UNI(OP_QUOTEMETA);
8122
8123     case KEY_qw:
8124         return yyl_qw(aTHX_ s, len);
8125
8126     case KEY_qq:
8127         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8128         if (!s)
8129             missingterm(NULL, 0);
8130         pl_yylval.ival = OP_STRINGIFY;
8131         if (SvIVX(PL_lex_stuff) == '\'')
8132             SvIV_set(PL_lex_stuff, 0);  /* qq'$foo' should interpolate */
8133         TERM(sublex_start());
8134
8135     case KEY_qr:
8136         s = scan_pat(s,OP_QR);
8137         TERM(sublex_start());
8138
8139     case KEY_qx:
8140         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8141         if (!s)
8142             missingterm(NULL, 0);
8143         pl_yylval.ival = OP_BACKTICK;
8144         TERM(sublex_start());
8145
8146     case KEY_return:
8147         OLDLOP(OP_RETURN);
8148
8149     case KEY_require:
8150         return yyl_require(aTHX_ s, orig_keyword);
8151
8152     case KEY_reset:
8153         UNI(OP_RESET);
8154
8155     case KEY_redo:
8156         LOOPX(OP_REDO);
8157
8158     case KEY_rename:
8159         LOP(OP_RENAME,XTERM);
8160
8161     case KEY_rand:
8162         UNI(OP_RAND);
8163
8164     case KEY_rmdir:
8165         UNI(OP_RMDIR);
8166
8167     case KEY_rindex:
8168         LOP(OP_RINDEX,XTERM);
8169
8170     case KEY_read:
8171         LOP(OP_READ,XTERM);
8172
8173     case KEY_readdir:
8174         UNI(OP_READDIR);
8175
8176     case KEY_readline:
8177         UNIDOR(OP_READLINE);
8178
8179     case KEY_readpipe:
8180         UNIDOR(OP_BACKTICK);
8181
8182     case KEY_rewinddir:
8183         UNI(OP_REWINDDIR);
8184
8185     case KEY_recv:
8186         LOP(OP_RECV,XTERM);
8187
8188     case KEY_reverse:
8189         LOP(OP_REVERSE,XTERM);
8190
8191     case KEY_readlink:
8192         UNIDOR(OP_READLINK);
8193
8194     case KEY_ref:
8195         UNI(OP_REF);
8196
8197     case KEY_s:
8198         s = scan_subst(s);
8199         if (pl_yylval.opval)
8200             TERM(sublex_start());
8201         else
8202             TOKEN(1);   /* force error */
8203
8204     case KEY_say:
8205         checkcomma(s,PL_tokenbuf,"filehandle");
8206         LOP(OP_SAY,XREF);
8207
8208     case KEY_chomp:
8209         UNI(OP_CHOMP);
8210
8211     case KEY_scalar:
8212         UNI(OP_SCALAR);
8213
8214     case KEY_select:
8215         LOP(OP_SELECT,XTERM);
8216
8217     case KEY_seek:
8218         LOP(OP_SEEK,XTERM);
8219
8220     case KEY_semctl:
8221         LOP(OP_SEMCTL,XTERM);
8222
8223     case KEY_semget:
8224         LOP(OP_SEMGET,XTERM);
8225
8226     case KEY_semop:
8227         LOP(OP_SEMOP,XTERM);
8228
8229     case KEY_send:
8230         LOP(OP_SEND,XTERM);
8231
8232     case KEY_setpgrp:
8233         LOP(OP_SETPGRP,XTERM);
8234
8235     case KEY_setpriority:
8236         LOP(OP_SETPRIORITY,XTERM);
8237
8238     case KEY_sethostent:
8239         UNI(OP_SHOSTENT);
8240
8241     case KEY_setnetent:
8242         UNI(OP_SNETENT);
8243
8244     case KEY_setservent:
8245         UNI(OP_SSERVENT);
8246
8247     case KEY_setprotoent:
8248         UNI(OP_SPROTOENT);
8249
8250     case KEY_setpwent:
8251         FUN0(OP_SPWENT);
8252
8253     case KEY_setgrent:
8254         FUN0(OP_SGRENT);
8255
8256     case KEY_seekdir:
8257         LOP(OP_SEEKDIR,XTERM);
8258
8259     case KEY_setsockopt:
8260         LOP(OP_SSOCKOPT,XTERM);
8261
8262     case KEY_shift:
8263         UNIDOR(OP_SHIFT);
8264
8265     case KEY_shmctl:
8266         LOP(OP_SHMCTL,XTERM);
8267
8268     case KEY_shmget:
8269         LOP(OP_SHMGET,XTERM);
8270
8271     case KEY_shmread:
8272         LOP(OP_SHMREAD,XTERM);
8273
8274     case KEY_shmwrite:
8275         LOP(OP_SHMWRITE,XTERM);
8276
8277     case KEY_shutdown:
8278         LOP(OP_SHUTDOWN,XTERM);
8279
8280     case KEY_sin:
8281         UNI(OP_SIN);
8282
8283     case KEY_sleep:
8284         UNI(OP_SLEEP);
8285
8286     case KEY_socket:
8287         LOP(OP_SOCKET,XTERM);
8288
8289     case KEY_socketpair:
8290         LOP(OP_SOCKPAIR,XTERM);
8291
8292     case KEY_sort:
8293         checkcomma(s,PL_tokenbuf,"subroutine name");
8294         s = skipspace(s);
8295         PL_expect = XTERM;
8296         s = force_word(s,BAREWORD,TRUE,TRUE);
8297         LOP(OP_SORT,XREF);
8298
8299     case KEY_split:
8300         LOP(OP_SPLIT,XTERM);
8301
8302     case KEY_sprintf:
8303         LOP(OP_SPRINTF,XTERM);
8304
8305     case KEY_splice:
8306         LOP(OP_SPLICE,XTERM);
8307
8308     case KEY_sqrt:
8309         UNI(OP_SQRT);
8310
8311     case KEY_srand:
8312         UNI(OP_SRAND);
8313
8314     case KEY_stat:
8315         UNI(OP_STAT);
8316
8317     case KEY_study:
8318         UNI(OP_STUDY);
8319
8320     case KEY_substr:
8321         LOP(OP_SUBSTR,XTERM);
8322
8323     case KEY_format:
8324     case KEY_sub:
8325         return yyl_sub(aTHX_ s, key);
8326
8327     case KEY_system:
8328         LOP(OP_SYSTEM,XREF);
8329
8330     case KEY_symlink:
8331         LOP(OP_SYMLINK,XTERM);
8332
8333     case KEY_syscall:
8334         LOP(OP_SYSCALL,XTERM);
8335
8336     case KEY_sysopen:
8337         LOP(OP_SYSOPEN,XTERM);
8338
8339     case KEY_sysseek:
8340         LOP(OP_SYSSEEK,XTERM);
8341
8342     case KEY_sysread:
8343         LOP(OP_SYSREAD,XTERM);
8344
8345     case KEY_syswrite:
8346         LOP(OP_SYSWRITE,XTERM);
8347
8348     case KEY_tr:
8349     case KEY_y:
8350         s = scan_trans(s);
8351         TERM(sublex_start());
8352
8353     case KEY_tell:
8354         UNI(OP_TELL);
8355
8356     case KEY_telldir:
8357         UNI(OP_TELLDIR);
8358
8359     case KEY_tie:
8360         LOP(OP_TIE,XTERM);
8361
8362     case KEY_tied:
8363         UNI(OP_TIED);
8364
8365     case KEY_time:
8366         FUN0(OP_TIME);
8367
8368     case KEY_times:
8369         FUN0(OP_TMS);
8370
8371     case KEY_truncate:
8372         LOP(OP_TRUNCATE,XTERM);
8373
8374     case KEY_uc:
8375         UNI(OP_UC);
8376
8377     case KEY_ucfirst:
8378         UNI(OP_UCFIRST);
8379
8380     case KEY_untie:
8381         UNI(OP_UNTIE);
8382
8383     case KEY_until:
8384         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8385             return REPORT(0);
8386         pl_yylval.ival = CopLINE(PL_curcop);
8387         OPERATOR(UNTIL);
8388
8389     case KEY_unless:
8390         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8391             return REPORT(0);
8392         pl_yylval.ival = CopLINE(PL_curcop);
8393         OPERATOR(UNLESS);
8394
8395     case KEY_unlink:
8396         LOP(OP_UNLINK,XTERM);
8397
8398     case KEY_undef:
8399         UNIDOR(OP_UNDEF);
8400
8401     case KEY_unpack:
8402         LOP(OP_UNPACK,XTERM);
8403
8404     case KEY_utime:
8405         LOP(OP_UTIME,XTERM);
8406
8407     case KEY_umask:
8408         UNIDOR(OP_UMASK);
8409
8410     case KEY_unshift:
8411         LOP(OP_UNSHIFT,XTERM);
8412
8413     case KEY_use:
8414         s = tokenize_use(1, s);
8415         TOKEN(USE);
8416
8417     case KEY_values:
8418         UNI(OP_VALUES);
8419
8420     case KEY_vec:
8421         LOP(OP_VEC,XTERM);
8422
8423     case KEY_when:
8424         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8425             return REPORT(0);
8426         pl_yylval.ival = CopLINE(PL_curcop);
8427         Perl_ck_warner_d(aTHX_
8428             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8429             "when is experimental");
8430         OPERATOR(WHEN);
8431
8432     case KEY_while:
8433         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8434             return REPORT(0);
8435         pl_yylval.ival = CopLINE(PL_curcop);
8436         OPERATOR(WHILE);
8437
8438     case KEY_warn:
8439         PL_hints |= HINT_BLOCK_SCOPE;
8440         LOP(OP_WARN,XTERM);
8441
8442     case KEY_wait:
8443         FUN0(OP_WAIT);
8444
8445     case KEY_waitpid:
8446         LOP(OP_WAITPID,XTERM);
8447
8448     case KEY_wantarray:
8449         FUN0(OP_WANTARRAY);
8450
8451     case KEY_write:
8452         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8453          * we use the same number on EBCDIC */
8454         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8455         UNI(OP_ENTERWRITE);
8456
8457     case KEY_x:
8458         if (PL_expect == XOPERATOR) {
8459             if (*s == '=' && !PL_lex_allbrackets
8460                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8461             {
8462                 return REPORT(0);
8463             }
8464             Mop(OP_REPEAT);
8465         }
8466         check_uni();
8467         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8468
8469     case KEY_xor:
8470         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8471             return REPORT(0);
8472         pl_yylval.ival = OP_XOR;
8473         OPERATOR(OROP);
8474     }
8475 }
8476
8477 static int
8478 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8479 {
8480     I32 key = 0;
8481     I32 orig_keyword = 0;
8482     STRLEN olen = len;
8483     char *d = s;
8484     s += 2;
8485     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8486     if ((*s == ':' && s[1] == ':')
8487         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8488     {
8489         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8490         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8491     }
8492     if (!key)
8493         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8494                           UTF8fARG(UTF, len, PL_tokenbuf));
8495     if (key < 0)
8496         key = -key;
8497     else if (key == KEY_require || key == KEY_do
8498           || key == KEY_glob)
8499         /* that's a way to remember we saw "CORE::" */
8500         orig_keyword = key;
8501
8502     /* Known to be a reserved word at this point */
8503     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8504 }
8505
8506 static int
8507 yyl_keylookup(pTHX_ char *s, GV *gv)
8508 {
8509     STRLEN len;
8510     bool anydelim;
8511     I32 key;
8512     struct code c = no_code;
8513     I32 orig_keyword = 0;
8514     char *d;
8515
8516     c.gv = gv;
8517
8518     PL_bufptr = s;
8519     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8520
8521     /* Some keywords can be followed by any delimiter, including ':' */
8522     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8523
8524     /* x::* is just a word, unless x is "CORE" */
8525     if (!anydelim && *s == ':' && s[1] == ':') {
8526         if (memEQs(PL_tokenbuf, len, "CORE"))
8527             return yyl_key_core(aTHX_ s, len, c);
8528         return yyl_just_a_word(aTHX_ s, len, 0, c);
8529     }
8530
8531     d = s;
8532     while (d < PL_bufend && isSPACE(*d))
8533             d++;        /* no comments skipped here, or s### is misparsed */
8534
8535     /* Is this a word before a => operator? */
8536     if (*d == '=' && d[1] == '>') {
8537         return yyl_fatcomma(aTHX_ s, len);
8538     }
8539
8540     /* Check for plugged-in keyword */
8541     {
8542         OP *o;
8543         int result;
8544         char *saved_bufptr = PL_bufptr;
8545         PL_bufptr = s;
8546         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8547         s = PL_bufptr;
8548         if (result == KEYWORD_PLUGIN_DECLINE) {
8549             /* not a plugged-in keyword */
8550             PL_bufptr = saved_bufptr;
8551         } else if (result == KEYWORD_PLUGIN_STMT) {
8552             pl_yylval.opval = o;
8553             CLINE;
8554             if (!PL_nexttoke) PL_expect = XSTATE;
8555             return REPORT(PLUGSTMT);
8556         } else if (result == KEYWORD_PLUGIN_EXPR) {
8557             pl_yylval.opval = o;
8558             CLINE;
8559             if (!PL_nexttoke) PL_expect = XOPERATOR;
8560             return REPORT(PLUGEXPR);
8561         } else {
8562             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8563         }
8564     }
8565
8566     /* Is this a label? */
8567     if (!anydelim && PL_expect == XSTATE
8568           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8569         s = d + 1;
8570         pl_yylval.opval =
8571             newSVOP(OP_CONST, 0,
8572                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8573         CLINE;
8574         TOKEN(LABEL);
8575     }
8576
8577     /* Check for lexical sub */
8578     if (PL_expect != XOPERATOR) {
8579         char tmpbuf[sizeof PL_tokenbuf + 1];
8580         *tmpbuf = '&';
8581         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8582         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8583         if (c.off != NOT_IN_PAD) {
8584             assert(c.off); /* we assume this is boolean-true below */
8585             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8586                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8587                 HEK * const stashname = HvNAME_HEK(stash);
8588                 c.sv = newSVhek(stashname);
8589                 sv_catpvs(c.sv, "::");
8590                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8591                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8592                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8593                                   SVt_PVCV);
8594                 c.off = 0;
8595                 if (!c.gv) {
8596                     sv_free(c.sv);
8597                     c.sv = NULL;
8598                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8599                 }
8600             }
8601             else {
8602                 c.rv2cv_op = newOP(OP_PADANY, 0);
8603                 c.rv2cv_op->op_targ = c.off;
8604                 c.cv = find_lexical_cv(c.off);
8605             }
8606             c.lex = TRUE;
8607             return yyl_just_a_word(aTHX_ s, len, 0, c);
8608         }
8609         c.off = 0;
8610     }
8611
8612     /* Check for built-in keyword */
8613     key = keyword(PL_tokenbuf, len, 0);
8614
8615     if (key < 0)
8616         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8617
8618     if (key && key != KEY___DATA__ && key != KEY___END__
8619      && (!anydelim || *s != '#')) {
8620         /* no override, and not s### either; skipspace is safe here
8621          * check for => on following line */
8622         bool arrow;
8623         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8624         STRLEN   soff = s         - SvPVX(PL_linestr);
8625         s = peekspace(s);
8626         arrow = *s == '=' && s[1] == '>';
8627         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8628         s         = SvPVX(PL_linestr) +   soff;
8629         if (arrow)
8630             return yyl_fatcomma(aTHX_ s, len);
8631     }
8632
8633     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8634 }
8635
8636 static int
8637 yyl_try(pTHX_ char *s)
8638 {
8639     char *d;
8640     GV *gv = NULL;
8641     int tok;
8642
8643   retry:
8644     switch (*s) {
8645     default:
8646         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8647             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8648                 return tok;
8649             goto retry_bufptr;
8650         }
8651         yyl_croak_unrecognised(aTHX_ s);
8652
8653     case 4:
8654     case 26:
8655         /* emulate EOF on ^D or ^Z */
8656         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8657             return tok;
8658     retry_bufptr:
8659         s = PL_bufptr;
8660         goto retry;
8661
8662     case 0:
8663         if ((!PL_rsfp || PL_lex_inwhat)
8664          && (!PL_parser->filtered || s+1 < PL_bufend)) {
8665             PL_last_uni = 0;
8666             PL_last_lop = 0;
8667             if (PL_lex_brackets
8668                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8669             {
8670                 yyerror((const char *)
8671                         (PL_lex_formbrack
8672                          ? "Format not terminated"
8673                          : "Missing right curly or square bracket"));
8674             }
8675             DEBUG_T({
8676                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8677             });
8678             TOKEN(0);
8679         }
8680         if (s++ < PL_bufend)
8681             goto retry;  /* ignore stray nulls */
8682         PL_last_uni = 0;
8683         PL_last_lop = 0;
8684         if (!PL_in_eval && !PL_preambled) {
8685             PL_preambled = TRUE;
8686             if (PL_perldb) {
8687                 /* Generate a string of Perl code to load the debugger.
8688                  * If PERL5DB is set, it will return the contents of that,
8689                  * otherwise a compile-time require of perl5db.pl.  */
8690
8691                 const char * const pdb = PerlEnv_getenv("PERL5DB");
8692
8693                 if (pdb) {
8694                     sv_setpv(PL_linestr, pdb);
8695                     sv_catpvs(PL_linestr,";");
8696                 } else {
8697                     SETERRNO(0,SS_NORMAL);
8698                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8699                 }
8700                 PL_parser->preambling = CopLINE(PL_curcop);
8701             } else
8702                 SvPVCLEAR(PL_linestr);
8703             if (PL_preambleav) {
8704                 SV **svp = AvARRAY(PL_preambleav);
8705                 SV **const end = svp + AvFILLp(PL_preambleav);
8706                 while(svp <= end) {
8707                     sv_catsv(PL_linestr, *svp);
8708                     ++svp;
8709                     sv_catpvs(PL_linestr, ";");
8710                 }
8711                 sv_free(MUTABLE_SV(PL_preambleav));
8712                 PL_preambleav = NULL;
8713             }
8714             if (PL_minus_E)
8715                 sv_catpvs(PL_linestr,
8716                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8717             if (PL_minus_n || PL_minus_p) {
8718                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8719                 if (PL_minus_l)
8720                     sv_catpvs(PL_linestr,"chomp;");
8721                 if (PL_minus_a) {
8722                     if (PL_minus_F) {
8723                         if (   (   *PL_splitstr == '/'
8724                                 || *PL_splitstr == '\''
8725                                 || *PL_splitstr == '"')
8726                             && strchr(PL_splitstr + 1, *PL_splitstr))
8727                         {
8728                             /* strchr is ok, because -F pattern can't contain
8729                              * embeddded NULs */
8730                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8731                         }
8732                         else {
8733                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8734                                bytes can be used as quoting characters.  :-) */
8735                             const char *splits = PL_splitstr;
8736                             sv_catpvs(PL_linestr, "our @F=split(q\0");
8737                             do {
8738                                 /* Need to \ \s  */
8739                                 if (*splits == '\\')
8740                                     sv_catpvn(PL_linestr, splits, 1);
8741                                 sv_catpvn(PL_linestr, splits, 1);
8742                             } while (*splits++);
8743                             /* This loop will embed the trailing NUL of
8744                                PL_linestr as the last thing it does before
8745                                terminating.  */
8746                             sv_catpvs(PL_linestr, ");");
8747                         }
8748                     }
8749                     else
8750                         sv_catpvs(PL_linestr,"our @F=split(' ');");
8751                 }
8752             }
8753             sv_catpvs(PL_linestr, "\n");
8754             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8755             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8756             PL_last_lop = PL_last_uni = NULL;
8757             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8758                 update_debugger_info(PL_linestr, NULL, 0);
8759             goto retry;
8760         }
8761         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8762             return tok;
8763         goto retry_bufptr;
8764
8765     case '\r':
8766 #ifdef PERL_STRICT_CR
8767         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8768         Perl_croak(aTHX_
8769       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8770 #endif
8771     case ' ': case '\t': case '\f': case '\v':
8772         s++;
8773         goto retry;
8774
8775     case '#':
8776     case '\n': {
8777         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8778         if (needs_semicolon)
8779             TOKEN(';');
8780         else
8781             goto retry;
8782     }
8783
8784     case '-':
8785         return yyl_hyphen(aTHX_ s);
8786
8787     case '+':
8788         return yyl_plus(aTHX_ s);
8789
8790     case '*':
8791         return yyl_star(aTHX_ s);
8792
8793     case '%':
8794         return yyl_percent(aTHX_ s);
8795
8796     case '^':
8797         return yyl_caret(aTHX_ s);
8798
8799     case '[':
8800         return yyl_leftsquare(aTHX_ s);
8801
8802     case '~':
8803         return yyl_tilde(aTHX_ s);
8804
8805     case ',':
8806         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8807             TOKEN(0);
8808         s++;
8809         OPERATOR(',');
8810     case ':':
8811         if (s[1] == ':')
8812             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8813         return yyl_colon(aTHX_ s + 1);
8814
8815     case '(':
8816         return yyl_leftparen(aTHX_ s + 1);
8817
8818     case ';':
8819         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8820             TOKEN(0);
8821         CLINE;
8822         s++;
8823         PL_expect = XSTATE;
8824         TOKEN(';');
8825
8826     case ')':
8827         return yyl_rightparen(aTHX_ s);
8828
8829     case ']':
8830         return yyl_rightsquare(aTHX_ s);
8831
8832     case '{':
8833         return yyl_leftcurly(aTHX_ s + 1, 0);
8834
8835     case '}':
8836         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8837             TOKEN(0);
8838         return yyl_rightcurly(aTHX_ s, 0);
8839
8840     case '&':
8841         return yyl_ampersand(aTHX_ s);
8842
8843     case '|':
8844         return yyl_verticalbar(aTHX_ s);
8845
8846     case '=':
8847         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8848             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
8849         {
8850             s = vcs_conflict_marker(s + 7);
8851             goto retry;
8852         }
8853
8854         s++;
8855         {
8856             const char tmp = *s++;
8857             if (tmp == '=') {
8858                 if (!PL_lex_allbrackets
8859                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8860                 {
8861                     s -= 2;
8862                     TOKEN(0);
8863                 }
8864                 ChEop(OP_EQ);
8865             }
8866             if (tmp == '>') {
8867                 if (!PL_lex_allbrackets
8868                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8869                 {
8870                     s -= 2;
8871                     TOKEN(0);
8872                 }
8873                 OPERATOR(',');
8874             }
8875             if (tmp == '~')
8876                 PMop(OP_MATCH);
8877             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8878                 && memCHRs("+-*/%.^&|<",tmp))
8879                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8880                             "Reversed %c= operator",(int)tmp);
8881             s--;
8882             if (PL_expect == XSTATE
8883                 && isALPHA(tmp)
8884                 && (s == PL_linestart+1 || s[-2] == '\n') )
8885             {
8886                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8887                     || PL_lex_state != LEX_NORMAL)
8888                 {
8889                     d = PL_bufend;
8890                     while (s < d) {
8891                         if (*s++ == '\n') {
8892                             incline(s, PL_bufend);
8893                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8894                             {
8895                                 s = (char *) memchr(s,'\n', d - s);
8896                                 if (s)
8897                                     s++;
8898                                 else
8899                                     s = d;
8900                                 incline(s, PL_bufend);
8901                                 goto retry;
8902                             }
8903                         }
8904                     }
8905                     goto retry;
8906                 }
8907                 s = PL_bufend;
8908                 PL_parser->in_pod = 1;
8909                 goto retry;
8910             }
8911         }
8912         if (PL_expect == XBLOCK) {
8913             const char *t = s;
8914 #ifdef PERL_STRICT_CR
8915             while (SPACE_OR_TAB(*t))
8916 #else
8917             while (SPACE_OR_TAB(*t) || *t == '\r')
8918 #endif
8919                 t++;
8920             if (*t == '\n' || *t == '#') {
8921                 ENTER_with_name("lex_format");
8922                 SAVEI8(PL_parser->form_lex_state);
8923                 SAVEI32(PL_lex_formbrack);
8924                 PL_parser->form_lex_state = PL_lex_state;
8925                 PL_lex_formbrack = PL_lex_brackets + 1;
8926                 PL_parser->sub_error_count = PL_error_count;
8927                 return yyl_leftcurly(aTHX_ s, 1);
8928             }
8929         }
8930         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8931             s--;
8932             TOKEN(0);
8933         }
8934         pl_yylval.ival = 0;
8935         OPERATOR(ASSIGNOP);
8936
8937     case '!':
8938         return yyl_bang(aTHX_ s + 1);
8939
8940     case '<':
8941         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8942             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
8943         {
8944             s = vcs_conflict_marker(s + 7);
8945             goto retry;
8946         }
8947         return yyl_leftpointy(aTHX_ s);
8948
8949     case '>':
8950         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8951             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
8952         {
8953             s = vcs_conflict_marker(s + 7);
8954             goto retry;
8955         }
8956         return yyl_rightpointy(aTHX_ s + 1);
8957
8958     case '$':
8959         return yyl_dollar(aTHX_ s);
8960
8961     case '@':
8962         return yyl_snail(aTHX_ s);
8963
8964     case '/':                   /* may be division, defined-or, or pattern */
8965         return yyl_slash(aTHX_ s);
8966
8967      case '?':                  /* conditional */
8968         s++;
8969         if (!PL_lex_allbrackets
8970             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
8971         {
8972             s--;
8973             TOKEN(0);
8974         }
8975         PL_lex_allbrackets++;
8976         OPERATOR('?');
8977
8978     case '.':
8979         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
8980 #ifdef PERL_STRICT_CR
8981             && s[1] == '\n'
8982 #else
8983             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
8984 #endif
8985             && (s == PL_linestart || s[-1] == '\n') )
8986         {
8987             PL_expect = XSTATE;
8988             /* formbrack==2 means dot seen where arguments expected */
8989             return yyl_rightcurly(aTHX_ s, 2);
8990         }
8991         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
8992             s += 3;
8993             OPERATOR(YADAYADA);
8994         }
8995         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
8996             char tmp = *s++;
8997             if (*s == tmp) {
8998                 if (!PL_lex_allbrackets
8999                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9000                 {
9001                     s--;
9002                     TOKEN(0);
9003                 }
9004                 s++;
9005                 if (*s == tmp) {
9006                     s++;
9007                     pl_yylval.ival = OPf_SPECIAL;
9008                 }
9009                 else
9010                     pl_yylval.ival = 0;
9011                 OPERATOR(DOTDOT);
9012             }
9013             if (*s == '=' && !PL_lex_allbrackets
9014                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9015             {
9016                 s--;
9017                 TOKEN(0);
9018             }
9019             Aop(OP_CONCAT);
9020         }
9021         /* FALLTHROUGH */
9022     case '0': case '1': case '2': case '3': case '4':
9023     case '5': case '6': case '7': case '8': case '9':
9024         s = scan_num(s, &pl_yylval);
9025         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9026         if (PL_expect == XOPERATOR)
9027             no_op("Number",s);
9028         TERM(THING);
9029
9030     case '\'':
9031         return yyl_sglquote(aTHX_ s);
9032
9033     case '"':
9034         return yyl_dblquote(aTHX_ s);
9035
9036     case '`':
9037         return yyl_backtick(aTHX_ s);
9038
9039     case '\\':
9040         return yyl_backslash(aTHX_ s + 1);
9041
9042     case 'v':
9043         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9044             char *start = s + 2;
9045             while (isDIGIT(*start) || *start == '_')
9046                 start++;
9047             if (*start == '.' && isDIGIT(start[1])) {
9048                 s = scan_num(s, &pl_yylval);
9049                 TERM(THING);
9050             }
9051             else if ((*start == ':' && start[1] == ':')
9052                      || (PL_expect == XSTATE && *start == ':')) {
9053                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9054                     return tok;
9055                 goto retry_bufptr;
9056             }
9057             else if (PL_expect == XSTATE) {
9058                 d = start;
9059                 while (d < PL_bufend && isSPACE(*d)) d++;
9060                 if (*d == ':') {
9061                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9062                         return tok;
9063                     goto retry_bufptr;
9064                 }
9065             }
9066             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9067             if (!isALPHA(*start) && (PL_expect == XTERM
9068                         || PL_expect == XREF || PL_expect == XSTATE
9069                         || PL_expect == XTERMORDORDOR)) {
9070                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9071                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9072                 if (!gv) {
9073                     s = scan_num(s, &pl_yylval);
9074                     TERM(THING);
9075                 }
9076             }
9077         }
9078         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9079             return tok;
9080         goto retry_bufptr;
9081
9082     case 'x':
9083         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9084             s++;
9085             Mop(OP_REPEAT);
9086         }
9087         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9088             return tok;
9089         goto retry_bufptr;
9090
9091     case '_':
9092     case 'a': case 'A':
9093     case 'b': case 'B':
9094     case 'c': case 'C':
9095     case 'd': case 'D':
9096     case 'e': case 'E':
9097     case 'f': case 'F':
9098     case 'g': case 'G':
9099     case 'h': case 'H':
9100     case 'i': case 'I':
9101     case 'j': case 'J':
9102     case 'k': case 'K':
9103     case 'l': case 'L':
9104     case 'm': case 'M':
9105     case 'n': case 'N':
9106     case 'o': case 'O':
9107     case 'p': case 'P':
9108     case 'q': case 'Q':
9109     case 'r': case 'R':
9110     case 's': case 'S':
9111     case 't': case 'T':
9112     case 'u': case 'U':
9113               case 'V':
9114     case 'w': case 'W':
9115               case 'X':
9116     case 'y': case 'Y':
9117     case 'z': case 'Z':
9118         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9119             return tok;
9120         goto retry_bufptr;
9121     }
9122 }
9123
9124
9125 /*
9126   yylex
9127
9128   Works out what to call the token just pulled out of the input
9129   stream.  The yacc parser takes care of taking the ops we return and
9130   stitching them into a tree.
9131
9132   Returns:
9133     The type of the next token
9134
9135   Structure:
9136       Check if we have already built the token; if so, use it.
9137       Switch based on the current state:
9138           - if we have a case modifier in a string, deal with that
9139           - handle other cases of interpolation inside a string
9140           - scan the next line if we are inside a format
9141       In the normal state, switch on the next character:
9142           - default:
9143             if alphabetic, go to key lookup
9144             unrecognized character - croak
9145           - 0/4/26: handle end-of-line or EOF
9146           - cases for whitespace
9147           - \n and #: handle comments and line numbers
9148           - various operators, brackets and sigils
9149           - numbers
9150           - quotes
9151           - 'v': vstrings (or go to key lookup)
9152           - 'x' repetition operator (or go to key lookup)
9153           - other ASCII alphanumerics (key lookup begins here):
9154               word before => ?
9155               keyword plugin
9156               scan built-in keyword (but do nothing with it yet)
9157               check for statement label
9158               check for lexical subs
9159                   return yyl_just_a_word if there is one
9160               see whether built-in keyword is overridden
9161               switch on keyword number:
9162                   - default: return yyl_just_a_word:
9163                       not a built-in keyword; handle bareword lookup
9164                       disambiguate between method and sub call
9165                       fall back to bareword
9166                   - cases for built-in keywords
9167 */
9168
9169 #ifdef NETWARE
9170 #define RSFP_FILENO (PL_rsfp)
9171 #else
9172 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9173 #endif
9174
9175
9176 int
9177 Perl_yylex(pTHX)
9178 {
9179     char *s = PL_bufptr;
9180
9181     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9182         const U8* first_bad_char_loc;
9183         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9184                                                         PL_bufend - PL_bufptr,
9185                                                         &first_bad_char_loc)))
9186         {
9187             _force_out_malformed_utf8_message(first_bad_char_loc,
9188                                               (U8 *) PL_bufend,
9189                                               0,
9190                                               1 /* 1 means die */ );
9191             NOT_REACHED; /* NOTREACHED */
9192         }
9193         PL_parser->recheck_utf8_validity = FALSE;
9194     }
9195     DEBUG_T( {
9196         SV* tmp = newSVpvs("");
9197         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9198             (IV)CopLINE(PL_curcop),
9199             lex_state_names[PL_lex_state],
9200             exp_name[PL_expect],
9201             pv_display(tmp, s, strlen(s), 0, 60));
9202         SvREFCNT_dec(tmp);
9203     } );
9204
9205     /* when we've already built the next token, just pull it out of the queue */
9206     if (PL_nexttoke) {
9207         PL_nexttoke--;
9208         pl_yylval = PL_nextval[PL_nexttoke];
9209         {
9210             I32 next_type;
9211             next_type = PL_nexttype[PL_nexttoke];
9212             if (next_type & (7<<24)) {
9213                 if (next_type & (1<<24)) {
9214                     if (PL_lex_brackets > 100)
9215                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9216                     PL_lex_brackstack[PL_lex_brackets++] =
9217                         (char) ((next_type >> 16) & 0xff);
9218                 }
9219                 if (next_type & (2<<24))
9220                     PL_lex_allbrackets++;
9221                 if (next_type & (4<<24))
9222                     PL_lex_allbrackets--;
9223                 next_type &= 0xffff;
9224             }
9225             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9226         }
9227     }
9228
9229     switch (PL_lex_state) {
9230     case LEX_NORMAL:
9231     case LEX_INTERPNORMAL:
9232         break;
9233
9234     /* interpolated case modifiers like \L \U, including \Q and \E.
9235        when we get here, PL_bufptr is at the \
9236     */
9237     case LEX_INTERPCASEMOD:
9238         /* handle \E or end of string */
9239         return yyl_interpcasemod(aTHX_ s);
9240
9241     case LEX_INTERPPUSH:
9242         return REPORT(sublex_push());
9243
9244     case LEX_INTERPSTART:
9245         if (PL_bufptr == PL_bufend)
9246             return REPORT(sublex_done());
9247         DEBUG_T({
9248             if(*PL_bufptr != '(')
9249                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9250         });
9251         PL_expect = XTERM;
9252         /* for /@a/, we leave the joining for the regex engine to do
9253          * (unless we're within \Q etc) */
9254         PL_lex_dojoin = (*PL_bufptr == '@'
9255                             && (!PL_lex_inpat || PL_lex_casemods));
9256         PL_lex_state = LEX_INTERPNORMAL;
9257         if (PL_lex_dojoin) {
9258             NEXTVAL_NEXTTOKE.ival = 0;
9259             force_next(',');
9260             force_ident("\"", '$');
9261             NEXTVAL_NEXTTOKE.ival = 0;
9262             force_next('$');
9263             NEXTVAL_NEXTTOKE.ival = 0;
9264             force_next((2<<24)|'(');
9265             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
9266             force_next(FUNC);
9267         }
9268         /* Convert (?{...}) and friends to 'do {...}' */
9269         if (PL_lex_inpat && *PL_bufptr == '(') {
9270             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9271             PL_bufptr += 2;
9272             if (*PL_bufptr != '{')
9273                 PL_bufptr++;
9274             PL_expect = XTERMBLOCK;
9275             force_next(DO);
9276         }
9277
9278         if (PL_lex_starts++) {
9279             s = PL_bufptr;
9280             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9281             if (!PL_lex_casemods && PL_lex_inpat)
9282                 TOKEN(',');
9283             else
9284                 AopNOASSIGN(OP_CONCAT);
9285         }
9286         return yylex();
9287
9288     case LEX_INTERPENDMAYBE:
9289         if (intuit_more(PL_bufptr, PL_bufend)) {
9290             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
9291             break;
9292         }
9293         /* FALLTHROUGH */
9294
9295     case LEX_INTERPEND:
9296         if (PL_lex_dojoin) {
9297             const U8 dojoin_was = PL_lex_dojoin;
9298             PL_lex_dojoin = FALSE;
9299             PL_lex_state = LEX_INTERPCONCAT;
9300             PL_lex_allbrackets--;
9301             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9302         }
9303         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9304             && SvEVALED(PL_lex_repl))
9305         {
9306             if (PL_bufptr != PL_bufend)
9307                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9308             PL_lex_repl = NULL;
9309         }
9310         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9311            re_eval_str.  If the here-doc body’s length equals the previous
9312            value of re_eval_start, re_eval_start will now be null.  So
9313            check re_eval_str as well. */
9314         if (PL_parser->lex_shared->re_eval_start
9315          || PL_parser->lex_shared->re_eval_str) {
9316             SV *sv;
9317             if (*PL_bufptr != ')')
9318                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9319             PL_bufptr++;
9320             /* having compiled a (?{..}) expression, return the original
9321              * text too, as a const */
9322             if (PL_parser->lex_shared->re_eval_str) {
9323                 sv = PL_parser->lex_shared->re_eval_str;
9324                 PL_parser->lex_shared->re_eval_str = NULL;
9325                 SvCUR_set(sv,
9326                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9327                 SvPV_shrink_to_cur(sv);
9328             }
9329             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9330                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9331             NEXTVAL_NEXTTOKE.opval =
9332                     newSVOP(OP_CONST, 0,
9333                                  sv);
9334             force_next(THING);
9335             PL_parser->lex_shared->re_eval_start = NULL;
9336             PL_expect = XTERM;
9337             return REPORT(',');
9338         }
9339
9340         /* FALLTHROUGH */
9341     case LEX_INTERPCONCAT:
9342 #ifdef DEBUGGING
9343         if (PL_lex_brackets)
9344             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9345                        (long) PL_lex_brackets);
9346 #endif
9347         if (PL_bufptr == PL_bufend)
9348             return REPORT(sublex_done());
9349
9350         /* m'foo' still needs to be parsed for possible (?{...}) */
9351         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9352             SV *sv = newSVsv(PL_linestr);
9353             sv = tokeq(sv);
9354             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9355             s = PL_bufend;
9356         }
9357         else {
9358             int save_error_count = PL_error_count;
9359
9360             s = scan_const(PL_bufptr);
9361
9362             /* Set flag if this was a pattern and there were errors.  op.c will
9363              * refuse to compile a pattern with this flag set.  Otherwise, we
9364              * could get segfaults, etc. */
9365             if (PL_lex_inpat && PL_error_count > save_error_count) {
9366                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9367             }
9368             if (*s == '\\')
9369                 PL_lex_state = LEX_INTERPCASEMOD;
9370             else
9371                 PL_lex_state = LEX_INTERPSTART;
9372         }
9373
9374         if (s != PL_bufptr) {
9375             NEXTVAL_NEXTTOKE = pl_yylval;
9376             PL_expect = XTERM;
9377             force_next(THING);
9378             if (PL_lex_starts++) {
9379                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9380                 if (!PL_lex_casemods && PL_lex_inpat)
9381                     TOKEN(',');
9382                 else
9383                     AopNOASSIGN(OP_CONCAT);
9384             }
9385             else {
9386                 PL_bufptr = s;
9387                 return yylex();
9388             }
9389         }
9390
9391         return yylex();
9392     case LEX_FORMLINE:
9393         if (PL_parser->sub_error_count != PL_error_count) {
9394             /* There was an error parsing a formline, which tends to
9395                mess up the parser.
9396                Unlike interpolated sub-parsing, we can't treat any of
9397                these as recoverable, so no need to check sub_no_recover.
9398             */
9399             yyquit();
9400         }
9401         assert(PL_lex_formbrack);
9402         s = scan_formline(PL_bufptr);
9403         if (!PL_lex_formbrack)
9404             return yyl_rightcurly(aTHX_ s, 1);
9405         PL_bufptr = s;
9406         return yylex();
9407     }
9408
9409     /* We really do *not* want PL_linestr ever becoming a COW. */
9410     assert (!SvIsCOW(PL_linestr));
9411     s = PL_bufptr;
9412     PL_oldoldbufptr = PL_oldbufptr;
9413     PL_oldbufptr = s;
9414
9415     if (PL_in_my == KEY_sigvar) {
9416         PL_parser->saw_infix_sigil = 0;
9417         return yyl_sigvar(aTHX_ s);
9418     }
9419
9420     {
9421         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9422            On its return, we then need to set it to indicate whether the token
9423            we just encountered was an infix operator that (if we hadn't been
9424            expecting an operator) have been a sigil.
9425         */
9426         bool expected_operator = (PL_expect == XOPERATOR);
9427         int ret = yyl_try(aTHX_ s);
9428         switch (pl_yylval.ival) {
9429         case OP_BIT_AND:
9430         case OP_MODULO:
9431         case OP_MULTIPLY:
9432         case OP_NBIT_AND:
9433             if (expected_operator) {
9434                 PL_parser->saw_infix_sigil = 1;
9435                 break;
9436             }
9437             /* FALLTHROUGH */
9438         default:
9439             PL_parser->saw_infix_sigil = 0;
9440         }
9441         return ret;
9442     }
9443 }
9444
9445
9446 /*
9447   S_pending_ident
9448
9449   Looks up an identifier in the pad or in a package
9450
9451   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9452   rather than a plain pad var.
9453
9454   Returns:
9455     PRIVATEREF if this is a lexical name.
9456     BAREWORD   if this belongs to a package.
9457
9458   Structure:
9459       if we're in a my declaration
9460           croak if they tried to say my($foo::bar)
9461           build the ops for a my() declaration
9462       if it's an access to a my() variable
9463           build ops for access to a my() variable
9464       if in a dq string, and they've said @foo and we can't find @foo
9465           warn
9466       build ops for a bareword
9467 */
9468
9469 static int
9470 S_pending_ident(pTHX)
9471 {
9472     PADOFFSET tmp = 0;
9473     const char pit = (char)pl_yylval.ival;
9474     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9475     /* All routes through this function want to know if there is a colon.  */
9476     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9477
9478     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9479           "### Pending identifier '%s'\n", PL_tokenbuf); });
9480     assert(tokenbuf_len >= 2);
9481
9482     /* if we're in a my(), we can't allow dynamics here.
9483        $foo'bar has already been turned into $foo::bar, so
9484        just check for colons.
9485
9486        if it's a legal name, the OP is a PADANY.
9487     */
9488     if (PL_in_my) {
9489         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9490             if (has_colon)
9491                 /* diag_listed_as: No package name allowed for variable %s
9492                                    in "our" */
9493                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9494                                   "%s %s in \"our\"",
9495                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9496                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9497             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9498         }
9499         else {
9500             OP *o;
9501             if (has_colon) {
9502                 /* "my" variable %s can't be in a package */
9503                 /* PL_no_myglob is constant */
9504                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9505                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9506                             PL_in_my == KEY_my ? "my" : "state",
9507                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9508                             PL_tokenbuf),
9509                             UTF ? SVf_UTF8 : 0);
9510                 GCC_DIAG_RESTORE_STMT;
9511             }
9512
9513             if (PL_in_my == KEY_sigvar) {
9514                 /* A signature 'padop' needs in addition, an op_first to
9515                  * point to a child sigdefelem, and an extra field to hold
9516                  * the signature index. We can achieve both by using an
9517                  * UNOP_AUX and (ab)using the op_aux field to hold the
9518                  * index. If we ever need more fields, use a real malloced
9519                  * aux strut instead.
9520                  */
9521                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9522                                     INT2PTR(UNOP_AUX_item *,
9523                                         (PL_parser->sig_elems)));
9524                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9525                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9526                                   :                         OPpARGELEM_HV);
9527             }
9528             else
9529                 o = newOP(OP_PADANY, 0);
9530             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9531                                                         UTF ? SVf_UTF8 : 0);
9532             if (PL_in_my == KEY_sigvar)
9533                 PL_in_my = 0;
9534
9535             pl_yylval.opval = o;
9536             return PRIVATEREF;
9537         }
9538     }
9539
9540     /*
9541        build the ops for accesses to a my() variable.
9542     */
9543
9544     if (!has_colon) {
9545         if (!PL_in_my)
9546             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9547                                  0);
9548         if (tmp != NOT_IN_PAD) {
9549             /* might be an "our" variable" */
9550             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9551                 /* build ops for a bareword */
9552                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9553                 HEK * const stashname = HvNAME_HEK(stash);
9554                 SV *  const sym = newSVhek(stashname);
9555                 sv_catpvs(sym, "::");
9556                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9557                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9558                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9559                 if (pit != '&')
9560                   gv_fetchsv(sym,
9561                     GV_ADDMULTI,
9562                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9563                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9564                      : SVt_PVHV));
9565                 return BAREWORD;
9566             }
9567
9568             pl_yylval.opval = newOP(OP_PADANY, 0);
9569             pl_yylval.opval->op_targ = tmp;
9570             return PRIVATEREF;
9571         }
9572     }
9573
9574     /*
9575        Whine if they've said @foo or @foo{key} in a doublequoted string,
9576        and @foo (or %foo) isn't a variable we can find in the symbol
9577        table.
9578     */
9579     if (ckWARN(WARN_AMBIGUOUS)
9580         && pit == '@'
9581         && PL_lex_state != LEX_NORMAL
9582         && !PL_lex_brackets)
9583     {
9584         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9585                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9586                                          SVt_PVAV);
9587         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9588            )
9589         {
9590             /* Downgraded from fatal to warning 20000522 mjd */
9591             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9592                         "Possible unintended interpolation of %" UTF8f
9593                         " in string",
9594                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9595         }
9596     }
9597
9598     /* build ops for a bareword */
9599     pl_yylval.opval = newSVOP(OP_CONST, 0,
9600                                    newSVpvn_flags(PL_tokenbuf + 1,
9601                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9602                                                       UTF ? SVf_UTF8 : 0 ));
9603     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9604     if (pit != '&')
9605         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9606                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9607                      | ( UTF ? SVf_UTF8 : 0 ),
9608                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9609                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9610                       : SVt_PVHV));
9611     return BAREWORD;
9612 }
9613
9614 STATIC void
9615 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9616 {
9617     PERL_ARGS_ASSERT_CHECKCOMMA;
9618
9619     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9620         if (ckWARN(WARN_SYNTAX)) {
9621             int level = 1;
9622             const char *w;
9623             for (w = s+2; *w && level; w++) {
9624                 if (*w == '(')
9625                     ++level;
9626                 else if (*w == ')')
9627                     --level;
9628             }
9629             while (isSPACE(*w))
9630                 ++w;
9631             /* the list of chars below is for end of statements or
9632              * block / parens, boolean operators (&&, ||, //) and branch
9633              * constructs (or, and, if, until, unless, while, err, for).
9634              * Not a very solid hack... */
9635             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9636                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9637                             "%s (...) interpreted as function",name);
9638         }
9639     }
9640     while (s < PL_bufend && isSPACE(*s))
9641         s++;
9642     if (*s == '(')
9643         s++;
9644     while (s < PL_bufend && isSPACE(*s))
9645         s++;
9646     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9647         const char * const w = s;
9648         s += UTF ? UTF8SKIP(s) : 1;
9649         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9650             s += UTF ? UTF8SKIP(s) : 1;
9651         while (s < PL_bufend && isSPACE(*s))
9652             s++;
9653         if (*s == ',') {
9654             GV* gv;
9655             if (keyword(w, s - w, 0))
9656                 return;
9657
9658             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9659             if (gv && GvCVu(gv))
9660                 return;
9661             if (s - w <= 254) {
9662                 PADOFFSET off;
9663                 char tmpbuf[256];
9664                 Copy(w, tmpbuf+1, s - w, char);
9665                 *tmpbuf = '&';
9666                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9667                 if (off != NOT_IN_PAD) return;
9668             }
9669             Perl_croak(aTHX_ "No comma allowed after %s", what);
9670         }
9671     }
9672 }
9673
9674 /* S_new_constant(): do any overload::constant lookup.
9675
9676    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9677    Best used as sv=new_constant(..., sv, ...).
9678    If s, pv are NULL, calls subroutine with one argument,
9679    and <type> is used with error messages only.
9680    <type> is assumed to be well formed UTF-8.
9681
9682    If error_msg is not NULL, *error_msg will be set to any error encountered.
9683    Otherwise yyerror() will be used to output it */
9684
9685 STATIC SV *
9686 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9687                SV *sv, SV *pv, const char *type, STRLEN typelen,
9688                const char ** error_msg)
9689 {
9690     dSP;
9691     HV * table = GvHV(PL_hintgv);                /* ^H */
9692     SV *res;
9693     SV *errsv = NULL;
9694     SV **cvp;
9695     SV *cv, *typesv;
9696     const char *why1 = "", *why2 = "", *why3 = "";
9697     const char * optional_colon = ":";  /* Only some messages have a colon */
9698     char *msg;
9699
9700     PERL_ARGS_ASSERT_NEW_CONSTANT;
9701     /* We assume that this is true: */
9702     assert(type || s);
9703
9704     sv_2mortal(sv);                     /* Parent created it permanently */
9705
9706     if (   ! table
9707         || ! (PL_hints & HINT_LOCALIZE_HH))
9708     {
9709         why1 = "unknown";
9710         optional_colon = "";
9711         goto report;
9712     }
9713
9714     cvp = hv_fetch(table, key, keylen, FALSE);
9715     if (!cvp || !SvOK(*cvp)) {
9716         why1 = "$^H{";
9717         why2 = key;
9718         why3 = "} is not defined";
9719         goto report;
9720     }
9721
9722     cv = *cvp;
9723     if (!pv && s)
9724         pv = newSVpvn_flags(s, len, SVs_TEMP);
9725     if (type && pv)
9726         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9727     else
9728         typesv = &PL_sv_undef;
9729
9730     PUSHSTACKi(PERLSI_OVERLOAD);
9731     ENTER ;
9732     SAVETMPS;
9733
9734     PUSHMARK(SP) ;
9735     EXTEND(sp, 3);
9736     if (pv)
9737         PUSHs(pv);
9738     PUSHs(sv);
9739     if (pv)
9740         PUSHs(typesv);
9741     PUTBACK;
9742     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9743
9744     SPAGAIN ;
9745
9746     /* Check the eval first */
9747     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9748         STRLEN errlen;
9749         const char * errstr;
9750         sv_catpvs(errsv, "Propagated");
9751         errstr = SvPV_const(errsv, errlen);
9752         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9753         (void)POPs;
9754         res = SvREFCNT_inc_simple_NN(sv);
9755     }
9756     else {
9757         res = POPs;
9758         SvREFCNT_inc_simple_void_NN(res);
9759     }
9760
9761     PUTBACK ;
9762     FREETMPS ;
9763     LEAVE ;
9764     POPSTACK;
9765
9766     if (SvOK(res)) {
9767         return res;
9768     }
9769
9770     sv = res;
9771     (void)sv_2mortal(sv);
9772
9773     why1 = "Call to &{$^H{";
9774     why2 = key;
9775     why3 = "}} did not return a defined value";
9776
9777   report:
9778
9779     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9780                         (int)(type ? typelen : len),
9781                         (type ? type: s),
9782                         optional_colon,
9783                         why1, why2, why3);
9784     if (error_msg) {
9785         *error_msg = msg;
9786     }
9787     else {
9788         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9789     }
9790     return SvREFCNT_inc_simple_NN(sv);
9791 }
9792
9793 PERL_STATIC_INLINE void
9794 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9795                     bool is_utf8, bool check_dollar, bool tick_warn)
9796 {
9797     int saw_tick = 0;
9798     const char *olds = *s;
9799     PERL_ARGS_ASSERT_PARSE_IDENT;
9800
9801     while (*s < PL_bufend) {
9802         if (*d >= e)
9803             Perl_croak(aTHX_ "%s", ident_too_long);
9804         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9805              /* The UTF-8 case must come first, otherwise things
9806              * like c\N{COMBINING TILDE} would start failing, as the
9807              * isWORDCHAR_A case below would gobble the 'c' up.
9808              */
9809
9810             char *t = *s + UTF8SKIP(*s);
9811             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9812                 t += UTF8SKIP(t);
9813             }
9814             if (*d + (t - *s) > e)
9815                 Perl_croak(aTHX_ "%s", ident_too_long);
9816             Copy(*s, *d, t - *s, char);
9817             *d += t - *s;
9818             *s = t;
9819         }
9820         else if ( isWORDCHAR_A(**s) ) {
9821             do {
9822                 *(*d)++ = *(*s)++;
9823             } while (isWORDCHAR_A(**s) && *d < e);
9824         }
9825         else if (   allow_package
9826                  && **s == '\''
9827                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9828         {
9829             *(*d)++ = ':';
9830             *(*d)++ = ':';
9831             (*s)++;
9832             saw_tick++;
9833         }
9834         else if (allow_package && **s == ':' && (*s)[1] == ':'
9835            /* Disallow things like Foo::$bar. For the curious, this is
9836             * the code path that triggers the "Bad name after" warning
9837             * when looking for barewords.
9838             */
9839            && !(check_dollar && (*s)[2] == '$')) {
9840             *(*d)++ = *(*s)++;
9841             *(*d)++ = *(*s)++;
9842         }
9843         else
9844             break;
9845     }
9846     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9847               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9848         char *this_d;
9849         char *d2;
9850         Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9851         d2 = this_d;
9852         SAVEFREEPV(this_d);
9853         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9854                          "Old package separator used in string");
9855         if (olds[-1] == '#')
9856             *d2++ = olds[-2];
9857         *d2++ = olds[-1];
9858         while (olds < *s) {
9859             if (*olds == '\'') {
9860                 *d2++ = '\\';
9861                 *d2++ = *olds++;
9862             }
9863             else
9864                 *d2++ = *olds++;
9865         }
9866         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9867                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9868                           UTF8fARG(is_utf8, d2-this_d, this_d));
9869     }
9870     return;
9871 }
9872
9873 /* Returns a NUL terminated string, with the length of the string written to
9874    *slp
9875    */
9876 char *
9877 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9878 {
9879     char *d = dest;
9880     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9881     bool is_utf8 = cBOOL(UTF);
9882
9883     PERL_ARGS_ASSERT_SCAN_WORD;
9884
9885     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9886     *d = '\0';
9887     *slp = d - dest;
9888     return s;
9889 }
9890
9891 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9892  * iff Unicode semantics are to be used.  The legal ones are any of:
9893  *  a) all ASCII characters except:
9894  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9895  *          2) '{'
9896  *     The final case currently doesn't get this far in the program, so we
9897  *     don't test for it.  If that were to change, it would be ok to allow it.
9898  *  b) When not under Unicode rules, any upper Latin1 character
9899  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9900  *
9901  *      Because all ASCII characters have the same representation whether
9902  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9903  *      '{' without knowing if is UTF-8 or not. */
9904 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9905     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9906                          ? isIDFIRST_utf8_safe(s, e)                        \
9907                          : (isGRAPH_L1(*s)                                  \
9908                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9909
9910 STATIC char *
9911 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9912 {
9913     I32 herelines = PL_parser->herelines;
9914     SSize_t bracket = -1;
9915     char funny = *s++;
9916     char *d = dest;
9917     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9918     bool is_utf8 = cBOOL(UTF);
9919     I32 orig_copline = 0, tmp_copline = 0;
9920
9921     PERL_ARGS_ASSERT_SCAN_IDENT;
9922
9923     if (isSPACE(*s) || !*s)
9924         s = skipspace(s);
9925     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
9926         bool is_zero= *s == '0' ? TRUE : FALSE;
9927         char *digit_start= d;
9928         *d++ = *s++;
9929         while (s < PL_bufend && isDIGIT(*s)) {
9930             if (d >= e)
9931                 Perl_croak(aTHX_ "%s", ident_too_long);
9932             *d++ = *s++;
9933         }
9934         if (is_zero && d - digit_start > 1)
9935             Perl_croak(aTHX_ ident_var_zero_multi_digit);
9936     }
9937     else {  /* See if it is a "normal" identifier */
9938         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9939     }
9940     *d = '\0';
9941     d = dest;
9942     if (*d) {
9943         /* Either a digit variable, or parse_ident() found an identifier
9944            (anything valid as a bareword), so job done and return.  */
9945         if (PL_lex_state != LEX_NORMAL)
9946             PL_lex_state = LEX_INTERPENDMAYBE;
9947         return s;
9948     }
9949
9950     /* Here, it is not a run-of-the-mill identifier name */
9951
9952     if (*s == '$' && s[1]
9953         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9954             || isDIGIT_A((U8)s[1])
9955             || s[1] == '$'
9956             || s[1] == '{'
9957             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9958     {
9959         /* Dereferencing a value in a scalar variable.
9960            The alternatives are different syntaxes for a scalar variable.
9961            Using ' as a leading package separator isn't allowed. :: is.   */
9962         return s;
9963     }
9964     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9965     if (*s == '{') {
9966         bracket = s - SvPVX(PL_linestr);
9967         s++;
9968         orig_copline = CopLINE(PL_curcop);
9969         if (s < PL_bufend && isSPACE(*s)) {
9970             s = skipspace(s);
9971         }
9972     }
9973     if ((s <= PL_bufend - ((is_utf8)
9974                           ? UTF8SKIP(s)
9975                           : 1))
9976         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9977     {
9978         if (is_utf8) {
9979             const STRLEN skip = UTF8SKIP(s);
9980             STRLEN i;
9981             d[skip] = '\0';
9982             for ( i = 0; i < skip; i++ )
9983                 d[i] = *s++;
9984         }
9985         else {
9986             *d = *s++;
9987             /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
9988             if (isDIGIT(*d)) {
9989                 bool is_zero= *d == '0' ? TRUE : FALSE;
9990                 char *digit_start= d;
9991                 while (s < PL_bufend && isDIGIT(*s)) {
9992                     d++;
9993                     if (d >= e)
9994                         Perl_croak(aTHX_ "%s", ident_too_long);
9995                     *d= *s++;
9996                 }
9997                 if (is_zero && d - digit_start > 1)
9998                     Perl_croak(aTHX_ ident_var_zero_multi_digit);
9999             }
10000             d[1] = '\0';
10001         }
10002     }
10003     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10004     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10005         *d = toCTRL(*s);
10006         s++;
10007     }
10008     /* Warn about ambiguous code after unary operators if {...} notation isn't
10009        used.  There's no difference in ambiguity; it's merely a heuristic
10010        about when not to warn.  */
10011     else if (ck_uni && bracket == -1)
10012         check_uni();
10013     if (bracket != -1) {
10014         bool skip;
10015         char *s2;
10016         /* If we were processing {...} notation then...  */
10017         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10018             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10019                  && isWORDCHAR(*s))
10020         ) {
10021             /* note we have to check for a normal identifier first,
10022              * as it handles utf8 symbols, and only after that has
10023              * been ruled out can we look at the caret words */
10024             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10025                 /* if it starts as a valid identifier, assume that it is one.
10026                    (the later check for } being at the expected point will trap
10027                    cases where this doesn't pan out.)  */
10028                 d += is_utf8 ? UTF8SKIP(d) : 1;
10029                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10030                 *d = '\0';
10031             }
10032             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10033                 d++;
10034                 while (isWORDCHAR(*s) && d < e) {
10035                     *d++ = *s++;
10036                 }
10037                 if (d >= e)
10038                     Perl_croak(aTHX_ "%s", ident_too_long);
10039                 *d = '\0';
10040             }
10041             tmp_copline = CopLINE(PL_curcop);
10042             if (s < PL_bufend && isSPACE(*s)) {
10043                 s = skipspace(s);
10044             }
10045             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10046                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10047                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10048                     const char * const brack =
10049                         (const char *)
10050                         ((*s == '[') ? "[...]" : "{...}");
10051                     orig_copline = CopLINE(PL_curcop);
10052                     CopLINE_set(PL_curcop, tmp_copline);
10053    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10054                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10055                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10056                         funny, dest, brack, funny, dest, brack);
10057                     CopLINE_set(PL_curcop, orig_copline);
10058                 }
10059                 bracket++;
10060                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10061                 PL_lex_allbrackets++;
10062                 return s;
10063             }
10064         }
10065
10066         if ( !tmp_copline )
10067             tmp_copline = CopLINE(PL_curcop);
10068         if ((skip = s < PL_bufend && isSPACE(*s))) {
10069             /* Avoid incrementing line numbers or resetting PL_linestart,
10070                in case we have to back up.  */
10071             STRLEN s_off = s - SvPVX(PL_linestr);
10072             s2 = peekspace(s);
10073             s = SvPVX(PL_linestr) + s_off;
10074         }
10075         else
10076             s2 = s;
10077
10078         /* Expect to find a closing } after consuming any trailing whitespace.
10079          */
10080         if (*s2 == '}') {
10081             /* Now increment line numbers if applicable.  */
10082             if (skip)
10083                 s = skipspace(s);
10084             s++;
10085             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10086                 PL_lex_state = LEX_INTERPEND;
10087                 PL_expect = XREF;
10088             }
10089             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10090                 if (ckWARN(WARN_AMBIGUOUS)
10091                     && (keyword(dest, d - dest, 0)
10092                         || get_cvn_flags(dest, d - dest, is_utf8
10093                            ? SVf_UTF8
10094                            : 0)))
10095                 {
10096                     SV *tmp = newSVpvn_flags( dest, d - dest,
10097                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10098                     if (funny == '#')
10099                         funny = '@';
10100                     orig_copline = CopLINE(PL_curcop);
10101                     CopLINE_set(PL_curcop, tmp_copline);
10102                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10103                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10104                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10105                     CopLINE_set(PL_curcop, orig_copline);
10106                 }
10107             }
10108         }
10109         else {
10110             /* Didn't find the closing } at the point we expected, so restore
10111                state such that the next thing to process is the opening { and */
10112             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10113             CopLINE_set(PL_curcop, orig_copline);
10114             PL_parser->herelines = herelines;
10115             *dest = '\0';
10116             PL_parser->sub_no_recover = TRUE;
10117         }
10118     }
10119     else if (   PL_lex_state == LEX_INTERPNORMAL
10120              && !PL_lex_brackets
10121              && !intuit_more(s, PL_bufend))
10122         PL_lex_state = LEX_INTERPEND;
10123     return s;
10124 }
10125
10126 static bool
10127 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10128
10129     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10130      * found in the parse starting at 's', based on the subset that are valid
10131      * in this context input to this routine in 'valid_flags'. Advances s.
10132      * Returns TRUE if the input should be treated as a valid flag, so the next
10133      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10134      * upon first call on the current regex.  This routine will set it to any
10135      * charset modifier found.  The caller shouldn't change it.  This way,
10136      * another charset modifier encountered in the parse can be detected as an
10137      * error, as we have decided to allow only one */
10138
10139     const char c = **s;
10140     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10141
10142     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10143         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10144             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10145                        UTF ? SVf_UTF8 : 0);
10146             (*s) += charlen;
10147             /* Pretend that it worked, so will continue processing before
10148              * dieing */
10149             return TRUE;
10150         }
10151         return FALSE;
10152     }
10153
10154     switch (c) {
10155
10156         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10157         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10158         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10159         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10160         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10161         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10162         case LOCALE_PAT_MOD:
10163             if (*charset) {
10164                 goto multiple_charsets;
10165             }
10166             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10167             *charset = c;
10168             break;
10169         case UNICODE_PAT_MOD:
10170             if (*charset) {
10171                 goto multiple_charsets;
10172             }
10173             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10174             *charset = c;
10175             break;
10176         case ASCII_RESTRICT_PAT_MOD:
10177             if (! *charset) {
10178                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10179             }
10180             else {
10181
10182                 /* Error if previous modifier wasn't an 'a', but if it was, see
10183                  * if, and accept, a second occurrence (only) */
10184                 if (*charset != 'a'
10185                     || get_regex_charset(*pmfl)
10186                         != REGEX_ASCII_RESTRICTED_CHARSET)
10187                 {
10188                         goto multiple_charsets;
10189                 }
10190                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10191             }
10192             *charset = c;
10193             break;
10194         case DEPENDS_PAT_MOD:
10195             if (*charset) {
10196                 goto multiple_charsets;
10197             }
10198             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10199             *charset = c;
10200             break;
10201     }
10202
10203     (*s)++;
10204     return TRUE;
10205
10206     multiple_charsets:
10207         if (*charset != c) {
10208             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10209         }
10210         else if (c == 'a') {
10211   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10212             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10213         }
10214         else {
10215             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10216         }
10217
10218         /* Pretend that it worked, so will continue processing before dieing */
10219         (*s)++;
10220         return TRUE;
10221 }
10222
10223 STATIC char *
10224 S_scan_pat(pTHX_ char *start, I32 type)
10225 {
10226     PMOP *pm;
10227     char *s;
10228     const char * const valid_flags =
10229         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10230     char charset = '\0';    /* character set modifier */
10231     unsigned int x_mod_count = 0;
10232
10233     PERL_ARGS_ASSERT_SCAN_PAT;
10234
10235     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10236     if (!s)
10237         Perl_croak(aTHX_ "Search pattern not terminated");
10238
10239     pm = (PMOP*)newPMOP(type, 0);
10240     if (PL_multi_open == '?') {
10241         /* This is the only point in the code that sets PMf_ONCE:  */
10242         pm->op_pmflags |= PMf_ONCE;
10243
10244         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10245            allows us to restrict the list needed by reset to just the ??
10246            matches.  */
10247         assert(type != OP_TRANS);
10248         if (PL_curstash) {
10249             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10250             U32 elements;
10251             if (!mg) {
10252                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10253                                  0);
10254             }
10255             elements = mg->mg_len / sizeof(PMOP**);
10256             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10257             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10258             mg->mg_len = elements * sizeof(PMOP**);
10259             PmopSTASH_set(pm,PL_curstash);
10260         }
10261     }
10262
10263     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10264      * anon CV. False positives like qr/[(?{]/ are harmless */
10265
10266     if (type == OP_QR) {
10267         STRLEN len;
10268         char *e, *p = SvPV(PL_lex_stuff, len);
10269         e = p + len;
10270         for (; p < e; p++) {
10271             if (p[0] == '(' && p[1] == '?'
10272                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10273             {
10274                 pm->op_pmflags |= PMf_HAS_CV;
10275                 break;
10276             }
10277         }
10278         pm->op_pmflags |= PMf_IS_QR;
10279     }
10280
10281     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10282                                 &s, &charset, &x_mod_count))
10283     {};
10284     /* issue a warning if /c is specified,but /g is not */
10285     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10286     {
10287         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10288                        "Use of /c modifier is meaningless without /g" );
10289     }
10290
10291     PL_lex_op = (OP*)pm;
10292     pl_yylval.ival = OP_MATCH;
10293     return s;
10294 }
10295
10296 STATIC char *
10297 S_scan_subst(pTHX_ char *start)
10298 {
10299     char *s;
10300     PMOP *pm;
10301     I32 first_start;
10302     line_t first_line;
10303     line_t linediff = 0;
10304     I32 es = 0;
10305     char charset = '\0';    /* character set modifier */
10306     unsigned int x_mod_count = 0;
10307     char *t;
10308
10309     PERL_ARGS_ASSERT_SCAN_SUBST;
10310
10311     pl_yylval.ival = OP_NULL;
10312
10313     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10314
10315     if (!s)
10316         Perl_croak(aTHX_ "Substitution pattern not terminated");
10317
10318     s = t;
10319
10320     first_start = PL_multi_start;
10321     first_line = CopLINE(PL_curcop);
10322     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10323     if (!s) {
10324         SvREFCNT_dec_NN(PL_lex_stuff);
10325         PL_lex_stuff = NULL;
10326         Perl_croak(aTHX_ "Substitution replacement not terminated");
10327     }
10328     PL_multi_start = first_start;       /* so whole substitution is taken together */
10329
10330     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10331
10332
10333     while (*s) {
10334         if (*s == EXEC_PAT_MOD) {
10335             s++;
10336             es++;
10337         }
10338         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10339                                   &s, &charset, &x_mod_count))
10340         {
10341             break;
10342         }
10343     }
10344
10345     if ((pm->op_pmflags & PMf_CONTINUE)) {
10346         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10347     }
10348
10349     if (es) {
10350         SV * const repl = newSVpvs("");
10351
10352         PL_multi_end = 0;
10353         pm->op_pmflags |= PMf_EVAL;
10354         for (; es > 1; es--) {
10355             sv_catpvs(repl, "eval ");
10356         }
10357         sv_catpvs(repl, "do {");
10358         sv_catsv(repl, PL_parser->lex_sub_repl);
10359         sv_catpvs(repl, "}");
10360         SvREFCNT_dec(PL_parser->lex_sub_repl);
10361         PL_parser->lex_sub_repl = repl;
10362     }
10363
10364
10365     linediff = CopLINE(PL_curcop) - first_line;
10366     if (linediff)
10367         CopLINE_set(PL_curcop, first_line);
10368
10369     if (linediff || es) {
10370         /* the IVX field indicates that the replacement string is a s///e;
10371          * the NVX field indicates how many src code lines the replacement
10372          * spreads over */
10373         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10374         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10375         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10376                                                                     cBOOL(es);
10377     }
10378
10379     PL_lex_op = (OP*)pm;
10380     pl_yylval.ival = OP_SUBST;
10381     return s;
10382 }
10383
10384 STATIC char *
10385 S_scan_trans(pTHX_ char *start)
10386 {
10387     char* s;
10388     OP *o;
10389     U8 squash;
10390     U8 del;
10391     U8 complement;
10392     bool nondestruct = 0;
10393     char *t;
10394
10395     PERL_ARGS_ASSERT_SCAN_TRANS;
10396
10397     pl_yylval.ival = OP_NULL;
10398
10399     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10400     if (!s)
10401         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10402
10403     s = t;
10404
10405     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10406     if (!s) {
10407         SvREFCNT_dec_NN(PL_lex_stuff);
10408         PL_lex_stuff = NULL;
10409         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10410     }
10411
10412     complement = del = squash = 0;
10413     while (1) {
10414         switch (*s) {
10415         case 'c':
10416             complement = OPpTRANS_COMPLEMENT;
10417             break;
10418         case 'd':
10419             del = OPpTRANS_DELETE;
10420             break;
10421         case 's':
10422             squash = OPpTRANS_SQUASH;
10423             break;
10424         case 'r':
10425             nondestruct = 1;
10426             break;
10427         default:
10428             goto no_more;
10429         }
10430         s++;
10431     }
10432   no_more:
10433
10434     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10435     o->op_private &= ~OPpTRANS_ALL;
10436     o->op_private |= del|squash|complement;
10437
10438     PL_lex_op = o;
10439     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10440
10441
10442     return s;
10443 }
10444
10445 /* scan_heredoc
10446    Takes a pointer to the first < in <<FOO.
10447    Returns a pointer to the byte following <<FOO.
10448
10449    This function scans a heredoc, which involves different methods
10450    depending on whether we are in a string eval, quoted construct, etc.
10451    This is because PL_linestr could containing a single line of input, or
10452    a whole string being evalled, or the contents of the current quote-
10453    like operator.
10454
10455    The two basic methods are:
10456     - Steal lines from the input stream
10457     - Scan the heredoc in PL_linestr and remove it therefrom
10458
10459    In a file scope or filtered eval, the first method is used; in a
10460    string eval, the second.
10461
10462    In a quote-like operator, we have to choose between the two,
10463    depending on where we can find a newline.  We peek into outer lex-
10464    ing scopes until we find one with a newline in it.  If we reach the
10465    outermost lexing scope and it is a file, we use the stream method.
10466    Otherwise it is treated as an eval.
10467 */
10468
10469 STATIC char *
10470 S_scan_heredoc(pTHX_ char *s)
10471 {
10472     I32 op_type = OP_SCALAR;
10473     I32 len;
10474     SV *tmpstr;
10475     char term;
10476     char *d;
10477     char *e;
10478     char *peek;
10479     char *indent = 0;
10480     I32 indent_len = 0;
10481     bool indented = FALSE;
10482     const bool infile = PL_rsfp || PL_parser->filtered;
10483     const line_t origline = CopLINE(PL_curcop);
10484     LEXSHARED *shared = PL_parser->lex_shared;
10485
10486     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10487
10488     s += 2;
10489     d = PL_tokenbuf + 1;
10490     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10491     *PL_tokenbuf = '\n';
10492     peek = s;
10493
10494     if (*peek == '~') {
10495         indented = TRUE;
10496         peek++; s++;
10497     }
10498
10499     while (SPACE_OR_TAB(*peek))
10500         peek++;
10501
10502     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10503         s = peek;
10504         term = *s++;
10505         s = delimcpy(d, e, s, PL_bufend, term, &len);
10506         if (s == PL_bufend)
10507             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10508         d += len;
10509         s++;
10510     }
10511     else {
10512         if (*s == '\\')
10513             /* <<\FOO is equivalent to <<'FOO' */
10514             s++, term = '\'';
10515         else
10516             term = '"';
10517
10518         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10519             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10520
10521         peek = s;
10522
10523         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10524             peek += UTF ? UTF8SKIP(peek) : 1;
10525         }
10526
10527         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10528         Copy(s, d, len, char);
10529         s += len;
10530         d += len;
10531     }
10532
10533     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10534         Perl_croak(aTHX_ "Delimiter for here document is too long");
10535
10536     *d++ = '\n';
10537     *d = '\0';
10538     len = d - PL_tokenbuf;
10539
10540 #ifndef PERL_STRICT_CR
10541     d = (char *) memchr(s, '\r', PL_bufend - s);
10542     if (d) {
10543         char * const olds = s;
10544         s = d;
10545         while (s < PL_bufend) {
10546             if (*s == '\r') {
10547                 *d++ = '\n';
10548                 if (*++s == '\n')
10549                     s++;
10550             }
10551             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10552                 *d++ = *s++;
10553                 s++;
10554             }
10555             else
10556                 *d++ = *s++;
10557         }
10558         *d = '\0';
10559         PL_bufend = d;
10560         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10561         s = olds;
10562     }
10563 #endif
10564
10565     tmpstr = newSV_type(SVt_PVIV);
10566     SvGROW(tmpstr, 80);
10567     if (term == '\'') {
10568         op_type = OP_CONST;
10569         SvIV_set(tmpstr, -1);
10570     }
10571     else if (term == '`') {
10572         op_type = OP_BACKTICK;
10573         SvIV_set(tmpstr, '\\');
10574     }
10575
10576     PL_multi_start = origline + 1 + PL_parser->herelines;
10577     PL_multi_open = PL_multi_close = '<';
10578
10579     /* inside a string eval or quote-like operator */
10580     if (!infile || PL_lex_inwhat) {
10581         SV *linestr;
10582         char *bufend;
10583         char * const olds = s;
10584         PERL_CONTEXT * const cx = CX_CUR();
10585         /* These two fields are not set until an inner lexing scope is
10586            entered.  But we need them set here. */
10587         shared->ls_bufptr  = s;
10588         shared->ls_linestr = PL_linestr;
10589
10590         if (PL_lex_inwhat) {
10591             /* Look for a newline.  If the current buffer does not have one,
10592              peek into the line buffer of the parent lexing scope, going
10593              up as many levels as necessary to find one with a newline
10594              after bufptr.
10595             */
10596             while (!(s = (char *)memchr(
10597                                 (void *)shared->ls_bufptr, '\n',
10598                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10599                 )))
10600             {
10601                 shared = shared->ls_prev;
10602                 /* shared is only null if we have gone beyond the outermost
10603                    lexing scope.  In a file, we will have broken out of the
10604                    loop in the previous iteration.  In an eval, the string buf-
10605                    fer ends with "\n;", so the while condition above will have
10606                    evaluated to false.  So shared can never be null.  Or so you
10607                    might think.  Odd syntax errors like s;@{<<; can gobble up
10608                    the implicit semicolon at the end of a flie, causing the
10609                    file handle to be closed even when we are not in a string
10610                    eval.  So shared may be null in that case.
10611                    (Closing '>>}' here to balance the earlier open brace for
10612                    editors that look for matched pairs.) */
10613                 if (UNLIKELY(!shared))
10614                     goto interminable;
10615                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10616                    most lexing scope.  In a file, shared->ls_linestr at that
10617                    level is just one line, so there is no body to steal. */
10618                 if (infile && !shared->ls_prev) {
10619                     s = olds;
10620                     goto streaming;
10621                 }
10622             }
10623         }
10624         else {  /* eval or we've already hit EOF */
10625             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10626             if (!s)
10627                 goto interminable;
10628         }
10629
10630         linestr = shared->ls_linestr;
10631         bufend = SvEND(linestr);
10632         d = s;
10633         if (indented) {
10634             char *myolds = s;
10635
10636             while (s < bufend - len + 1) {
10637                 if (*s++ == '\n')
10638                     ++PL_parser->herelines;
10639
10640                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10641                     char *backup = s;
10642                     indent_len = 0;
10643
10644                     /* Only valid if it's preceded by whitespace only */
10645                     while (backup != myolds && --backup >= myolds) {
10646                         if (! SPACE_OR_TAB(*backup)) {
10647                             break;
10648                         }
10649                         indent_len++;
10650                     }
10651
10652                     /* No whitespace or all! */
10653                     if (backup == s || *backup == '\n') {
10654                         Newx(indent, indent_len + 1, char);
10655                         memcpy(indent, backup + 1, indent_len);
10656                         indent[indent_len] = 0;
10657                         s--; /* before our delimiter */
10658                         PL_parser->herelines--; /* this line doesn't count */
10659                         break;
10660                     }
10661                 }
10662             }
10663         }
10664         else {
10665             while (s < bufend - len + 1
10666                    && memNE(s,PL_tokenbuf,len) )
10667             {
10668                 if (*s++ == '\n')
10669                     ++PL_parser->herelines;
10670             }
10671         }
10672
10673         if (s >= bufend - len + 1) {
10674             goto interminable;
10675         }
10676
10677         sv_setpvn(tmpstr,d+1,s-d);
10678         s += len - 1;
10679         /* the preceding stmt passes a newline */
10680         PL_parser->herelines++;
10681
10682         /* s now points to the newline after the heredoc terminator.
10683            d points to the newline before the body of the heredoc.
10684          */
10685
10686         /* We are going to modify linestr in place here, so set
10687            aside copies of the string if necessary for re-evals or
10688            (caller $n)[6]. */
10689         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10690            check shared->re_eval_str. */
10691         if (shared->re_eval_start || shared->re_eval_str) {
10692             /* Set aside the rest of the regexp */
10693             if (!shared->re_eval_str)
10694                 shared->re_eval_str =
10695                        newSVpvn(shared->re_eval_start,
10696                                 bufend - shared->re_eval_start);
10697             shared->re_eval_start -= s-d;
10698         }
10699
10700         if (cxstack_ix >= 0
10701             && CxTYPE(cx) == CXt_EVAL
10702             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10703             && cx->blk_eval.cur_text == linestr)
10704         {
10705             cx->blk_eval.cur_text = newSVsv(linestr);
10706             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10707         }
10708
10709         /* Copy everything from s onwards back to d. */
10710         Move(s,d,bufend-s + 1,char);
10711         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10712         /* Setting PL_bufend only applies when we have not dug deeper
10713            into other scopes, because sublex_done sets PL_bufend to
10714            SvEND(PL_linestr). */
10715         if (shared == PL_parser->lex_shared)
10716             PL_bufend = SvEND(linestr);
10717         s = olds;
10718     }
10719     else {
10720         SV *linestr_save;
10721         char *oldbufptr_save;
10722         char *oldoldbufptr_save;
10723       streaming:
10724         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10725         term = PL_tokenbuf[1];
10726         len--;
10727         linestr_save = PL_linestr; /* must restore this afterwards */
10728         d = s;                   /* and this */
10729         oldbufptr_save = PL_oldbufptr;
10730         oldoldbufptr_save = PL_oldoldbufptr;
10731         PL_linestr = newSVpvs("");
10732         PL_bufend = SvPVX(PL_linestr);
10733
10734         while (1) {
10735             PL_bufptr = PL_bufend;
10736             CopLINE_set(PL_curcop,
10737                         origline + 1 + PL_parser->herelines);
10738
10739             if (   !lex_next_chunk(LEX_NO_TERM)
10740                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10741             {
10742                 /* Simply freeing linestr_save might seem simpler here, as it
10743                    does not matter what PL_linestr points to, since we are
10744                    about to croak; but in a quote-like op, linestr_save
10745                    will have been prospectively freed already, via
10746                    SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10747                    restore PL_linestr. */
10748                 SvREFCNT_dec_NN(PL_linestr);
10749                 PL_linestr = linestr_save;
10750                 PL_oldbufptr = oldbufptr_save;
10751                 PL_oldoldbufptr = oldoldbufptr_save;
10752                 goto interminable;
10753             }
10754
10755             CopLINE_set(PL_curcop, origline);
10756
10757             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10758                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10759                 /* ^That should be enough to avoid this needing to grow:  */
10760                 sv_catpvs(PL_linestr, "\n\0");
10761                 assert(s == SvPVX(PL_linestr));
10762                 PL_bufend = SvEND(PL_linestr);
10763             }
10764
10765             s = PL_bufptr;
10766             PL_parser->herelines++;
10767             PL_last_lop = PL_last_uni = NULL;
10768
10769 #ifndef PERL_STRICT_CR
10770             if (PL_bufend - PL_linestart >= 2) {
10771                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10772                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10773                 {
10774                     PL_bufend[-2] = '\n';
10775                     PL_bufend--;
10776                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10777                 }
10778                 else if (PL_bufend[-1] == '\r')
10779                     PL_bufend[-1] = '\n';
10780             }
10781             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10782                 PL_bufend[-1] = '\n';
10783 #endif
10784
10785             if (indented && (PL_bufend-s) >= len) {
10786                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10787
10788                 if (found) {
10789                     char *backup = found;
10790                     indent_len = 0;
10791
10792                     /* Only valid if it's preceded by whitespace only */
10793                     while (backup != s && --backup >= s) {
10794                         if (! SPACE_OR_TAB(*backup)) {
10795                             break;
10796                         }
10797                         indent_len++;
10798                     }
10799
10800                     /* All whitespace or none! */
10801                     if (backup == found || SPACE_OR_TAB(*backup)) {
10802                         Newx(indent, indent_len + 1, char);
10803                         memcpy(indent, backup, indent_len);
10804                         indent[indent_len] = 0;
10805                         SvREFCNT_dec(PL_linestr);
10806                         PL_linestr = linestr_save;
10807                         PL_linestart = SvPVX(linestr_save);
10808                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10809                         PL_oldbufptr = oldbufptr_save;
10810                         PL_oldoldbufptr = oldoldbufptr_save;
10811                         s = d;
10812                         break;
10813                     }
10814                 }
10815
10816                 /* Didn't find it */
10817                 sv_catsv(tmpstr,PL_linestr);
10818             }
10819             else {
10820                 if (*s == term && PL_bufend-s >= len
10821                     && memEQ(s,PL_tokenbuf + 1,len))
10822                 {
10823                     SvREFCNT_dec(PL_linestr);
10824                     PL_linestr = linestr_save;
10825                     PL_linestart = SvPVX(linestr_save);
10826                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10827                     PL_oldbufptr = oldbufptr_save;
10828                     PL_oldoldbufptr = oldoldbufptr_save;
10829                     s = d;
10830                     break;
10831                 }
10832                 else {
10833                     sv_catsv(tmpstr,PL_linestr);
10834                 }
10835             }
10836         } /* while (1) */
10837     }
10838
10839     PL_multi_end = origline + PL_parser->herelines;
10840
10841     if (indented && indent) {
10842         STRLEN linecount = 1;
10843         STRLEN herelen = SvCUR(tmpstr);
10844         char *ss = SvPVX(tmpstr);
10845         char *se = ss + herelen;
10846         SV *newstr = newSV(herelen+1);
10847         SvPOK_on(newstr);
10848
10849         /* Trim leading whitespace */
10850         while (ss < se) {
10851             /* newline only? Copy and move on */
10852             if (*ss == '\n') {
10853                 sv_catpvs(newstr,"\n");
10854                 ss++;
10855                 linecount++;
10856
10857             /* Found our indentation? Strip it */
10858             }
10859             else if (se - ss >= indent_len
10860                        && memEQ(ss, indent, indent_len))
10861             {
10862                 STRLEN le = 0;
10863                 ss += indent_len;
10864
10865                 while ((ss + le) < se && *(ss + le) != '\n')
10866                     le++;
10867
10868                 sv_catpvn(newstr, ss, le);
10869                 ss += le;
10870
10871             /* Line doesn't begin with our indentation? Croak */
10872             }
10873             else {
10874                 Safefree(indent);
10875                 Perl_croak(aTHX_
10876                     "Indentation on line %d of here-doc doesn't match delimiter",
10877                     (int)linecount
10878                 );
10879             }
10880         } /* while */
10881
10882         /* avoid sv_setsv() as we dont wan't to COW here */
10883         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10884         Safefree(indent);
10885         SvREFCNT_dec_NN(newstr);
10886     }
10887
10888     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10889         SvPV_shrink_to_cur(tmpstr);
10890     }
10891
10892     if (!IN_BYTES) {
10893         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10894             SvUTF8_on(tmpstr);
10895     }
10896
10897     PL_lex_stuff = tmpstr;
10898     pl_yylval.ival = op_type;
10899     return s;
10900
10901   interminable:
10902     if (indent)
10903         Safefree(indent);
10904     SvREFCNT_dec(tmpstr);
10905     CopLINE_set(PL_curcop, origline);
10906     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10907 }
10908
10909
10910 /* scan_inputsymbol
10911    takes: position of first '<' in input buffer
10912    returns: position of first char following the matching '>' in
10913             input buffer
10914    side-effects: pl_yylval and lex_op are set.
10915
10916    This code handles:
10917
10918    <>           read from ARGV
10919    <<>>         read from ARGV without magic open
10920    <FH>         read from filehandle
10921    <pkg::FH>    read from package qualified filehandle
10922    <pkg'FH>     read from package qualified filehandle
10923    <$fh>        read from filehandle in $fh
10924    <*.h>        filename glob
10925
10926 */
10927
10928 STATIC char *
10929 S_scan_inputsymbol(pTHX_ char *start)
10930 {
10931     char *s = start;            /* current position in buffer */
10932     char *end;
10933     I32 len;
10934     bool nomagicopen = FALSE;
10935     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10936     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10937
10938     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10939
10940     end = (char *) memchr(s, '\n', PL_bufend - s);
10941     if (!end)
10942         end = PL_bufend;
10943     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10944         nomagicopen = TRUE;
10945         *d = '\0';
10946         len = 0;
10947         s += 3;
10948     }
10949     else
10950         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10951
10952     /* die if we didn't have space for the contents of the <>,
10953        or if it didn't end, or if we see a newline
10954     */
10955
10956     if (len >= (I32)sizeof PL_tokenbuf)
10957         Perl_croak(aTHX_ "Excessively long <> operator");
10958     if (s >= end)
10959         Perl_croak(aTHX_ "Unterminated <> operator");
10960
10961     s++;
10962
10963     /* check for <$fh>
10964        Remember, only scalar variables are interpreted as filehandles by
10965        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10966        treated as a glob() call.
10967        This code makes use of the fact that except for the $ at the front,
10968        a scalar variable and a filehandle look the same.
10969     */
10970     if (*d == '$' && d[1]) d++;
10971
10972     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10973     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10974         d += UTF ? UTF8SKIP(d) : 1;
10975     }
10976
10977     /* If we've tried to read what we allow filehandles to look like, and
10978        there's still text left, then it must be a glob() and not a getline.
10979        Use scan_str to pull out the stuff between the <> and treat it
10980        as nothing more than a string.
10981     */
10982
10983     if (d - PL_tokenbuf != len) {
10984         pl_yylval.ival = OP_GLOB;
10985         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10986         if (!s)
10987            Perl_croak(aTHX_ "Glob not terminated");
10988         return s;
10989     }
10990     else {
10991         bool readline_overriden = FALSE;
10992         GV *gv_readline;
10993         /* we're in a filehandle read situation */
10994         d = PL_tokenbuf;
10995
10996         /* turn <> into <ARGV> */
10997         if (!len)
10998             Copy("ARGV",d,5,char);
10999
11000         /* Check whether readline() is overriden */
11001         if ((gv_readline = gv_override("readline",8)))
11002             readline_overriden = TRUE;
11003
11004         /* if <$fh>, create the ops to turn the variable into a
11005            filehandle
11006         */
11007         if (*d == '$') {
11008             /* try to find it in the pad for this block, otherwise find
11009                add symbol table ops
11010             */
11011             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11012             if (tmp != NOT_IN_PAD) {
11013                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11014                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11015                     HEK * const stashname = HvNAME_HEK(stash);
11016                     SV * const sym = sv_2mortal(newSVhek(stashname));
11017                     sv_catpvs(sym, "::");
11018                     sv_catpv(sym, d+1);
11019                     d = SvPVX(sym);
11020                     goto intro_sym;
11021                 }
11022                 else {
11023                     OP * const o = newOP(OP_PADSV, 0);
11024                     o->op_targ = tmp;
11025                     PL_lex_op = readline_overriden
11026                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11027                                 op_append_elem(OP_LIST, o,
11028                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11029                         : newUNOP(OP_READLINE, 0, o);
11030                 }
11031             }
11032             else {
11033                 GV *gv;
11034                 ++d;
11035               intro_sym:
11036                 gv = gv_fetchpv(d,
11037                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11038                                 SVt_PV);
11039                 PL_lex_op = readline_overriden
11040                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11041                             op_append_elem(OP_LIST,
11042                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11043                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11044                     : newUNOP(OP_READLINE, 0,
11045                             newUNOP(OP_RV2SV, 0,
11046                                 newGVOP(OP_GV, 0, gv)));
11047             }
11048             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11049             pl_yylval.ival = OP_NULL;
11050         }
11051
11052         /* If it's none of the above, it must be a literal filehandle
11053            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11054         else {
11055             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11056             PL_lex_op = readline_overriden
11057                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11058                         op_append_elem(OP_LIST,
11059                             newGVOP(OP_GV, 0, gv),
11060                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11061                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11062             pl_yylval.ival = OP_NULL;
11063         }
11064     }
11065
11066     return s;
11067 }
11068
11069
11070 /* scan_str
11071    takes:
11072         start                   position in buffer
11073         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11074                                 only if they are of the open/close form
11075         keep_delims             preserve the delimiters around the string
11076         re_reparse              compiling a run-time /(?{})/:
11077                                    collapse // to /,  and skip encoding src
11078         delimp                  if non-null, this is set to the position of
11079                                 the closing delimiter, or just after it if
11080                                 the closing and opening delimiters differ
11081                                 (i.e., the opening delimiter of a substitu-
11082                                 tion replacement)
11083    returns: position to continue reading from buffer
11084    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11085         updates the read buffer.
11086
11087    This subroutine pulls a string out of the input.  It is called for:
11088         q               single quotes           q(literal text)
11089         '               single quotes           'literal text'
11090         qq              double quotes           qq(interpolate $here please)
11091         "               double quotes           "interpolate $here please"
11092         qx              backticks               qx(/bin/ls -l)
11093         `               backticks               `/bin/ls -l`
11094         qw              quote words             @EXPORT_OK = qw( func() $spam )
11095         m//             regexp match            m/this/
11096         s///            regexp substitute       s/this/that/
11097         tr///           string transliterate    tr/this/that/
11098         y///            string transliterate    y/this/that/
11099         ($*@)           sub prototypes          sub foo ($)
11100         (stuff)         sub attr parameters     sub foo : attr(stuff)
11101         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11102
11103    In most of these cases (all but <>, patterns and transliterate)
11104    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11105    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11106    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11107    calls scan_str().
11108
11109    It skips whitespace before the string starts, and treats the first
11110    character as the delimiter.  If the delimiter is one of ([{< then
11111    the corresponding "close" character )]}> is used as the closing
11112    delimiter.  It allows quoting of delimiters, and if the string has
11113    balanced delimiters ([{<>}]) it allows nesting.
11114
11115    On success, the SV with the resulting string is put into lex_stuff or,
11116    if that is already non-NULL, into lex_repl. The second case occurs only
11117    when parsing the RHS of the special constructs s/// and tr/// (y///).
11118    For convenience, the terminating delimiter character is stuffed into
11119    SvIVX of the SV.
11120 */
11121
11122 char *
11123 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11124                  char **delimp
11125     )
11126 {
11127     SV *sv;                     /* scalar value: string */
11128     const char *tmps;           /* temp string, used for delimiter matching */
11129     char *s = start;            /* current position in the buffer */
11130     char term;                  /* terminating character */
11131     char *to;                   /* current position in the sv's data */
11132     I32 brackets = 1;           /* bracket nesting level */
11133     bool d_is_utf8 = FALSE;     /* is there any utf8 content? */
11134     IV termcode;                /* terminating char. code */
11135     U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11136     STRLEN termlen;             /* length of terminating string */
11137     line_t herelines;
11138
11139     /* The delimiters that have a mirror-image closing one */
11140     const char * opening_delims = "([{<";
11141     const char * closing_delims = ")]}>";
11142
11143     /* The only non-UTF character that isn't a stand alone grapheme is
11144      * white-space, hence can't be a delimiter. */
11145     const char * non_grapheme_msg = "Use of unassigned code point or"
11146                                     " non-standalone grapheme for a delimiter"
11147                                     " is not allowed";
11148     PERL_ARGS_ASSERT_SCAN_STR;
11149
11150     /* skip space before the delimiter */
11151     if (isSPACE(*s)) {
11152         s = skipspace(s);
11153     }
11154
11155     /* mark where we are, in case we need to report errors */
11156     CLINE;
11157
11158     /* after skipping whitespace, the next character is the terminator */
11159     term = *s;
11160     if (!UTF || UTF8_IS_INVARIANT(term)) {
11161         termcode = termstr[0] = term;
11162         termlen = 1;
11163     }
11164     else {
11165         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11166         if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11167                                            (U8 *) s,
11168                                            (U8 *) PL_bufend,
11169                                                   termcode)))
11170         {
11171             yyerror(non_grapheme_msg);
11172         }
11173
11174         Copy(s, termstr, termlen, U8);
11175     }
11176
11177     /* mark where we are */
11178     PL_multi_start = CopLINE(PL_curcop);
11179     PL_multi_open = termcode;
11180     herelines = PL_parser->herelines;
11181
11182     /* If the delimiter has a mirror-image closing one, get it */
11183     if (term && (tmps = strchr(opening_delims, term))) {
11184         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11185     }
11186
11187     PL_multi_close = termcode;
11188
11189     if (PL_multi_open == PL_multi_close) {
11190         keep_bracketed_quoted = FALSE;
11191     }
11192
11193     /* create a new SV to hold the contents.  79 is the SV's initial length.
11194        What a random number. */
11195     sv = newSV_type(SVt_PVIV);
11196     SvGROW(sv, 80);
11197     SvIV_set(sv, termcode);
11198     (void)SvPOK_only(sv);               /* validate pointer */
11199
11200     /* move past delimiter and try to read a complete string */
11201     if (keep_delims)
11202         sv_catpvn(sv, s, termlen);
11203     s += termlen;
11204     for (;;) {
11205         /* extend sv if need be */
11206         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11207         /* set 'to' to the next character in the sv's string */
11208         to = SvPVX(sv)+SvCUR(sv);
11209
11210         /* if open delimiter is the close delimiter read unbridle */
11211         if (PL_multi_open == PL_multi_close) {
11212             for (; s < PL_bufend; s++,to++) {
11213                 /* embedded newlines increment the current line number */
11214                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11215                     COPLINE_INC_WITH_HERELINES;
11216                 /* handle quoted delimiters */
11217                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11218                     if (!keep_bracketed_quoted
11219                         && (s[1] == term
11220                             || (re_reparse && s[1] == '\\'))
11221                     )
11222                         s++;
11223                     else /* any other quotes are simply copied straight through */
11224                         *to++ = *s++;
11225                 }
11226                 /* terminate when run out of buffer (the for() condition), or
11227                    have found the terminator */
11228                 else if (*s == term) {  /* First byte of terminator matches */
11229                     if (termlen == 1)   /* If is the only byte, are done */
11230                         break;
11231
11232                     /* If the remainder of the terminator matches, also are
11233                      * done, after checking that is a separate grapheme */
11234                     if (   s + termlen <= PL_bufend
11235                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11236                     {
11237                         if (   UTF
11238                             && UNLIKELY(! is_grapheme((U8 *) start,
11239                                                        (U8 *) s,
11240                                                        (U8 *) PL_bufend,
11241                                                               termcode)))
11242                         {
11243                             yyerror(non_grapheme_msg);
11244                         }
11245                         break;
11246                     }
11247                 }
11248                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11249                     d_is_utf8 = TRUE;
11250                 }
11251
11252                 *to = *s;
11253             }
11254         }
11255
11256         /* if the terminator isn't the same as the start character (e.g.,
11257            matched brackets), we have to allow more in the quoting, and
11258            be prepared for nested brackets.
11259         */
11260         else {
11261             /* read until we run out of string, or we find the terminator */
11262             for (; s < PL_bufend; s++,to++) {
11263                 /* embedded newlines increment the line count */
11264                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11265                     COPLINE_INC_WITH_HERELINES;
11266                 /* backslashes can escape the open or closing characters */
11267                 if (*s == '\\' && s+1 < PL_bufend) {
11268                     if (!keep_bracketed_quoted
11269                        && ( ((UV)s[1] == PL_multi_open)
11270                          || ((UV)s[1] == PL_multi_close) ))
11271                     {
11272                         s++;
11273                     }
11274                     else
11275                         *to++ = *s++;
11276                 }
11277                 /* allow nested opens and closes */
11278                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11279                     break;
11280                 else if ((UV)*s == PL_multi_open)
11281                     brackets++;
11282                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11283                     d_is_utf8 = TRUE;
11284                 *to = *s;
11285             }
11286         }
11287         /* terminate the copied string and update the sv's end-of-string */
11288         *to = '\0';
11289         SvCUR_set(sv, to - SvPVX_const(sv));
11290
11291         /*
11292          * this next chunk reads more into the buffer if we're not done yet
11293          */
11294
11295         if (s < PL_bufend)
11296             break;              /* handle case where we are done yet :-) */
11297
11298 #ifndef PERL_STRICT_CR
11299         if (to - SvPVX_const(sv) >= 2) {
11300             if (   (to[-2] == '\r' && to[-1] == '\n')
11301                 || (to[-2] == '\n' && to[-1] == '\r'))
11302             {
11303                 to[-2] = '\n';
11304                 to--;
11305                 SvCUR_set(sv, to - SvPVX_const(sv));
11306             }
11307             else if (to[-1] == '\r')
11308                 to[-1] = '\n';
11309         }
11310         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11311             to[-1] = '\n';
11312 #endif
11313
11314         /* if we're out of file, or a read fails, bail and reset the current
11315            line marker so we can report where the unterminated string began
11316         */
11317         COPLINE_INC_WITH_HERELINES;
11318         PL_bufptr = PL_bufend;
11319         if (!lex_next_chunk(0)) {
11320             sv_free(sv);
11321             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11322             return NULL;
11323         }
11324         s = start = PL_bufptr;
11325     }
11326
11327     /* at this point, we have successfully read the delimited string */
11328
11329     if (keep_delims)
11330             sv_catpvn(sv, s, termlen);
11331     s += termlen;
11332
11333     if (d_is_utf8)
11334         SvUTF8_on(sv);
11335
11336     PL_multi_end = CopLINE(PL_curcop);
11337     CopLINE_set(PL_curcop, PL_multi_start);
11338     PL_parser->herelines = herelines;
11339
11340     /* if we allocated too much space, give some back */
11341     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11342         SvLEN_set(sv, SvCUR(sv) + 1);
11343         SvPV_shrink_to_cur(sv);
11344     }
11345
11346     /* decide whether this is the first or second quoted string we've read
11347        for this op
11348     */
11349
11350     if (PL_lex_stuff)
11351         PL_parser->lex_sub_repl = sv;
11352     else
11353         PL_lex_stuff = sv;
11354     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11355     return s;
11356 }
11357
11358 /*
11359   scan_num
11360   takes: pointer to position in buffer
11361   returns: pointer to new position in buffer
11362   side-effects: builds ops for the constant in pl_yylval.op
11363
11364   Read a number in any of the formats that Perl accepts:
11365
11366   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
11367   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
11368   0b[01](_?[01])*                                       binary integers
11369   0[0-7](_?[0-7])*                                      octal integers
11370   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11371   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11372
11373   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11374   thing it reads.
11375
11376   If it reads a number without a decimal point or an exponent, it will
11377   try converting the number to an integer and see if it can do so
11378   without loss of precision.
11379 */
11380
11381 char *
11382 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11383 {
11384     const char *s = start;      /* current position in buffer */
11385     char *d;                    /* destination in temp buffer */
11386     char *e;                    /* end of temp buffer */
11387     NV nv;                              /* number read, as a double */
11388     SV *sv = NULL;                      /* place to put the converted number */
11389     bool floatit;                       /* boolean: int or float? */
11390     const char *lastub = NULL;          /* position of last underbar */
11391     static const char* const number_too_long = "Number too long";
11392     bool warned_about_underscore = 0;
11393     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11394 #define WARN_ABOUT_UNDERSCORE() \
11395         do { \
11396             if (!warned_about_underscore) { \
11397                 warned_about_underscore = 1; \
11398                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11399                                "Misplaced _ in number"); \
11400             } \
11401         } while(0)
11402     /* Hexadecimal floating point.
11403      *
11404      * In many places (where we have quads and NV is IEEE 754 double)
11405      * we can fit the mantissa bits of a NV into an unsigned quad.
11406      * (Note that UVs might not be quads even when we have quads.)
11407      * This will not work everywhere, though (either no quads, or
11408      * using long doubles), in which case we have to resort to NV,
11409      * which will probably mean horrible loss of precision due to
11410      * multiple fp operations. */
11411     bool hexfp = FALSE;
11412     int total_bits = 0;
11413     int significant_bits = 0;
11414 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11415 #  define HEXFP_UQUAD
11416     Uquad_t hexfp_uquad = 0;
11417     int hexfp_frac_bits = 0;
11418 #else
11419 #  define HEXFP_NV
11420     NV hexfp_nv = 0.0;
11421 #endif
11422     NV hexfp_mult = 1.0;
11423     UV high_non_zero = 0; /* highest digit */
11424     int non_zero_integer_digits = 0;
11425
11426     PERL_ARGS_ASSERT_SCAN_NUM;
11427
11428     /* We use the first character to decide what type of number this is */
11429
11430     switch (*s) {
11431     default:
11432         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11433
11434     /* if it starts with a 0, it could be an octal number, a decimal in
11435        0.13 disguise, or a hexadecimal number, or a binary number. */
11436     case '0':
11437         {
11438           /* variables:
11439              u          holds the "number so far"
11440              overflowed was the number more than we can hold?
11441
11442              Shift is used when we add a digit.  It also serves as an "are
11443              we in octal/hex/binary?" indicator to disallow hex characters
11444              when in octal mode.
11445            */
11446             NV n = 0.0;
11447             UV u = 0;
11448             bool overflowed = FALSE;
11449             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11450             bool has_digs = FALSE;
11451             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11452             static const char* const bases[5] =
11453               { "", "binary", "", "octal", "hexadecimal" };
11454             static const char* const Bases[5] =
11455               { "", "Binary", "", "Octal", "Hexadecimal" };
11456             static const char* const maxima[5] =
11457               { "",
11458                 "0b11111111111111111111111111111111",
11459                 "",
11460                 "037777777777",
11461                 "0xffffffff" };
11462             const char *base, *Base, *max;
11463
11464             /* check for hex */
11465             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11466                 shift = 4;
11467                 s += 2;
11468                 just_zero = FALSE;
11469             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11470                 shift = 1;
11471                 s += 2;
11472                 just_zero = FALSE;
11473             }
11474             /* check for a decimal in disguise */
11475             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11476                 goto decimal;
11477             /* so it must be octal */
11478             else {
11479                 shift = 3;
11480                 s++;
11481             }
11482
11483             if (*s == '_') {
11484                 WARN_ABOUT_UNDERSCORE();
11485                lastub = s++;
11486             }
11487
11488             base = bases[shift];
11489             Base = Bases[shift];
11490             max  = maxima[shift];
11491
11492             /* read the rest of the number */
11493             for (;;) {
11494                 /* x is used in the overflow test,
11495                    b is the digit we're adding on. */
11496                 UV x, b;
11497
11498                 switch (*s) {
11499
11500                 /* if we don't mention it, we're done */
11501                 default:
11502                     goto out;
11503
11504                 /* _ are ignored -- but warned about if consecutive */
11505                 case '_':
11506                     if (lastub && s == lastub + 1)
11507                         WARN_ABOUT_UNDERSCORE();
11508                     lastub = s++;
11509                     break;
11510
11511                 /* 8 and 9 are not octal */
11512                 case '8': case '9':
11513                     if (shift == 3)
11514                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11515                     /* FALLTHROUGH */
11516
11517                 /* octal digits */
11518                 case '2': case '3': case '4':
11519                 case '5': case '6': case '7':
11520                     if (shift == 1)
11521                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11522                     /* FALLTHROUGH */
11523
11524                 case '0': case '1':
11525                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11526                     goto digit;
11527
11528                 /* hex digits */
11529                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11530                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11531                     /* make sure they said 0x */
11532                     if (shift != 4)
11533                         goto out;
11534                     b = (*s++ & 7) + 9;
11535
11536                     /* Prepare to put the digit we have onto the end
11537                        of the number so far.  We check for overflows.
11538                     */
11539
11540                   digit:
11541                     just_zero = FALSE;
11542                     has_digs = TRUE;
11543                     if (!overflowed) {
11544                         assert(shift >= 0);
11545                         x = u << shift; /* make room for the digit */
11546
11547                         total_bits += shift;
11548
11549                         if ((x >> shift) != u
11550                             && !(PL_hints & HINT_NEW_BINARY)) {
11551                             overflowed = TRUE;
11552                             n = (NV) u;
11553                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11554                                              "Integer overflow in %s number",
11555                                              base);
11556                         } else
11557                             u = x | b;          /* add the digit to the end */
11558                     }
11559                     if (overflowed) {
11560                         n *= nvshift[shift];
11561                         /* If an NV has not enough bits in its
11562                          * mantissa to represent an UV this summing of
11563                          * small low-order numbers is a waste of time
11564                          * (because the NV cannot preserve the
11565                          * low-order bits anyway): we could just
11566                          * remember when did we overflow and in the
11567                          * end just multiply n by the right
11568                          * amount. */
11569                         n += (NV) b;
11570                     }
11571
11572                     if (high_non_zero == 0 && b > 0)
11573                         high_non_zero = b;
11574
11575                     if (high_non_zero)
11576                         non_zero_integer_digits++;
11577
11578                     /* this could be hexfp, but peek ahead
11579                      * to avoid matching ".." */
11580                     if (UNLIKELY(HEXFP_PEEK(s))) {
11581                         goto out;
11582                     }
11583
11584                     break;
11585                 }
11586             }
11587
11588           /* if we get here, we had success: make a scalar value from
11589              the number.
11590           */
11591           out:
11592
11593             /* final misplaced underbar check */
11594             if (s[-1] == '_')
11595                 WARN_ABOUT_UNDERSCORE();
11596
11597             if (UNLIKELY(HEXFP_PEEK(s))) {
11598                 /* Do sloppy (on the underbars) but quick detection
11599                  * (and value construction) for hexfp, the decimal
11600                  * detection will shortly be more thorough with the
11601                  * underbar checks. */
11602                 const char* h = s;
11603                 significant_bits = non_zero_integer_digits * shift;
11604 #ifdef HEXFP_UQUAD
11605                 hexfp_uquad = u;
11606 #else /* HEXFP_NV */
11607                 hexfp_nv = u;
11608 #endif
11609                 /* Ignore the leading zero bits of
11610                  * the high (first) non-zero digit. */
11611                 if (high_non_zero) {
11612                     if (high_non_zero < 0x8)
11613                         significant_bits--;
11614                     if (high_non_zero < 0x4)
11615                         significant_bits--;
11616                     if (high_non_zero < 0x2)
11617                         significant_bits--;
11618                 }
11619
11620                 if (*h == '.') {
11621 #ifdef HEXFP_NV
11622                     NV nv_mult = 1.0;
11623 #endif
11624                     bool accumulate = TRUE;
11625                     U8 b;
11626                     int lim = 1 << shift;
11627                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11628                                *h == '_'); h++) {
11629                         if (isXDIGIT(*h)) {
11630                             significant_bits += shift;
11631 #ifdef HEXFP_UQUAD
11632                             if (accumulate) {
11633                                 if (significant_bits < NV_MANT_DIG) {
11634                                     /* We are in the long "run" of xdigits,
11635                                      * accumulate the full four bits. */
11636                                     assert(shift >= 0);
11637                                     hexfp_uquad <<= shift;
11638                                     hexfp_uquad |= b;
11639                                     hexfp_frac_bits += shift;
11640                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11641                                     /* We are at a hexdigit either at,
11642                                      * or straddling, the edge of mantissa.
11643                                      * We will try grabbing as many as
11644                                      * possible bits. */
11645                                     int tail =
11646                                       significant_bits - NV_MANT_DIG;
11647                                     if (tail <= 0)
11648                                        tail += shift;
11649                                     assert(tail >= 0);
11650                                     hexfp_uquad <<= tail;
11651                                     assert((shift - tail) >= 0);
11652                                     hexfp_uquad |= b >> (shift - tail);
11653                                     hexfp_frac_bits += tail;
11654
11655                                     /* Ignore the trailing zero bits
11656                                      * of the last non-zero xdigit.
11657                                      *
11658                                      * The assumption here is that if
11659                                      * one has input of e.g. the xdigit
11660                                      * eight (0x8), there is only one
11661                                      * bit being input, not the full
11662                                      * four bits.  Conversely, if one
11663                                      * specifies a zero xdigit, the
11664                                      * assumption is that one really
11665                                      * wants all those bits to be zero. */
11666                                     if (b) {
11667                                         if ((b & 0x1) == 0x0) {
11668                                             significant_bits--;
11669                                             if ((b & 0x2) == 0x0) {
11670                                                 significant_bits--;
11671                                                 if ((b & 0x4) == 0x0) {
11672                                                     significant_bits--;
11673                                                 }
11674                                             }
11675                                         }
11676                                     }
11677
11678                                     accumulate = FALSE;
11679                                 }
11680                             } else {
11681                                 /* Keep skipping the xdigits, and
11682                                  * accumulating the significant bits,
11683                                  * but do not shift the uquad
11684                                  * (which would catastrophically drop
11685                                  * high-order bits) or accumulate the
11686                                  * xdigits anymore. */
11687                             }
11688 #else /* HEXFP_NV */
11689                             if (accumulate) {
11690                                 nv_mult /= nvshift[shift];
11691                                 if (nv_mult > 0.0)
11692                                     hexfp_nv += b * nv_mult;
11693                                 else
11694                                     accumulate = FALSE;
11695                             }
11696 #endif
11697                         }
11698                         if (significant_bits >= NV_MANT_DIG)
11699                             accumulate = FALSE;
11700                     }
11701                 }
11702
11703                 if ((total_bits > 0 || significant_bits > 0) &&
11704                     isALPHA_FOLD_EQ(*h, 'p')) {
11705                     bool negexp = FALSE;
11706                     h++;
11707                     if (*h == '+')
11708                         h++;
11709                     else if (*h == '-') {
11710                         negexp = TRUE;
11711                         h++;
11712                     }
11713                     if (isDIGIT(*h)) {
11714                         I32 hexfp_exp = 0;
11715                         while (isDIGIT(*h) || *h == '_') {
11716                             if (isDIGIT(*h)) {
11717                                 hexfp_exp *= 10;
11718                                 hexfp_exp += *h - '0';
11719 #ifdef NV_MIN_EXP
11720                                 if (negexp
11721                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11722                                     /* NOTE: this means that the exponent
11723                                      * underflow warning happens for
11724                                      * the IEEE 754 subnormals (denormals),
11725                                      * because DBL_MIN_EXP etc are the lowest
11726                                      * possible binary (or, rather, DBL_RADIX-base)
11727                                      * exponent for normals, not subnormals.
11728                                      *
11729                                      * This may or may not be a good thing. */
11730                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11731                                                    "Hexadecimal float: exponent underflow");
11732                                     break;
11733                                 }
11734 #endif
11735 #ifdef NV_MAX_EXP
11736                                 if (!negexp
11737                                     && hexfp_exp > NV_MAX_EXP - 1) {
11738                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11739                                                    "Hexadecimal float: exponent overflow");
11740                                     break;
11741                                 }
11742 #endif
11743                             }
11744                             h++;
11745                         }
11746                         if (negexp)
11747                             hexfp_exp = -hexfp_exp;
11748 #ifdef HEXFP_UQUAD
11749                         hexfp_exp -= hexfp_frac_bits;
11750 #endif
11751                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11752                         hexfp = TRUE;
11753                         goto decimal;
11754                     }
11755                 }
11756             }
11757
11758             if (shift != 3 && !has_digs) {
11759                 /* 0x or 0b with no digits, treat it as an error.
11760                    Originally this backed up the parse before the b or
11761                    x, but that has the potential for silent changes in
11762                    behaviour, like for: "0x.3" and "0x+$foo".
11763                 */
11764                 const char *d = s;
11765                 char *oldbp = PL_bufptr;
11766                 if (*d) ++d; /* so the user sees the bad non-digit */
11767                 PL_bufptr = (char *)d; /* so yyerror reports the context */
11768                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11769                                   shift == 4 ? "hexadecimal" : "binary"));
11770                 PL_bufptr = oldbp;
11771             }
11772
11773             if (overflowed) {
11774                 if (n > 4294967295.0)
11775                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11776                                    "%s number > %s non-portable",
11777                                    Base, max);
11778                 sv = newSVnv(n);
11779             }
11780             else {
11781 #if UVSIZE > 4
11782                 if (u > 0xffffffff)
11783                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11784                                    "%s number > %s non-portable",
11785                                    Base, max);
11786 #endif
11787                 sv = newSVuv(u);
11788             }
11789             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11790                 sv = new_constant(start, s - start, "integer",
11791                                   sv, NULL, NULL, 0, NULL);
11792             else if (PL_hints & HINT_NEW_BINARY)
11793                 sv = new_constant(start, s - start, "binary",
11794                                   sv, NULL, NULL, 0, NULL);
11795         }
11796         break;
11797
11798     /*
11799       handle decimal numbers.
11800       we're also sent here when we read a 0 as the first digit
11801     */
11802     case '1': case '2': case '3': case '4': case '5':
11803     case '6': case '7': case '8': case '9': case '.':
11804       decimal:
11805         d = PL_tokenbuf;
11806         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11807         floatit = FALSE;
11808         if (hexfp) {
11809             floatit = TRUE;
11810             *d++ = '0';
11811             switch (shift) {
11812             case 4:
11813                 *d++ = 'x';
11814                 s = start + 2;
11815                 break;
11816             case 3:
11817                 s = start + 1;
11818                 break;
11819             case 1:
11820                 *d++ = 'b';
11821                 s = start + 2;
11822                 break;
11823             default:
11824                 NOT_REACHED; /* NOTREACHED */
11825             }
11826         }
11827
11828         /* read next group of digits and _ and copy into d */
11829         while (isDIGIT(*s)
11830                || *s == '_'
11831                || UNLIKELY(hexfp && isXDIGIT(*s)))
11832         {
11833             /* skip underscores, checking for misplaced ones
11834                if -w is on
11835             */
11836             if (*s == '_') {
11837                 if (lastub && s == lastub + 1)
11838                     WARN_ABOUT_UNDERSCORE();
11839                 lastub = s++;
11840             }
11841             else {
11842                 /* check for end of fixed-length buffer */
11843                 if (d >= e)
11844                     Perl_croak(aTHX_ "%s", number_too_long);
11845                 /* if we're ok, copy the character */
11846                 *d++ = *s++;
11847             }
11848         }
11849
11850         /* final misplaced underbar check */
11851         if (lastub && s == lastub + 1)
11852             WARN_ABOUT_UNDERSCORE();
11853
11854         /* read a decimal portion if there is one.  avoid
11855            3..5 being interpreted as the number 3. followed
11856            by .5
11857         */
11858         if (*s == '.' && s[1] != '.') {
11859             floatit = TRUE;
11860             *d++ = *s++;
11861
11862             if (*s == '_') {
11863                 WARN_ABOUT_UNDERSCORE();
11864                 lastub = s;
11865             }
11866
11867             /* copy, ignoring underbars, until we run out of digits.
11868             */
11869             for (; isDIGIT(*s)
11870                    || *s == '_'
11871                    || UNLIKELY(hexfp && isXDIGIT(*s));
11872                  s++)
11873             {
11874                 /* fixed length buffer check */
11875                 if (d >= e)
11876                     Perl_croak(aTHX_ "%s", number_too_long);
11877                 if (*s == '_') {
11878                    if (lastub && s == lastub + 1)
11879                         WARN_ABOUT_UNDERSCORE();
11880                    lastub = s;
11881                 }
11882                 else
11883                     *d++ = *s;
11884             }
11885             /* fractional part ending in underbar? */
11886             if (s[-1] == '_')
11887                 WARN_ABOUT_UNDERSCORE();
11888             if (*s == '.' && isDIGIT(s[1])) {
11889                 /* oops, it's really a v-string, but without the "v" */
11890                 s = start;
11891                 goto vstring;
11892             }
11893         }
11894
11895         /* read exponent part, if present */
11896         if ((isALPHA_FOLD_EQ(*s, 'e')
11897               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11898             && memCHRs("+-0123456789_", s[1]))
11899         {
11900             int exp_digits = 0;
11901             const char *save_s = s;
11902             char * save_d = d;
11903
11904             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11905                ditto for p (hexfloats) */
11906             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11907                 /* At least some Mach atof()s don't grok 'E' */
11908                 *d++ = 'e';
11909             }
11910             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11911                 *d++ = 'p';
11912             }
11913
11914             s++;
11915
11916
11917             /* stray preinitial _ */
11918             if (*s == '_') {
11919                 WARN_ABOUT_UNDERSCORE();
11920                 lastub = s++;
11921             }
11922
11923             /* allow positive or negative exponent */
11924             if (*s == '+' || *s == '-')
11925                 *d++ = *s++;
11926
11927             /* stray initial _ */
11928             if (*s == '_') {
11929                 WARN_ABOUT_UNDERSCORE();
11930                 lastub = s++;
11931             }
11932
11933             /* read digits of exponent */
11934             while (isDIGIT(*s) || *s == '_') {
11935                 if (isDIGIT(*s)) {
11936                     ++exp_digits;
11937                     if (d >= e)
11938                         Perl_croak(aTHX_ "%s", number_too_long);
11939                     *d++ = *s++;
11940                 }
11941                 else {
11942                    if (((lastub && s == lastub + 1)
11943                         || (!isDIGIT(s[1]) && s[1] != '_')))
11944                         WARN_ABOUT_UNDERSCORE();
11945                    lastub = s++;
11946                 }
11947             }
11948
11949             if (!exp_digits) {
11950                 /* no exponent digits, the [eEpP] could be for something else,
11951                  * though in practice we don't get here for p since that's preparsed
11952                  * earlier, and results in only the 0xX being consumed, so behave similarly
11953                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
11954                  * next token.
11955                  */
11956                 s = save_s;
11957                 d = save_d;
11958             }
11959             else {
11960                 floatit = TRUE;
11961             }
11962         }
11963
11964
11965         /*
11966            We try to do an integer conversion first if no characters
11967            indicating "float" have been found.
11968          */
11969
11970         if (!floatit) {
11971             UV uv;
11972             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11973
11974             if (flags == IS_NUMBER_IN_UV) {
11975               if (uv <= IV_MAX)
11976                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11977               else
11978                 sv = newSVuv(uv);
11979             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11980               if (uv <= (UV) IV_MIN)
11981                 sv = newSViv(-(IV)uv);
11982               else
11983                 floatit = TRUE;
11984             } else
11985               floatit = TRUE;
11986         }
11987         if (floatit) {
11988             /* terminate the string */
11989             *d = '\0';
11990             if (UNLIKELY(hexfp)) {
11991 #  ifdef NV_MANT_DIG
11992                 if (significant_bits > NV_MANT_DIG)
11993                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11994                                    "Hexadecimal float: mantissa overflow");
11995 #  endif
11996 #ifdef HEXFP_UQUAD
11997                 nv = hexfp_uquad * hexfp_mult;
11998 #else /* HEXFP_NV */
11999                 nv = hexfp_nv * hexfp_mult;
12000 #endif
12001             } else {
12002                 nv = Atof(PL_tokenbuf);
12003             }
12004             sv = newSVnv(nv);
12005         }
12006
12007         if ( floatit
12008              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12009             const char *const key = floatit ? "float" : "integer";
12010             const STRLEN keylen = floatit ? 5 : 7;
12011             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12012                                 key, keylen, sv, NULL, NULL, 0, NULL);
12013         }
12014         break;
12015
12016     /* if it starts with a v, it could be a v-string */
12017     case 'v':
12018     vstring:
12019                 sv = newSV(5); /* preallocate storage space */
12020                 ENTER_with_name("scan_vstring");
12021                 SAVEFREESV(sv);
12022                 s = scan_vstring(s, PL_bufend, sv);
12023                 SvREFCNT_inc_simple_void_NN(sv);
12024                 LEAVE_with_name("scan_vstring");
12025         break;
12026     }
12027
12028     /* make the op for the constant and return */
12029
12030     if (sv)
12031         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12032     else
12033         lvalp->opval = NULL;
12034
12035     return (char *)s;
12036 }
12037
12038 STATIC char *
12039 S_scan_formline(pTHX_ char *s)
12040 {
12041     SV * const stuff = newSVpvs("");
12042     bool needargs = FALSE;
12043     bool eofmt = FALSE;
12044
12045     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12046
12047     while (!needargs) {
12048         char *eol;
12049         if (*s == '.') {
12050             char *t = s+1;
12051 #ifdef PERL_STRICT_CR
12052             while (SPACE_OR_TAB(*t))
12053                 t++;
12054 #else
12055             while (SPACE_OR_TAB(*t) || *t == '\r')
12056                 t++;
12057 #endif
12058             if (*t == '\n' || t == PL_bufend) {
12059                 eofmt = TRUE;
12060                 break;
12061             }
12062         }
12063         eol = (char *) memchr(s,'\n',PL_bufend-s);
12064         if (!eol++)
12065                 eol = PL_bufend;
12066         if (*s != '#') {
12067             char *t;
12068             for (t = s; t < eol; t++) {
12069                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12070                     needargs = FALSE;
12071                     goto enough;        /* ~~ must be first line in formline */
12072                 }
12073                 if (*t == '@' || *t == '^')
12074                     needargs = TRUE;
12075             }
12076             if (eol > s) {
12077                 sv_catpvn(stuff, s, eol-s);
12078 #ifndef PERL_STRICT_CR
12079                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12080                     char *end = SvPVX(stuff) + SvCUR(stuff);
12081                     end[-2] = '\n';
12082                     end[-1] = '\0';
12083                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12084                 }
12085 #endif
12086             }
12087             else
12088               break;
12089         }
12090         s = (char*)eol;
12091         if ((PL_rsfp || PL_parser->filtered)
12092          && PL_parser->form_lex_state == LEX_NORMAL) {
12093             bool got_some;
12094             PL_bufptr = PL_bufend;
12095             COPLINE_INC_WITH_HERELINES;
12096             got_some = lex_next_chunk(0);
12097             CopLINE_dec(PL_curcop);
12098             s = PL_bufptr;
12099             if (!got_some)
12100                 break;
12101         }
12102         incline(s, PL_bufend);
12103     }
12104   enough:
12105     if (!SvCUR(stuff) || needargs)
12106         PL_lex_state = PL_parser->form_lex_state;
12107     if (SvCUR(stuff)) {
12108         PL_expect = XSTATE;
12109         if (needargs) {
12110             const char *s2 = s;
12111             while (isSPACE(*s2) && *s2 != '\n')
12112                 s2++;
12113             if (*s2 == '{') {
12114                 PL_expect = XTERMBLOCK;
12115                 NEXTVAL_NEXTTOKE.ival = 0;
12116                 force_next(DO);
12117             }
12118             NEXTVAL_NEXTTOKE.ival = 0;
12119             force_next(FORMLBRACK);
12120         }
12121         if (!IN_BYTES) {
12122             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12123                 SvUTF8_on(stuff);
12124         }
12125         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12126         force_next(THING);
12127     }
12128     else {
12129         SvREFCNT_dec(stuff);
12130         if (eofmt)
12131             PL_lex_formbrack = 0;
12132     }
12133     return s;
12134 }
12135
12136 I32
12137 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12138 {
12139     const I32 oldsavestack_ix = PL_savestack_ix;
12140     CV* const outsidecv = PL_compcv;
12141
12142     SAVEI32(PL_subline);
12143     save_item(PL_subname);
12144     SAVESPTR(PL_compcv);
12145
12146     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12147     CvFLAGS(PL_compcv) |= flags;
12148
12149     PL_subline = CopLINE(PL_curcop);
12150     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12151     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12152     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12153     if (outsidecv && CvPADLIST(outsidecv))
12154         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12155
12156     return oldsavestack_ix;
12157 }
12158
12159
12160 /* Do extra initialisation of a CV (typically one just created by
12161  * start_subparse()) if that CV is for a named sub
12162  */
12163
12164 void
12165 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12166 {
12167     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12168
12169     if (nameop->op_type == OP_CONST) {
12170         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12171         if (   strEQ(name, "BEGIN")
12172             || strEQ(name, "END")
12173             || strEQ(name, "INIT")
12174             || strEQ(name, "CHECK")
12175             || strEQ(name, "UNITCHECK")
12176         )
12177           CvSPECIAL_on(cv);
12178     }
12179     else
12180     /* State subs inside anonymous subs need to be
12181      clonable themselves. */
12182     if (   CvANON(CvOUTSIDE(cv))
12183         || CvCLONE(CvOUTSIDE(cv))
12184         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12185                         CvOUTSIDE(cv)
12186                      ))[nameop->op_targ])
12187     )
12188       CvCLONE_on(cv);
12189 }
12190
12191
12192 static int
12193 S_yywarn(pTHX_ const char *const s, U32 flags)
12194 {
12195     PERL_ARGS_ASSERT_YYWARN;
12196
12197     PL_in_eval |= EVAL_WARNONLY;
12198     yyerror_pv(s, flags);
12199     return 0;
12200 }
12201
12202 void
12203 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12204 {
12205     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12206
12207     if (PL_minus_c)
12208         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12209     else {
12210         Perl_croak(aTHX_
12211                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12212     }
12213     NOT_REACHED; /* NOTREACHED */
12214 }
12215
12216 void
12217 Perl_yyquit(pTHX)
12218 {
12219     /* Called, after at least one error has been found, to abort the parse now,
12220      * instead of trying to forge ahead */
12221
12222     yyerror_pvn(NULL, 0, 0);
12223 }
12224
12225 int
12226 Perl_yyerror(pTHX_ const char *const s)
12227 {
12228     PERL_ARGS_ASSERT_YYERROR;
12229     return yyerror_pvn(s, strlen(s), 0);
12230 }
12231
12232 int
12233 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12234 {
12235     PERL_ARGS_ASSERT_YYERROR_PV;
12236     return yyerror_pvn(s, strlen(s), flags);
12237 }
12238
12239 int
12240 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12241 {
12242     const char *context = NULL;
12243     int contlen = -1;
12244     SV *msg;
12245     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12246     int yychar  = PL_parser->yychar;
12247
12248     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12249      * apply.  If the number of errors found is large enough, it abandons
12250      * parsing.  If 's' is NULL, there is no message, and it abandons
12251      * processing unconditionally */
12252
12253     if (s != NULL) {
12254         if (!yychar || (yychar == ';' && !PL_rsfp))
12255             sv_catpvs(where_sv, "at EOF");
12256         else if (   PL_oldoldbufptr
12257                  && PL_bufptr > PL_oldoldbufptr
12258                  && PL_bufptr - PL_oldoldbufptr < 200
12259                  && PL_oldoldbufptr != PL_oldbufptr
12260                  && PL_oldbufptr != PL_bufptr)
12261         {
12262             /*
12263                     Only for NetWare:
12264                     The code below is removed for NetWare because it
12265                     abends/crashes on NetWare when the script has error such as
12266                     not having the closing quotes like:
12267                         if ($var eq "value)
12268                     Checking of white spaces is anyway done in NetWare code.
12269             */
12270 #ifndef NETWARE
12271             while (isSPACE(*PL_oldoldbufptr))
12272                 PL_oldoldbufptr++;
12273 #endif
12274             context = PL_oldoldbufptr;
12275             contlen = PL_bufptr - PL_oldoldbufptr;
12276         }
12277         else if (  PL_oldbufptr
12278                 && PL_bufptr > PL_oldbufptr
12279                 && PL_bufptr - PL_oldbufptr < 200
12280                 && PL_oldbufptr != PL_bufptr) {
12281             /*
12282                     Only for NetWare:
12283                     The code below is removed for NetWare because it
12284                     abends/crashes on NetWare when the script has error such as
12285                     not having the closing quotes like:
12286                         if ($var eq "value)
12287                     Checking of white spaces is anyway done in NetWare code.
12288             */
12289 #ifndef NETWARE
12290             while (isSPACE(*PL_oldbufptr))
12291                 PL_oldbufptr++;
12292 #endif
12293             context = PL_oldbufptr;
12294             contlen = PL_bufptr - PL_oldbufptr;
12295         }
12296         else if (yychar > 255)
12297             sv_catpvs(where_sv, "next token ???");
12298         else if (yychar == YYEMPTY) {
12299             if (PL_lex_state == LEX_NORMAL)
12300                 sv_catpvs(where_sv, "at end of line");
12301             else if (PL_lex_inpat)
12302                 sv_catpvs(where_sv, "within pattern");
12303             else
12304                 sv_catpvs(where_sv, "within string");
12305         }
12306         else {
12307             sv_catpvs(where_sv, "next char ");
12308             if (yychar < 32)
12309                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12310             else if (isPRINT_LC(yychar)) {
12311                 const char string = yychar;
12312                 sv_catpvn(where_sv, &string, 1);
12313             }
12314             else
12315                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12316         }
12317         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12318         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12319             OutCopFILE(PL_curcop),
12320             (IV)(PL_parser->preambling == NOLINE
12321                    ? CopLINE(PL_curcop)
12322                    : PL_parser->preambling));
12323         if (context)
12324             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12325                                  UTF8fARG(UTF, contlen, context));
12326         else
12327             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12328         if (   PL_multi_start < PL_multi_end
12329             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12330         {
12331             Perl_sv_catpvf(aTHX_ msg,
12332             "  (Might be a runaway multi-line %c%c string starting on"
12333             " line %" IVdf ")\n",
12334                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12335             PL_multi_end = 0;
12336         }
12337         if (PL_in_eval & EVAL_WARNONLY) {
12338             PL_in_eval &= ~EVAL_WARNONLY;
12339             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12340         }
12341         else {
12342             qerror(msg);
12343         }
12344     }
12345     if (s == NULL || PL_error_count >= 10) {
12346         const char * msg = "";
12347         const char * const name = OutCopFILE(PL_curcop);
12348
12349         if (PL_in_eval) {
12350             SV * errsv = ERRSV;
12351             if (SvCUR(errsv)) {
12352                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12353             }
12354         }
12355
12356         if (s == NULL) {
12357             abort_execution(msg, name);
12358         }
12359         else {
12360             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12361         }
12362     }
12363     PL_in_my = 0;
12364     PL_in_my_stash = NULL;
12365     return 0;
12366 }
12367
12368 STATIC char*
12369 S_swallow_bom(pTHX_ U8 *s)
12370 {
12371     const STRLEN slen = SvCUR(PL_linestr);
12372
12373     PERL_ARGS_ASSERT_SWALLOW_BOM;
12374
12375     switch (s[0]) {
12376     case 0xFF:
12377         if (s[1] == 0xFE) {
12378             /* UTF-16 little-endian? (or UTF-32LE?) */
12379             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12380                 /* diag_listed_as: Unsupported script encoding %s */
12381                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12382 #ifndef PERL_NO_UTF16_FILTER
12383 #ifdef DEBUGGING
12384             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12385 #endif
12386             s += 2;
12387             if (PL_bufend > (char*)s) {
12388                 s = add_utf16_textfilter(s, TRUE);
12389             }
12390 #else
12391             /* diag_listed_as: Unsupported script encoding %s */
12392             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12393 #endif
12394         }
12395         break;
12396     case 0xFE:
12397         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12398 #ifndef PERL_NO_UTF16_FILTER
12399 #ifdef DEBUGGING
12400             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12401 #endif
12402             s += 2;
12403             if (PL_bufend > (char *)s) {
12404                 s = add_utf16_textfilter(s, FALSE);
12405             }
12406 #else
12407             /* diag_listed_as: Unsupported script encoding %s */
12408             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12409 #endif
12410         }
12411         break;
12412     case BOM_UTF8_FIRST_BYTE: {
12413         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12414 #ifdef DEBUGGING
12415             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12416 #endif
12417             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12418         }
12419         break;
12420     }
12421     case 0:
12422         if (slen > 3) {
12423              if (s[1] == 0) {
12424                   if (s[2] == 0xFE && s[3] == 0xFF) {
12425                        /* UTF-32 big-endian */
12426                        /* diag_listed_as: Unsupported script encoding %s */
12427                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12428                   }
12429              }
12430              else if (s[2] == 0 && s[3] != 0) {
12431                   /* Leading bytes
12432                    * 00 xx 00 xx
12433                    * are a good indicator of UTF-16BE. */
12434 #ifndef PERL_NO_UTF16_FILTER
12435 #ifdef DEBUGGING
12436                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12437 #endif
12438                   s = add_utf16_textfilter(s, FALSE);
12439 #else
12440                   /* diag_listed_as: Unsupported script encoding %s */
12441                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12442 #endif
12443              }
12444         }
12445         break;
12446
12447     default:
12448          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12449                   /* Leading bytes
12450                    * xx 00 xx 00
12451                    * are a good indicator of UTF-16LE. */
12452 #ifndef PERL_NO_UTF16_FILTER
12453 #ifdef DEBUGGING
12454               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12455 #endif
12456               s = add_utf16_textfilter(s, TRUE);
12457 #else
12458               /* diag_listed_as: Unsupported script encoding %s */
12459               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12460 #endif
12461          }
12462     }
12463     return (char*)s;
12464 }
12465
12466
12467 #ifndef PERL_NO_UTF16_FILTER
12468 static I32
12469 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12470 {
12471     SV *const filter = FILTER_DATA(idx);
12472     /* We re-use this each time round, throwing the contents away before we
12473        return.  */
12474     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12475     SV *const utf8_buffer = filter;
12476     IV status = IoPAGE(filter);
12477     const bool reverse = cBOOL(IoLINES(filter));
12478     I32 retval;
12479
12480     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12481
12482     /* As we're automatically added, at the lowest level, and hence only called
12483        from this file, we can be sure that we're not called in block mode. Hence
12484        don't bother writing code to deal with block mode.  */
12485     if (maxlen) {
12486         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12487     }
12488     if (status < 0) {
12489         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12490     }
12491     DEBUG_P(PerlIO_printf(Perl_debug_log,
12492                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12493                           FPTR2DPTR(void *, S_utf16_textfilter),
12494                           reverse ? 'l' : 'b', idx, maxlen, status,
12495                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12496
12497     while (1) {
12498         STRLEN chars;
12499         STRLEN have;
12500         Size_t newlen;
12501         U8 *end;
12502         /* First, look in our buffer of existing UTF-8 data:  */
12503         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12504
12505         if (nl) {
12506             ++nl;
12507         } else if (status == 0) {
12508             /* EOF */
12509             IoPAGE(filter) = 0;
12510             nl = SvEND(utf8_buffer);
12511         }
12512         if (nl) {
12513             STRLEN got = nl - SvPVX(utf8_buffer);
12514             /* Did we have anything to append?  */
12515             retval = got != 0;
12516             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12517             /* Everything else in this code works just fine if SVp_POK isn't
12518                set.  This, however, needs it, and we need it to work, else
12519                we loop infinitely because the buffer is never consumed.  */
12520             sv_chop(utf8_buffer, nl);
12521             break;
12522         }
12523
12524         /* OK, not a complete line there, so need to read some more UTF-16.
12525            Read an extra octect if the buffer currently has an odd number. */
12526         while (1) {
12527             if (status <= 0)
12528                 break;
12529             if (SvCUR(utf16_buffer) >= 2) {
12530                 /* Location of the high octet of the last complete code point.
12531                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12532                    *coupled* with all the benefits of partial reads and
12533                    endianness.  */
12534                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12535                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12536
12537                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12538                     break;
12539                 }
12540
12541                 /* We have the first half of a surrogate. Read more.  */
12542                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12543             }
12544
12545             status = FILTER_READ(idx + 1, utf16_buffer,
12546                                  160 + (SvCUR(utf16_buffer) & 1));
12547             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12548             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12549             if (status < 0) {
12550                 /* Error */
12551                 IoPAGE(filter) = status;
12552                 return status;
12553             }
12554         }
12555
12556         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12557          * require 4 bytes per char */
12558         chars = SvCUR(utf16_buffer) >> 1;
12559         have = SvCUR(utf8_buffer);
12560
12561         /* Assume the worst case size as noted by the functions: twice the
12562          * number of input bytes */
12563         SvGROW(utf8_buffer, have + chars * 4 + 1);
12564
12565         if (reverse) {
12566             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12567                                          (U8*)SvPVX_const(utf8_buffer) + have,
12568                                          chars * 2, &newlen);
12569         } else {
12570             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12571                                 (U8*)SvPVX_const(utf8_buffer) + have,
12572                                 chars * 2, &newlen);
12573         }
12574         SvCUR_set(utf8_buffer, have + newlen);
12575         *end = '\0';
12576
12577         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12578            it's private to us, and utf16_to_utf8{,reversed} take a
12579            (pointer,length) pair, rather than a NUL-terminated string.  */
12580         if(SvCUR(utf16_buffer) & 1) {
12581             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12582             SvCUR_set(utf16_buffer, 1);
12583         } else {
12584             SvCUR_set(utf16_buffer, 0);
12585         }
12586     }
12587     DEBUG_P(PerlIO_printf(Perl_debug_log,
12588                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12589                           status,
12590                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12591     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12592     return retval;
12593 }
12594
12595 static U8 *
12596 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12597 {
12598     SV *filter = filter_add(S_utf16_textfilter, NULL);
12599
12600     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12601
12602     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12603     SvPVCLEAR(filter);
12604     IoLINES(filter) = reversed;
12605     IoPAGE(filter) = 1; /* Not EOF */
12606
12607     /* Sadly, we have to return a valid pointer, come what may, so we have to
12608        ignore any error return from this.  */
12609     SvCUR_set(PL_linestr, 0);
12610     if (FILTER_READ(0, PL_linestr, 0)) {
12611         SvUTF8_on(PL_linestr);
12612     } else {
12613         SvUTF8_on(PL_linestr);
12614     }
12615     PL_bufend = SvEND(PL_linestr);
12616     return (U8*)SvPVX(PL_linestr);
12617 }
12618 #endif
12619
12620 /*
12621 Returns a pointer to the next character after the parsed
12622 vstring, as well as updating the passed in sv.
12623
12624 Function must be called like
12625
12626         sv = sv_2mortal(newSV(5));
12627         s = scan_vstring(s,e,sv);
12628
12629 where s and e are the start and end of the string.
12630 The sv should already be large enough to store the vstring
12631 passed in, for performance reasons.
12632
12633 This function may croak if fatal warnings are enabled in the
12634 calling scope, hence the sv_2mortal in the example (to prevent
12635 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12636 sv_2mortal.
12637
12638 */
12639
12640 char *
12641 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12642 {
12643     const char *pos = s;
12644     const char *start = s;
12645
12646     PERL_ARGS_ASSERT_SCAN_VSTRING;
12647
12648     if (*pos == 'v') pos++;  /* get past 'v' */
12649     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12650         pos++;
12651     if ( *pos != '.') {
12652         /* this may not be a v-string if followed by => */
12653         const char *next = pos;
12654         while (next < e && isSPACE(*next))
12655             ++next;
12656         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12657             /* return string not v-string */
12658             sv_setpvn(sv,(char *)s,pos-s);
12659             return (char *)pos;
12660         }
12661     }
12662
12663     if (!isALPHA(*pos)) {
12664         U8 tmpbuf[UTF8_MAXBYTES+1];
12665
12666         if (*s == 'v')
12667             s++;  /* get past 'v' */
12668
12669         SvPVCLEAR(sv);
12670
12671         for (;;) {
12672             /* this is atoi() that tolerates underscores */
12673             U8 *tmpend;
12674             UV rev = 0;
12675             const char *end = pos;
12676             UV mult = 1;
12677             while (--end >= s) {
12678                 if (*end != '_') {
12679                     const UV orev = rev;
12680                     rev += (*end - '0') * mult;
12681                     mult *= 10;
12682                     if (orev > rev)
12683                         /* diag_listed_as: Integer overflow in %s number */
12684                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12685                                          "Integer overflow in decimal number");
12686                 }
12687             }
12688
12689             /* Append native character for the rev point */
12690             tmpend = uvchr_to_utf8(tmpbuf, rev);
12691             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12692             if (!UVCHR_IS_INVARIANT(rev))
12693                  SvUTF8_on(sv);
12694             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12695                  s = ++pos;
12696             else {
12697                  s = pos;
12698                  break;
12699             }
12700             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12701                  pos++;
12702         }
12703         SvPOK_on(sv);
12704         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12705         SvRMAGICAL_on(sv);
12706     }
12707     return (char *)s;
12708 }
12709
12710 int
12711 Perl_keyword_plugin_standard(pTHX_
12712         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12713 {
12714     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12715     PERL_UNUSED_CONTEXT;
12716     PERL_UNUSED_ARG(keyword_ptr);
12717     PERL_UNUSED_ARG(keyword_len);
12718     PERL_UNUSED_ARG(op_ptr);
12719     return KEYWORD_PLUGIN_DECLINE;
12720 }
12721
12722 /*
12723 =for apidoc wrap_keyword_plugin
12724
12725 Puts a C function into the chain of keyword plugins.  This is the
12726 preferred way to manipulate the L</PL_keyword_plugin> variable.
12727 C<new_plugin> is a pointer to the C function that is to be added to the
12728 keyword plugin chain, and C<old_plugin_p> points to the storage location
12729 where a pointer to the next function in the chain will be stored.  The
12730 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12731 while the value previously stored there is written to C<*old_plugin_p>.
12732
12733 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12734 to hook keyword parsing may find itself invoked more than once per
12735 process, typically in different threads.  To handle that situation, this
12736 function is idempotent.  The location C<*old_plugin_p> must initially
12737 (once per process) contain a null pointer.  A C variable of static
12738 duration (declared at file scope, typically also marked C<static> to give
12739 it internal linkage) will be implicitly initialised appropriately, if it
12740 does not have an explicit initialiser.  This function will only actually
12741 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12742 function is also thread safe on the small scale.  It uses appropriate
12743 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12744
12745 When this function is called, the function referenced by C<new_plugin>
12746 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12747 In a threading situation, C<new_plugin> may be called immediately, even
12748 before this function has returned.  C<*old_plugin_p> will always be
12749 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12750 decides not to do anything special with the identifier that it is given
12751 (which is the usual case for most calls to a keyword plugin), it must
12752 chain the plugin function referenced by C<*old_plugin_p>.
12753
12754 Taken all together, XS code to install a keyword plugin should typically
12755 look something like this:
12756
12757     static Perl_keyword_plugin_t next_keyword_plugin;
12758     static OP *my_keyword_plugin(pTHX_
12759         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12760     {
12761         if (memEQs(keyword_ptr, keyword_len,
12762                    "my_new_keyword")) {
12763             ...
12764         } else {
12765             return next_keyword_plugin(aTHX_
12766                 keyword_ptr, keyword_len, op_ptr);
12767         }
12768     }
12769     BOOT:
12770         wrap_keyword_plugin(my_keyword_plugin,
12771                             &next_keyword_plugin);
12772
12773 Direct access to L</PL_keyword_plugin> should be avoided.
12774
12775 =cut
12776 */
12777
12778 void
12779 Perl_wrap_keyword_plugin(pTHX_
12780     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12781 {
12782
12783     PERL_UNUSED_CONTEXT;
12784     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12785     if (*old_plugin_p) return;
12786     KEYWORD_PLUGIN_MUTEX_LOCK;
12787     if (!*old_plugin_p) {
12788         *old_plugin_p = PL_keyword_plugin;
12789         PL_keyword_plugin = new_plugin;
12790     }
12791     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12792 }
12793
12794 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12795 static void
12796 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12797 {
12798     SAVEI32(PL_lex_brackets);
12799     if (PL_lex_brackets > 100)
12800         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12801     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12802     SAVEI32(PL_lex_allbrackets);
12803     PL_lex_allbrackets = 0;
12804     SAVEI8(PL_lex_fakeeof);
12805     PL_lex_fakeeof = (U8)fakeeof;
12806     if(yyparse(gramtype) && !PL_parser->error_count)
12807         qerror(Perl_mess(aTHX_ "Parse error"));
12808 }
12809
12810 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12811 static OP *
12812 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12813 {
12814     OP *o;
12815     ENTER;
12816     SAVEVPTR(PL_eval_root);
12817     PL_eval_root = NULL;
12818     parse_recdescent(gramtype, fakeeof);
12819     o = PL_eval_root;
12820     LEAVE;
12821     return o;
12822 }
12823
12824 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12825 static OP *
12826 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12827 {
12828     OP *exprop;
12829     if (flags & ~PARSE_OPTIONAL)
12830         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12831     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12832     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12833         if (!PL_parser->error_count)
12834             qerror(Perl_mess(aTHX_ "Parse error"));
12835         exprop = newOP(OP_NULL, 0);
12836     }
12837     return exprop;
12838 }
12839
12840 /*
12841 =for apidoc parse_arithexpr
12842
12843 Parse a Perl arithmetic expression.  This may contain operators of precedence
12844 down to the bit shift operators.  The expression must be followed (and thus
12845 terminated) either by a comparison or lower-precedence operator or by
12846 something that would normally terminate an expression such as semicolon.
12847 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12848 otherwise it is mandatory.  It is up to the caller to ensure that the
12849 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12850 the source of the code to be parsed and the lexical context for the
12851 expression.
12852
12853 The op tree representing the expression is returned.  If an optional
12854 expression is absent, a null pointer is returned, otherwise the pointer
12855 will be non-null.
12856
12857 If an error occurs in parsing or compilation, in most cases a valid op
12858 tree is returned anyway.  The error is reflected in the parser state,
12859 normally resulting in a single exception at the top level of parsing
12860 which covers all the compilation errors that occurred.  Some compilation
12861 errors, however, will throw an exception immediately.
12862
12863 =for apidoc Amnh||PARSE_OPTIONAL
12864
12865 =cut
12866
12867 */
12868
12869 OP *
12870 Perl_parse_arithexpr(pTHX_ U32 flags)
12871 {
12872     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12873 }
12874
12875 /*
12876 =for apidoc parse_termexpr
12877
12878 Parse a Perl term expression.  This may contain operators of precedence
12879 down to the assignment operators.  The expression must be followed (and thus
12880 terminated) either by a comma or lower-precedence operator or by
12881 something that would normally terminate an expression such as semicolon.
12882 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12883 otherwise it is mandatory.  It is up to the caller to ensure that the
12884 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12885 the source of the code to be parsed and the lexical context for the
12886 expression.
12887
12888 The op tree representing the expression is returned.  If an optional
12889 expression is absent, a null pointer is returned, otherwise the pointer
12890 will be non-null.
12891
12892 If an error occurs in parsing or compilation, in most cases a valid op
12893 tree is returned anyway.  The error is reflected in the parser state,
12894 normally resulting in a single exception at the top level of parsing
12895 which covers all the compilation errors that occurred.  Some compilation
12896 errors, however, will throw an exception immediately.
12897
12898 =cut
12899 */
12900
12901 OP *
12902 Perl_parse_termexpr(pTHX_ U32 flags)
12903 {
12904     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12905 }
12906
12907 /*
12908 =for apidoc parse_listexpr
12909
12910 Parse a Perl list expression.  This may contain operators of precedence
12911 down to the comma operator.  The expression must be followed (and thus
12912 terminated) either by a low-precedence logic operator such as C<or> or by
12913 something that would normally terminate an expression such as semicolon.
12914 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12915 otherwise it is mandatory.  It is up to the caller to ensure that the
12916 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12917 the source of the code to be parsed and the lexical context for the
12918 expression.
12919
12920 The op tree representing the expression is returned.  If an optional
12921 expression is absent, a null pointer is returned, otherwise the pointer
12922 will be non-null.
12923
12924 If an error occurs in parsing or compilation, in most cases a valid op
12925 tree is returned anyway.  The error is reflected in the parser state,
12926 normally resulting in a single exception at the top level of parsing
12927 which covers all the compilation errors that occurred.  Some compilation
12928 errors, however, will throw an exception immediately.
12929
12930 =cut
12931 */
12932
12933 OP *
12934 Perl_parse_listexpr(pTHX_ U32 flags)
12935 {
12936     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12937 }
12938
12939 /*
12940 =for apidoc parse_fullexpr
12941
12942 Parse a single complete Perl expression.  This allows the full
12943 expression grammar, including the lowest-precedence operators such
12944 as C<or>.  The expression must be followed (and thus terminated) by a
12945 token that an expression would normally be terminated by: end-of-file,
12946 closing bracketing punctuation, semicolon, or one of the keywords that
12947 signals a postfix expression-statement modifier.  If C<flags> has the
12948 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12949 mandatory.  It is up to the caller to ensure that the dynamic parser
12950 state (L</PL_parser> et al) is correctly set to reflect the source of
12951 the code to be parsed and the lexical context for the expression.
12952
12953 The op tree representing the expression is returned.  If an optional
12954 expression is absent, a null pointer is returned, otherwise the pointer
12955 will be non-null.
12956
12957 If an error occurs in parsing or compilation, in most cases a valid op
12958 tree is returned anyway.  The error is reflected in the parser state,
12959 normally resulting in a single exception at the top level of parsing
12960 which covers all the compilation errors that occurred.  Some compilation
12961 errors, however, will throw an exception immediately.
12962
12963 =cut
12964 */
12965
12966 OP *
12967 Perl_parse_fullexpr(pTHX_ U32 flags)
12968 {
12969     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12970 }
12971
12972 /*
12973 =for apidoc parse_block
12974
12975 Parse a single complete Perl code block.  This consists of an opening
12976 brace, a sequence of statements, and a closing brace.  The block
12977 constitutes a lexical scope, so C<my> variables and various compile-time
12978 effects can be contained within it.  It is up to the caller to ensure
12979 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12980 reflect the source of the code to be parsed and the lexical context for
12981 the statement.
12982
12983 The op tree representing the code block is returned.  This is always a
12984 real op, never a null pointer.  It will normally be a C<lineseq> list,
12985 including C<nextstate> or equivalent ops.  No ops to construct any kind
12986 of runtime scope are included by virtue of it being a block.
12987
12988 If an error occurs in parsing or compilation, in most cases a valid op
12989 tree (most likely null) is returned anyway.  The error is reflected in
12990 the parser state, normally resulting in a single exception at the top
12991 level of parsing which covers all the compilation errors that occurred.
12992 Some compilation errors, however, will throw an exception immediately.
12993
12994 The C<flags> parameter is reserved for future use, and must always
12995 be zero.
12996
12997 =cut
12998 */
12999
13000 OP *
13001 Perl_parse_block(pTHX_ U32 flags)
13002 {
13003     if (flags)
13004         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13005     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13006 }
13007
13008 /*
13009 =for apidoc parse_barestmt
13010
13011 Parse a single unadorned Perl statement.  This may be a normal imperative
13012 statement or a declaration that has compile-time effect.  It does not
13013 include any label or other affixture.  It is up to the caller to ensure
13014 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13015 reflect the source of the code to be parsed and the lexical context for
13016 the statement.
13017
13018 The op tree representing the statement is returned.  This may be a
13019 null pointer if the statement is null, for example if it was actually
13020 a subroutine definition (which has compile-time side effects).  If not
13021 null, it will be ops directly implementing the statement, suitable to
13022 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13023 equivalent op (except for those embedded in a scope contained entirely
13024 within the statement).
13025
13026 If an error occurs in parsing or compilation, in most cases a valid op
13027 tree (most likely null) is returned anyway.  The error is reflected in
13028 the parser state, normally resulting in a single exception at the top
13029 level of parsing which covers all the compilation errors that occurred.
13030 Some compilation errors, however, will throw an exception immediately.
13031
13032 The C<flags> parameter is reserved for future use, and must always
13033 be zero.
13034
13035 =cut
13036 */
13037
13038 OP *
13039 Perl_parse_barestmt(pTHX_ U32 flags)
13040 {
13041     if (flags)
13042         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13043     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13044 }
13045
13046 /*
13047 =for apidoc parse_label
13048
13049 Parse a single label, possibly optional, of the type that may prefix a
13050 Perl statement.  It is up to the caller to ensure that the dynamic parser
13051 state (L</PL_parser> et al) is correctly set to reflect the source of
13052 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13053 label is optional, otherwise it is mandatory.
13054
13055 The name of the label is returned in the form of a fresh scalar.  If an
13056 optional label is absent, a null pointer is returned.
13057
13058 If an error occurs in parsing, which can only occur if the label is
13059 mandatory, a valid label is returned anyway.  The error is reflected in
13060 the parser state, normally resulting in a single exception at the top
13061 level of parsing which covers all the compilation errors that occurred.
13062
13063 =cut
13064 */
13065
13066 SV *
13067 Perl_parse_label(pTHX_ U32 flags)
13068 {
13069     if (flags & ~PARSE_OPTIONAL)
13070         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13071     if (PL_nexttoke) {
13072         PL_parser->yychar = yylex();
13073         if (PL_parser->yychar == LABEL) {
13074             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13075             PL_parser->yychar = YYEMPTY;
13076             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13077             op_free(pl_yylval.opval);
13078             return labelsv;
13079         } else {
13080             yyunlex();
13081             goto no_label;
13082         }
13083     } else {
13084         char *s, *t;
13085         STRLEN wlen, bufptr_pos;
13086         lex_read_space(0);
13087         t = s = PL_bufptr;
13088         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13089             goto no_label;
13090         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13091         if (word_takes_any_delimiter(s, wlen))
13092             goto no_label;
13093         bufptr_pos = s - SvPVX(PL_linestr);
13094         PL_bufptr = t;
13095         lex_read_space(LEX_KEEP_PREVIOUS);
13096         t = PL_bufptr;
13097         s = SvPVX(PL_linestr) + bufptr_pos;
13098         if (t[0] == ':' && t[1] != ':') {
13099             PL_oldoldbufptr = PL_oldbufptr;
13100             PL_oldbufptr = s;
13101             PL_bufptr = t+1;
13102             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13103         } else {
13104             PL_bufptr = s;
13105             no_label:
13106             if (flags & PARSE_OPTIONAL) {
13107                 return NULL;
13108             } else {
13109                 qerror(Perl_mess(aTHX_ "Parse error"));
13110                 return newSVpvs("x");
13111             }
13112         }
13113     }
13114 }
13115
13116 /*
13117 =for apidoc parse_fullstmt
13118
13119 Parse a single complete Perl statement.  This may be a normal imperative
13120 statement or a declaration that has compile-time effect, and may include
13121 optional labels.  It is up to the caller to ensure that the dynamic
13122 parser state (L</PL_parser> et al) is correctly set to reflect the source
13123 of the code to be parsed and the lexical context for the statement.
13124
13125 The op tree representing the statement is returned.  This may be a
13126 null pointer if the statement is null, for example if it was actually
13127 a subroutine definition (which has compile-time side effects).  If not
13128 null, it will be the result of a L</newSTATEOP> call, normally including
13129 a C<nextstate> or equivalent op.
13130
13131 If an error occurs in parsing or compilation, in most cases a valid op
13132 tree (most likely null) is returned anyway.  The error is reflected in
13133 the parser state, normally resulting in a single exception at the top
13134 level of parsing which covers all the compilation errors that occurred.
13135 Some compilation errors, however, will throw an exception immediately.
13136
13137 The C<flags> parameter is reserved for future use, and must always
13138 be zero.
13139
13140 =cut
13141 */
13142
13143 OP *
13144 Perl_parse_fullstmt(pTHX_ U32 flags)
13145 {
13146     if (flags)
13147         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13148     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13149 }
13150
13151 /*
13152 =for apidoc parse_stmtseq
13153
13154 Parse a sequence of zero or more Perl statements.  These may be normal
13155 imperative statements, including optional labels, or declarations
13156 that have compile-time effect, or any mixture thereof.  The statement
13157 sequence ends when a closing brace or end-of-file is encountered in a
13158 place where a new statement could have validly started.  It is up to
13159 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13160 is correctly set to reflect the source of the code to be parsed and the
13161 lexical context for the statements.
13162
13163 The op tree representing the statement sequence is returned.  This may
13164 be a null pointer if the statements were all null, for example if there
13165 were no statements or if there were only subroutine definitions (which
13166 have compile-time side effects).  If not null, it will be a C<lineseq>
13167 list, normally including C<nextstate> or equivalent ops.
13168
13169 If an error occurs in parsing or compilation, in most cases a valid op
13170 tree is returned anyway.  The error is reflected in the parser state,
13171 normally resulting in a single exception at the top level of parsing
13172 which covers all the compilation errors that occurred.  Some compilation
13173 errors, however, will throw an exception immediately.
13174
13175 The C<flags> parameter is reserved for future use, and must always
13176 be zero.
13177
13178 =cut
13179 */
13180
13181 OP *
13182 Perl_parse_stmtseq(pTHX_ U32 flags)
13183 {
13184     OP *stmtseqop;
13185     I32 c;
13186     if (flags)
13187         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13188     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13189     c = lex_peek_unichar(0);
13190     if (c != -1 && c != /*{*/'}')
13191         qerror(Perl_mess(aTHX_ "Parse error"));
13192     return stmtseqop;
13193 }
13194
13195 /*
13196 =for apidoc parse_subsignature
13197
13198 Parse a subroutine signature declaration. This is the contents of the
13199 parentheses following a named or anonymous subroutine declaration when the
13200 C<signatures> feature is enabled. Note that this function neither expects
13201 nor consumes the opening and closing parentheses around the signature; it
13202 is the caller's job to handle these.
13203
13204 This function must only be called during parsing of a subroutine; after
13205 L</start_subparse> has been called. It might allocate lexical variables on
13206 the pad for the current subroutine.
13207
13208 The op tree to unpack the arguments from the stack at runtime is returned.
13209 This op tree should appear at the beginning of the compiled function. The
13210 caller may wish to use L</op_append_list> to build their function body
13211 after it, or splice it together with the body before calling L</newATTRSUB>.
13212
13213 The C<flags> parameter is reserved for future use, and must always
13214 be zero.
13215
13216 =cut
13217 */
13218
13219 OP *
13220 Perl_parse_subsignature(pTHX_ U32 flags)
13221 {
13222     if (flags)
13223         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13224     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13225 }
13226
13227 /*
13228  * ex: set ts=8 sts=4 sw=4 et:
13229  */