toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmnU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "invlist_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g, h)  \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char ident_too_long[] = "Identifier too long";
  97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
 117
 118 #define SPACE_OR_TAB(c) isBLANK_A(c)
 119
 120 #define HEXFP_PEEK(s)     \
 121     (((s[0] == '.') && \
 122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 123      isALPHA_FOLD_EQ(s[0], 'p'))
 124
 125 /* LEX_* are values for PL_lex_state, the state of the lexer.
 126  * They are arranged oddly so that the guard on the switch statement
 127  * can get by with a single comparison (if the compiler is smart enough).
 128  *
 129  * These values refer to the various states within a sublex parse,
 130  * i.e. within a double quotish string
 131  */
 132
 133 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 134
 135 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 136 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 137 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 138 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 139 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 140
 141                                    /* at end of code, eg "$x" followed by:  */
 142 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 143 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 144
 145 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 146                                         string or after \E, $foo, etc       */
 147 #define LEX_INTERPCONST          2 /* NOT USED */
 148 #define LEX_FORMLINE             1 /* expecting a format line               */
 149
 150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
 151    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
 152    can also return it.
 153
 154    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
 155    other token values are 258 or higher (see perly.h), so -1 should be
 156    a safe value here.
 157 */
 158 #define YYL_RETRY (-1)
 159
 160 #ifdef DEBUGGING
 161 static const char* const lex_state_names[] = {
 162     "KNOWNEXT",
 163     "FORMLINE",
 164     "INTERPCONST",
 165     "INTERPCONCAT",
 166     "INTERPENDMAYBE",
 167     "INTERPEND",
 168     "INTERPSTART",
 169     "INTERPPUSH",
 170     "INTERPCASEMOD",
 171     "INTERPNORMAL",
 172     "NORMAL"
 173 };
 174 #endif
 175
 176 #include "keywords.h"
 177
 178 /* CLINE is a macro that ensures PL_copline has a sane value */
 179
 180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 181
 182 /*
 183  * Convenience functions to return different tokens and prime the
 184  * lexer for the next token.  They all take an argument.
 185  *
 186  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 187  * OPERATOR     : generic operator
 188  * AOPERATOR    : assignment operator
 189  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 190  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 191  * PREREF       : *EXPR where EXPR is not a simple identifier
 192  * TERM         : expression term
 193  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 194  * LOOPX        : loop exiting command (goto, last, dump, etc)
 195  * FTST         : file test operator
 196  * FUN0         : zero-argument function
 197  * FUN0OP       : zero-argument function, with its op created in this file
 198  * FUN1         : not used, except for not, which isn't a UNIOP
 199  * BOop         : bitwise or or xor
 200  * BAop         : bitwise and
 201  * BCop         : bitwise complement
 202  * SHop         : shift operator
 203  * PWop         : power operator
 204  * PMop         : pattern-matching operator
 205  * Aop          : addition-level operator
 206  * AopNOASSIGN  : addition-level operator that is never part of .=
 207  * Mop          : multiplication-level operator
 208  * ChEop        : chaining equality-testing operator
 209  * NCEop        : non-chaining comparison operator at equality precedence
 210  * ChRop        : chaining relational operator <= != gt
 211  * NCRop        : non-chaining relational operator isa
 212  *
 213  * Also see LOP and lop() below.
 214  */
 215
 216 #ifdef DEBUGGING /* Serve -DT. */
 217 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 218 #else
 219 #   define REPORT(retval) (retval)
 220 #endif
 221
 222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 231                          pl_yylval.ival=f, \
 232                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 233                          REPORT((int)LOOPEX))
 234 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 235 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 236 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 237 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 238 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 239 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 241                        REPORT(PERLY_TILDE)
 242 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 243 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 244 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 245 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 247 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
 249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
 250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
 251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
 252
 253 /* This bit of chicanery makes a unary function followed by
 254  * a parenthesis into a function with one argument, highest precedence.
 255  * The UNIDOR macro is for unary functions that can be followed by the //
 256  * operator (such as C<shift // 0>).
 257  */
 258 #define UNI3(f,x,have_x) { \
 259         pl_yylval.ival = f; \
 260         if (have_x) PL_expect = x; \
 261         PL_bufptr = s; \
 262         PL_last_uni = PL_oldbufptr; \
 263         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 264         if (*s == '(') \
 265             return REPORT( (int)FUNC1 ); \
 266         s = skipspace(s); \
 267         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 268         }
 269 #define UNI(f)    UNI3(f,XTERM,1)
 270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 271 #define UNIPROTO(f,optional) { \
 272         if (optional) PL_last_uni = PL_oldbufptr; \
 273         OPERATOR(f); \
 274         }
 275
 276 #define UNIBRACK(f) UNI3(f,0,0)
 277
 278 /* return has special case parsing.
 279  *
 280  * List operators have low precedence. Functions have high precedence.
 281  * Every built in, *except return*, if written with () around its arguments, is
 282  * parsed as a function. Hence every other list built in:
 283  *
 284  * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
 285  * 429
 286  * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
 287  * 639
 288  * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
 289  * Useless use of a constant (2) in void context at -e line 1.
 290  * Useless use of a constant (4) in void context at -e line 1.
 291  *
 292  * $
 293  *
 294  * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
 295  * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
 296  *
 297  * Whereas return:
 298  *
 299  * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
 300  * 2
 301  * 4
 302  * 9
 303  * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
 304  * Useless use of a constant (2) in void context at -e line 1.
 305  * Useless use of a constant (4) in void context at -e line 1.
 306  * 9
 307  * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
 308  * Useless use of a constant (2) in void context at -e line 1.
 309  * Useless use of a constant (4) in void context at -e line 1.
 310  * 9
 311  * $
 312  *
 313  * and:
 314  * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
 315  * 2
 316  * 4
 317  * 6
 318  *
 319  * This last example is what we expect, but it's clearly inconsistent with how
 320  * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
 321  * followed.
 322  *
 323  *
 324  * Perl 3 attempted to be consistent:
 325  *
 326  *   The rules are more consistent about where parens are needed and
 327  *   where they are not.  In particular, unary operators and list operators now
 328  *   behave like functions if they're called like functions.
 329  *
 330  * However, the behaviour for return was reverted to the "old" parsing with
 331  * patches 9-12:
 332  *
 333  *   The construct
 334  *   return (1,2,3);
 335  *   did not do what was expected, since return was swallowing the
 336  *   parens in order to consider itself a function.  The solution,
 337  *   since return never wants any trailing expression such as
 338  *   return (1,2,3) + 2;
 339  *   is to simply make return an exception to the paren-makes-a-function
 340  *   rule, and treat it the way it always was, so that it doesn't
 341  *   strip the parens.
 342  *
 343  * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
 344  * LOP(OP_RETURN, XTERM);
 345  *
 346  * and constructs such as
 347  *
 348  *     return (Internals::V())[2]
 349  *
 350  * turn into syntax errors
 351  */
 352
 353 #define OLDLOP(f) \
 354         do { \
 355             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 356                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 357             pl_yylval.ival = (f); \
 358             PL_expect = XTERM; \
 359             PL_bufptr = s; \
 360             return (int)LSTOP; \
 361         } while(0)
 362
 363 #define COPLINE_INC_WITH_HERELINES                  \
 364     STMT_START {                                     \
 365         CopLINE_inc(PL_curcop);                       \
 366         if (PL_parser->herelines)                      \
 367             CopLINE(PL_curcop) += PL_parser->herelines, \
 368             PL_parser->herelines = 0;                    \
 369     } STMT_END
 370 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 371  * is no sublex_push to follow. */
 372 #define COPLINE_SET_FROM_MULTI_END            \
 373     STMT_START {                               \
 374         CopLINE_set(PL_curcop, PL_multi_end);   \
 375         if (PL_multi_end != PL_multi_start)      \
 376             PL_parser->herelines = 0;             \
 377     } STMT_END
 378
 379
 380 /* A file-local structure for passing around information about subroutines and
 381  * related definable words */
 382 struct code {
 383     SV *sv;
 384     CV *cv;
 385     GV *gv, **gvp;
 386     OP *rv2cv_op;
 387     PADOFFSET off;
 388     bool lex;
 389 };
 390
 391 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
 392
 393 #ifdef DEBUGGING
 394
 395 /* how to interpret the pl_yylval associated with the token */
 396 enum token_type {
 397     TOKENTYPE_NONE,
 398     TOKENTYPE_IVAL,
 399     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 400     TOKENTYPE_PVAL,
 401     TOKENTYPE_OPVAL
 402 };
 403
 404 #define DEBUG_TOKEN(Type, Name)                                         \
 405     { Name, TOKENTYPE_##Type, #Name }
 406
 407 static struct debug_tokens {
 408     const int token;
 409     enum token_type type;
 410     const char *name;
 411 } const debug_tokens[] =
 412 {
 413     DEBUG_TOKEN (OPNUM, ADDOP),
 414     DEBUG_TOKEN (NONE,  ANDAND),
 415     DEBUG_TOKEN (NONE,  ANDOP),
 416     DEBUG_TOKEN (NONE,  ARROW),
 417     DEBUG_TOKEN (OPNUM, ASSIGNOP),
 418     DEBUG_TOKEN (OPNUM, BITANDOP),
 419     DEBUG_TOKEN (OPNUM, BITOROP),
 420     DEBUG_TOKEN (OPNUM, CHEQOP),
 421     DEBUG_TOKEN (OPNUM, CHRELOP),
 422     DEBUG_TOKEN (NONE,  COLONATTR),
 423     DEBUG_TOKEN (NONE,  DOLSHARP),
 424     DEBUG_TOKEN (NONE,  DORDOR),
 425     DEBUG_TOKEN (IVAL,  DOTDOT),
 426     DEBUG_TOKEN (NONE,  FORMLBRACK),
 427     DEBUG_TOKEN (NONE,  FORMRBRACK),
 428     DEBUG_TOKEN (OPNUM, FUNC),
 429     DEBUG_TOKEN (OPNUM, FUNC0),
 430     DEBUG_TOKEN (OPVAL, FUNC0OP),
 431     DEBUG_TOKEN (OPVAL, FUNC0SUB),
 432     DEBUG_TOKEN (OPNUM, FUNC1),
 433     DEBUG_TOKEN (NONE,  HASHBRACK),
 434     DEBUG_TOKEN (IVAL,  KW_CATCH),
 435     DEBUG_TOKEN (IVAL,  KW_CONTINUE),
 436     DEBUG_TOKEN (IVAL,  KW_DEFAULT),
 437     DEBUG_TOKEN (IVAL,  KW_DO),
 438     DEBUG_TOKEN (IVAL,  KW_ELSE),
 439     DEBUG_TOKEN (IVAL,  KW_ELSIF),
 440     DEBUG_TOKEN (IVAL,  KW_GIVEN),
 441     DEBUG_TOKEN (IVAL,  KW_FOR),
 442     DEBUG_TOKEN (IVAL,  KW_FORMAT),
 443     DEBUG_TOKEN (IVAL,  KW_IF),
 444     DEBUG_TOKEN (IVAL,  KW_LOCAL),
 445     DEBUG_TOKEN (IVAL,  KW_MY),
 446     DEBUG_TOKEN (IVAL,  KW_PACKAGE),
 447     DEBUG_TOKEN (IVAL,  KW_REQUIRE),
 448     DEBUG_TOKEN (IVAL,  KW_SUB_anon),
 449     DEBUG_TOKEN (IVAL,  KW_SUB_anon_sig),
 450     DEBUG_TOKEN (IVAL,  KW_SUB_named),
 451     DEBUG_TOKEN (IVAL,  KW_SUB_named_sig),
 452     DEBUG_TOKEN (IVAL,  KW_TRY),
 453     DEBUG_TOKEN (IVAL,  KW_USE_or_NO),
 454     DEBUG_TOKEN (IVAL,  KW_UNLESS),
 455     DEBUG_TOKEN (IVAL,  KW_UNTIL),
 456     DEBUG_TOKEN (IVAL,  KW_WHEN),
 457     DEBUG_TOKEN (IVAL,  KW_WHILE),
 458     DEBUG_TOKEN (OPVAL, LABEL),
 459     DEBUG_TOKEN (OPNUM, LOOPEX),
 460     DEBUG_TOKEN (OPNUM, LSTOP),
 461     DEBUG_TOKEN (OPVAL, LSTOPSUB),
 462     DEBUG_TOKEN (OPNUM, MATCHOP),
 463     DEBUG_TOKEN (OPVAL, METHCALL),
 464     DEBUG_TOKEN (OPVAL, METHCALL0),
 465     DEBUG_TOKEN (OPNUM, MULOP),
 466     DEBUG_TOKEN (OPNUM, NCEQOP),
 467     DEBUG_TOKEN (OPNUM, NCRELOP),
 468     DEBUG_TOKEN (NONE,  NOAMP),
 469     DEBUG_TOKEN (NONE,  NOTOP),
 470     DEBUG_TOKEN (IVAL,  OROP),
 471     DEBUG_TOKEN (NONE,  OROR),
 472     DEBUG_TOKEN (IVAL,  PERLY_AMPERSAND),
 473     DEBUG_TOKEN (IVAL,  PERLY_BRACE_CLOSE),
 474     DEBUG_TOKEN (IVAL,  PERLY_BRACE_OPEN),
 475     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_CLOSE),
 476     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_OPEN),
 477     DEBUG_TOKEN (IVAL,  PERLY_COLON),
 478     DEBUG_TOKEN (IVAL,  PERLY_COMMA),
 479     DEBUG_TOKEN (IVAL,  PERLY_DOT),
 480     DEBUG_TOKEN (IVAL,  PERLY_EQUAL_SIGN),
 481     DEBUG_TOKEN (IVAL,  PERLY_EXCLAMATION_MARK),
 482     DEBUG_TOKEN (IVAL,  PERLY_MINUS),
 483     DEBUG_TOKEN (IVAL,  PERLY_PAREN_OPEN),
 484     DEBUG_TOKEN (IVAL,  PERLY_PERCENT_SIGN),
 485     DEBUG_TOKEN (IVAL,  PERLY_PLUS),
 486     DEBUG_TOKEN (IVAL,  PERLY_QUESTION_MARK),
 487     DEBUG_TOKEN (IVAL,  PERLY_SEMICOLON),
 488     DEBUG_TOKEN (IVAL,  PERLY_SLASH),
 489     DEBUG_TOKEN (IVAL,  PERLY_SNAIL),
 490     DEBUG_TOKEN (IVAL,  PERLY_STAR),
 491     DEBUG_TOKEN (IVAL,  PERLY_TILDE),
 492     DEBUG_TOKEN (OPVAL, PLUGEXPR),
 493     DEBUG_TOKEN (OPVAL, PLUGSTMT),
 494     DEBUG_TOKEN (OPVAL, PMFUNC),
 495     DEBUG_TOKEN (NONE,  POSTJOIN),
 496     DEBUG_TOKEN (NONE,  POSTDEC),
 497     DEBUG_TOKEN (NONE,  POSTINC),
 498     DEBUG_TOKEN (OPNUM, POWOP),
 499     DEBUG_TOKEN (NONE,  PREDEC),
 500     DEBUG_TOKEN (NONE,  PREINC),
 501     DEBUG_TOKEN (OPVAL, PRIVATEREF),
 502     DEBUG_TOKEN (OPVAL, QWLIST),
 503     DEBUG_TOKEN (NONE,  REFGEN),
 504     DEBUG_TOKEN (OPNUM, SHIFTOP),
 505     DEBUG_TOKEN (NONE,  SUBLEXEND),
 506     DEBUG_TOKEN (NONE,  SUBLEXSTART),
 507     DEBUG_TOKEN (OPVAL, THING),
 508     DEBUG_TOKEN (NONE,  UMINUS),
 509     DEBUG_TOKEN (OPNUM, UNIOP),
 510     DEBUG_TOKEN (OPVAL, UNIOPSUB),
 511     DEBUG_TOKEN (OPVAL, BAREWORD),
 512     DEBUG_TOKEN (IVAL,  YADAYADA),
 513     { 0,                TOKENTYPE_NONE,         NULL }
 514 };
 515
 516 #undef DEBUG_TOKEN
 517
 518 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 519
 520 STATIC int
 521 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 522 {
 523     PERL_ARGS_ASSERT_TOKEREPORT;
 524
 525     if (DEBUG_T_TEST) {
 526         const char *name = NULL;
 527         enum token_type type = TOKENTYPE_NONE;
 528         const struct debug_tokens *p;
 529         SV* const report = newSVpvs("<== ");
 530
 531         for (p = debug_tokens; p->token; p++) {
 532             if (p->token == (int)rv) {
 533                 name = p->name;
 534                 type = p->type;
 535                 break;
 536             }
 537         }
 538         if (name)
 539             Perl_sv_catpv(aTHX_ report, name);
 540         else if (isGRAPH(rv))
 541         {
 542             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 543             if ((char)rv == 'p')
 544                 sv_catpvs(report, " (pending identifier)");
 545         }
 546         else if (!rv)
 547             sv_catpvs(report, "EOF");
 548         else
 549             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 550         switch (type) {
 551         case TOKENTYPE_NONE:
 552             break;
 553         case TOKENTYPE_IVAL:
 554             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 555             break;
 556         case TOKENTYPE_OPNUM:
 557             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 558                                     PL_op_name[lvalp->ival]);
 559             break;
 560         case TOKENTYPE_PVAL:
 561             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 562             break;
 563         case TOKENTYPE_OPVAL:
 564             if (lvalp->opval) {
 565                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 566                                     PL_op_name[lvalp->opval->op_type]);
 567                 if (lvalp->opval->op_type == OP_CONST) {
 568                     Perl_sv_catpvf(aTHX_ report, " %s",
 569                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 570                 }
 571
 572             }
 573             else
 574                 sv_catpvs(report, "(opval=null)");
 575             break;
 576         }
 577         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 578     };
 579     return (int)rv;
 580 }
 581
 582
 583 /* print the buffer with suitable escapes */
 584
 585 STATIC void
 586 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 587 {
 588     SV* const tmp = newSVpvs("");
 589
 590     PERL_ARGS_ASSERT_PRINTBUF;
 591
 592     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 593     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 594     GCC_DIAG_RESTORE_STMT;
 595     SvREFCNT_dec(tmp);
 596 }
 597
 598 #endif
 599
 600 /*
 601  * S_ao
 602  *
 603  * This subroutine looks for an '=' next to the operator that has just been
 604  * parsed and turns it into an ASSIGNOP if it finds one.
 605  */
 606
 607 STATIC int
 608 S_ao(pTHX_ int toketype)
 609 {
 610     if (*PL_bufptr == '=') {
 611         PL_bufptr++;
 612
 613         switch (toketype) {
 614             case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
 615             case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
 616             case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
 617         }
 618
 619         toketype = ASSIGNOP;
 620     }
 621     return REPORT(toketype);
 622 }
 623
 624 /*
 625  * S_no_op
 626  * When Perl expects an operator and finds something else, no_op
 627  * prints the warning.  It always prints "<something> found where
 628  * operator expected.  It prints "Missing semicolon on previous line?"
 629  * if the surprise occurs at the start of the line.  "do you need to
 630  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 631  * where the compiler doesn't know if foo is a method call or a function.
 632  * It prints "Missing operator before end of line" if there's nothing
 633  * after the missing operator, or "... before <...>" if there is something
 634  * after the missing operator.
 635  *
 636  * PL_bufptr is expected to point to the start of the thing that was found,
 637  * and s after the next token or partial token.
 638  */
 639
 640 STATIC void
 641 S_no_op(pTHX_ const char *const what, char *s)
 642 {
 643     char * const oldbp = PL_bufptr;
 644     const bool is_first = (PL_oldbufptr == PL_linestart);
 645
 646     PERL_ARGS_ASSERT_NO_OP;
 647
 648     if (!s)
 649         s = oldbp;
 650     else
 651         PL_bufptr = s;
 652     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 653     if (ckWARN_d(WARN_SYNTAX)) {
 654         if (is_first)
 655             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 656                     "\t(Missing semicolon on previous line?)\n");
 657         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 658                                                            PL_bufend,
 659                                                            UTF))
 660         {
 661             const char *t;
 662             for (t = PL_oldoldbufptr;
 663                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 664                  t += UTF ? UTF8SKIP(t) : 1)
 665             {
 666                 NOOP;
 667             }
 668             if (t < PL_bufptr && isSPACE(*t))
 669                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 670                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 671                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 672         }
 673         else {
 674             assert(s >= oldbp);
 675             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 676                     "\t(Missing operator before %" UTF8f "?)\n",
 677                      UTF8fARG(UTF, s - oldbp, oldbp));
 678         }
 679     }
 680     PL_bufptr = oldbp;
 681 }
 682
 683 /*
 684  * S_missingterm
 685  * Complain about missing quote/regexp/heredoc terminator.
 686  * If it's called with NULL then it cauterizes the line buffer.
 687  * If we're in a delimited string and the delimiter is a control
 688  * character, it's reformatted into a two-char sequence like ^C.
 689  * This is fatal.
 690  */
 691
 692 STATIC void
 693 S_missingterm(pTHX_ char *s, STRLEN len)
 694 {
 695     char tmpbuf[UTF8_MAXBYTES + 1];
 696     char q;
 697     bool uni = FALSE;
 698     if (s) {
 699         char * const nl = (char *) my_memrchr(s, '\n', len);
 700         if (nl) {
 701             *nl = '\0';
 702             len = nl - s;
 703         }
 704         uni = UTF;
 705     }
 706     else if (PL_multi_close < 32) {
 707         *tmpbuf = '^';
 708         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 709         tmpbuf[2] = '\0';
 710         s = tmpbuf;
 711         len = 2;
 712     }
 713     else {
 714         if (! UTF && LIKELY(PL_multi_close < 256)) {
 715             *tmpbuf = (char)PL_multi_close;
 716             tmpbuf[1] = '\0';
 717             len = 1;
 718         }
 719         else {
 720             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 721             *end = '\0';
 722             len = end - tmpbuf;
 723             uni = TRUE;
 724         }
 725         s = tmpbuf;
 726     }
 727     q = memchr(s, '"', len) ? '\'' : '"';
 728     Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
 729                      " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
 730 }
 731
 732 #include "feature.h"
 733
 734 /*
 735  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 736  * utf16-to-utf8-reversed.
 737  */
 738
 739 #ifdef PERL_CR_FILTER
 740 static void
 741 strip_return(SV *sv)
 742 {
 743     const char *s = SvPVX_const(sv);
 744     const char * const e = s + SvCUR(sv);
 745
 746     PERL_ARGS_ASSERT_STRIP_RETURN;
 747
 748     /* outer loop optimized to do nothing if there are no CR-LFs */
 749     while (s < e) {
 750         if (*s++ == '\r' && *s == '\n') {
 751             /* hit a CR-LF, need to copy the rest */
 752             char *d = s - 1;
 753             *d++ = *s++;
 754             while (s < e) {
 755                 if (*s == '\r' && s[1] == '\n')
 756                     s++;
 757                 *d++ = *s++;
 758             }
 759             SvCUR(sv) -= s - d;
 760             return;
 761         }
 762     }
 763 }
 764
 765 STATIC I32
 766 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 767 {
 768     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 769     if (count > 0 && !maxlen)
 770         strip_return(sv);
 771     return count;
 772 }
 773 #endif
 774
 775 /*
 776 =for apidoc lex_start
 777
 778 Creates and initialises a new lexer/parser state object, supplying
 779 a context in which to lex and parse from a new source of Perl code.
 780 A pointer to the new state object is placed in L</PL_parser>.  An entry
 781 is made on the save stack so that upon unwinding, the new state object
 782 will be destroyed and the former value of L</PL_parser> will be restored.
 783 Nothing else need be done to clean up the parsing context.
 784
 785 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 786 non-null, provides a string (in SV form) containing code to be parsed.
 787 A copy of the string is made, so subsequent modification of C<line>
 788 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 789 from which code will be read to be parsed.  If both are non-null, the
 790 code in C<line> comes first and must consist of complete lines of input,
 791 and C<rsfp> supplies the remainder of the source.
 792
 793 The C<flags> parameter is reserved for future use.  Currently it is only
 794 used by perl internally, so extensions should always pass zero.
 795
 796 =cut
 797 */
 798
 799 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 800    can share filters with the current parser.
 801    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 802    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 803    destruction. This is used to handle the case of defaulting to reading the
 804    script from the standard input because no filename was given on the command
 805    line (without getting confused by situation where STDIN has been closed, so
 806    the script handle is opened on fd 0)  */
 807
 808 void
 809 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 810 {
 811     const char *s = NULL;
 812     yy_parser *parser, *oparser;
 813
 814     if (flags && flags & ~LEX_START_FLAGS)
 815         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 816
 817     /* create and initialise a parser */
 818
 819     Newxz(parser, 1, yy_parser);
 820     parser->old_parser = oparser = PL_parser;
 821     PL_parser = parser;
 822
 823     parser->stack = NULL;
 824     parser->stack_max1 = NULL;
 825     parser->ps = NULL;
 826
 827     /* on scope exit, free this parser and restore any outer one */
 828     SAVEPARSER(parser);
 829     parser->saved_curcop = PL_curcop;
 830
 831     /* initialise lexer state */
 832
 833     parser->nexttoke = 0;
 834     parser->error_count = oparser ? oparser->error_count : 0;
 835     parser->copline = parser->preambling = NOLINE;
 836     parser->lex_state = LEX_NORMAL;
 837     parser->expect = XSTATE;
 838     parser->rsfp = rsfp;
 839     parser->recheck_utf8_validity = TRUE;
 840     parser->rsfp_filters =
 841       !(flags & LEX_START_SAME_FILTER) || !oparser
 842         ? NULL
 843         : MUTABLE_AV(SvREFCNT_inc(
 844             oparser->rsfp_filters
 845              ? oparser->rsfp_filters
 846              : (oparser->rsfp_filters = newAV())
 847           ));
 848
 849     Newx(parser->lex_brackstack, 120, char);
 850     Newx(parser->lex_casestack, 12, char);
 851     *parser->lex_casestack = '\0';
 852     Newxz(parser->lex_shared, 1, LEXSHARED);
 853
 854     if (line) {
 855         Size_t len;
 856         const U8* first_bad_char_loc;
 857
 858         s = SvPV_const(line, len);
 859
 860         if (   SvUTF8(line)
 861             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 862                                              SvCUR(line),
 863                                              &first_bad_char_loc)))
 864         {
 865             _force_out_malformed_utf8_message(first_bad_char_loc,
 866                                               (U8 *) s + SvCUR(line),
 867                                               0,
 868                                               1 /* 1 means die */ );
 869             NOT_REACHED; /* NOTREACHED */
 870         }
 871
 872         parser->linestr = flags & LEX_START_COPIED
 873                             ? SvREFCNT_inc_simple_NN(line)
 874                             : newSVpvn_flags(s, len, SvUTF8(line));
 875         if (!rsfp)
 876             sv_catpvs(parser->linestr, "\n;");
 877     } else {
 878         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 879     }
 880
 881     parser->oldoldbufptr =
 882         parser->oldbufptr =
 883         parser->bufptr =
 884         parser->linestart = SvPVX(parser->linestr);
 885     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 886     parser->last_lop = parser->last_uni = NULL;
 887
 888     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 889                                                         |LEX_DONT_CLOSE_RSFP));
 890     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 891                                                         |LEX_DONT_CLOSE_RSFP));
 892
 893     parser->in_pod = parser->filtered = 0;
 894 }
 895
 896
 897 /* delete a parser object */
 898
 899 void
 900 Perl_parser_free(pTHX_  const yy_parser *parser)
 901 {
 902     PERL_ARGS_ASSERT_PARSER_FREE;
 903
 904     PL_curcop = parser->saved_curcop;
 905     SvREFCNT_dec(parser->linestr);
 906
 907     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 908         PerlIO_clearerr(parser->rsfp);
 909     else if (parser->rsfp && (!parser->old_parser
 910           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 911         PerlIO_close(parser->rsfp);
 912     SvREFCNT_dec(parser->rsfp_filters);
 913     SvREFCNT_dec(parser->lex_stuff);
 914     SvREFCNT_dec(parser->lex_sub_repl);
 915
 916     Safefree(parser->lex_brackstack);
 917     Safefree(parser->lex_casestack);
 918     Safefree(parser->lex_shared);
 919     PL_parser = parser->old_parser;
 920     Safefree(parser);
 921 }
 922
 923 void
 924 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 925 {
 926     I32 nexttoke = parser->nexttoke;
 927     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 928     while (nexttoke--) {
 929         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 930          && parser->nextval[nexttoke].opval
 931          && parser->nextval[nexttoke].opval->op_slabbed
 932          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 933             op_free(parser->nextval[nexttoke].opval);
 934             parser->nextval[nexttoke].opval = NULL;
 935         }
 936     }
 937 }
 938
 939
 940 /*
 941 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
 942
 943 Buffer scalar containing the chunk currently under consideration of the
 944 text currently being lexed.  This is always a plain string scalar (for
 945 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 946 normal scalar means; instead refer to the buffer directly by the pointer
 947 variables described below.
 948
 949 The lexer maintains various C<char*> pointers to things in the
 950 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 951 reallocated, all of these pointers must be updated.  Don't attempt to
 952 do this manually, but rather use L</lex_grow_linestr> if you need to
 953 reallocate the buffer.
 954
 955 The content of the text chunk in the buffer is commonly exactly one
 956 complete line of input, up to and including a newline terminator,
 957 but there are situations where it is otherwise.  The octets of the
 958 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 959 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 960 flag on this scalar, which may disagree with it.
 961
 962 For direct examination of the buffer, the variable
 963 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 964 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 965 of these pointers is usually preferable to examination of the scalar
 966 through normal scalar means.
 967
 968 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
 969
 970 Direct pointer to the end of the chunk of text currently being lexed, the
 971 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 972 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 973 always located at the end of the buffer, and does not count as part of
 974 the buffer's contents.
 975
 976 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
 977
 978 Points to the current position of lexing inside the lexer buffer.
 979 Characters around this point may be freely examined, within
 980 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 981 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 982 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 983
 984 Lexing code (whether in the Perl core or not) moves this pointer past
 985 the characters that it consumes.  It is also expected to perform some
 986 bookkeeping whenever a newline character is consumed.  This movement
 987 can be more conveniently performed by the function L</lex_read_to>,
 988 which handles newlines appropriately.
 989
 990 Interpretation of the buffer's octets can be abstracted out by
 991 using the slightly higher-level functions L</lex_peek_unichar> and
 992 L</lex_read_unichar>.
 993
 994 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
 995
 996 Points to the start of the current line inside the lexer buffer.
 997 This is useful for indicating at which column an error occurred, and
 998 not much else.  This must be updated by any lexing code that consumes
 999 a newline; the function L</lex_read_to> handles this detail.
1000
1001 =cut
1002 */
1003
1004 /*
1005 =for apidoc lex_bufutf8
1006
1007 Indicates whether the octets in the lexer buffer
1008 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1009 of Unicode characters.  If not, they should be interpreted as Latin-1
1010 characters.  This is analogous to the C<SvUTF8> flag for scalars.
1011
1012 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1013 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1014 encoding.
1015
1016 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1017 is significant, but not the whole story regarding the input character
1018 encoding.  Normally, when a file is being read, the scalar contains octets
1019 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1020 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1021 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1022 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1023 is in effect.  This logic may change in the future; use this function
1024 instead of implementing the logic yourself.
1025
1026 =cut
1027 */
1028
1029 bool
1030 Perl_lex_bufutf8(pTHX)
1031 {
1032     return UTF;
1033 }
1034
1035 /*
1036 =for apidoc lex_grow_linestr
1037
1038 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1039 at least C<len> octets (including terminating C<NUL>).  Returns a
1040 pointer to the reallocated buffer.  This is necessary before making
1041 any direct modification of the buffer that would increase its length.
1042 L</lex_stuff_pvn> provides a more convenient way to insert text into
1043 the buffer.
1044
1045 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1046 this function updates all of the lexer's variables that point directly
1047 into the buffer.
1048
1049 =cut
1050 */
1051
1052 char *
1053 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1054 {
1055     SV *linestr;
1056     char *buf;
1057     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1058     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1059     bool current;
1060
1061     linestr = PL_parser->linestr;
1062     buf = SvPVX(linestr);
1063     if (len <= SvLEN(linestr))
1064         return buf;
1065
1066     /* Is the lex_shared linestr SV the same as the current linestr SV?
1067      * Only in this case does re_eval_start need adjusting, since it
1068      * points within lex_shared->ls_linestr's buffer */
1069     current = (   !PL_parser->lex_shared->ls_linestr
1070                || linestr == PL_parser->lex_shared->ls_linestr);
1071
1072     bufend_pos = PL_parser->bufend - buf;
1073     bufptr_pos = PL_parser->bufptr - buf;
1074     oldbufptr_pos = PL_parser->oldbufptr - buf;
1075     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1076     linestart_pos = PL_parser->linestart - buf;
1077     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1078     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1079     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1080                             PL_parser->lex_shared->re_eval_start - buf : 0;
1081
1082     buf = sv_grow(linestr, len);
1083
1084     PL_parser->bufend = buf + bufend_pos;
1085     PL_parser->bufptr = buf + bufptr_pos;
1086     PL_parser->oldbufptr = buf + oldbufptr_pos;
1087     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1088     PL_parser->linestart = buf + linestart_pos;
1089     if (PL_parser->last_uni)
1090         PL_parser->last_uni = buf + last_uni_pos;
1091     if (PL_parser->last_lop)
1092         PL_parser->last_lop = buf + last_lop_pos;
1093     if (current && PL_parser->lex_shared->re_eval_start)
1094         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1095     return buf;
1096 }
1097
1098 /*
1099 =for apidoc lex_stuff_pvn
1100
1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103 reallocating the buffer if necessary.  This means that lexing code that
1104 runs later will see the characters as if they had appeared in the input.
1105 It is not recommended to do this as part of normal parsing, and most
1106 uses of this facility run the risk of the inserted characters being
1107 interpreted in an unintended manner.
1108
1109 The string to be inserted is represented by C<len> octets starting
1110 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1111 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1112 The characters are recoded for the lexer buffer, according to how the
1113 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1114 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1115 function is more convenient.
1116
1117 =for apidoc Amnh||LEX_STUFF_UTF8
1118
1119 =cut
1120 */
1121
1122 void
1123 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1124 {
1125     char *bufptr;
1126     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1127     if (flags & ~(LEX_STUFF_UTF8))
1128         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1129     if (UTF) {
1130         if (flags & LEX_STUFF_UTF8) {
1131             goto plain_copy;
1132         } else {
1133             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1134                                                        (U8 *) pv + len);
1135             const char *p, *e = pv+len;;
1136             if (!highhalf)
1137                 goto plain_copy;
1138             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1139             bufptr = PL_parser->bufptr;
1140             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1141             SvCUR_set(PL_parser->linestr,
1142                 SvCUR(PL_parser->linestr) + len+highhalf);
1143             PL_parser->bufend += len+highhalf;
1144             for (p = pv; p != e; p++) {
1145                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1146             }
1147         }
1148     } else {
1149         if (flags & LEX_STUFF_UTF8) {
1150             STRLEN highhalf = 0;
1151             const char *p, *e = pv+len;
1152             for (p = pv; p != e; p++) {
1153                 U8 c = (U8)*p;
1154                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1155                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1156                                 "non-Latin-1 character into Latin-1 input");
1157                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1158                     p++;
1159                     highhalf++;
1160                 } else assert(UTF8_IS_INVARIANT(c));
1161             }
1162             if (!highhalf)
1163                 goto plain_copy;
1164             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1165             bufptr = PL_parser->bufptr;
1166             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1167             SvCUR_set(PL_parser->linestr,
1168                 SvCUR(PL_parser->linestr) + len-highhalf);
1169             PL_parser->bufend += len-highhalf;
1170             p = pv;
1171             while (p < e) {
1172                 if (UTF8_IS_INVARIANT(*p)) {
1173                     *bufptr++ = *p;
1174                     p++;
1175                 }
1176                 else {
1177                     assert(p < e -1 );
1178                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1179                     p += 2;
1180                 }
1181             }
1182         } else {
1183           plain_copy:
1184             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1185             bufptr = PL_parser->bufptr;
1186             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1187             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1188             PL_parser->bufend += len;
1189             Copy(pv, bufptr, len, char);
1190         }
1191     }
1192 }
1193
1194 /*
1195 =for apidoc lex_stuff_pv
1196
1197 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1198 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1199 reallocating the buffer if necessary.  This means that lexing code that
1200 runs later will see the characters as if they had appeared in the input.
1201 It is not recommended to do this as part of normal parsing, and most
1202 uses of this facility run the risk of the inserted characters being
1203 interpreted in an unintended manner.
1204
1205 The string to be inserted is represented by octets starting at C<pv>
1206 and continuing to the first nul.  These octets are interpreted as either
1207 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1208 in C<flags>.  The characters are recoded for the lexer buffer, according
1209 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1210 If it is not convenient to nul-terminate a string to be inserted, the
1211 L</lex_stuff_pvn> function is more appropriate.
1212
1213 =cut
1214 */
1215
1216 void
1217 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1218 {
1219     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1220     lex_stuff_pvn(pv, strlen(pv), flags);
1221 }
1222
1223 /*
1224 =for apidoc lex_stuff_sv
1225
1226 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1227 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1228 reallocating the buffer if necessary.  This means that lexing code that
1229 runs later will see the characters as if they had appeared in the input.
1230 It is not recommended to do this as part of normal parsing, and most
1231 uses of this facility run the risk of the inserted characters being
1232 interpreted in an unintended manner.
1233
1234 The string to be inserted is the string value of C<sv>.  The characters
1235 are recoded for the lexer buffer, according to how the buffer is currently
1236 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1237 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1238 need to construct a scalar.
1239
1240 =cut
1241 */
1242
1243 void
1244 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1245 {
1246     char *pv;
1247     STRLEN len;
1248     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1249     if (flags)
1250         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1251     pv = SvPV(sv, len);
1252     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1253 }
1254
1255 /*
1256 =for apidoc lex_unstuff
1257
1258 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1259 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1260 This hides the discarded text from any lexing code that runs later,
1261 as if the text had never appeared.
1262
1263 This is not the normal way to consume lexed text.  For that, use
1264 L</lex_read_to>.
1265
1266 =cut
1267 */
1268
1269 void
1270 Perl_lex_unstuff(pTHX_ char *ptr)
1271 {
1272     char *buf, *bufend;
1273     STRLEN unstuff_len;
1274     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1275     buf = PL_parser->bufptr;
1276     if (ptr < buf)
1277         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1278     if (ptr == buf)
1279         return;
1280     bufend = PL_parser->bufend;
1281     if (ptr > bufend)
1282         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1283     unstuff_len = ptr - buf;
1284     Move(ptr, buf, bufend+1-ptr, char);
1285     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1286     PL_parser->bufend = bufend - unstuff_len;
1287 }
1288
1289 /*
1290 =for apidoc lex_read_to
1291
1292 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1293 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1294 performing the correct bookkeeping whenever a newline character is passed.
1295 This is the normal way to consume lexed text.
1296
1297 Interpretation of the buffer's octets can be abstracted out by
1298 using the slightly higher-level functions L</lex_peek_unichar> and
1299 L</lex_read_unichar>.
1300
1301 =cut
1302 */
1303
1304 void
1305 Perl_lex_read_to(pTHX_ char *ptr)
1306 {
1307     char *s;
1308     PERL_ARGS_ASSERT_LEX_READ_TO;
1309     s = PL_parser->bufptr;
1310     if (ptr < s || ptr > PL_parser->bufend)
1311         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1312     for (; s != ptr; s++)
1313         if (*s == '\n') {
1314             COPLINE_INC_WITH_HERELINES;
1315             PL_parser->linestart = s+1;
1316         }
1317     PL_parser->bufptr = ptr;
1318 }
1319
1320 /*
1321 =for apidoc lex_discard_to
1322
1323 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1324 up to C<ptr>.  The remaining content of the buffer will be moved, and
1325 all pointers into the buffer updated appropriately.  C<ptr> must not
1326 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1327 it is not permitted to discard text that has yet to be lexed.
1328
1329 Normally it is not necessarily to do this directly, because it suffices to
1330 use the implicit discarding behaviour of L</lex_next_chunk> and things
1331 based on it.  However, if a token stretches across multiple lines,
1332 and the lexing code has kept multiple lines of text in the buffer for
1333 that purpose, then after completion of the token it would be wise to
1334 explicitly discard the now-unneeded earlier lines, to avoid future
1335 multi-line tokens growing the buffer without bound.
1336
1337 =cut
1338 */
1339
1340 void
1341 Perl_lex_discard_to(pTHX_ char *ptr)
1342 {
1343     char *buf;
1344     STRLEN discard_len;
1345     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1346     buf = SvPVX(PL_parser->linestr);
1347     if (ptr < buf)
1348         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1349     if (ptr == buf)
1350         return;
1351     if (ptr > PL_parser->bufptr)
1352         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1353     discard_len = ptr - buf;
1354     if (PL_parser->oldbufptr < ptr)
1355         PL_parser->oldbufptr = ptr;
1356     if (PL_parser->oldoldbufptr < ptr)
1357         PL_parser->oldoldbufptr = ptr;
1358     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1359         PL_parser->last_uni = NULL;
1360     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1361         PL_parser->last_lop = NULL;
1362     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1363     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1364     PL_parser->bufend -= discard_len;
1365     PL_parser->bufptr -= discard_len;
1366     PL_parser->oldbufptr -= discard_len;
1367     PL_parser->oldoldbufptr -= discard_len;
1368     if (PL_parser->last_uni)
1369         PL_parser->last_uni -= discard_len;
1370     if (PL_parser->last_lop)
1371         PL_parser->last_lop -= discard_len;
1372 }
1373
1374 void
1375 Perl_notify_parser_that_changed_to_utf8(pTHX)
1376 {
1377     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1378      * off to on.  At compile time, this has the effect of entering a 'use
1379      * utf8' section.  This means that any input was not previously checked for
1380      * UTF-8 (because it was off), but now we do need to check it, or our
1381      * assumptions about the input being sane could be wrong, and we could
1382      * segfault.  This routine just sets a flag so that the next time we look
1383      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1384      * proper phase, there may not be a parser object, but if there is, setting
1385      * the flag is harmless */
1386
1387     if (PL_parser) {
1388         PL_parser->recheck_utf8_validity = TRUE;
1389     }
1390 }
1391
1392 /*
1393 =for apidoc lex_next_chunk
1394
1395 Reads in the next chunk of text to be lexed, appending it to
1396 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1397 looked to the end of the current chunk and wants to know more.  It is
1398 usual, but not necessary, for lexing to have consumed the entirety of
1399 the current chunk at this time.
1400
1401 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1402 chunk (i.e., the current chunk has been entirely consumed), normally the
1403 current chunk will be discarded at the same time that the new chunk is
1404 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1405 will not be discarded.  If the current chunk has not been entirely
1406 consumed, then it will not be discarded regardless of the flag.
1407
1408 Returns true if some new text was added to the buffer, or false if the
1409 buffer has reached the end of the input text.
1410
1411 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1412
1413 =cut
1414 */
1415
1416 #define LEX_FAKE_EOF 0x80000000
1417 #define LEX_NO_TERM  0x40000000 /* here-doc */
1418
1419 bool
1420 Perl_lex_next_chunk(pTHX_ U32 flags)
1421 {
1422     SV *linestr;
1423     char *buf;
1424     STRLEN old_bufend_pos, new_bufend_pos;
1425     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1426     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1427     bool got_some_for_debugger = 0;
1428     bool got_some;
1429
1430     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1431         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1432     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1433         return FALSE;
1434     linestr = PL_parser->linestr;
1435     buf = SvPVX(linestr);
1436     if (!(flags & LEX_KEEP_PREVIOUS)
1437           && PL_parser->bufptr == PL_parser->bufend)
1438     {
1439         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1440         linestart_pos = 0;
1441         if (PL_parser->last_uni != PL_parser->bufend)
1442             PL_parser->last_uni = NULL;
1443         if (PL_parser->last_lop != PL_parser->bufend)
1444             PL_parser->last_lop = NULL;
1445         last_uni_pos = last_lop_pos = 0;
1446         *buf = 0;
1447         SvCUR_set(linestr, 0);
1448     } else {
1449         old_bufend_pos = PL_parser->bufend - buf;
1450         bufptr_pos = PL_parser->bufptr - buf;
1451         oldbufptr_pos = PL_parser->oldbufptr - buf;
1452         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1453         linestart_pos = PL_parser->linestart - buf;
1454         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1455         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1456     }
1457     if (flags & LEX_FAKE_EOF) {
1458         goto eof;
1459     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1460         got_some = 0;
1461     } else if (filter_gets(linestr, old_bufend_pos)) {
1462         got_some = 1;
1463         got_some_for_debugger = 1;
1464     } else if (flags & LEX_NO_TERM) {
1465         got_some = 0;
1466     } else {
1467         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1468             SvPVCLEAR(linestr);
1469         eof:
1470         /* End of real input.  Close filehandle (unless it was STDIN),
1471          * then add implicit termination.
1472          */
1473         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1474             PerlIO_clearerr(PL_parser->rsfp);
1475         else if (PL_parser->rsfp)
1476             (void)PerlIO_close(PL_parser->rsfp);
1477         PL_parser->rsfp = NULL;
1478         PL_parser->in_pod = PL_parser->filtered = 0;
1479         if (!PL_in_eval && PL_minus_p) {
1480             sv_catpvs(linestr,
1481                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1482             PL_minus_n = PL_minus_p = 0;
1483         } else if (!PL_in_eval && PL_minus_n) {
1484             sv_catpvs(linestr, /*{*/";}");
1485             PL_minus_n = 0;
1486         } else
1487             sv_catpvs(linestr, ";");
1488         got_some = 1;
1489     }
1490     buf = SvPVX(linestr);
1491     new_bufend_pos = SvCUR(linestr);
1492     PL_parser->bufend = buf + new_bufend_pos;
1493     PL_parser->bufptr = buf + bufptr_pos;
1494
1495     if (UTF) {
1496         const U8* first_bad_char_loc;
1497         if (UNLIKELY(! is_utf8_string_loc(
1498                             (U8 *) PL_parser->bufptr,
1499                                    PL_parser->bufend - PL_parser->bufptr,
1500                                    &first_bad_char_loc)))
1501         {
1502             _force_out_malformed_utf8_message(first_bad_char_loc,
1503                                               (U8 *) PL_parser->bufend,
1504                                               0,
1505                                               1 /* 1 means die */ );
1506             NOT_REACHED; /* NOTREACHED */
1507         }
1508     }
1509
1510     PL_parser->oldbufptr = buf + oldbufptr_pos;
1511     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1512     PL_parser->linestart = buf + linestart_pos;
1513     if (PL_parser->last_uni)
1514         PL_parser->last_uni = buf + last_uni_pos;
1515     if (PL_parser->last_lop)
1516         PL_parser->last_lop = buf + last_lop_pos;
1517     if (PL_parser->preambling != NOLINE) {
1518         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1519         PL_parser->preambling = NOLINE;
1520     }
1521     if (   got_some_for_debugger
1522         && PERLDB_LINE_OR_SAVESRC
1523         && PL_curstash != PL_debstash)
1524     {
1525         /* debugger active and we're not compiling the debugger code,
1526          * so store the line into the debugger's array of lines
1527          */
1528         update_debugger_info(NULL, buf+old_bufend_pos,
1529             new_bufend_pos-old_bufend_pos);
1530     }
1531     return got_some;
1532 }
1533
1534 /*
1535 =for apidoc lex_peek_unichar
1536
1537 Looks ahead one (Unicode) character in the text currently being lexed.
1538 Returns the codepoint (unsigned integer value) of the next character,
1539 or -1 if lexing has reached the end of the input text.  To consume the
1540 peeked character, use L</lex_read_unichar>.
1541
1542 If the next character is in (or extends into) the next chunk of input
1543 text, the next chunk will be read in.  Normally the current chunk will be
1544 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1545 bit set, then the current chunk will not be discarded.
1546
1547 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1548 is encountered, an exception is generated.
1549
1550 =cut
1551 */
1552
1553 I32
1554 Perl_lex_peek_unichar(pTHX_ U32 flags)
1555 {
1556     char *s, *bufend;
1557     if (flags & ~(LEX_KEEP_PREVIOUS))
1558         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1559     s = PL_parser->bufptr;
1560     bufend = PL_parser->bufend;
1561     if (UTF) {
1562         U8 head;
1563         I32 unichar;
1564         STRLEN len, retlen;
1565         if (s == bufend) {
1566             if (!lex_next_chunk(flags))
1567                 return -1;
1568             s = PL_parser->bufptr;
1569             bufend = PL_parser->bufend;
1570         }
1571         head = (U8)*s;
1572         if (UTF8_IS_INVARIANT(head))
1573             return head;
1574         if (UTF8_IS_START(head)) {
1575             len = UTF8SKIP(&head);
1576             while ((STRLEN)(bufend-s) < len) {
1577                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1578                     break;
1579                 s = PL_parser->bufptr;
1580                 bufend = PL_parser->bufend;
1581             }
1582         }
1583         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1584         if (retlen == (STRLEN)-1) {
1585             _force_out_malformed_utf8_message((U8 *) s,
1586                                               (U8 *) bufend,
1587                                               0,
1588                                               1 /* 1 means die */ );
1589             NOT_REACHED; /* NOTREACHED */
1590         }
1591         return unichar;
1592     } else {
1593         if (s == bufend) {
1594             if (!lex_next_chunk(flags))
1595                 return -1;
1596             s = PL_parser->bufptr;
1597         }
1598         return (U8)*s;
1599     }
1600 }
1601
1602 /*
1603 =for apidoc lex_read_unichar
1604
1605 Reads the next (Unicode) character in the text currently being lexed.
1606 Returns the codepoint (unsigned integer value) of the character read,
1607 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1608 if lexing has reached the end of the input text.  To non-destructively
1609 examine the next character, use L</lex_peek_unichar> instead.
1610
1611 If the next character is in (or extends into) the next chunk of input
1612 text, the next chunk will be read in.  Normally the current chunk will be
1613 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1614 bit set, then the current chunk will not be discarded.
1615
1616 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1617 is encountered, an exception is generated.
1618
1619 =cut
1620 */
1621
1622 I32
1623 Perl_lex_read_unichar(pTHX_ U32 flags)
1624 {
1625     I32 c;
1626     if (flags & ~(LEX_KEEP_PREVIOUS))
1627         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1628     c = lex_peek_unichar(flags);
1629     if (c != -1) {
1630         if (c == '\n')
1631             COPLINE_INC_WITH_HERELINES;
1632         if (UTF)
1633             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1634         else
1635             ++(PL_parser->bufptr);
1636     }
1637     return c;
1638 }
1639
1640 /*
1641 =for apidoc lex_read_space
1642
1643 Reads optional spaces, in Perl style, in the text currently being
1644 lexed.  The spaces may include ordinary whitespace characters and
1645 Perl-style comments.  C<#line> directives are processed if encountered.
1646 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1647 at a non-space character (or the end of the input text).
1648
1649 If spaces extend into the next chunk of input text, the next chunk will
1650 be read in.  Normally the current chunk will be discarded at the same
1651 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1652 chunk will not be discarded.
1653
1654 =cut
1655 */
1656
1657 #define LEX_NO_INCLINE    0x40000000
1658 #define LEX_NO_NEXT_CHUNK 0x80000000
1659
1660 void
1661 Perl_lex_read_space(pTHX_ U32 flags)
1662 {
1663     char *s, *bufend;
1664     const bool can_incline = !(flags & LEX_NO_INCLINE);
1665     bool need_incline = 0;
1666     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1667         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1668     s = PL_parser->bufptr;
1669     bufend = PL_parser->bufend;
1670     while (1) {
1671         char c = *s;
1672         if (c == '#') {
1673             do {
1674                 c = *++s;
1675             } while (!(c == '\n' || (c == 0 && s == bufend)));
1676         } else if (c == '\n') {
1677             s++;
1678             if (can_incline) {
1679                 PL_parser->linestart = s;
1680                 if (s == bufend)
1681                     need_incline = 1;
1682                 else
1683                     incline(s, bufend);
1684             }
1685         } else if (isSPACE(c)) {
1686             s++;
1687         } else if (c == 0 && s == bufend) {
1688             bool got_more;
1689             line_t l;
1690             if (flags & LEX_NO_NEXT_CHUNK)
1691                 break;
1692             PL_parser->bufptr = s;
1693             l = CopLINE(PL_curcop);
1694             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1695             got_more = lex_next_chunk(flags);
1696             CopLINE_set(PL_curcop, l);
1697             s = PL_parser->bufptr;
1698             bufend = PL_parser->bufend;
1699             if (!got_more)
1700                 break;
1701             if (can_incline && need_incline && PL_parser->rsfp) {
1702                 incline(s, bufend);
1703                 need_incline = 0;
1704             }
1705         } else if (!c) {
1706             s++;
1707         } else {
1708             break;
1709         }
1710     }
1711     PL_parser->bufptr = s;
1712 }
1713
1714 /*
1715
1716 =for apidoc validate_proto
1717
1718 This function performs syntax checking on a prototype, C<proto>.
1719 If C<warn> is true, any illegal characters or mismatched brackets
1720 will trigger illegalproto warnings, declaring that they were
1721 detected in the prototype for C<name>.
1722
1723 The return value is C<true> if this is a valid prototype, and
1724 C<false> if it is not, regardless of whether C<warn> was C<true> or
1725 C<false>.
1726
1727 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1728
1729 =cut
1730
1731  */
1732
1733 bool
1734 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1735 {
1736     STRLEN len, origlen;
1737     char *p;
1738     bool bad_proto = FALSE;
1739     bool in_brackets = FALSE;
1740     bool after_slash = FALSE;
1741     char greedy_proto = ' ';
1742     bool proto_after_greedy_proto = FALSE;
1743     bool must_be_last = FALSE;
1744     bool underscore = FALSE;
1745     bool bad_proto_after_underscore = FALSE;
1746
1747     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1748
1749     if (!proto)
1750         return TRUE;
1751
1752     p = SvPV(proto, len);
1753     origlen = len;
1754     for (; len--; p++) {
1755         if (!isSPACE(*p)) {
1756             if (must_be_last)
1757                 proto_after_greedy_proto = TRUE;
1758             if (underscore) {
1759                 if (!memCHRs(";@%", *p))
1760                     bad_proto_after_underscore = TRUE;
1761                 underscore = FALSE;
1762             }
1763             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1764                 bad_proto = TRUE;
1765             }
1766             else {
1767                 if (*p == '[')
1768                     in_brackets = TRUE;
1769                 else if (*p == ']')
1770                     in_brackets = FALSE;
1771                 else if ((*p == '@' || *p == '%')
1772                          && !after_slash
1773                          && !in_brackets )
1774                 {
1775                     must_be_last = TRUE;
1776                     greedy_proto = *p;
1777                 }
1778                 else if (*p == '_')
1779                     underscore = TRUE;
1780             }
1781             if (*p == '\\')
1782                 after_slash = TRUE;
1783             else
1784                 after_slash = FALSE;
1785         }
1786     }
1787
1788     if (warn) {
1789         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1790         p -= origlen;
1791         p = SvUTF8(proto)
1792             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1793                              origlen, UNI_DISPLAY_ISPRINT)
1794             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1795
1796         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1797             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1798             sv_catpvs(name2, "::");
1799             sv_catsv(name2, (SV *)name);
1800             name = name2;
1801         }
1802
1803         if (proto_after_greedy_proto)
1804             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1805                         "Prototype after '%c' for %" SVf " : %s",
1806                         greedy_proto, SVfARG(name), p);
1807         if (in_brackets)
1808             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1809                         "Missing ']' in prototype for %" SVf " : %s",
1810                         SVfARG(name), p);
1811         if (bad_proto)
1812             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1813                         "Illegal character in prototype for %" SVf " : %s",
1814                         SVfARG(name), p);
1815         if (bad_proto_after_underscore)
1816             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1817                         "Illegal character after '_' in prototype for %" SVf " : %s",
1818                         SVfARG(name), p);
1819     }
1820
1821     return (! (proto_after_greedy_proto || bad_proto) );
1822 }
1823
1824 /*
1825  * S_incline
1826  * This subroutine has nothing to do with tilting, whether at windmills
1827  * or pinball tables.  Its name is short for "increment line".  It
1828  * increments the current line number in CopLINE(PL_curcop) and checks
1829  * to see whether the line starts with a comment of the form
1830  *    # line 500 "foo.pm"
1831  * If so, it sets the current line number and file to the values in the comment.
1832  */
1833
1834 STATIC void
1835 S_incline(pTHX_ const char *s, const char *end)
1836 {
1837     const char *t;
1838     const char *n;
1839     const char *e;
1840     line_t line_num;
1841     UV uv;
1842
1843     PERL_ARGS_ASSERT_INCLINE;
1844
1845     assert(end >= s);
1846
1847     COPLINE_INC_WITH_HERELINES;
1848     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1849      && s+1 == PL_bufend && *s == ';') {
1850         /* fake newline in string eval */
1851         CopLINE_dec(PL_curcop);
1852         return;
1853     }
1854     if (*s++ != '#')
1855         return;
1856     while (SPACE_OR_TAB(*s))
1857         s++;
1858     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1859         s += sizeof("line") - 1;
1860     else
1861         return;
1862     if (SPACE_OR_TAB(*s))
1863         s++;
1864     else
1865         return;
1866     while (SPACE_OR_TAB(*s))
1867         s++;
1868     if (!isDIGIT(*s))
1869         return;
1870
1871     n = s;
1872     while (isDIGIT(*s))
1873         s++;
1874     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1875         return;
1876     while (SPACE_OR_TAB(*s))
1877         s++;
1878     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1879         s++;
1880         e = t + 1;
1881     }
1882     else {
1883         t = s;
1884         while (*t && !isSPACE(*t))
1885             t++;
1886         e = t;
1887     }
1888     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1889         e++;
1890     if (*e != '\n' && *e != '\0')
1891         return;         /* false alarm */
1892
1893     if (!grok_atoUV(n, &uv, &e))
1894         return;
1895     line_num = ((line_t)uv) - 1;
1896
1897     if (t - s > 0) {
1898         const STRLEN len = t - s;
1899
1900         if (!PL_rsfp && !PL_parser->filtered) {
1901             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1902              * to *{"::_<newfilename"} */
1903             /* However, the long form of evals is only turned on by the
1904                debugger - usually they're "(eval %lu)" */
1905             GV * const cfgv = CopFILEGV(PL_curcop);
1906             if (cfgv) {
1907                 char smallbuf[128];
1908                 STRLEN tmplen2 = len;
1909                 char *tmpbuf2;
1910                 GV *gv2;
1911
1912                 if (tmplen2 + 2 <= sizeof smallbuf)
1913                     tmpbuf2 = smallbuf;
1914                 else
1915                     Newx(tmpbuf2, tmplen2 + 2, char);
1916
1917                 tmpbuf2[0] = '_';
1918                 tmpbuf2[1] = '<';
1919
1920                 memcpy(tmpbuf2 + 2, s, tmplen2);
1921                 tmplen2 += 2;
1922
1923                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1924                 if (!isGV(gv2)) {
1925                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1926                     /* adjust ${"::_<newfilename"} to store the new file name */
1927                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1928                     /* The line number may differ. If that is the case,
1929                        alias the saved lines that are in the array.
1930                        Otherwise alias the whole array. */
1931                     if (CopLINE(PL_curcop) == line_num) {
1932                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1933                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1934                     }
1935                     else if (GvAV(cfgv)) {
1936                         AV * const av = GvAV(cfgv);
1937                         const line_t start = CopLINE(PL_curcop)+1;
1938                         SSize_t items = AvFILLp(av) - start;
1939                         if (items > 0) {
1940                             AV * const av2 = GvAVn(gv2);
1941                             SV **svp = AvARRAY(av) + start;
1942                             Size_t l = line_num+1;
1943                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1944                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1945                         }
1946                     }
1947                 }
1948
1949                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1950             }
1951         }
1952         CopFILE_free(PL_curcop);
1953         CopFILE_setn(PL_curcop, s, len);
1954     }
1955     CopLINE_set(PL_curcop, line_num);
1956 }
1957
1958 STATIC void
1959 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1960 {
1961     AV *av = CopFILEAVx(PL_curcop);
1962     if (av) {
1963         SV * sv;
1964         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1965         else {
1966             sv = *av_fetch(av, 0, 1);
1967             SvUPGRADE(sv, SVt_PVMG);
1968         }
1969         if (!SvPOK(sv)) SvPVCLEAR(sv);
1970         if (orig_sv)
1971             sv_catsv(sv, orig_sv);
1972         else
1973             sv_catpvn(sv, buf, len);
1974         if (!SvIOK(sv)) {
1975             (void)SvIOK_on(sv);
1976             SvIV_set(sv, 0);
1977         }
1978         if (PL_parser->preambling == NOLINE)
1979             av_store(av, CopLINE(PL_curcop), sv);
1980     }
1981 }
1982
1983 /*
1984  * skipspace
1985  * Called to gobble the appropriate amount and type of whitespace.
1986  * Skips comments as well.
1987  * Returns the next character after the whitespace that is skipped.
1988  *
1989  * peekspace
1990  * Same thing, but look ahead without incrementing line numbers or
1991  * adjusting PL_linestart.
1992  */
1993
1994 #define skipspace(s) skipspace_flags(s, 0)
1995 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1996
1997 char *
1998 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1999 {
2000     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2001     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2002         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2003             s++;
2004     } else {
2005         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2006         PL_bufptr = s;
2007         lex_read_space(flags | LEX_KEEP_PREVIOUS |
2008                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2009                     LEX_NO_NEXT_CHUNK : 0));
2010         s = PL_bufptr;
2011         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2012         if (PL_linestart > PL_bufptr)
2013             PL_bufptr = PL_linestart;
2014         return s;
2015     }
2016     return s;
2017 }
2018
2019 /*
2020  * S_check_uni
2021  * Check the unary operators to ensure there's no ambiguity in how they're
2022  * used.  An ambiguous piece of code would be:
2023  *     rand + 5
2024  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2025  * the +5 is its argument.
2026  */
2027
2028 STATIC void
2029 S_check_uni(pTHX)
2030 {
2031     const char *s;
2032
2033     if (PL_oldoldbufptr != PL_last_uni)
2034         return;
2035     while (isSPACE(*PL_last_uni))
2036         PL_last_uni++;
2037     s = PL_last_uni;
2038     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2039         s += UTF ? UTF8SKIP(s) : 1;
2040     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2041         return;
2042
2043     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2044                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2045                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2046 }
2047
2048 /*
2049  * LOP : macro to build a list operator.  Its behaviour has been replaced
2050  * with a subroutine, S_lop() for which LOP is just another name.
2051  */
2052
2053 #define LOP(f,x) return lop(f,x,s)
2054
2055 /*
2056  * S_lop
2057  * Build a list operator (or something that might be one).  The rules:
2058  *  - if we have a next token, then it's a list operator (no parens) for
2059  *    which the next token has already been parsed; e.g.,
2060  *       sort foo @args
2061  *       sort foo (@args)
2062  *  - if the next thing is an opening paren, then it's a function
2063  *  - else it's a list operator
2064  */
2065
2066 STATIC I32
2067 S_lop(pTHX_ I32 f, U8 x, char *s)
2068 {
2069     PERL_ARGS_ASSERT_LOP;
2070
2071     pl_yylval.ival = f;
2072     CLINE;
2073     PL_bufptr = s;
2074     PL_last_lop = PL_oldbufptr;
2075     PL_last_lop_op = (OPCODE)f;
2076     if (PL_nexttoke)
2077         goto lstop;
2078     PL_expect = x;
2079     if (*s == '(')
2080         return REPORT(FUNC);
2081     s = skipspace(s);
2082     if (*s == '(')
2083         return REPORT(FUNC);
2084     else {
2085         lstop:
2086         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2087             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2088         return REPORT(LSTOP);
2089     }
2090 }
2091
2092 /*
2093  * S_force_next
2094  * When the lexer realizes it knows the next token (for instance,
2095  * it is reordering tokens for the parser) then it can call S_force_next
2096  * to know what token to return the next time the lexer is called.  Caller
2097  * will need to set PL_nextval[] and possibly PL_expect to ensure
2098  * the lexer handles the token correctly.
2099  */
2100
2101 STATIC void
2102 S_force_next(pTHX_ I32 type)
2103 {
2104 #ifdef DEBUGGING
2105     if (DEBUG_T_TEST) {
2106         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2107         tokereport(type, &NEXTVAL_NEXTTOKE);
2108     }
2109 #endif
2110     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2111     PL_nexttype[PL_nexttoke] = type;
2112     PL_nexttoke++;
2113 }
2114
2115 /*
2116  * S_postderef
2117  *
2118  * This subroutine handles postfix deref syntax after the arrow has already
2119  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2120  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2121  * only the first, leaving yylex to find the next.
2122  */
2123
2124 static int
2125 S_postderef(pTHX_ int const funny, char const next)
2126 {
2127     assert(funny == DOLSHARP
2128         || funny == PERLY_DOLLAR
2129         || funny == PERLY_SNAIL
2130         || funny == PERLY_PERCENT_SIGN
2131         || funny == PERLY_AMPERSAND
2132         || funny == PERLY_STAR
2133     );
2134     if (next == '*') {
2135         PL_expect = XOPERATOR;
2136         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2137             assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2138             PL_lex_state = LEX_INTERPEND;
2139             if (PERLY_SNAIL == funny)
2140                 force_next(POSTJOIN);
2141         }
2142         force_next(PERLY_STAR);
2143         PL_bufptr+=2;
2144     }
2145     else {
2146         if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2147          && !PL_lex_brackets)
2148             PL_lex_dojoin = 2;
2149         PL_expect = XOPERATOR;
2150         PL_bufptr++;
2151     }
2152     return funny;
2153 }
2154
2155 void
2156 Perl_yyunlex(pTHX)
2157 {
2158     int yyc = PL_parser->yychar;
2159     if (yyc != YYEMPTY) {
2160         if (yyc) {
2161             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2162             if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2163                 PL_lex_allbrackets--;
2164                 PL_lex_brackets--;
2165                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2166             } else if (yyc == PERLY_PAREN_OPEN) {
2167                 PL_lex_allbrackets--;
2168                 yyc |= (2<<24);
2169             }
2170             force_next(yyc);
2171         }
2172         PL_parser->yychar = YYEMPTY;
2173     }
2174 }
2175
2176 STATIC SV *
2177 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2178 {
2179     SV * const sv = newSVpvn_utf8(start, len,
2180                     ! IN_BYTES
2181                   &&  UTF
2182                   &&  len != 0
2183                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2184     return sv;
2185 }
2186
2187 /*
2188  * S_force_word
2189  * When the lexer knows the next thing is a word (for instance, it has
2190  * just seen -> and it knows that the next char is a word char, then
2191  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2192  * lookahead.
2193  *
2194  * Arguments:
2195  *   char *start : buffer position (must be within PL_linestr)
2196  *   int token   : PL_next* will be this type of bare word
2197  *                 (e.g., METHCALL0,BAREWORD)
2198  *   int check_keyword : if true, Perl checks to make sure the word isn't
2199  *       a keyword (do this if the word is a label, e.g. goto FOO)
2200  *   int allow_pack : if true, : characters will also be allowed (require,
2201  *       use, etc. do this)
2202  */
2203
2204 STATIC char *
2205 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2206 {
2207     char *s;
2208     STRLEN len;
2209
2210     PERL_ARGS_ASSERT_FORCE_WORD;
2211
2212     start = skipspace(start);
2213     s = start;
2214     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2215         || (allow_pack && *s == ':' && s[1] == ':') )
2216     {
2217         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2218         if (check_keyword) {
2219           char *s2 = PL_tokenbuf;
2220           STRLEN len2 = len;
2221           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2222             s2 += sizeof("CORE::") - 1;
2223             len2 -= sizeof("CORE::") - 1;
2224           }
2225           if (keyword(s2, len2, 0))
2226             return start;
2227         }
2228         if (token == METHCALL0) {
2229             s = skipspace(s);
2230             if (*s == '(')
2231                 PL_expect = XTERM;
2232             else {
2233                 PL_expect = XOPERATOR;
2234             }
2235         }
2236         NEXTVAL_NEXTTOKE.opval
2237             = newSVOP(OP_CONST,0,
2238                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2239         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2240         force_next(token);
2241     }
2242     return s;
2243 }
2244
2245 /*
2246  * S_force_ident
2247  * Called when the lexer wants $foo *foo &foo etc, but the program
2248  * text only contains the "foo" portion.  The first argument is a pointer
2249  * to the "foo", and the second argument is the type symbol to prefix.
2250  * Forces the next token to be a "BAREWORD".
2251  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2252  */
2253
2254 STATIC void
2255 S_force_ident(pTHX_ const char *s, int kind)
2256 {
2257     PERL_ARGS_ASSERT_FORCE_IDENT;
2258
2259     if (s[0]) {
2260         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2261         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2262                                                                 UTF ? SVf_UTF8 : 0));
2263         NEXTVAL_NEXTTOKE.opval = o;
2264         force_next(BAREWORD);
2265         if (kind) {
2266             o->op_private = OPpCONST_ENTERED;
2267             /* XXX see note in pp_entereval() for why we forgo typo
2268                warnings if the symbol must be introduced in an eval.
2269                GSAR 96-10-12 */
2270             gv_fetchpvn_flags(s, len,
2271                               (PL_in_eval ? GV_ADDMULTI
2272                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2273                               kind == PERLY_DOLLAR ? SVt_PV :
2274                               kind == PERLY_SNAIL ? SVt_PVAV :
2275                               kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2276                               SVt_PVGV
2277                               );
2278         }
2279     }
2280 }
2281
2282 static void
2283 S_force_ident_maybe_lex(pTHX_ char pit)
2284 {
2285     NEXTVAL_NEXTTOKE.ival = pit;
2286     force_next('p');
2287 }
2288
2289 NV
2290 Perl_str_to_version(pTHX_ SV *sv)
2291 {
2292     NV retval = 0.0;
2293     NV nshift = 1.0;
2294     STRLEN len;
2295     const char *start = SvPV_const(sv,len);
2296     const char * const end = start + len;
2297     const bool utf = cBOOL(SvUTF8(sv));
2298
2299     PERL_ARGS_ASSERT_STR_TO_VERSION;
2300
2301     while (start < end) {
2302         STRLEN skip;
2303         UV n;
2304         if (utf)
2305             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2306         else {
2307             n = *(U8*)start;
2308             skip = 1;
2309         }
2310         retval += ((NV)n)/nshift;
2311         start += skip;
2312         nshift *= 1000;
2313     }
2314     return retval;
2315 }
2316
2317 /*
2318  * S_force_version
2319  * Forces the next token to be a version number.
2320  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2321  * and if "guessing" is TRUE, then no new token is created (and the caller
2322  * must use an alternative parsing method).
2323  */
2324
2325 STATIC char *
2326 S_force_version(pTHX_ char *s, int guessing)
2327 {
2328     OP *version = NULL;
2329     char *d;
2330
2331     PERL_ARGS_ASSERT_FORCE_VERSION;
2332
2333     s = skipspace(s);
2334
2335     d = s;
2336     if (*d == 'v')
2337         d++;
2338     if (isDIGIT(*d)) {
2339         while (isDIGIT(*d) || *d == '_' || *d == '.')
2340             d++;
2341         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2342             SV *ver;
2343             s = scan_num(s, &pl_yylval);
2344             version = pl_yylval.opval;
2345             ver = cSVOPx(version)->op_sv;
2346             if (SvPOK(ver) && !SvNIOK(ver)) {
2347                 SvUPGRADE(ver, SVt_PVNV);
2348                 SvNV_set(ver, str_to_version(ver));
2349                 SvNOK_on(ver);          /* hint that it is a version */
2350             }
2351         }
2352         else if (guessing) {
2353             return s;
2354         }
2355     }
2356
2357     /* NOTE: The parser sees the package name and the VERSION swapped */
2358     NEXTVAL_NEXTTOKE.opval = version;
2359     force_next(BAREWORD);
2360
2361     return s;
2362 }
2363
2364 /*
2365  * S_force_strict_version
2366  * Forces the next token to be a version number using strict syntax rules.
2367  */
2368
2369 STATIC char *
2370 S_force_strict_version(pTHX_ char *s)
2371 {
2372     OP *version = NULL;
2373     const char *errstr = NULL;
2374
2375     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2376
2377     while (isSPACE(*s)) /* leading whitespace */
2378         s++;
2379
2380     if (is_STRICT_VERSION(s,&errstr)) {
2381         SV *ver = newSV_type(SVt_NULL);
2382         s = (char *)scan_version(s, ver, 0);
2383         version = newSVOP(OP_CONST, 0, ver);
2384     }
2385     else if ((*s != ';' && *s != '{' && *s != '}' )
2386              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2387     {
2388         PL_bufptr = s;
2389         if (errstr)
2390             yyerror(errstr); /* version required */
2391         return s;
2392     }
2393
2394     /* NOTE: The parser sees the package name and the VERSION swapped */
2395     NEXTVAL_NEXTTOKE.opval = version;
2396     force_next(BAREWORD);
2397
2398     return s;
2399 }
2400
2401 /*
2402  * S_tokeq
2403  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2404  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2405  * unchanged, and a new SV containing the modified input is returned.
2406  */
2407
2408 STATIC SV *
2409 S_tokeq(pTHX_ SV *sv)
2410 {
2411     char *s;
2412     char *send;
2413     char *d;
2414     SV *pv = sv;
2415
2416     PERL_ARGS_ASSERT_TOKEQ;
2417
2418     assert (SvPOK(sv));
2419     assert (SvLEN(sv));
2420     assert (!SvIsCOW(sv));
2421     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2422         goto finish;
2423     s = SvPVX(sv);
2424     send = SvEND(sv);
2425     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2426     while (s < send && !(*s == '\\' && s[1] == '\\'))
2427         s++;
2428     if (s == send)
2429         goto finish;
2430     d = s;
2431     if ( PL_hints & HINT_NEW_STRING ) {
2432         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2433                             SVs_TEMP | SvUTF8(sv));
2434     }
2435     while (s < send) {
2436         if (*s == '\\') {
2437             if (s + 1 < send && (s[1] == '\\'))
2438                 s++;            /* all that, just for this */
2439         }
2440         *d++ = *s++;
2441     }
2442     *d = '\0';
2443     SvCUR_set(sv, d - SvPVX_const(sv));
2444   finish:
2445     if ( PL_hints & HINT_NEW_STRING )
2446        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2447     return sv;
2448 }
2449
2450 /*
2451  * Now come three functions related to double-quote context,
2452  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2453  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2454  * interact with PL_lex_state, and create fake ( ... ) argument lists
2455  * to handle functions and concatenation.
2456  * For example,
2457  *   "foo\lbar"
2458  * is tokenised as
2459  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2460  */
2461
2462 /*
2463  * S_sublex_start
2464  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2465  *
2466  * Pattern matching will set PL_lex_op to the pattern-matching op to
2467  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2468  *
2469  * OP_CONST is easy--just make the new op and return.
2470  *
2471  * Everything else becomes a FUNC.
2472  *
2473  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2474  * had an OP_CONST.  This just sets us up for a
2475  * call to S_sublex_push().
2476  */
2477
2478 STATIC I32
2479 S_sublex_start(pTHX)
2480 {
2481     const I32 op_type = pl_yylval.ival;
2482
2483     if (op_type == OP_NULL) {
2484         pl_yylval.opval = PL_lex_op;
2485         PL_lex_op = NULL;
2486         return THING;
2487     }
2488     if (op_type == OP_CONST) {
2489         SV *sv = PL_lex_stuff;
2490         PL_lex_stuff = NULL;
2491         sv = tokeq(sv);
2492
2493         if (SvTYPE(sv) == SVt_PVIV) {
2494             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2495             STRLEN len;
2496             const char * const p = SvPV_const(sv, len);
2497             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2498             SvREFCNT_dec(sv);
2499             sv = nsv;
2500         }
2501         pl_yylval.opval = newSVOP(op_type, 0, sv);
2502         return THING;
2503     }
2504
2505     PL_parser->lex_super_state = PL_lex_state;
2506     PL_parser->lex_sub_inwhat = (U16)op_type;
2507     PL_parser->lex_sub_op = PL_lex_op;
2508     PL_parser->sub_no_recover = FALSE;
2509     PL_parser->sub_error_count = PL_error_count;
2510     PL_lex_state = LEX_INTERPPUSH;
2511
2512     PL_expect = XTERM;
2513     if (PL_lex_op) {
2514         pl_yylval.opval = PL_lex_op;
2515         PL_lex_op = NULL;
2516         return PMFUNC;
2517     }
2518     else
2519         return FUNC;
2520 }
2521
2522 /*
2523  * S_sublex_push
2524  * Create a new scope to save the lexing state.  The scope will be
2525  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2526  * to the uc, lc, etc. found before.
2527  * Sets PL_lex_state to LEX_INTERPCONCAT.
2528  */
2529
2530 STATIC I32
2531 S_sublex_push(pTHX)
2532 {
2533     LEXSHARED *shared;
2534     const bool is_heredoc = PL_multi_close == '<';
2535     ENTER;
2536
2537     PL_lex_state = PL_parser->lex_super_state;
2538     SAVEI8(PL_lex_dojoin);
2539     SAVEI32(PL_lex_brackets);
2540     SAVEI32(PL_lex_allbrackets);
2541     SAVEI32(PL_lex_formbrack);
2542     SAVEI8(PL_lex_fakeeof);
2543     SAVEI32(PL_lex_casemods);
2544     SAVEI32(PL_lex_starts);
2545     SAVEI8(PL_lex_state);
2546     SAVESPTR(PL_lex_repl);
2547     SAVEVPTR(PL_lex_inpat);
2548     SAVEI16(PL_lex_inwhat);
2549     if (is_heredoc)
2550     {
2551         SAVECOPLINE(PL_curcop);
2552         SAVEI32(PL_multi_end);
2553         SAVEI32(PL_parser->herelines);
2554         PL_parser->herelines = 0;
2555     }
2556     SAVEIV(PL_multi_close);
2557     SAVEPPTR(PL_bufptr);
2558     SAVEPPTR(PL_bufend);
2559     SAVEPPTR(PL_oldbufptr);
2560     SAVEPPTR(PL_oldoldbufptr);
2561     SAVEPPTR(PL_last_lop);
2562     SAVEPPTR(PL_last_uni);
2563     SAVEPPTR(PL_linestart);
2564     SAVESPTR(PL_linestr);
2565     SAVEGENERICPV(PL_lex_brackstack);
2566     SAVEGENERICPV(PL_lex_casestack);
2567     SAVEGENERICPV(PL_parser->lex_shared);
2568     SAVEBOOL(PL_parser->lex_re_reparsing);
2569     SAVEI32(PL_copline);
2570
2571     /* The here-doc parser needs to be able to peek into outer lexing
2572        scopes to find the body of the here-doc.  So we put PL_linestr and
2573        PL_bufptr into lex_shared, to 'share' those values.
2574      */
2575     PL_parser->lex_shared->ls_linestr = PL_linestr;
2576     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2577
2578     PL_linestr = PL_lex_stuff;
2579     PL_lex_repl = PL_parser->lex_sub_repl;
2580     PL_lex_stuff = NULL;
2581     PL_parser->lex_sub_repl = NULL;
2582
2583     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2584        set for an inner quote-like operator and then an error causes scope-
2585        popping.  We must not have a PL_lex_stuff value left dangling, as
2586        that breaks assumptions elsewhere.  See bug #123617.  */
2587     SAVEGENERICSV(PL_lex_stuff);
2588     SAVEGENERICSV(PL_parser->lex_sub_repl);
2589
2590     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2591         = SvPVX(PL_linestr);
2592     PL_bufend += SvCUR(PL_linestr);
2593     PL_last_lop = PL_last_uni = NULL;
2594     SAVEFREESV(PL_linestr);
2595     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2596
2597     PL_lex_dojoin = FALSE;
2598     PL_lex_brackets = PL_lex_formbrack = 0;
2599     PL_lex_allbrackets = 0;
2600     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2601     Newx(PL_lex_brackstack, 120, char);
2602     Newx(PL_lex_casestack, 12, char);
2603     PL_lex_casemods = 0;
2604     *PL_lex_casestack = '\0';
2605     PL_lex_starts = 0;
2606     PL_lex_state = LEX_INTERPCONCAT;
2607     if (is_heredoc)
2608         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2609     PL_copline = NOLINE;
2610
2611     Newxz(shared, 1, LEXSHARED);
2612     shared->ls_prev = PL_parser->lex_shared;
2613     PL_parser->lex_shared = shared;
2614
2615     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2616     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2617     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2618         PL_lex_inpat = PL_parser->lex_sub_op;
2619     else
2620         PL_lex_inpat = NULL;
2621
2622     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2623     PL_in_eval &= ~EVAL_RE_REPARSING;
2624
2625     return SUBLEXSTART;
2626 }
2627
2628 /*
2629  * S_sublex_done
2630  * Restores lexer state after a S_sublex_push.
2631  */
2632
2633 STATIC I32
2634 S_sublex_done(pTHX)
2635 {
2636     if (!PL_lex_starts++) {
2637         SV * const sv = newSVpvs("");
2638         if (SvUTF8(PL_linestr))
2639             SvUTF8_on(sv);
2640         PL_expect = XOPERATOR;
2641         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2642         return THING;
2643     }
2644
2645     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2646         PL_lex_state = LEX_INTERPCASEMOD;
2647         return yylex();
2648     }
2649
2650     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2651     assert(PL_lex_inwhat != OP_TRANSR);
2652     if (PL_lex_repl) {
2653         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2654         PL_linestr = PL_lex_repl;
2655         PL_lex_inpat = 0;
2656         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2657         PL_bufend += SvCUR(PL_linestr);
2658         PL_last_lop = PL_last_uni = NULL;
2659         PL_lex_dojoin = FALSE;
2660         PL_lex_brackets = 0;
2661         PL_lex_allbrackets = 0;
2662         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2663         PL_lex_casemods = 0;
2664         *PL_lex_casestack = '\0';
2665         PL_lex_starts = 0;
2666         if (SvEVALED(PL_lex_repl)) {
2667             PL_lex_state = LEX_INTERPNORMAL;
2668             PL_lex_starts++;
2669             /*  we don't clear PL_lex_repl here, so that we can check later
2670                 whether this is an evalled subst; that means we rely on the
2671                 logic to ensure sublex_done() is called again only via the
2672                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2673         }
2674         else {
2675             PL_lex_state = LEX_INTERPCONCAT;
2676             PL_lex_repl = NULL;
2677         }
2678         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2679             CopLINE(PL_curcop) +=
2680                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2681                  + PL_parser->herelines;
2682             PL_parser->herelines = 0;
2683         }
2684         return PERLY_SLASH;
2685     }
2686     else {
2687         const line_t l = CopLINE(PL_curcop);
2688         LEAVE;
2689         if (PL_parser->sub_error_count != PL_error_count) {
2690             if (PL_parser->sub_no_recover) {
2691                 yyquit();
2692                 NOT_REACHED;
2693             }
2694         }
2695         if (PL_multi_close == '<')
2696             PL_parser->herelines += l - PL_multi_end;
2697         PL_bufend = SvPVX(PL_linestr);
2698         PL_bufend += SvCUR(PL_linestr);
2699         PL_expect = XOPERATOR;
2700         return SUBLEXEND;
2701     }
2702 }
2703
2704 HV *
2705 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2706                           const STRLEN context_len, const char ** error_msg)
2707 {
2708     /* Load the official _charnames module if not already there.  The
2709      * parameters are just to give info for any error messages generated:
2710      *  char_name   a name to look up which is the reason for loading this
2711      *  context     'char_name' in the context in the input in which it appears
2712      *  context_len how many bytes 'context' occupies
2713      *  error_msg   *error_msg will be set to any error
2714      *
2715      *  Returns the ^H table if success; otherwise NULL */
2716
2717     unsigned int i;
2718     HV * table;
2719     SV **cvp;
2720     SV * res;
2721
2722     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2723
2724     /* This loop is executed 1 1/2 times.  On the first time through, if it
2725      * isn't already loaded, try loading it, and iterate just once to see if it
2726      * worked.  */
2727     for (i = 0; i < 2; i++) {
2728         table = GvHV(PL_hintgv);                 /* ^H */
2729
2730         if (    table
2731             && (PL_hints & HINT_LOCALIZE_HH)
2732             && (cvp = hv_fetchs(table, "charnames", FALSE))
2733             &&  SvOK(*cvp))
2734         {
2735             return table;   /* Quit if already loaded */
2736         }
2737
2738         if (i == 0) {
2739             Perl_load_module(aTHX_
2740                 0,
2741                 newSVpvs("_charnames"),
2742
2743                 /* version parameter; no need to specify it, as if we get too early
2744                 * a version, will fail anyway, not being able to find 'charnames'
2745                 * */
2746                 NULL,
2747                 newSVpvs(":full"),
2748                 newSVpvs(":short"),
2749                 NULL);
2750         }
2751     }
2752
2753     /* Here, it failed; new_constant will give appropriate error messages */
2754     *error_msg = NULL;
2755     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2756                         context, context_len, error_msg);
2757     SvREFCNT_dec(res);
2758
2759     return NULL;
2760 }
2761
2762 STATIC SV*
2763 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2764 {
2765     /* This justs wraps get_and_check_backslash_N_name() to output any error
2766      * message it returns. */
2767
2768     const char * error_msg = NULL;
2769     SV * result;
2770
2771     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2772
2773     /* charnames doesn't work well if there have been errors found */
2774     if (PL_error_count > 0) {
2775         return NULL;
2776     }
2777
2778     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2779
2780     if (error_msg) {
2781         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2782     }
2783
2784     return result;
2785 }
2786
2787 SV*
2788 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2789                                           const char* e,
2790                                           const bool is_utf8,
2791                                           const char ** error_msg)
2792 {
2793     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2794      * interior, hence to the "}".  Finds what the name resolves to, returning
2795      * an SV* containing it; NULL if no valid one found.
2796      *
2797      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2798      * doesn't have to be. */
2799
2800     SV* char_name;
2801     SV* res;
2802     HV * table;
2803     SV **cvp;
2804     SV *cv;
2805     SV *rv;
2806     HV *stash;
2807
2808     /* Points to the beginning of the \N{... so that any messages include the
2809      * context of what's failing*/
2810     const char* context = s - 3;
2811     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2812
2813
2814     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2815
2816     assert(e >= s);
2817     assert(s > (char *) 3);
2818
2819     while (s < e && isBLANK(*s)) {
2820         s++;
2821     }
2822
2823     while (s < e && isBLANK(*(e - 1))) {
2824         e--;
2825     }
2826
2827     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2828
2829     if (!SvCUR(char_name)) {
2830         SvREFCNT_dec_NN(char_name);
2831         /* diag_listed_as: Unknown charname '%s' */
2832         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2833         return NULL;
2834     }
2835
2836     /* Autoload the charnames module */
2837
2838     table = load_charnames(char_name, context, context_len, error_msg);
2839     if (table == NULL) {
2840         return NULL;
2841     }
2842
2843     *error_msg = NULL;
2844     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2845                         context, context_len, error_msg);
2846     if (*error_msg) {
2847         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2848
2849         SvREFCNT_dec(res);
2850         return NULL;
2851     }
2852
2853     /* See if the charnames handler is the Perl core's, and if so, we can skip
2854      * the validation needed for a user-supplied one, as Perl's does its own
2855      * validation. */
2856     cvp = hv_fetchs(table, "charnames", FALSE);
2857     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2858         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2859     {
2860         const char * const name = HvNAME(stash);
2861          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2862            return res;
2863        }
2864     }
2865
2866     /* Here, it isn't Perl's charname handler.  We can't rely on a
2867      * user-supplied handler to validate the input name.  For non-ut8 input,
2868      * look to see that the first character is legal.  Then loop through the
2869      * rest checking that each is a continuation */
2870
2871     /* This code makes the reasonable assumption that the only Latin1-range
2872      * characters that begin a character name alias are alphabetic, otherwise
2873      * would have to create a isCHARNAME_BEGIN macro */
2874
2875     if (! is_utf8) {
2876         if (! isALPHAU(*s)) {
2877             goto bad_charname;
2878         }
2879         s++;
2880         while (s < e) {
2881             if (! isCHARNAME_CONT(*s)) {
2882                 goto bad_charname;
2883             }
2884             if (*s == ' ' && *(s-1) == ' ') {
2885                 goto multi_spaces;
2886             }
2887             s++;
2888         }
2889     }
2890     else {
2891         /* Similarly for utf8.  For invariants can check directly; for other
2892          * Latin1, can calculate their code point and check; otherwise  use an
2893          * inversion list */
2894         if (UTF8_IS_INVARIANT(*s)) {
2895             if (! isALPHAU(*s)) {
2896                 goto bad_charname;
2897             }
2898             s++;
2899         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2900             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2901                 goto bad_charname;
2902             }
2903             s += 2;
2904         }
2905         else {
2906             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2907                                        utf8_to_uvchr_buf((U8 *) s,
2908                                                          (U8 *) e,
2909                                                          NULL)))
2910             {
2911                 goto bad_charname;
2912             }
2913             s += UTF8SKIP(s);
2914         }
2915
2916         while (s < e) {
2917             if (UTF8_IS_INVARIANT(*s)) {
2918                 if (! isCHARNAME_CONT(*s)) {
2919                     goto bad_charname;
2920                 }
2921                 if (*s == ' ' && *(s-1) == ' ') {
2922                     goto multi_spaces;
2923                 }
2924                 s++;
2925             }
2926             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2927                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2928                 {
2929                     goto bad_charname;
2930                 }
2931                 s += 2;
2932             }
2933             else {
2934                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2935                                            utf8_to_uvchr_buf((U8 *) s,
2936                                                              (U8 *) e,
2937                                                              NULL)))
2938                 {
2939                     goto bad_charname;
2940                 }
2941                 s += UTF8SKIP(s);
2942             }
2943         }
2944     }
2945     if (*(s-1) == ' ') {
2946         /* diag_listed_as: charnames alias definitions may not contain
2947                            trailing white-space; marked by <-- HERE in %s
2948          */
2949         *error_msg = Perl_form(aTHX_
2950             "charnames alias definitions may not contain trailing "
2951             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2952             (int)(s - context + 1), context,
2953             (int)(e - s + 1), s + 1);
2954         return NULL;
2955     }
2956
2957     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2958         const U8* first_bad_char_loc;
2959         STRLEN len;
2960         const char* const str = SvPV_const(res, len);
2961         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2962                                           &first_bad_char_loc)))
2963         {
2964             _force_out_malformed_utf8_message(first_bad_char_loc,
2965                                               (U8 *) PL_parser->bufend,
2966                                               0,
2967                                               0 /* 0 means don't die */ );
2968             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2969                                immediately after '%s' */
2970             *error_msg = Perl_form(aTHX_
2971                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2972                  (int) context_len, context,
2973                  (int) ((char *) first_bad_char_loc - str), str);
2974             return NULL;
2975         }
2976     }
2977
2978     return res;
2979
2980   bad_charname: {
2981
2982         /* The final %.*s makes sure that should the trailing NUL be missing
2983          * that this print won't run off the end of the string */
2984         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2985                            in \N{%s} */
2986         *error_msg = Perl_form(aTHX_
2987             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2988             (int)(s - context + 1), context,
2989             (int)(e - s + 1), s + 1);
2990         return NULL;
2991     }
2992
2993   multi_spaces:
2994         /* diag_listed_as: charnames alias definitions may not contain a
2995                            sequence of multiple spaces; marked by <-- HERE
2996                            in %s */
2997         *error_msg = Perl_form(aTHX_
2998             "charnames alias definitions may not contain a sequence of "
2999             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3000             (int)(s - context + 1), context,
3001             (int)(e - s + 1), s + 1);
3002         return NULL;
3003 }
3004
3005 /*
3006   scan_const
3007
3008   Extracts the next constant part of a pattern, double-quoted string,
3009   or transliteration.  This is terrifying code.
3010
3011   For example, in parsing the double-quoted string "ab\x63$d", it would
3012   stop at the '$' and return an OP_CONST containing 'abc'.
3013
3014   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3015   processing a pattern (PL_lex_inpat is true), a transliteration
3016   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3017
3018   Returns a pointer to the character scanned up to. If this is
3019   advanced from the start pointer supplied (i.e. if anything was
3020   successfully parsed), will leave an OP_CONST for the substring scanned
3021   in pl_yylval. Caller must intuit reason for not parsing further
3022   by looking at the next characters herself.
3023
3024   In patterns:
3025     expand:
3026       \N{FOO}  => \N{U+hex_for_character_FOO}
3027       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3028
3029     pass through:
3030         all other \-char, including \N and \N{ apart from \N{ABC}
3031
3032     stops on:
3033         @ and $ where it appears to be a var, but not for $ as tail anchor
3034         \l \L \u \U \Q \E
3035         (?{  or  (??{
3036
3037   In transliterations:
3038     characters are VERY literal, except for - not at the start or end
3039     of the string, which indicates a range.  However some backslash sequences
3040     are recognized: \r, \n, and the like
3041                     \007 \o{}, \x{}, \N{}
3042     If all elements in the transliteration are below 256,
3043     scan_const expands the range to the full set of intermediate
3044     characters. If the range is in utf8, the hyphen is replaced with
3045     a certain range mark which will be handled by pmtrans() in op.c.
3046
3047   In double-quoted strings:
3048     backslashes:
3049       all those recognized in transliterations
3050       deprecated backrefs: \1 (in substitution replacements)
3051       case and quoting: \U \Q \E
3052     stops on @ and $
3053
3054   scan_const does *not* construct ops to handle interpolated strings.
3055   It stops processing as soon as it finds an embedded $ or @ variable
3056   and leaves it to the caller to work out what's going on.
3057
3058   embedded arrays (whether in pattern or not) could be:
3059       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3060
3061   $ in double-quoted strings must be the symbol of an embedded scalar.
3062
3063   $ in pattern could be $foo or could be tail anchor.  Assumption:
3064   it's a tail anchor if $ is the last thing in the string, or if it's
3065   followed by one of "()| \r\n\t"
3066
3067   \1 (backreferences) are turned into $1 in substitutions
3068
3069   The structure of the code is
3070       while (there's a character to process) {
3071           handle transliteration ranges
3072           skip regexp comments /(?#comment)/ and codes /(?{code})/
3073           skip #-initiated comments in //x patterns
3074           check for embedded arrays
3075           check for embedded scalars
3076           if (backslash) {
3077               deprecate \1 in substitution replacements
3078               handle string-changing backslashes \l \U \Q \E, etc.
3079               switch (what was escaped) {
3080                   handle \- in a transliteration (becomes a literal -)
3081                   if a pattern and not \N{, go treat as regular character
3082                   handle \132 (octal characters)
3083                   handle \x15 and \x{1234} (hex characters)
3084                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3085                   handle \cV (control characters)
3086                   handle printf-style backslashes (\f, \r, \n, etc)
3087               } (end switch)
3088               continue
3089           } (end if backslash)
3090           handle regular character
3091     } (end while character to read)
3092
3093 */
3094
3095 STATIC char *
3096 S_scan_const(pTHX_ char *start)
3097 {
3098     const char * const send = PL_bufend;/* end of the constant */
3099     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3100                                            on sizing. */
3101     char *s = start;                    /* start of the constant */
3102     char *d = SvPVX(sv);                /* destination for copies */
3103     bool dorange = FALSE;               /* are we in a translit range? */
3104     bool didrange = FALSE;              /* did we just finish a range? */
3105     bool in_charclass = FALSE;          /* within /[...]/ */
3106     const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3107                                            UTF8?  But, this can show as true
3108                                            when the source isn't utf8, as for
3109                                            example when it is entirely composed
3110                                            of hex constants */
3111     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3112     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3113                                            number of characters found so far
3114                                            that will expand (into 2 bytes)
3115                                            should we have to convert to
3116                                            UTF-8) */
3117     SV *res;                            /* result from charnames */
3118     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3119                                    high-end character is temporarily placed */
3120
3121     /* Does something require special handling in tr/// ?  This avoids extra
3122      * work in a less likely case.  As such, khw didn't feel it was worth
3123      * adding any branches to the more mainline code to handle this, which
3124      * means that this doesn't get set in some circumstances when things like
3125      * \x{100} get expanded out.  As a result there needs to be extra testing
3126      * done in the tr code */
3127     bool has_above_latin1 = FALSE;
3128
3129     /* Note on sizing:  The scanned constant is placed into sv, which is
3130      * initialized by newSV() assuming one byte of output for every byte of
3131      * input.  This routine expects newSV() to allocate an extra byte for a
3132      * trailing NUL, which this routine will append if it gets to the end of
3133      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3134      * CAPITAL LETTER A}), or more output than input if the constant ends up
3135      * recoded to utf8, but each time a construct is found that might increase
3136      * the needed size, SvGROW() is called.  Its size parameter each time is
3137      * based on the best guess estimate at the time, namely the length used so
3138      * far, plus the length the current construct will occupy, plus room for
3139      * the trailing NUL, plus one byte for every input byte still unscanned */
3140
3141     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3142                        before set */
3143 #ifdef EBCDIC
3144     int backslash_N = 0;            /* ? was the character from \N{} */
3145     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3146                                        platform-specific like \x65 */
3147 #endif
3148
3149     PERL_ARGS_ASSERT_SCAN_CONST;
3150
3151     assert(PL_lex_inwhat != OP_TRANSR);
3152
3153     /* Protect sv from errors and fatal warnings. */
3154     ENTER_with_name("scan_const");
3155     SAVEFREESV(sv);
3156
3157     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3158      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3159      * valid */
3160     assert(*send == '\0');
3161
3162     while (s < send
3163            || dorange   /* Handle tr/// range at right edge of input */
3164     ) {
3165
3166         /* get transliterations out of the way (they're most literal) */
3167         if (PL_lex_inwhat == OP_TRANS) {
3168
3169             /* But there isn't any special handling necessary unless there is a
3170              * range, so for most cases we just drop down and handle the value
3171              * as any other.  There are two exceptions.
3172              *
3173              * 1.  A hyphen indicates that we are actually going to have a
3174              *     range.  In this case, skip the '-', set a flag, then drop
3175              *     down to handle what should be the end range value.
3176              * 2.  After we've handled that value, the next time through, that
3177              *     flag is set and we fix up the range.
3178              *
3179              * Ranges entirely within Latin1 are expanded out entirely, in
3180              * order to make the transliteration a simple table look-up.
3181              * Ranges that extend above Latin1 have to be done differently, so
3182              * there is no advantage to expanding them here, so they are
3183              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3184              * a byte that can't occur in legal UTF-8, and hence can signify a
3185              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3186              * the range is expressed as Unicode, the Latin1 portion is
3187              * expanded out even if the range extends above Latin1.  This is
3188              * because each code point in it has to be processed here
3189              * individually to get its native translation */
3190
3191             if (! dorange) {
3192
3193                 /* Here, we don't think we're in a range.  If the new character
3194                  * is not a hyphen; or if it is a hyphen, but it's too close to
3195                  * either edge to indicate a range, or if we haven't output any
3196                  * characters yet then it's a regular character. */
3197                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3198                 {
3199
3200                     /* A regular character.  Process like any other, but first
3201                      * clear any flags */
3202                     didrange = FALSE;
3203                     dorange = FALSE;
3204 #ifdef EBCDIC
3205                     non_portable_endpoint = 0;
3206                     backslash_N = 0;
3207 #endif
3208                     /* The tests here for being above Latin1 and similar ones
3209                      * in the following 'else' suffice to find all such
3210                      * occurences in the constant, except those added by a
3211                      * backslash escape sequence, like \x{100}.  Mostly, those
3212                      * set 'has_above_latin1' as appropriate */
3213                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3214                         has_above_latin1 = TRUE;
3215                     }
3216
3217                     /* Drops down to generic code to process current byte */
3218                 }
3219                 else {  /* Is a '-' in the context where it means a range */
3220                     if (didrange) { /* Something like y/A-C-Z// */
3221                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3222                                          " operator");
3223                     }
3224
3225                     dorange = TRUE;
3226
3227                     s++;    /* Skip past the hyphen */
3228
3229                     /* d now points to where the end-range character will be
3230                      * placed.  Drop down to get that character.  We'll finish
3231                      * processing the range the next time through the loop */
3232
3233                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3234                         has_above_latin1 = TRUE;
3235                     }
3236
3237                     /* Drops down to generic code to process current byte */
3238                 }
3239             }  /* End of not a range */
3240             else {
3241                 /* Here we have parsed a range.  Now must handle it.  At this
3242                  * point:
3243                  * 'sv' is a SV* that contains the output string we are
3244                  *      constructing.  The final two characters in that string
3245                  *      are the range start and range end, in order.
3246                  * 'd'  points to just beyond the range end in the 'sv' string,
3247                  *      where we would next place something
3248                  */
3249                 char * max_ptr;
3250                 char * min_ptr;
3251                 IV range_min;
3252                 IV range_max;   /* last character in range */
3253                 STRLEN grow;
3254                 Size_t offset_to_min = 0;
3255                 Size_t extras = 0;
3256 #ifdef EBCDIC
3257                 bool convert_unicode;
3258                 IV real_range_max = 0;
3259 #endif
3260                 /* Get the code point values of the range ends. */
3261                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3262                 offset_to_max = max_ptr - SvPVX_const(sv);
3263                 if (d_is_utf8) {
3264                     /* We know the utf8 is valid, because we just constructed
3265                      * it ourselves in previous loop iterations */
3266                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3267                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3268                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3269
3270                     /* This compensates for not all code setting
3271                      * 'has_above_latin1', so that we don't skip stuff that
3272                      * should be executed */
3273                     if (range_max > 255) {
3274                         has_above_latin1 = TRUE;
3275                     }
3276                 }
3277                 else {
3278                     min_ptr = max_ptr - 1;
3279                     range_min = * (U8*) min_ptr;
3280                     range_max = * (U8*) max_ptr;
3281                 }
3282
3283                 /* If the range is just a single code point, like tr/a-a/.../,
3284                  * that code point is already in the output, twice.  We can
3285                  * just back up over the second instance and avoid all the rest
3286                  * of the work.  But if it is a variant character, it's been
3287                  * counted twice, so decrement.  (This unlikely scenario is
3288                  * special cased, like the one for a range of 2 code points
3289                  * below, only because the main-line code below needs a range
3290                  * of 3 or more to work without special casing.  Might as well
3291                  * get it out of the way now.) */
3292                 if (UNLIKELY(range_max == range_min)) {
3293                     d = max_ptr;
3294                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3295                         utf8_variant_count--;
3296                     }
3297                     goto range_done;
3298                 }
3299
3300 #ifdef EBCDIC
3301                 /* On EBCDIC platforms, we may have to deal with portable
3302                  * ranges.  These happen if at least one range endpoint is a
3303                  * Unicode value (\N{...}), or if the range is a subset of
3304                  * [A-Z] or [a-z], and both ends are literal characters,
3305                  * like 'A', and not like \x{C1} */
3306                 convert_unicode =
3307                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3308                                                        hence portable range */
3309                     || (     ! non_portable_endpoint
3310                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3311                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3312                 if (convert_unicode) {
3313
3314                     /* Special handling is needed for these portable ranges.
3315                      * They are defined to be in Unicode terms, which includes
3316                      * all the Unicode code points between the end points.
3317                      * Convert to Unicode to get the Unicode range.  Later we
3318                      * will convert each code point in the range back to
3319                      * native.  */
3320                     range_min = NATIVE_TO_UNI(range_min);
3321                     range_max = NATIVE_TO_UNI(range_max);
3322                 }
3323 #endif
3324
3325                 if (range_min > range_max) {
3326 #ifdef EBCDIC
3327                     if (convert_unicode) {
3328                         /* Need to convert back to native for meaningful
3329                          * messages for this platform */
3330                         range_min = UNI_TO_NATIVE(range_min);
3331                         range_max = UNI_TO_NATIVE(range_max);
3332                     }
3333 #endif
3334                     /* Use the characters themselves for the error message if
3335                      * ASCII printables; otherwise some visible representation
3336                      * of them */
3337                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3338                         Perl_croak(aTHX_
3339                          "Invalid range \"%c-%c\" in transliteration operator",
3340                          (char)range_min, (char)range_max);
3341                     }
3342 #ifdef EBCDIC
3343                     else if (convert_unicode) {
3344         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3345                         Perl_croak(aTHX_
3346                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3347                            UVXf "}\" in transliteration operator",
3348                            range_min, range_max);
3349                     }
3350 #endif
3351                     else {
3352         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3353                         Perl_croak(aTHX_
3354                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3355                            " in transliteration operator",
3356                            range_min, range_max);
3357                     }
3358                 }
3359
3360                 /* If the range is exactly two code points long, they are
3361                  * already both in the output */
3362                 if (UNLIKELY(range_min + 1 == range_max)) {
3363                     goto range_done;
3364                 }
3365
3366                 /* Here the range contains at least 3 code points */
3367
3368                 if (d_is_utf8) {
3369
3370                     /* If everything in the transliteration is below 256, we
3371                      * can avoid special handling later.  A translation table
3372                      * for each of those bytes is created by op.c.  So we
3373                      * expand out all ranges to their constituent code points.
3374                      * But if we've encountered something above 255, the
3375                      * expanding won't help, so skip doing that.  But if it's
3376                      * EBCDIC, we may have to look at each character below 256
3377                      * if we have to convert to/from Unicode values */
3378                     if (   has_above_latin1
3379 #ifdef EBCDIC
3380                         && (range_min > 255 || ! convert_unicode)
3381 #endif
3382                     ) {
3383                         const STRLEN off = d - SvPVX(sv);
3384                         const STRLEN extra = 1 + (send - s) + 1;
3385                         char *e;
3386
3387                         /* Move the high character one byte to the right; then
3388                          * insert between it and the range begin, an illegal
3389                          * byte which serves to indicate this is a range (using
3390                          * a '-' would be ambiguous). */
3391
3392                         if (off + extra > SvLEN(sv)) {
3393                             d = off + SvGROW(sv, off + extra);
3394                             max_ptr = d - off + offset_to_max;
3395                         }
3396
3397                         e = d++;
3398                         while (e-- > max_ptr) {
3399                             *(e + 1) = *e;
3400                         }
3401                         *(e + 1) = (char) RANGE_INDICATOR;
3402                         goto range_done;
3403                     }
3404
3405                     /* Here, we're going to expand out the range.  For EBCDIC
3406                      * the range can extend above 255 (not so in ASCII), so
3407                      * for EBCDIC, split it into the parts above and below
3408                      * 255/256 */
3409 #ifdef EBCDIC
3410                     if (range_max > 255) {
3411                         real_range_max = range_max;
3412                         range_max = 255;
3413                     }
3414 #endif
3415                 }
3416
3417                 /* Here we need to expand out the string to contain each
3418                  * character in the range.  Grow the output to handle this.
3419                  * For non-UTF8, we need a byte for each code point in the
3420                  * range, minus the three that we've already allocated for: the
3421                  * hyphen, the min, and the max.  For UTF-8, we need this
3422                  * plus an extra byte for each code point that occupies two
3423                  * bytes (is variant) when in UTF-8 (except we've already
3424                  * allocated for the end points, including if they are
3425                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3426                  * platforms, it's easy to calculate a precise number.  To
3427                  * start, we count the variants in the range, which we need
3428                  * elsewhere in this function anyway.  (For the case where it
3429                  * isn't easy to calculate, 'extras' has been initialized to 0,
3430                  * and the calculation is done in a loop further down.) */
3431 #ifdef EBCDIC
3432                 if (convert_unicode)
3433 #endif
3434                 {
3435                     /* This is executed unconditionally on ASCII, and for
3436                      * Unicode ranges on EBCDIC.  Under these conditions, all
3437                      * code points above a certain value are variant; and none
3438                      * under that value are.  We just need to find out how much
3439                      * of the range is above that value.  We don't count the
3440                      * end points here, as they will already have been counted
3441                      * as they were parsed. */
3442                     if (range_min >= UTF_CONTINUATION_MARK) {
3443
3444                         /* The whole range is made up of variants */
3445                         extras = (range_max - 1) - (range_min + 1) + 1;
3446                     }
3447                     else if (range_max >= UTF_CONTINUATION_MARK) {
3448
3449                         /* Only the higher portion of the range is variants */
3450                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3451                     }
3452
3453                     utf8_variant_count += extras;
3454                 }
3455
3456                 /* The base growth is the number of code points in the range,
3457                  * not including the endpoints, which have already been sized
3458                  * for (and output).  We don't subtract for the hyphen, as it
3459                  * has been parsed but not output, and the SvGROW below is
3460                  * based only on what's been output plus what's left to parse.
3461                  * */
3462                 grow = (range_max - 1) - (range_min + 1) + 1;
3463
3464                 if (d_is_utf8) {
3465 #ifdef EBCDIC
3466                     /* In some cases in EBCDIC, we haven't yet calculated a
3467                      * precise amount needed for the UTF-8 variants.  Just
3468                      * assume the worst case, that everything will expand by a
3469                      * byte */
3470                     if (! convert_unicode) {
3471                         grow *= 2;
3472                     }
3473                     else
3474 #endif
3475                     {
3476                         /* Otherwise we know exactly how many variants there
3477                          * are in the range. */
3478                         grow += extras;
3479                     }
3480                 }
3481
3482                 /* Grow, but position the output to overwrite the range min end
3483                  * point, because in some cases we overwrite that */
3484                 SvCUR_set(sv, d - SvPVX_const(sv));
3485                 offset_to_min = min_ptr - SvPVX_const(sv);
3486
3487                 /* See Note on sizing above. */
3488                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3489                                              + (send - s)
3490                                              + grow
3491                                              + 1 /* Trailing NUL */ );
3492
3493                 /* Now, we can expand out the range. */
3494 #ifdef EBCDIC
3495                 if (convert_unicode) {
3496                     SSize_t i;
3497
3498                     /* Recall that the min and max are now in Unicode terms, so
3499                      * we have to convert each character to its native
3500                      * equivalent */
3501                     if (d_is_utf8) {
3502                         for (i = range_min; i <= range_max; i++) {
3503                             append_utf8_from_native_byte(
3504                                                     LATIN1_TO_NATIVE((U8) i),
3505                                                     (U8 **) &d);
3506                         }
3507                     }
3508                     else {
3509                         for (i = range_min; i <= range_max; i++) {
3510                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3511                         }
3512                     }
3513                 }
3514                 else
3515 #endif
3516                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3517                 {
3518                     /* Here, no conversions are necessary, which means that the
3519                      * first character in the range is already in 'd' and
3520                      * valid, so we can skip overwriting it */
3521                     if (d_is_utf8) {
3522                         SSize_t i;
3523                         d += UTF8SKIP(d);
3524                         for (i = range_min + 1; i <= range_max; i++) {
3525                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3526                         }
3527                     }
3528                     else {
3529                         SSize_t i;
3530                         d++;
3531                         assert(range_min + 1 <= range_max);
3532                         for (i = range_min + 1; i < range_max; i++) {
3533 #ifdef EBCDIC
3534                             /* In this case on EBCDIC, we haven't calculated
3535                              * the variants.  Do it here, as we go along */
3536                             if (! UVCHR_IS_INVARIANT(i)) {
3537                                 utf8_variant_count++;
3538                             }
3539 #endif
3540                             *d++ = (char)i;
3541                         }
3542
3543                         /* The range_max is done outside the loop so as to
3544                          * avoid having to special case not incrementing
3545                          * 'utf8_variant_count' on EBCDIC (it's already been
3546                          * counted when originally parsed) */
3547                         *d++ = (char) range_max;
3548                     }
3549                 }
3550
3551 #ifdef EBCDIC
3552                 /* If the original range extended above 255, add in that
3553                  * portion. */
3554                 if (real_range_max) {
3555                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3556                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3557                     if (real_range_max > 0x100) {
3558                         if (real_range_max > 0x101) {
3559                             *d++ = (char) RANGE_INDICATOR;
3560                         }
3561                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3562                     }
3563                 }
3564 #endif
3565
3566               range_done:
3567                 /* mark the range as done, and continue */
3568                 didrange = TRUE;
3569                 dorange = FALSE;
3570 #ifdef EBCDIC
3571                 non_portable_endpoint = 0;
3572                 backslash_N = 0;
3573 #endif
3574                 continue;
3575             } /* End of is a range */
3576         } /* End of transliteration.  Joins main code after these else's */
3577         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3578             char *s1 = s-1;
3579             int esc = 0;
3580             while (s1 >= start && *s1-- == '\\')
3581                 esc = !esc;
3582             if (!esc)
3583                 in_charclass = TRUE;
3584         }
3585         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3586             char *s1 = s-1;
3587             int esc = 0;
3588             while (s1 >= start && *s1-- == '\\')
3589                 esc = !esc;
3590             if (!esc)
3591                 in_charclass = FALSE;
3592         }
3593             /* skip for regexp comments /(?#comment)/, except for the last
3594              * char, which will be done separately.  Stop on (?{..}) and
3595              * friends */
3596         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3597             if (s[2] == '#') {
3598                 if (s_is_utf8) {
3599                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3600
3601                     while (s + len < send && *s != ')') {
3602                         Copy(s, d, len, U8);
3603                         d += len;
3604                         s += len;
3605                         len = UTF8_SAFE_SKIP(s, send);
3606                     }
3607                 }
3608                 else while (s+1 < send && *s != ')') {
3609                     *d++ = *s++;
3610                 }
3611             }
3612             else if (!PL_lex_casemods
3613                      && (    s[2] == '{' /* This should match regcomp.c */
3614                          || (s[2] == '?' && s[3] == '{')))
3615             {
3616                 break;
3617             }
3618         }
3619             /* likewise skip #-initiated comments in //x patterns */
3620         else if (*s == '#'
3621                  && PL_lex_inpat
3622                  && !in_charclass
3623                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3624         {
3625             while (s < send && *s != '\n')
3626                 *d++ = *s++;
3627         }
3628             /* no further processing of single-quoted regex */
3629         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3630             goto default_action;
3631
3632             /* check for embedded arrays
3633              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3634              */
3635         else if (*s == '@' && s[1]) {
3636             if (UTF
3637                ? isIDFIRST_utf8_safe(s+1, send)
3638                : isWORDCHAR_A(s[1]))
3639             {
3640                 break;
3641             }
3642             if (memCHRs(":'{$", s[1]))
3643                 break;
3644             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3645                 break; /* in regexp, neither @+ nor @- are interpolated */
3646         }
3647             /* check for embedded scalars.  only stop if we're sure it's a
3648              * variable.  */
3649         else if (*s == '$') {
3650             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3651                 break;
3652             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3653                 if (s[1] == '\\') {
3654                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3655                                    "Possible unintended interpolation of $\\ in regex");
3656                 }
3657                 break;          /* in regexp, $ might be tail anchor */
3658             }
3659         }
3660
3661         /* End of else if chain - OP_TRANS rejoin rest */
3662
3663         if (UNLIKELY(s >= send)) {
3664             assert(s == send);
3665             break;
3666         }
3667
3668         /* backslashes */
3669         if (*s == '\\' && s+1 < send) {
3670             char* bslash = s;   /* point to beginning \ */
3671             char* rbrace;       /* point to ending '}' */
3672             char* e;            /* 1 past the meat (non-blanks) before the
3673                                    brace */
3674             s++;
3675
3676             /* warn on \1 - \9 in substitution replacements, but note that \11
3677              * is an octal; and \19 is \1 followed by '9' */
3678             if (PL_lex_inwhat == OP_SUBST
3679                 && !PL_lex_inpat
3680                 && isDIGIT(*s)
3681                 && *s != '0'
3682                 && !isDIGIT(s[1]))
3683             {
3684                 /* diag_listed_as: \%d better written as $%d */
3685                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3686                 s = bslash;
3687                 *s = '$';
3688                 break;
3689             }
3690
3691             /* string-change backslash escapes */
3692             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3693                 s = bslash;
3694                 break;
3695             }
3696             /* In a pattern, process \N, but skip any other backslash escapes.
3697              * This is because we don't want to translate an escape sequence
3698              * into a meta symbol and have the regex compiler use the meta
3699              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3700              * in spite of this, we do have to process \N here while the proper
3701              * charnames handler is in scope.  See bugs #56444 and #62056.
3702              *
3703              * There is a complication because \N in a pattern may also stand
3704              * for 'match a non-nl', and not mean a charname, in which case its
3705              * processing should be deferred to the regex compiler.  To be a
3706              * charname it must be followed immediately by a '{', and not look
3707              * like \N followed by a curly quantifier, i.e., not something like
3708              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3709              * quantifier */
3710             else if (PL_lex_inpat
3711                     && (*s != 'N'
3712                         || s[1] != '{'
3713                         || regcurly(s + 1, send, NULL)))
3714             {
3715                 *d++ = '\\';
3716                 goto default_action;
3717             }
3718
3719             switch (*s) {
3720             default:
3721                 {
3722                     if ((isALPHANUMERIC(*s)))
3723                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3724                                        "Unrecognized escape \\%c passed through",
3725                                        *s);
3726                     /* default action is to copy the quoted character */
3727                     goto default_action;
3728                 }
3729
3730             /* eg. \132 indicates the octal constant 0132 */
3731             case '0': case '1': case '2': case '3':
3732             case '4': case '5': case '6': case '7':
3733                 {
3734                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3735                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3736                     STRLEN len = 3;
3737                     uv = grok_oct(s, &len, &flags, NULL);
3738                     s += len;
3739                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3740                         && s < send
3741                         && isDIGIT(*s)  /* like \08, \178 */
3742                         && ckWARN(WARN_MISC))
3743                     {
3744                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3745                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3746                     }
3747                 }
3748                 goto NUM_ESCAPE_INSERT;
3749
3750             /* eg. \o{24} indicates the octal constant \024 */
3751             case 'o':
3752                 {
3753                     const char* error;
3754
3755                     if (! grok_bslash_o(&s, send,
3756                                                &uv, &error,
3757                                                NULL,
3758                                                FALSE, /* Not strict */
3759                                                FALSE, /* No illegal cp's */
3760                                                UTF))
3761                     {
3762                         yyerror(error);
3763                         uv = 0; /* drop through to ensure range ends are set */
3764                     }
3765                     goto NUM_ESCAPE_INSERT;
3766                 }
3767
3768             /* eg. \x24 indicates the hex constant 0x24 */
3769             case 'x':
3770                 {
3771                     const char* error;
3772
3773                     if (! grok_bslash_x(&s, send,
3774                                                &uv, &error,
3775                                                NULL,
3776                                                FALSE, /* Not strict */
3777                                                FALSE, /* No illegal cp's */
3778                                                UTF))
3779                     {
3780                         yyerror(error);
3781                         uv = 0; /* drop through to ensure range ends are set */
3782                     }
3783                 }
3784
3785               NUM_ESCAPE_INSERT:
3786                 /* Insert oct or hex escaped character. */
3787
3788                 /* Here uv is the ordinal of the next character being added */
3789                 if (UVCHR_IS_INVARIANT(uv)) {
3790                     *d++ = (char) uv;
3791                 }
3792                 else {
3793                     if (!d_is_utf8 && uv > 255) {
3794
3795                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3796                          * If we've only seen invariants so far, all we have to
3797                          * do is turn on the flag */
3798                         if (utf8_variant_count == 0) {
3799                             SvUTF8_on(sv);
3800                         }
3801                         else {
3802                             SvCUR_set(sv, d - SvPVX_const(sv));
3803                             SvPOK_on(sv);
3804                             *d = '\0';
3805
3806                             sv_utf8_upgrade_flags_grow(
3807                                            sv,
3808                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3809
3810                                            /* Since we're having to grow here,
3811                                             * make sure we have enough room for
3812                                             * this escape and a NUL, so the
3813                                             * code immediately below won't have
3814                                             * to actually grow again */
3815                                           UVCHR_SKIP(uv)
3816                                         + (STRLEN)(send - s) + 1);
3817                             d = SvPVX(sv) + SvCUR(sv);
3818                         }
3819
3820                         has_above_latin1 = TRUE;
3821                         d_is_utf8 = TRUE;
3822                     }
3823
3824                     if (! d_is_utf8) {
3825                         *d++ = (char)uv;
3826                         utf8_variant_count++;
3827                     }
3828                     else {
3829                        /* Usually, there will already be enough room in 'sv'
3830                         * since such escapes are likely longer than any UTF-8
3831                         * sequence they can end up as.  This isn't the case on
3832                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3833                         * UTF-8 for it contains 14.  And, we have to allow for
3834                         * a trailing NUL.  It probably can't happen on ASCII
3835                         * platforms, but be safe.  See Note on sizing above. */
3836                         const STRLEN needed = d - SvPVX(sv)
3837                                             + UVCHR_SKIP(uv)
3838                                             + (send - s)
3839                                             + 1;
3840                         if (UNLIKELY(needed > SvLEN(sv))) {
3841                             SvCUR_set(sv, d - SvPVX_const(sv));
3842                             d = SvCUR(sv) + SvGROW(sv, needed);
3843                         }
3844
3845                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3846                                                    (ckWARN(WARN_PORTABLE))
3847                                                    ? UNICODE_WARN_PERL_EXTENDED
3848                                                    : 0);
3849                     }
3850                 }
3851 #ifdef EBCDIC
3852                 non_portable_endpoint++;
3853 #endif
3854                 continue;
3855
3856             case 'N':
3857                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3858                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3859                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3860                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3861                  * convenience all three forms are referred to as "named
3862                  * characters" below.
3863                  *
3864                  * For patterns, \N also can mean to match a non-newline.  Code
3865                  * before this 'switch' statement should already have handled
3866                  * this situation, and hence this code only has to deal with
3867                  * the named character cases.
3868                  *
3869                  * For non-patterns, the named characters are converted to
3870                  * their string equivalents.  In patterns, named characters are
3871                  * not converted to their ultimate forms for the same reasons
3872                  * that other escapes aren't (mainly that the ultimate
3873                  * character could be considered a meta-symbol by the regex
3874                  * compiler).  Instead, they are converted to the \N{U+...}
3875                  * form to get the value from the charnames that is in effect
3876                  * right now, while preserving the fact that it was a named
3877                  * character, so that the regex compiler knows this.
3878                  *
3879                  * The structure of this section of code (besides checking for
3880                  * errors and upgrading to utf8) is:
3881                  *    If the named character is of the form \N{U+...}, pass it
3882                  *      through if a pattern; otherwise convert the code point
3883                  *      to utf8
3884                  *    Otherwise must be some \N{NAME}: convert to
3885                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3886                  *
3887                  * Transliteration is an exception.  The conversion to utf8 is
3888                  * only done if the code point requires it to be representable.
3889                  *
3890                  * Here, 's' points to the 'N'; the test below is guaranteed to
3891                  * succeed if we are being called on a pattern, as we already
3892                  * know from a test above that the next character is a '{'.  A
3893                  * non-pattern \N must mean 'named character', which requires
3894                  * braces */
3895                 s++;
3896                 if (*s != '{') {
3897                     yyerror("Missing braces on \\N{}");
3898                     *d++ = '\0';
3899                     continue;
3900                 }
3901                 s++;
3902
3903                 /* If there is no matching '}', it is an error. */
3904                 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3905                     if (! PL_lex_inpat) {
3906                         yyerror("Missing right brace on \\N{}");
3907                     } else {
3908                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3909                     }
3910                     yyquit(); /* Have exhausted the input. */
3911                 }
3912
3913                 /* Here it looks like a named character */
3914                 while (s < rbrace && isBLANK(*s)) {
3915                     s++;
3916                 }
3917
3918                 e = rbrace;
3919                 while (s < e && isBLANK(*(e - 1))) {
3920                     e--;
3921                 }
3922
3923                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3924                     s += 2;         /* Skip to next char after the 'U+' */
3925                     if (PL_lex_inpat) {
3926
3927                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3928                         /* Check the syntax.  */
3929                         if (!isXDIGIT(*s)) {
3930                           bad_NU:
3931                             yyerror(
3932                                 "Invalid hexadecimal number in \\N{U+...}"
3933                             );
3934                             s = rbrace + 1;
3935                             *d++ = '\0';
3936                             continue;
3937                         }
3938                         while (++s < e) {
3939                             if (isXDIGIT(*s))
3940                                 continue;
3941                             else if ((*s == '.' || *s == '_')
3942                                   && isXDIGIT(s[1]))
3943                                 continue;
3944                             goto bad_NU;
3945                         }
3946
3947                         /* Pass everything through unchanged.
3948                          * +1 is to include the '}' */
3949                         Copy(bslash, d, rbrace - bslash + 1, char);
3950                         d += rbrace - bslash + 1;
3951                     }
3952                     else {  /* Not a pattern: convert the hex to string */
3953                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3954                                   | PERL_SCAN_SILENT_ILLDIGIT
3955                                   | PERL_SCAN_SILENT_OVERFLOW
3956                                   | PERL_SCAN_DISALLOW_PREFIX;
3957                         STRLEN len = e - s;
3958
3959                         uv = grok_hex(s, &len, &flags, NULL);
3960                         if (len == 0 || (len != (STRLEN)(e - s)))
3961                             goto bad_NU;
3962
3963                         if (    uv > MAX_LEGAL_CP
3964                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3965                         {
3966                             yyerror(form_cp_too_large_msg(16, s, len, 0));
3967                             uv = 0; /* drop through to ensure range ends are
3968                                        set */
3969                         }
3970
3971                          /* For non-tr///, if the destination is not in utf8,
3972                           * unconditionally recode it to be so.  This is
3973                           * because \N{} implies Unicode semantics, and scalars
3974                           * have to be in utf8 to guarantee those semantics.
3975                           * tr/// doesn't care about Unicode rules, so no need
3976                           * there to upgrade to UTF-8 for small enough code
3977                           * points */
3978                         if (! d_is_utf8 && (   uv > 0xFF
3979                                            || PL_lex_inwhat != OP_TRANS))
3980                         {
3981                             /* See Note on sizing above.  */
3982                             const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
3983
3984                             SvCUR_set(sv, d - SvPVX_const(sv));
3985                             SvPOK_on(sv);
3986                             *d = '\0';
3987
3988                             if (utf8_variant_count == 0) {
3989                                 SvUTF8_on(sv);
3990                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3991                             }
3992                             else {
3993                                 sv_utf8_upgrade_flags_grow(
3994                                                sv,
3995                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3996                                                extra);
3997                                 d = SvPVX(sv) + SvCUR(sv);
3998                             }
3999
4000                             d_is_utf8 = TRUE;
4001                             has_above_latin1 = TRUE;
4002                         }
4003
4004                         /* Add the (Unicode) code point to the output. */
4005                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4006                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4007                         }
4008                         else {
4009                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4010                                                    (ckWARN(WARN_PORTABLE))
4011                                                    ? UNICODE_WARN_PERL_EXTENDED
4012                                                    : 0);
4013                         }
4014                     }
4015                 }
4016                 else     /* Here is \N{NAME} but not \N{U+...}. */
4017                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4018                 {   /* Failed.  We should die eventually, but for now use a NUL
4019                        to keep parsing */
4020                     *d++ = '\0';
4021                 }
4022                 else {  /* Successfully evaluated the name */
4023                     STRLEN len;
4024                     const char *str = SvPV_const(res, len);
4025                     if (PL_lex_inpat) {
4026
4027                         if (! len) { /* The name resolved to an empty string */
4028                             const char empty_N[] = "\\N{_}";
4029                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
4030                             d += sizeof(empty_N) - 1;
4031                         }
4032                         else {
4033                             /* In order to not lose information for the regex
4034                             * compiler, pass the result in the specially made
4035                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4036                             * the code points in hex of each character
4037                             * returned by charnames */
4038
4039                             const char *str_end = str + len;
4040                             const STRLEN off = d - SvPVX_const(sv);
4041
4042                             if (! SvUTF8(res)) {
4043                                 /* For the non-UTF-8 case, we can determine the
4044                                  * exact length needed without having to parse
4045                                  * through the string.  Each character takes up
4046                                  * 2 hex digits plus either a trailing dot or
4047                                  * the "}" */
4048                                 const char initial_text[] = "\\N{U+";
4049                                 const STRLEN initial_len = sizeof(initial_text)
4050                                                            - 1;
4051                                 d = off + SvGROW(sv, off
4052                                                     + 3 * len
4053
4054                                                     /* +1 for trailing NUL */
4055                                                     + initial_len + 1
4056
4057                                                     + (STRLEN)(send - rbrace));
4058                                 Copy(initial_text, d, initial_len, char);
4059                                 d += initial_len;
4060                                 while (str < str_end) {
4061                                     char hex_string[4];
4062                                     int len =
4063                                         my_snprintf(hex_string,
4064                                                   sizeof(hex_string),
4065                                                   "%02X.",
4066
4067                                                   /* The regex compiler is
4068                                                    * expecting Unicode, not
4069                                                    * native */
4070                                                   NATIVE_TO_LATIN1(*str));
4071                                     PERL_MY_SNPRINTF_POST_GUARD(len,
4072                                                            sizeof(hex_string));
4073                                     Copy(hex_string, d, 3, char);
4074                                     d += 3;
4075                                     str++;
4076                                 }
4077                                 d--;    /* Below, we will overwrite the final
4078                                            dot with a right brace */
4079                             }
4080                             else {
4081                                 STRLEN char_length; /* cur char's byte length */
4082
4083                                 /* and the number of bytes after this is
4084                                  * translated into hex digits */
4085                                 STRLEN output_length;
4086
4087                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4088                                  * for max('U+', '.'); and 1 for NUL */
4089                                 char hex_string[2 * UTF8_MAXBYTES + 5];
4090
4091                                 /* Get the first character of the result. */
4092                                 U32 uv = utf8n_to_uvchr((U8 *) str,
4093                                                         len,
4094                                                         &char_length,
4095                                                         UTF8_ALLOW_ANYUV);
4096                                 /* Convert first code point to Unicode hex,
4097                                  * including the boiler plate before it. */
4098                                 output_length =
4099                                     my_snprintf(hex_string, sizeof(hex_string),
4100                                              "\\N{U+%X",
4101                                              (unsigned int) NATIVE_TO_UNI(uv));
4102
4103                                 /* Make sure there is enough space to hold it */
4104                                 d = off + SvGROW(sv, off
4105                                                     + output_length
4106                                                     + (STRLEN)(send - rbrace)
4107                                                     + 2);       /* '}' + NUL */
4108                                 /* And output it */
4109                                 Copy(hex_string, d, output_length, char);
4110                                 d += output_length;
4111
4112                                 /* For each subsequent character, append dot and
4113                                 * its Unicode code point in hex */
4114                                 while ((str += char_length) < str_end) {
4115                                     const STRLEN off = d - SvPVX_const(sv);
4116                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4117                                                             str_end - str,
4118                                                             &char_length,
4119                                                             UTF8_ALLOW_ANYUV);
4120                                     output_length =
4121                                         my_snprintf(hex_string,
4122                                              sizeof(hex_string),
4123                                              ".%X",
4124                                              (unsigned int) NATIVE_TO_UNI(uv));
4125
4126                                     d = off + SvGROW(sv, off
4127                                                         + output_length
4128                                                         + (STRLEN)(send - rbrace)
4129                                                         + 2);   /* '}' +  NUL */
4130                                     Copy(hex_string, d, output_length, char);
4131                                     d += output_length;
4132                                 }
4133                             }
4134
4135                             *d++ = '}'; /* Done.  Add the trailing brace */
4136                         }
4137                     }
4138                     else { /* Here, not in a pattern.  Convert the name to a
4139                             * string. */
4140
4141                         if (PL_lex_inwhat == OP_TRANS) {
4142                             str = SvPV_const(res, len);
4143                             if (len > ((SvUTF8(res))
4144                                        ? UTF8SKIP(str)
4145                                        : 1U))
4146                             {
4147                                 yyerror(Perl_form(aTHX_
4148                                     "%.*s must not be a named sequence"
4149                                     " in transliteration operator",
4150                                         /*  +1 to include the "}" */
4151                                     (int) (rbrace + 1 - start), start));
4152                                 *d++ = '\0';
4153                                 goto end_backslash_N;
4154                             }
4155
4156                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4157                                 has_above_latin1 = TRUE;
4158                             }
4159
4160                         }
4161                         else if (! SvUTF8(res)) {
4162                             /* Make sure \N{} return is UTF-8.  This is because
4163                              * \N{} implies Unicode semantics, and scalars have
4164                              * to be in utf8 to guarantee those semantics; but
4165                              * not needed in tr/// */
4166                             sv_utf8_upgrade_flags(res, 0);
4167                             str = SvPV_const(res, len);
4168                         }
4169
4170                          /* Upgrade destination to be utf8 if this new
4171                           * component is */
4172                         if (! d_is_utf8 && SvUTF8(res)) {
4173                             /* See Note on sizing above.  */
4174                             const STRLEN extra = len + (send - s) + 1;
4175
4176                             SvCUR_set(sv, d - SvPVX_const(sv));
4177                             SvPOK_on(sv);
4178                             *d = '\0';
4179
4180                             if (utf8_variant_count == 0) {
4181                                 SvUTF8_on(sv);
4182                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4183                             }
4184                             else {
4185                                 sv_utf8_upgrade_flags_grow(sv,
4186                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4187                                                 extra);
4188                                 d = SvPVX(sv) + SvCUR(sv);
4189                             }
4190                             d_is_utf8 = TRUE;
4191                         } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4192
4193                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4194                              * set correctly here). */
4195                             const STRLEN extra = len + (send - rbrace) + 1;
4196                             const STRLEN off = d - SvPVX_const(sv);
4197                             d = off + SvGROW(sv, off + extra);
4198                         }
4199                         Copy(str, d, len, char);
4200                         d += len;
4201                     }
4202
4203                     SvREFCNT_dec(res);
4204
4205                 } /* End \N{NAME} */
4206
4207               end_backslash_N:
4208 #ifdef EBCDIC
4209                 backslash_N++; /* \N{} is defined to be Unicode */
4210 #endif
4211                 s = rbrace + 1;  /* Point to just after the '}' */
4212                 continue;
4213
4214             /* \c is a control character */
4215             case 'c':
4216                 s++;
4217                 if (s < send) {
4218                     const char * message;
4219
4220                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4221                         yyerror(message);
4222                         yyquit();   /* Have always immediately croaked on
4223                                        errors in this */
4224                     }
4225                     d++;
4226                 }
4227                 else {
4228                     yyerror("Missing control char name in \\c");
4229                     yyquit();   /* Are at end of input, no sense continuing */
4230                 }
4231 #ifdef EBCDIC
4232                 non_portable_endpoint++;
4233 #endif
4234                 break;
4235
4236             /* printf-style backslashes, formfeeds, newlines, etc */
4237             case 'b':
4238                 *d++ = '\b';
4239                 break;
4240             case 'n':
4241                 *d++ = '\n';
4242                 break;
4243             case 'r':
4244                 *d++ = '\r';
4245                 break;
4246             case 'f':
4247                 *d++ = '\f';
4248                 break;
4249             case 't':
4250                 *d++ = '\t';
4251                 break;
4252             case 'e':
4253                 *d++ = ESC_NATIVE;
4254                 break;
4255             case 'a':
4256                 *d++ = '\a';
4257                 break;
4258             } /* end switch */
4259
4260             s++;
4261             continue;
4262         } /* end if (backslash) */
4263
4264     default_action:
4265         /* Just copy the input to the output, though we may have to convert
4266          * to/from UTF-8.
4267          *
4268          * If the input has the same representation in UTF-8 as not, it will be
4269          * a single byte, and we don't care about UTF8ness; just copy the byte */
4270         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4271             *d++ = *s++;
4272         }
4273         else if (! s_is_utf8 && ! d_is_utf8) {
4274             /* If neither source nor output is UTF-8, is also a single byte,
4275              * just copy it; but this byte counts should we later have to
4276              * convert to UTF-8 */
4277             *d++ = *s++;
4278             utf8_variant_count++;
4279         }
4280         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4281             const STRLEN len = UTF8SKIP(s);
4282
4283             /* We expect the source to have already been checked for
4284              * malformedness */
4285             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4286
4287             Copy(s, d, len, U8);
4288             d += len;
4289             s += len;
4290         }
4291         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4292             STRLEN need = send - s + 1; /* See Note on sizing above. */
4293
4294             SvCUR_set(sv, d - SvPVX_const(sv));
4295             SvPOK_on(sv);
4296             *d = '\0';
4297
4298             if (utf8_variant_count == 0) {
4299                 SvUTF8_on(sv);
4300                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4301             }
4302             else {
4303                 sv_utf8_upgrade_flags_grow(sv,
4304                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4305                                            need);
4306                 d = SvPVX(sv) + SvCUR(sv);
4307             }
4308             d_is_utf8 = TRUE;
4309             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4310         }
4311         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4312                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4313                    the input byte since we haven't incremented 's' yet. See
4314                    Note on sizing above. */
4315             const STRLEN off = d - SvPVX(sv);
4316             const STRLEN extra = 2 + (send - s - 1) + 1;
4317             if (off + extra > SvLEN(sv)) {
4318                 d = off + SvGROW(sv, off + extra);
4319             }
4320             *d++ = UTF8_EIGHT_BIT_HI(*s);
4321             *d++ = UTF8_EIGHT_BIT_LO(*s);
4322             s++;
4323         }
4324     } /* while loop to process each character */
4325
4326     {
4327         const STRLEN off = d - SvPVX(sv);
4328
4329         /* See if room for the terminating NUL */
4330         if (UNLIKELY(off >= SvLEN(sv))) {
4331
4332 #ifndef DEBUGGING
4333
4334             if (off > SvLEN(sv))
4335 #endif
4336                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4337                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4338
4339             /* Whew!  Here we don't have room for the terminating NUL, but
4340              * everything else so far has fit.  It's not too late to grow
4341              * to fit the NUL and continue on.  But it is a bug, as the code
4342              * above was supposed to have made room for this, so under
4343              * DEBUGGING builds, we panic anyway.  */
4344             d = off + SvGROW(sv, off + 1);
4345         }
4346     }
4347
4348     /* terminate the string and set up the sv */
4349     *d = '\0';
4350     SvCUR_set(sv, d - SvPVX_const(sv));
4351
4352     SvPOK_on(sv);
4353     if (d_is_utf8) {
4354         SvUTF8_on(sv);
4355     }
4356
4357     /* shrink the sv if we allocated more than we used */
4358     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4359         SvPV_shrink_to_cur(sv);
4360     }
4361
4362     /* return the substring (via pl_yylval) only if we parsed anything */
4363     if (s > start) {
4364         char *s2 = start;
4365         for (; s2 < s; s2++) {
4366             if (*s2 == '\n')
4367                 COPLINE_INC_WITH_HERELINES;
4368         }
4369         SvREFCNT_inc_simple_void_NN(sv);
4370         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4371             && ! PL_parser->lex_re_reparsing)
4372         {
4373             const char *const key = PL_lex_inpat ? "qr" : "q";
4374             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4375             const char *type;
4376             STRLEN typelen;
4377
4378             if (PL_lex_inwhat == OP_TRANS) {
4379                 type = "tr";
4380                 typelen = 2;
4381             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4382                 type = "s";
4383                 typelen = 1;
4384             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4385                 type = "q";
4386                 typelen = 1;
4387             } else {
4388                 type = "qq";
4389                 typelen = 2;
4390             }
4391
4392             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4393                                 type, typelen, NULL);
4394         }
4395         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4396     }
4397     LEAVE_with_name("scan_const");
4398     return s;
4399 }
4400
4401 /* S_intuit_more
4402  * Returns TRUE if there's more to the expression (e.g., a subscript),
4403  * FALSE otherwise.
4404  *
4405  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4406  *
4407  * ->[ and ->{ return TRUE
4408  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4409  * { and [ outside a pattern are always subscripts, so return TRUE
4410  * if we're outside a pattern and it's not { or [, then return FALSE
4411  * if we're in a pattern and the first char is a {
4412  *   {4,5} (any digits around the comma) returns FALSE
4413  * if we're in a pattern and the first char is a [
4414  *   [] returns FALSE
4415  *   [SOMETHING] has a funky algorithm to decide whether it's a
4416  *      character class or not.  It has to deal with things like
4417  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4418  * anything else returns TRUE
4419  */
4420
4421 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4422
4423 STATIC int
4424 S_intuit_more(pTHX_ char *s, char *e)
4425 {
4426     PERL_ARGS_ASSERT_INTUIT_MORE;
4427
4428     if (PL_lex_brackets)
4429         return TRUE;
4430     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4431         return TRUE;
4432     if (*s == '-' && s[1] == '>'
4433      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4434      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4435         ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4436         return TRUE;
4437     if (*s != '{' && *s != '[')
4438         return FALSE;
4439     PL_parser->sub_no_recover = TRUE;
4440     if (!PL_lex_inpat)
4441         return TRUE;
4442
4443     /* In a pattern, so maybe we have {n,m}. */
4444     if (*s == '{') {
4445         if (regcurly(s, e, NULL)) {
4446             return FALSE;
4447         }
4448         return TRUE;
4449     }
4450
4451     /* On the other hand, maybe we have a character class */
4452
4453     s++;
4454     if (*s == ']' || *s == '^')
4455         return FALSE;
4456     else {
4457         /* this is terrifying, and it works */
4458         int weight;
4459         char seen[256];
4460         const char * const send = (char *) memchr(s, ']', e - s);
4461         unsigned char un_char, last_un_char;
4462         char tmpbuf[sizeof PL_tokenbuf * 4];
4463
4464         if (!send)              /* has to be an expression */
4465             return TRUE;
4466         weight = 2;             /* let's weigh the evidence */
4467
4468         if (*s == '$')
4469             weight -= 3;
4470         else if (isDIGIT(*s)) {
4471             if (s[1] != ']') {
4472                 if (isDIGIT(s[1]) && s[2] == ']')
4473                     weight -= 10;
4474             }
4475             else
4476                 weight -= 100;
4477         }
4478         Zero(seen,256,char);
4479         un_char = 255;
4480         for (; s < send; s++) {
4481             last_un_char = un_char;
4482             un_char = (unsigned char)*s;
4483             switch (*s) {
4484             case '@':
4485             case '&':
4486             case '$':
4487                 weight -= seen[un_char] * 10;
4488                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4489                     int len;
4490                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4491                     len = (int)strlen(tmpbuf);
4492                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4493                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4494                         weight -= 100;
4495                     else
4496                         weight -= 10;
4497                 }
4498                 else if (*s == '$'
4499                          && s[1]
4500                          && memCHRs("[#!%*<>()-=",s[1]))
4501                 {
4502                     if (/*{*/ memCHRs("])} =",s[2]))
4503                         weight -= 10;
4504                     else
4505                         weight -= 1;
4506                 }
4507                 break;
4508             case '\\':
4509                 un_char = 254;
4510                 if (s[1]) {
4511                     if (memCHRs("wds]",s[1]))
4512                         weight += 100;
4513                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4514                         weight += 1;
4515                     else if (memCHRs("rnftbxcav",s[1]))
4516                         weight += 40;
4517                     else if (isDIGIT(s[1])) {
4518                         weight += 40;
4519                         while (s[1] && isDIGIT(s[1]))
4520                             s++;
4521                     }
4522                 }
4523                 else
4524                     weight += 100;
4525                 break;
4526             case '-':
4527                 if (s[1] == '\\')
4528                     weight += 50;
4529                 if (memCHRs("aA01! ",last_un_char))
4530                     weight += 30;
4531                 if (memCHRs("zZ79~",s[1]))
4532                     weight += 30;
4533                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4534                     weight -= 5;        /* cope with negative subscript */
4535                 break;
4536             default:
4537                 if (!isWORDCHAR(last_un_char)
4538                     && !(last_un_char == '$' || last_un_char == '@'
4539                          || last_un_char == '&')
4540                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4541                     char *d = s;
4542                     while (isALPHA(*s))
4543                         s++;
4544                     if (keyword(d, s - d, 0))
4545                         weight -= 150;
4546                 }
4547                 if (un_char == last_un_char + 1)
4548                     weight += 5;
4549                 weight -= seen[un_char];
4550                 break;
4551             }
4552             seen[un_char]++;
4553         }
4554         if (weight >= 0)        /* probably a character class */
4555             return FALSE;
4556     }
4557
4558     return TRUE;
4559 }
4560
4561 /*
4562  * S_intuit_method
4563  *
4564  * Does all the checking to disambiguate
4565  *   foo bar
4566  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4567  * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4568  *
4569  * First argument is the stuff after the first token, e.g. "bar".
4570  *
4571  * Not a method if foo is a filehandle.
4572  * Not a method if foo is a subroutine prototyped to take a filehandle.
4573  * Not a method if it's really "Foo $bar"
4574  * Method if it's "foo $bar"
4575  * Not a method if it's really "print foo $bar"
4576  * Method if it's really "foo package::" (interpreted as package->foo)
4577  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4578  * Not a method if bar is a filehandle or package, but is quoted with
4579  *   =>
4580  */
4581
4582 STATIC int
4583 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4584 {
4585     char *s = start + (*start == '$');
4586     char tmpbuf[sizeof PL_tokenbuf];
4587     STRLEN len;
4588     GV* indirgv;
4589         /* Mustn't actually add anything to a symbol table.
4590            But also don't want to "initialise" any placeholder
4591            constants that might already be there into full
4592            blown PVGVs with attached PVCV.  */
4593     GV * const gv =
4594         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4595
4596     PERL_ARGS_ASSERT_INTUIT_METHOD;
4597
4598     if (!FEATURE_INDIRECT_IS_ENABLED)
4599         return 0;
4600
4601     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4602             return 0;
4603     if (cv && SvPOK(cv)) {
4604         const char *proto = CvPROTO(cv);
4605         if (proto) {
4606             while (*proto && (isSPACE(*proto) || *proto == ';'))
4607                 proto++;
4608             if (*proto == '*')
4609                 return 0;
4610         }
4611     }
4612
4613     if (*start == '$') {
4614         SSize_t start_off = start - SvPVX(PL_linestr);
4615         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4616             || isUPPER(*PL_tokenbuf))
4617             return 0;
4618         /* this could be $# */
4619         if (isSPACE(*s))
4620             s = skipspace(s);
4621         PL_bufptr = SvPVX(PL_linestr) + start_off;
4622         PL_expect = XREF;
4623         return *s == '(' ? METHCALL : METHCALL0;
4624     }
4625
4626     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4627     /* start is the beginning of the possible filehandle/object,
4628      * and s is the end of it
4629      * tmpbuf is a copy of it (but with single quotes as double colons)
4630      */
4631
4632     if (!keyword(tmpbuf, len, 0)) {
4633         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4634             len -= 2;
4635             tmpbuf[len] = '\0';
4636             goto bare_package;
4637         }
4638         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4639                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4640                                     SVt_PVCV);
4641         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4642          && (!isGV(indirgv) || GvCVu(indirgv)))
4643             return 0;
4644         /* filehandle or package name makes it a method */
4645         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4646             s = skipspace(s);
4647             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4648                 return 0;       /* no assumptions -- "=>" quotes bareword */
4649       bare_package:
4650             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4651                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4652             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4653             PL_expect = XTERM;
4654             force_next(BAREWORD);
4655             PL_bufptr = s;
4656             return *s == '(' ? METHCALL : METHCALL0;
4657         }
4658     }
4659     return 0;
4660 }
4661
4662 /* Encoded script support. filter_add() effectively inserts a
4663  * 'pre-processing' function into the current source input stream.
4664  * Note that the filter function only applies to the current source file
4665  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4666  *
4667  * The datasv parameter (which may be NULL) can be used to pass
4668  * private data to this instance of the filter. The filter function
4669  * can recover the SV using the FILTER_DATA macro and use it to
4670  * store private buffers and state information.
4671  *
4672  * The supplied datasv parameter is upgraded to a PVIO type
4673  * and the IoDIRP/IoANY field is used to store the function pointer,
4674  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4675  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4676  * private use must be set using malloc'd pointers.
4677  */
4678
4679 SV *
4680 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4681 {
4682     if (!funcp)
4683         return NULL;
4684
4685     if (!PL_parser)
4686         return NULL;
4687
4688     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4689         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4690
4691     if (!PL_rsfp_filters)
4692         PL_rsfp_filters = newAV();
4693     if (!datasv)
4694         datasv = newSV(0);
4695     SvUPGRADE(datasv, SVt_PVIO);
4696     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4697     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4698     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4699                           FPTR2DPTR(void *, IoANY(datasv)),
4700                           SvPV_nolen(datasv)));
4701     av_unshift(PL_rsfp_filters, 1);
4702     av_store(PL_rsfp_filters, 0, datasv) ;
4703     if (
4704         !PL_parser->filtered
4705      && PL_parser->lex_flags & LEX_EVALBYTES
4706      && PL_bufptr < PL_bufend
4707     ) {
4708         const char *s = PL_bufptr;
4709         while (s < PL_bufend) {
4710             if (*s == '\n') {
4711                 SV *linestr = PL_parser->linestr;
4712                 char *buf = SvPVX(linestr);
4713                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4714                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4715                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4716                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4717                 STRLEN const last_uni_pos =
4718                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4719                 STRLEN const last_lop_pos =
4720                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4721                 av_push(PL_rsfp_filters, linestr);
4722                 PL_parser->linestr =
4723                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4724                 buf = SvPVX(PL_parser->linestr);
4725                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4726                 PL_parser->bufptr = buf + bufptr_pos;
4727                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4728                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4729                 PL_parser->linestart = buf + linestart_pos;
4730                 if (PL_parser->last_uni)
4731                     PL_parser->last_uni = buf + last_uni_pos;
4732                 if (PL_parser->last_lop)
4733                     PL_parser->last_lop = buf + last_lop_pos;
4734                 SvLEN_set(linestr, SvCUR(linestr));
4735                 SvCUR_set(linestr, s - SvPVX(linestr));
4736                 PL_parser->filtered = 1;
4737                 break;
4738             }
4739             s++;
4740         }
4741     }
4742     return(datasv);
4743 }
4744
4745 /*
4746 =for apidoc_section $filters
4747 =for apidoc filter_del
4748
4749 Delete most recently added instance of the filter function argument
4750
4751 =cut
4752 */
4753
4754 void
4755 Perl_filter_del(pTHX_ filter_t funcp)
4756 {
4757     SV *datasv;
4758
4759     PERL_ARGS_ASSERT_FILTER_DEL;
4760
4761 #ifdef DEBUGGING
4762     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4763                           FPTR2DPTR(void*, funcp)));
4764 #endif
4765     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4766         return;
4767     /* if filter is on top of stack (usual case) just pop it off */
4768     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4769     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4770         sv_free(av_pop(PL_rsfp_filters));
4771
4772         return;
4773     }
4774     /* we need to search for the correct entry and clear it     */
4775     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4776 }
4777
4778
4779 /* Invoke the idxth filter function for the current rsfp.        */
4780 /* maxlen 0 = read one text line */
4781 I32
4782 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4783 {
4784     filter_t funcp;
4785     I32 ret;
4786     SV *datasv = NULL;
4787     /* This API is bad. It should have been using unsigned int for maxlen.
4788        Not sure if we want to change the API, but if not we should sanity
4789        check the value here.  */
4790     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4791
4792     PERL_ARGS_ASSERT_FILTER_READ;
4793
4794     if (!PL_parser || !PL_rsfp_filters)
4795         return -1;
4796     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4797         /* Provide a default input filter to make life easy.    */
4798         /* Note that we append to the line. This is handy.      */
4799         DEBUG_P(PerlIO_printf(Perl_debug_log,
4800                               "filter_read %d: from rsfp\n", idx));
4801         if (correct_length) {
4802             /* Want a block */
4803             int len ;
4804             const int old_len = SvCUR(buf_sv);
4805
4806             /* ensure buf_sv is large enough */
4807             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4808             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4809                                    correct_length)) <= 0) {
4810                 if (PerlIO_error(PL_rsfp))
4811                     return -1;          /* error */
4812                 else
4813                     return 0 ;          /* end of file */
4814             }
4815             SvCUR_set(buf_sv, old_len + len) ;
4816             SvPVX(buf_sv)[old_len + len] = '\0';
4817         } else {
4818             /* Want a line */
4819             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4820                 if (PerlIO_error(PL_rsfp))
4821                     return -1;          /* error */
4822                 else
4823                     return 0 ;          /* end of file */
4824             }
4825         }
4826         return SvCUR(buf_sv);
4827     }
4828     /* Skip this filter slot if filter has been deleted */
4829     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4830         DEBUG_P(PerlIO_printf(Perl_debug_log,
4831                               "filter_read %d: skipped (filter deleted)\n",
4832                               idx));
4833         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4834     }
4835     if (SvTYPE(datasv) != SVt_PVIO) {
4836         if (correct_length) {
4837             /* Want a block */
4838             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4839             if (!remainder) return 0; /* eof */
4840             if (correct_length > remainder) correct_length = remainder;
4841             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4842             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4843         } else {
4844             /* Want a line */
4845             const char *s = SvEND(datasv);
4846             const char *send = SvPVX(datasv) + SvLEN(datasv);
4847             while (s < send) {
4848                 if (*s == '\n') {
4849                     s++;
4850                     break;
4851                 }
4852                 s++;
4853             }
4854             if (s == send) return 0; /* eof */
4855             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4856             SvCUR_set(datasv, s-SvPVX(datasv));
4857         }
4858         return SvCUR(buf_sv);
4859     }
4860     /* Get function pointer hidden within datasv        */
4861     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4862     DEBUG_P(PerlIO_printf(Perl_debug_log,
4863                           "filter_read %d: via function %p (%s)\n",
4864                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4865     /* Call function. The function is expected to       */
4866     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4867     /* Return: <0:error, =0:eof, >0:not eof             */
4868     ENTER;
4869     save_scalar(PL_errgv);
4870     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4871     LEAVE;
4872     return ret;
4873 }
4874
4875 STATIC char *
4876 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4877 {
4878     PERL_ARGS_ASSERT_FILTER_GETS;
4879
4880 #ifdef PERL_CR_FILTER
4881     if (!PL_rsfp_filters) {
4882         filter_add(S_cr_textfilter,NULL);
4883     }
4884 #endif
4885     if (PL_rsfp_filters) {
4886         if (!append)
4887             SvCUR_set(sv, 0);   /* start with empty line        */
4888         if (FILTER_READ(0, sv, 0) > 0)
4889             return ( SvPVX(sv) ) ;
4890         else
4891             return NULL ;
4892     }
4893     else
4894         return (sv_gets(sv, PL_rsfp, append));
4895 }
4896
4897 STATIC HV *
4898 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4899 {
4900     GV *gv;
4901
4902     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4903
4904     if (memEQs(pkgname, len, "__PACKAGE__"))
4905         return PL_curstash;
4906
4907     if (len > 2
4908         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4909         && (gv = gv_fetchpvn_flags(pkgname,
4910                                    len,
4911                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4912     {
4913         return GvHV(gv);                        /* Foo:: */
4914     }
4915
4916     /* use constant CLASS => 'MyClass' */
4917     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4918     if (gv && GvCV(gv)) {
4919         SV * const sv = cv_const_sv(GvCV(gv));
4920         if (sv)
4921             return gv_stashsv(sv, 0);
4922     }
4923
4924     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4925 }
4926
4927
4928 STATIC char *
4929 S_tokenize_use(pTHX_ int is_use, char *s) {
4930     PERL_ARGS_ASSERT_TOKENIZE_USE;
4931
4932     if (PL_expect != XSTATE)
4933         /* diag_listed_as: "use" not allowed in expression */
4934         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4935                     is_use ? "use" : "no"));
4936     PL_expect = XTERM;
4937     s = skipspace(s);
4938     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4939         s = force_version(s, TRUE);
4940         if (*s == ';' || *s == '}'
4941                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4942             NEXTVAL_NEXTTOKE.opval = NULL;
4943             force_next(BAREWORD);
4944         }
4945         else if (*s == 'v') {
4946             s = force_word(s,BAREWORD,FALSE,TRUE);
4947             s = force_version(s, FALSE);
4948         }
4949     }
4950     else {
4951         s = force_word(s,BAREWORD,FALSE,TRUE);
4952         s = force_version(s, FALSE);
4953     }
4954     pl_yylval.ival = is_use;
4955     return s;
4956 }
4957 #ifdef DEBUGGING
4958     static const char* const exp_name[] =
4959         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4960           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4961           "SIGVAR", "TERMORDORDOR"
4962         };
4963 #endif
4964
4965 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4966 STATIC bool
4967 S_word_takes_any_delimiter(char *p, STRLEN len)
4968 {
4969     return (len == 1 && memCHRs("msyq", p[0]))
4970             || (len == 2
4971                 && ((p[0] == 't' && p[1] == 'r')
4972                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4973 }
4974
4975 static void
4976 S_check_scalar_slice(pTHX_ char *s)
4977 {
4978     s++;
4979     while (SPACE_OR_TAB(*s)) s++;
4980     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4981                                                              PL_bufend,
4982                                                              UTF))
4983     {
4984         return;
4985     }
4986     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4987            || (*s && memCHRs(" \t$#+-'\"", *s)))
4988     {
4989         s += UTF ? UTF8SKIP(s) : 1;
4990     }
4991     if (*s == '}' || *s == ']')
4992         pl_yylval.ival = OPpSLICEWARNING;
4993 }
4994
4995 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4996 static void
4997 S_lex_token_boundary(pTHX)
4998 {
4999     PL_oldoldbufptr = PL_oldbufptr;
5000     PL_oldbufptr = PL_bufptr;
5001 }
5002
5003 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5004 static char *
5005 S_vcs_conflict_marker(pTHX_ char *s)
5006 {
5007     lex_token_boundary();
5008     PL_bufptr = s;
5009     yyerror("Version control conflict marker");
5010     while (s < PL_bufend && *s != '\n')
5011         s++;
5012     return s;
5013 }
5014
5015 static int
5016 yyl_sigvar(pTHX_ char *s)
5017 {
5018     /* we expect the sigil and optional var name part of a
5019      * signature element here. Since a '$' is not necessarily
5020      * followed by a var name, handle it specially here; the general
5021      * yylex code would otherwise try to interpret whatever follows
5022      * as a var; e.g. ($, ...) would be seen as the var '$,'
5023      */
5024
5025     U8 sigil;
5026
5027     s = skipspace(s);
5028     sigil = *s++;
5029     PL_bufptr = s; /* for error reporting */
5030     switch (sigil) {
5031     case '$':
5032     case '@':
5033     case '%':
5034         /* spot stuff that looks like an prototype */
5035         if (memCHRs("$:@%&*;\\[]", *s)) {
5036             yyerror("Illegal character following sigil in a subroutine signature");
5037             break;
5038         }
5039         /* '$#' is banned, while '$ # comment' isn't */
5040         if (*s == '#') {
5041             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5042             break;
5043         }
5044         s = skipspace(s);
5045         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5046             char *dest = PL_tokenbuf + 1;
5047             /* read var name, including sigil, into PL_tokenbuf */
5048             PL_tokenbuf[0] = sigil;
5049             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5050                 0, cBOOL(UTF), FALSE, FALSE);
5051             *dest = '\0';
5052             assert(PL_tokenbuf[1]); /* we have a variable name */
5053         }
5054         else {
5055             *PL_tokenbuf = 0;
5056             PL_in_my = 0;
5057         }
5058
5059         s = skipspace(s);
5060         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5061          * as the ASSIGNOP, and exclude other tokens that start with =
5062          */
5063         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5064             /* save now to report with the same context as we did when
5065              * all ASSIGNOPS were accepted */
5066             PL_oldbufptr = s;
5067
5068             ++s;
5069             NEXTVAL_NEXTTOKE.ival = 0;
5070             force_next(ASSIGNOP);
5071             PL_expect = XTERM;
5072         }
5073         else if (*s == ',' || *s == ')') {
5074             PL_expect = XOPERATOR;
5075         }
5076         else {
5077             /* make sure the context shows the unexpected character and
5078              * hopefully a bit more */
5079             if (*s) ++s;
5080             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5081                 s++;
5082             PL_bufptr = s; /* for error reporting */
5083             yyerror("Illegal operator following parameter in a subroutine signature");
5084             PL_in_my = 0;
5085         }
5086         if (*PL_tokenbuf) {
5087             NEXTVAL_NEXTTOKE.ival = sigil;
5088             force_next('p'); /* force a signature pending identifier */
5089         }
5090         break;
5091
5092     case ')':
5093         PL_expect = XBLOCK;
5094         break;
5095     case ',': /* handle ($a,,$b) */
5096         break;
5097
5098     default:
5099         PL_in_my = 0;
5100         yyerror("A signature parameter must start with '$', '@' or '%'");
5101         /* very crude error recovery: skip to likely next signature
5102          * element */
5103         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5104             s++;
5105         break;
5106     }
5107
5108     switch (sigil) {
5109         case ',': TOKEN (PERLY_COMMA);
5110         case '$': TOKEN (PERLY_DOLLAR);
5111         case '@': TOKEN (PERLY_SNAIL);
5112         case '%': TOKEN (PERLY_PERCENT_SIGN);
5113         case ')': TOKEN (PERLY_PAREN_CLOSE);
5114         default:  TOKEN (sigil);
5115     }
5116 }
5117
5118 static int
5119 yyl_dollar(pTHX_ char *s)
5120 {
5121     CLINE;
5122
5123     if (PL_expect == XPOSTDEREF) {
5124         if (s[1] == '#') {
5125             s++;
5126             POSTDEREF(DOLSHARP);
5127         }
5128         POSTDEREF(PERLY_DOLLAR);
5129     }
5130
5131     if (   s[1] == '#'
5132         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5133             || memCHRs("{$:+-@", s[2])))
5134     {
5135         PL_tokenbuf[0] = '@';
5136         s = scan_ident(s + 1, PL_tokenbuf + 1,
5137                        sizeof PL_tokenbuf - 1, FALSE);
5138         if (PL_expect == XOPERATOR) {
5139             char *d = s;
5140             if (PL_bufptr > s) {
5141                 d = PL_bufptr-1;
5142                 PL_bufptr = PL_oldbufptr;
5143             }
5144             no_op("Array length", d);
5145         }
5146         if (!PL_tokenbuf[1])
5147             PREREF(DOLSHARP);
5148         PL_expect = XOPERATOR;
5149         force_ident_maybe_lex('#');
5150         TOKEN(DOLSHARP);
5151     }
5152
5153     PL_tokenbuf[0] = '$';
5154     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5155     if (PL_expect == XOPERATOR) {
5156         char *d = s;
5157         if (PL_bufptr > s) {
5158             d = PL_bufptr-1;
5159             PL_bufptr = PL_oldbufptr;
5160         }
5161         no_op("Scalar", d);
5162     }
5163     if (!PL_tokenbuf[1]) {
5164         if (s == PL_bufend)
5165             yyerror("Final $ should be \\$ or $name");
5166         PREREF(PERLY_DOLLAR);
5167     }
5168
5169     {
5170         const char tmp = *s;
5171         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5172             s = skipspace(s);
5173
5174         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5175             && intuit_more(s, PL_bufend)) {
5176             if (*s == '[') {
5177                 PL_tokenbuf[0] = '@';
5178                 if (ckWARN(WARN_SYNTAX)) {
5179                     char *t = s+1;
5180
5181                     while ( t < PL_bufend ) {
5182                         if (isSPACE(*t)) {
5183                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5184                             /* consumed one or more space chars */
5185                         } else if (*t == '$' || *t == '@') {
5186                             /* could be more than one '$' like $$ref or @$ref */
5187                             do { t++; } while (t < PL_bufend && *t == '$');
5188
5189                             /* could be an abigail style identifier like $ foo */
5190                             while (t < PL_bufend && *t == ' ') t++;
5191
5192                             /* strip off the name of the var */
5193                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5194                                 t += UTF ? UTF8SKIP(t) : 1;
5195                             /* consumed a varname */
5196                         } else if (isDIGIT(*t)) {
5197                             /* deal with hex constants like 0x11 */
5198                             if (t[0] == '0' && t[1] == 'x') {
5199                                 t += 2;
5200                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5201                             } else {
5202                                 /* deal with decimal/octal constants like 1 and 0123 */
5203                                 do { t++; } while (isDIGIT(*t));
5204                                 if (t<PL_bufend && *t == '.') {
5205                                     do { t++; } while (isDIGIT(*t));
5206                                 }
5207                             }
5208                             /* consumed a number */
5209                         } else {
5210                             /* not a var nor a space nor a number */
5211                             break;
5212                         }
5213                     }
5214                     if (t < PL_bufend && *t++ == ',') {
5215                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5216                         while (t < PL_bufend && *t != ']')
5217                             t++;
5218                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5219                                     "Multidimensional syntax %" UTF8f " not supported",
5220                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5221                     }
5222                 }
5223             }
5224             else if (*s == '{') {
5225                 char *t;
5226                 PL_tokenbuf[0] = '%';
5227                 if (    strEQ(PL_tokenbuf+1, "SIG")
5228                     && ckWARN(WARN_SYNTAX)
5229                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5230                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5231                 {
5232                     char tmpbuf[sizeof PL_tokenbuf];
5233                     do {
5234                         t++;
5235                     } while (isSPACE(*t));
5236                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5237                         STRLEN len;
5238                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5239                                         &len);
5240                         while (isSPACE(*t))
5241                             t++;
5242                         if (  *t == ';'
5243                             && get_cvn_flags(tmpbuf, len, UTF
5244                                                             ? SVf_UTF8
5245                                                             : 0))
5246                         {
5247                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5248                                 "You need to quote \"%" UTF8f "\"",
5249                                     UTF8fARG(UTF, len, tmpbuf));
5250                         }
5251                     }
5252                 }
5253             }
5254         }
5255
5256         PL_expect = XOPERATOR;
5257         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5258             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5259             if (!islop || PL_last_lop_op == OP_GREPSTART)
5260                 PL_expect = XOPERATOR;
5261             else if (memCHRs("$@\"'`q", *s))
5262                 PL_expect = XTERM;              /* e.g. print $fh "foo" */
5263             else if (   memCHRs("&*<%", *s)
5264                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5265             {
5266                 PL_expect = XTERM;              /* e.g. print $fh &sub */
5267             }
5268             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5269                 char tmpbuf[sizeof PL_tokenbuf];
5270                 int t2;
5271                 STRLEN len;
5272                 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5273                 if ((t2 = keyword(tmpbuf, len, 0))) {
5274                     /* binary operators exclude handle interpretations */
5275                     switch (t2) {
5276                     case -KEY_x:
5277                     case -KEY_eq:
5278                     case -KEY_ne:
5279                     case -KEY_gt:
5280                     case -KEY_lt:
5281                     case -KEY_ge:
5282                     case -KEY_le:
5283                     case -KEY_cmp:
5284                         break;
5285                     default:
5286                         PL_expect = XTERM;      /* e.g. print $fh length() */
5287                         break;
5288                     }
5289                 }
5290                 else {
5291                     PL_expect = XTERM;  /* e.g. print $fh subr() */
5292                 }
5293             }
5294             else if (isDIGIT(*s))
5295                 PL_expect = XTERM;              /* e.g. print $fh 3 */
5296             else if (*s == '.' && isDIGIT(s[1]))
5297                 PL_expect = XTERM;              /* e.g. print $fh .3 */
5298             else if ((*s == '?' || *s == '-' || *s == '+')
5299                 && !isSPACE(s[1]) && s[1] != '=')
5300                 PL_expect = XTERM;              /* e.g. print $fh -1 */
5301             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5302                      && s[1] != '/')
5303                 PL_expect = XTERM;              /* e.g. print $fh /.../
5304                                                XXX except DORDOR operator
5305                                             */
5306             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5307                      && s[2] != '=')
5308                 PL_expect = XTERM;              /* print $fh <<"EOF" */
5309         }
5310     }
5311     force_ident_maybe_lex('$');
5312     TOKEN(PERLY_DOLLAR);
5313 }
5314
5315 static int
5316 yyl_sub(pTHX_ char *s, const int key)
5317 {
5318     char * const tmpbuf = PL_tokenbuf + 1;
5319     bool have_name, have_proto;
5320     STRLEN len;
5321     SV *format_name = NULL;
5322     bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5323
5324     SSize_t off = s-SvPVX(PL_linestr);
5325     char *d;
5326
5327     s = skipspace(s); /* can move PL_linestr */
5328
5329     d = SvPVX(PL_linestr)+off;
5330
5331     SAVEBOOL(PL_parser->sig_seen);
5332     PL_parser->sig_seen = FALSE;
5333
5334     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5335         || *s == '\''
5336         || (*s == ':' && s[1] == ':'))
5337     {
5338
5339         PL_expect = XATTRBLOCK;
5340         d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5341                       &len);
5342         if (key == KEY_format)
5343             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5344         *PL_tokenbuf = '&';
5345         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5346          || pad_findmy_pvn(
5347                 PL_tokenbuf, len + 1, 0
5348             ) != NOT_IN_PAD)
5349             sv_setpvn(PL_subname, tmpbuf, len);
5350         else {
5351             sv_setsv(PL_subname,PL_curstname);
5352             sv_catpvs(PL_subname,"::");
5353             sv_catpvn(PL_subname,tmpbuf,len);
5354         }
5355         if (SvUTF8(PL_linestr))
5356             SvUTF8_on(PL_subname);
5357         have_name = TRUE;
5358
5359         s = skipspace(d);
5360     }
5361     else {
5362         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5363             *d = '\0';
5364             /* diag_listed_as: Missing name in "%s sub" */
5365             Perl_croak(aTHX_
5366                       "Missing name in \"%s\"", PL_bufptr);
5367         }
5368         PL_expect = XATTRTERM;
5369         sv_setpvs(PL_subname,"?");
5370         have_name = FALSE;
5371     }
5372
5373     if (key == KEY_format) {
5374         if (format_name) {
5375             NEXTVAL_NEXTTOKE.opval
5376                 = newSVOP(OP_CONST,0, format_name);
5377             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5378             force_next(BAREWORD);
5379         }
5380         PREBLOCK(KW_FORMAT);
5381     }
5382
5383     /* Look for a prototype */
5384     if (*s == '(' && !is_sigsub) {
5385         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5386         if (!s)
5387             Perl_croak(aTHX_ "Prototype not terminated");
5388         COPLINE_SET_FROM_MULTI_END;
5389         (void)validate_proto(PL_subname, PL_lex_stuff,
5390                              ckWARN(WARN_ILLEGALPROTO), 0);
5391         have_proto = TRUE;
5392
5393         s = skipspace(s);
5394     }
5395     else
5396         have_proto = FALSE;
5397
5398     if (  !(*s == ':' && s[1] != ':')
5399         && (*s != '{' && *s != '(') && key != KEY_format)
5400     {
5401         assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5402                key == KEY_DESTROY || key == KEY_BEGIN ||
5403                key == KEY_UNITCHECK || key == KEY_CHECK ||
5404                key == KEY_INIT || key == KEY_END ||
5405                key == KEY_my || key == KEY_state ||
5406                key == KEY_our);
5407         if (!have_name)
5408             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5409         else if (*s != ';' && *s != '}')
5410             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5411     }
5412
5413     if (have_proto) {
5414         NEXTVAL_NEXTTOKE.opval =
5415             newSVOP(OP_CONST, 0, PL_lex_stuff);
5416         PL_lex_stuff = NULL;
5417         force_next(THING);
5418     }
5419     if (!have_name) {
5420         if (PL_curstash)
5421             sv_setpvs(PL_subname, "__ANON__");
5422         else
5423             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5424         if (is_sigsub)
5425             TOKEN(KW_SUB_anon_sig);
5426         else
5427             TOKEN(KW_SUB_anon);
5428     }
5429     force_ident_maybe_lex('&');
5430     if (is_sigsub)
5431         TOKEN(KW_SUB_named_sig);
5432     else
5433         TOKEN(KW_SUB_named);
5434 }
5435
5436 static int
5437 yyl_interpcasemod(pTHX_ char *s)
5438 {
5439 #ifdef DEBUGGING
5440     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5441         Perl_croak(aTHX_
5442                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5443                    PL_bufptr, PL_bufend, *PL_bufptr);
5444 #endif
5445
5446     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5447         /* if at a \E */
5448         if (PL_lex_casemods) {
5449             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5450             PL_lex_casestack[PL_lex_casemods] = '\0';
5451
5452             if (PL_bufptr != PL_bufend
5453                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5454                     || oldmod == 'F')) {
5455                 PL_bufptr += 2;
5456                 PL_lex_state = LEX_INTERPCONCAT;
5457             }
5458             PL_lex_allbrackets--;
5459             return REPORT(PERLY_PAREN_CLOSE);
5460         }
5461         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5462            /* Got an unpaired \E */
5463            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5464                     "Useless use of \\E");
5465         }
5466         if (PL_bufptr != PL_bufend)
5467             PL_bufptr += 2;
5468         PL_lex_state = LEX_INTERPCONCAT;
5469         return yylex();
5470     }
5471     else {
5472         DEBUG_T({
5473             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5474         });
5475         s = PL_bufptr + 1;
5476         if (s[1] == '\\' && s[2] == 'E') {
5477             PL_bufptr = s + 3;
5478             PL_lex_state = LEX_INTERPCONCAT;
5479             return yylex();
5480         }
5481         else {
5482             I32 tmp;
5483             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5484                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5485             {
5486                 tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
5487             }
5488             if ((*s == 'L' || *s == 'U' || *s == 'F')
5489                 && (strpbrk(PL_lex_casestack, "LUF")))
5490             {
5491                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5492                 PL_lex_allbrackets--;
5493                 return REPORT(PERLY_PAREN_CLOSE);
5494             }
5495             if (PL_lex_casemods > 10)
5496                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5497             PL_lex_casestack[PL_lex_casemods++] = *s;
5498             PL_lex_casestack[PL_lex_casemods] = '\0';
5499             PL_lex_state = LEX_INTERPCONCAT;
5500             NEXTVAL_NEXTTOKE.ival = 0;
5501             force_next((2<<24)|PERLY_PAREN_OPEN);
5502             if (*s == 'l')
5503                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5504             else if (*s == 'u')
5505                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5506             else if (*s == 'L')
5507                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5508             else if (*s == 'U')
5509                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5510             else if (*s == 'Q')
5511                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5512             else if (*s == 'F')
5513                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5514             else
5515                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5516             PL_bufptr = s + 1;
5517         }
5518         force_next(FUNC);
5519         if (PL_lex_starts) {
5520             s = PL_bufptr;
5521             PL_lex_starts = 0;
5522             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5523             if (PL_lex_casemods == 1 && PL_lex_inpat)
5524                 TOKEN(PERLY_COMMA);
5525             else
5526                 AopNOASSIGN(OP_CONCAT);
5527         }
5528         else
5529             return yylex();
5530     }
5531 }
5532
5533 static int
5534 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5535                         GV **pgv, GV ***pgvp)
5536 {
5537     GV *ogv = NULL;     /* override (winner) */
5538     GV *hgv = NULL;     /* hidden (loser) */
5539     GV *gv = *pgv;
5540
5541     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5542         CV *cv;
5543         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5544                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5545                                     SVt_PVCV))
5546             && (cv = GvCVu(gv)))
5547         {
5548             if (GvIMPORTED_CV(gv))
5549                 ogv = gv;
5550             else if (! CvNOWARN_AMBIGUOUS(cv))
5551                 hgv = gv;
5552         }
5553         if (!ogv
5554             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5555             && (gv = **pgvp)
5556             && (isGV_with_GP(gv)
5557                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5558                 :   SvPCS_IMPORTED(gv)
5559                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5560                                                          len, 0), 1)))
5561         {
5562             ogv = gv;
5563         }
5564     }
5565
5566     *pgv = gv;
5567
5568     if (ogv) {
5569         *orig_keyword = key;
5570         return 0;               /* overridden by import or by GLOBAL */
5571     }
5572     else if (gv && !*pgvp
5573              && -key==KEY_lock  /* XXX generalizable kludge */
5574              && GvCVu(gv))
5575     {
5576         return 0;               /* any sub overrides "weak" keyword */
5577     }
5578     else {                      /* no override */
5579         key = -key;
5580         if (key == KEY_dump) {
5581             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5582         }
5583         *pgv = NULL;
5584         *pgvp = 0;
5585         if (hgv && key != KEY_x)        /* never ambiguous */
5586             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5587                            "Ambiguous call resolved as CORE::%s(), "
5588                            "qualify as such or use &",
5589                            GvENAME(hgv));
5590         return key;
5591     }
5592 }
5593
5594 static int
5595 yyl_qw(pTHX_ char *s, STRLEN len)
5596 {
5597     OP *words = NULL;
5598
5599     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5600     if (!s)
5601         missingterm(NULL, 0);
5602
5603     COPLINE_SET_FROM_MULTI_END;
5604     PL_expect = XOPERATOR;
5605     if (SvCUR(PL_lex_stuff)) {
5606         int warned_comma = !ckWARN(WARN_QW);
5607         int warned_comment = warned_comma;
5608         char *d = SvPV_force(PL_lex_stuff, len);
5609         while (len) {
5610             for (; isSPACE(*d) && len; --len, ++d)
5611                 /**/;
5612             if (len) {
5613                 SV *sv;
5614                 const char *b = d;
5615                 if (!warned_comma || !warned_comment) {
5616                     for (; !isSPACE(*d) && len; --len, ++d) {
5617                         if (!warned_comma && *d == ',') {
5618                             Perl_warner(aTHX_ packWARN(WARN_QW),
5619                                 "Possible attempt to separate words with commas");
5620                             ++warned_comma;
5621                         }
5622                         else if (!warned_comment && *d == '#') {
5623                             Perl_warner(aTHX_ packWARN(WARN_QW),
5624                                 "Possible attempt to put comments in qw() list");
5625                             ++warned_comment;
5626                         }
5627                     }
5628                 }
5629                 else {
5630                     for (; !isSPACE(*d) && len; --len, ++d)
5631                         /**/;
5632                 }
5633                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5634                 words = op_append_elem(OP_LIST, words,
5635                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5636             }
5637         }
5638     }
5639     if (!words)
5640         words = newNULLLIST();
5641     SvREFCNT_dec_NN(PL_lex_stuff);
5642     PL_lex_stuff = NULL;
5643     PL_expect = XOPERATOR;
5644     pl_yylval.opval = sawparens(words);
5645     TOKEN(QWLIST);
5646 }
5647
5648 static int
5649 yyl_hyphen(pTHX_ char *s)
5650 {
5651     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5652         I32 ftst = 0;
5653         char tmp;
5654
5655         s++;
5656         PL_bufptr = s;
5657         tmp = *s++;
5658
5659         while (s < PL_bufend && SPACE_OR_TAB(*s))
5660             s++;
5661
5662         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5663             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5664             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5665             OPERATOR(PERLY_MINUS);              /* unary minus */
5666         }
5667         switch (tmp) {
5668         case 'r': ftst = OP_FTEREAD;    break;
5669         case 'w': ftst = OP_FTEWRITE;   break;
5670         case 'x': ftst = OP_FTEEXEC;    break;
5671         case 'o': ftst = OP_FTEOWNED;   break;
5672         case 'R': ftst = OP_FTRREAD;    break;
5673         case 'W': ftst = OP_FTRWRITE;   break;
5674         case 'X': ftst = OP_FTREXEC;    break;
5675         case 'O': ftst = OP_FTROWNED;   break;
5676         case 'e': ftst = OP_FTIS;       break;
5677         case 'z': ftst = OP_FTZERO;     break;
5678         case 's': ftst = OP_FTSIZE;     break;
5679         case 'f': ftst = OP_FTFILE;     break;
5680         case 'd': ftst = OP_FTDIR;      break;
5681         case 'l': ftst = OP_FTLINK;     break;
5682         case 'p': ftst = OP_FTPIPE;     break;
5683         case 'S': ftst = OP_FTSOCK;     break;
5684         case 'u': ftst = OP_FTSUID;     break;
5685         case 'g': ftst = OP_FTSGID;     break;
5686         case 'k': ftst = OP_FTSVTX;     break;
5687         case 'b': ftst = OP_FTBLK;      break;
5688         case 'c': ftst = OP_FTCHR;      break;
5689         case 't': ftst = OP_FTTTY;      break;
5690         case 'T': ftst = OP_FTTEXT;     break;
5691         case 'B': ftst = OP_FTBINARY;   break;
5692         case 'M': case 'A': case 'C':
5693             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5694             switch (tmp) {
5695             case 'M': ftst = OP_FTMTIME; break;
5696             case 'A': ftst = OP_FTATIME; break;
5697             case 'C': ftst = OP_FTCTIME; break;
5698             default:                     break;
5699             }
5700             break;
5701         default:
5702             break;
5703         }
5704         if (ftst) {
5705             PL_last_uni = PL_oldbufptr;
5706             PL_last_lop_op = (OPCODE)ftst;
5707             DEBUG_T( {
5708                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5709             } );
5710             FTST(ftst);
5711         }
5712         else {
5713             /* Assume it was a minus followed by a one-letter named
5714              * subroutine call (or a -bareword), then. */
5715             DEBUG_T( {
5716                 PerlIO_printf(Perl_debug_log,
5717                     "### '-%c' looked like a file test but was not\n",
5718                     (int) tmp);
5719             } );
5720             s = --PL_bufptr;
5721         }
5722     }
5723     {
5724         const char tmp = *s++;
5725         if (*s == tmp) {
5726             s++;
5727             if (PL_expect == XOPERATOR)
5728                 TERM(POSTDEC);
5729             else
5730                 OPERATOR(PREDEC);
5731         }
5732         else if (*s == '>') {
5733             s++;
5734             s = skipspace(s);
5735             if (((*s == '$' || *s == '&') && s[1] == '*')
5736               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5737               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5738               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5739              )
5740             {
5741                 PL_expect = XPOSTDEREF;
5742                 TOKEN(ARROW);
5743             }
5744             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5745                 s = force_word(s,METHCALL0,FALSE,TRUE);
5746                 TOKEN(ARROW);
5747             }
5748             else if (*s == '$')
5749                 OPERATOR(ARROW);
5750             else
5751                 TERM(ARROW);
5752         }
5753         if (PL_expect == XOPERATOR) {
5754             if (*s == '='
5755                 && !PL_lex_allbrackets
5756                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5757             {
5758                 s--;
5759                 TOKEN(0);
5760             }
5761             Aop(OP_SUBTRACT);
5762         }
5763         else {
5764             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5765                 check_uni();
5766             OPERATOR(PERLY_MINUS);              /* unary minus */
5767         }
5768     }
5769 }
5770
5771 static int
5772 yyl_plus(pTHX_ char *s)
5773 {
5774     const char tmp = *s++;
5775     if (*s == tmp) {
5776         s++;
5777         if (PL_expect == XOPERATOR)
5778             TERM(POSTINC);
5779         else
5780             OPERATOR(PREINC);
5781     }
5782     if (PL_expect == XOPERATOR) {
5783         if (*s == '='
5784             && !PL_lex_allbrackets
5785             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5786         {
5787             s--;
5788             TOKEN(0);
5789         }
5790         Aop(OP_ADD);
5791     }
5792     else {
5793         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5794             check_uni();
5795         OPERATOR(PERLY_PLUS);
5796     }
5797 }
5798
5799 static int
5800 yyl_star(pTHX_ char *s)
5801 {
5802     if (PL_expect == XPOSTDEREF)
5803         POSTDEREF(PERLY_STAR);
5804
5805     if (PL_expect != XOPERATOR) {
5806         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5807         PL_expect = XOPERATOR;
5808         force_ident(PL_tokenbuf, PERLY_STAR);
5809         if (!*PL_tokenbuf)
5810             PREREF(PERLY_STAR);
5811         TERM(PERLY_STAR);
5812     }
5813
5814     s++;
5815     if (*s == '*') {
5816         s++;
5817         if (*s == '=' && !PL_lex_allbrackets
5818             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5819         {
5820             s -= 2;
5821             TOKEN(0);
5822         }
5823         PWop(OP_POW);
5824     }
5825
5826     if (*s == '='
5827         && !PL_lex_allbrackets
5828         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5829     {
5830         s--;
5831         TOKEN(0);
5832     }
5833
5834     Mop(OP_MULTIPLY);
5835 }
5836
5837 static int
5838 yyl_percent(pTHX_ char *s)
5839 {
5840     if (PL_expect == XOPERATOR) {
5841         if (s[1] == '='
5842             && !PL_lex_allbrackets
5843             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5844         {
5845             TOKEN(0);
5846         }
5847         ++s;
5848         Mop(OP_MODULO);
5849     }
5850     else if (PL_expect == XPOSTDEREF)
5851         POSTDEREF(PERLY_PERCENT_SIGN);
5852
5853     PL_tokenbuf[0] = '%';
5854     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5855     pl_yylval.ival = 0;
5856     if (!PL_tokenbuf[1]) {
5857         PREREF(PERLY_PERCENT_SIGN);
5858     }
5859     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5860         && intuit_more(s, PL_bufend)) {
5861         if (*s == '[')
5862             PL_tokenbuf[0] = '@';
5863     }
5864     PL_expect = XOPERATOR;
5865     force_ident_maybe_lex('%');
5866     TERM(PERLY_PERCENT_SIGN);
5867 }
5868
5869 static int
5870 yyl_caret(pTHX_ char *s)
5871 {
5872     char *d = s;
5873     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5874     if (bof && s[1] == '.')
5875         s++;
5876     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5877             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5878     {
5879         s = d;
5880         TOKEN(0);
5881     }
5882     s++;
5883     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5884 }
5885
5886 static int
5887 yyl_colon(pTHX_ char *s)
5888 {
5889     OP *attrs;
5890
5891     switch (PL_expect) {
5892     case XOPERATOR:
5893         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5894             break;
5895         PL_bufptr = s;  /* update in case we back off */
5896         if (*s == '=') {
5897             Perl_croak(aTHX_
5898                        "Use of := for an empty attribute list is not allowed");
5899         }
5900         goto grabattrs;
5901     case XATTRBLOCK:
5902         PL_expect = XBLOCK;
5903         goto grabattrs;
5904     case XATTRTERM:
5905         PL_expect = XTERMBLOCK;
5906      grabattrs:
5907         /* NB: as well as parsing normal attributes, we also end up
5908          * here if there is something looking like attributes
5909          * following a signature (which is illegal, but used to be
5910          * legal in 5.20..5.26). If the latter, we still parse the
5911          * attributes so that error messages(s) are less confusing,
5912          * but ignore them (parser->sig_seen).
5913          */
5914         s = skipspace(s);
5915         attrs = NULL;
5916         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5917             bool sig = PL_parser->sig_seen;
5918             I32 tmp;
5919             SV *sv;
5920             STRLEN len;
5921             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5922             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5923                 if (tmp < 0) tmp = -tmp;
5924                 switch (tmp) {
5925                 case KEY_or:
5926                 case KEY_and:
5927                 case KEY_for:
5928                 case KEY_foreach:
5929                 case KEY_unless:
5930                 case KEY_if:
5931                 case KEY_while:
5932                 case KEY_until:
5933                     goto got_attrs;
5934                 default:
5935                     break;
5936                 }
5937             }
5938             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5939             if (*d == '(') {
5940                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5941                 if (!d) {
5942                     if (attrs)
5943                         op_free(attrs);
5944                     sv_free(sv);
5945                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5946                 }
5947                 COPLINE_SET_FROM_MULTI_END;
5948             }
5949             if (PL_lex_stuff) {
5950                 sv_catsv(sv, PL_lex_stuff);
5951                 attrs = op_append_elem(OP_LIST, attrs,
5952                                     newSVOP(OP_CONST, 0, sv));
5953                 SvREFCNT_dec_NN(PL_lex_stuff);
5954                 PL_lex_stuff = NULL;
5955             }
5956             else {
5957                 /* NOTE: any CV attrs applied here need to be part of
5958                    the CVf_BUILTIN_ATTRS define in cv.h! */
5959                 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5960                     sv_free(sv);
5961                     if (!sig)
5962                         CvLVALUE_on(PL_compcv);
5963                 }
5964                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5965                     sv_free(sv);
5966                     if (!sig)
5967                         CvNOWARN_AMBIGUOUS_on(PL_compcv);
5968                 }
5969                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5970                     sv_free(sv);
5971                     if (!sig) {
5972                         Perl_ck_warner_d(aTHX_
5973                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5974                            ":const is experimental"
5975                         );
5976                         CvANONCONST_on(PL_compcv);
5977                         if (!CvANON(PL_compcv))
5978                             yyerror(":const is not permitted on named "
5979                                     "subroutines");
5980                     }
5981                 }
5982                 /* After we've set the flags, it could be argued that
5983                    we don't need to do the attributes.pm-based setting
5984                    process, and shouldn't bother appending recognized
5985                    flags.  To experiment with that, uncomment the
5986                    following "else".  (Note that's already been
5987                    uncommented.  That keeps the above-applied built-in
5988                    attributes from being intercepted (and possibly
5989                    rejected) by a package's attribute routines, but is
5990                    justified by the performance win for the common case
5991                    of applying only built-in attributes.) */
5992                 else
5993                     attrs = op_append_elem(OP_LIST, attrs,
5994                                         newSVOP(OP_CONST, 0,
5995                                                 sv));
5996             }
5997             s = skipspace(d);
5998             if (*s == ':' && s[1] != ':')
5999                 s = skipspace(s+1);
6000             else if (s == d)
6001                 break;  /* require real whitespace or :'s */
6002             /* XXX losing whitespace on sequential attributes here */
6003         }
6004
6005         if (*s != ';'
6006             && *s != '}'
6007             && !(PL_expect == XOPERATOR
6008                  ? (*s == '=' ||  *s == ')')
6009                  : (*s == '{' ||  *s == '(')))
6010         {
6011             const char q = ((*s == '\'') ? '"' : '\'');
6012             /* If here for an expression, and parsed no attrs, back off. */
6013             if (PL_expect == XOPERATOR && !attrs) {
6014                 s = PL_bufptr;
6015                 break;
6016             }
6017             /* MUST advance bufptr here to avoid bogus "at end of line"
6018                context messages from yyerror().
6019             */
6020             PL_bufptr = s;
6021             yyerror( (const char *)
6022                      (*s
6023                       ? Perl_form(aTHX_ "Invalid separator character "
6024                                   "%c%c%c in attribute list", q, *s, q)
6025                       : "Unterminated attribute list" ) );
6026             if (attrs)
6027                 op_free(attrs);
6028             OPERATOR(PERLY_COLON);
6029         }
6030
6031     got_attrs:
6032         if (PL_parser->sig_seen) {
6033             /* see comment about about sig_seen and parser error
6034              * handling */
6035             if (attrs)
6036                 op_free(attrs);
6037             Perl_croak(aTHX_ "Subroutine attributes must come "
6038                              "before the signature");
6039         }
6040         if (attrs) {
6041             NEXTVAL_NEXTTOKE.opval = attrs;
6042             force_next(THING);
6043         }
6044         TOKEN(COLONATTR);
6045     }
6046
6047     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6048         s--;
6049         TOKEN(0);
6050     }
6051
6052     PL_lex_allbrackets--;
6053     OPERATOR(PERLY_COLON);
6054 }
6055
6056 static int
6057 yyl_subproto(pTHX_ char *s, CV *cv)
6058 {
6059     STRLEN protolen = CvPROTOLEN(cv);
6060     const char *proto = CvPROTO(cv);
6061     bool optional;
6062
6063     proto = S_strip_spaces(aTHX_ proto, &protolen);
6064     if (!protolen)
6065         TERM(FUNC0SUB);
6066     if ((optional = *proto == ';')) {
6067         do {
6068             proto++;
6069         } while (*proto == ';');
6070     }
6071
6072     if (
6073         (
6074             (
6075                 *proto == '$' || *proto == '_'
6076              || *proto == '*' || *proto == '+'
6077             )
6078          && proto[1] == '\0'
6079         )
6080      || (
6081          *proto == '\\' && proto[1] && proto[2] == '\0'
6082         )
6083     ) {
6084         UNIPROTO(UNIOPSUB,optional);
6085     }
6086
6087     if (*proto == '\\' && proto[1] == '[') {
6088         const char *p = proto + 2;
6089         while(*p && *p != ']')
6090             ++p;
6091         if(*p == ']' && !p[1])
6092             UNIPROTO(UNIOPSUB,optional);
6093     }
6094
6095     if (*proto == '&' && *s == '{') {
6096         if (PL_curstash)
6097             sv_setpvs(PL_subname, "__ANON__");
6098         else
6099             sv_setpvs(PL_subname, "__ANON__::__ANON__");
6100         if (!PL_lex_allbrackets
6101             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6102         {
6103             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6104         }
6105         PREBLOCK(LSTOPSUB);
6106     }
6107
6108     return KEY_NULL;
6109 }
6110
6111 static int
6112 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6113 {
6114     char *d;
6115     if (PL_lex_brackets > 100) {
6116         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6117     }
6118
6119     switch (PL_expect) {
6120     case XTERM:
6121     case XTERMORDORDOR:
6122         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6123         PL_lex_allbrackets++;
6124         OPERATOR(HASHBRACK);
6125     case XOPERATOR:
6126         while (s < PL_bufend && SPACE_OR_TAB(*s))
6127             s++;
6128         d = s;
6129         PL_tokenbuf[0] = '\0';
6130         if (d < PL_bufend && *d == '-') {
6131             PL_tokenbuf[0] = '-';
6132             d++;
6133             while (d < PL_bufend && SPACE_OR_TAB(*d))
6134                 d++;
6135         }
6136         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6137             STRLEN len;
6138             d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6139                           FALSE, &len);
6140             while (d < PL_bufend && SPACE_OR_TAB(*d))
6141                 d++;
6142             if (*d == '}') {
6143                 const char minus = (PL_tokenbuf[0] == '-');
6144                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6145                 if (minus)
6146                     force_next(PERLY_MINUS);
6147             }
6148         }
6149         /* FALLTHROUGH */
6150     case XATTRTERM:
6151     case XTERMBLOCK:
6152         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153         PL_lex_allbrackets++;
6154         PL_expect = XSTATE;
6155         break;
6156     case XATTRBLOCK:
6157     case XBLOCK:
6158         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6159         PL_lex_allbrackets++;
6160         PL_expect = XSTATE;
6161         break;
6162     case XBLOCKTERM:
6163         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6164         PL_lex_allbrackets++;
6165         PL_expect = XSTATE;
6166         break;
6167     default: {
6168             const char *t;
6169             if (PL_oldoldbufptr == PL_last_lop)
6170                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6171             else
6172                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6173             PL_lex_allbrackets++;
6174             s = skipspace(s);
6175             if (*s == '}') {
6176                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6177                     PL_expect = XTERM;
6178                     /* This hack is to get the ${} in the message. */
6179                     PL_bufptr = s+1;
6180                     yyerror("syntax error");
6181                     break;
6182                 }
6183                 OPERATOR(HASHBRACK);
6184             }
6185             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6186                 /* ${...} or @{...} etc., but not print {...}
6187                  * Skip the disambiguation and treat this as a block.
6188                  */
6189                 goto block_expectation;
6190             }
6191             /* This hack serves to disambiguate a pair of curlies
6192              * as being a block or an anon hash.  Normally, expectation
6193              * determines that, but in cases where we're not in a
6194              * position to expect anything in particular (like inside
6195              * eval"") we have to resolve the ambiguity.  This code
6196              * covers the case where the first term in the curlies is a
6197              * quoted string.  Most other cases need to be explicitly
6198              * disambiguated by prepending a "+" before the opening
6199              * curly in order to force resolution as an anon hash.
6200              *
6201              * XXX should probably propagate the outer expectation
6202              * into eval"" to rely less on this hack, but that could
6203              * potentially break current behavior of eval"".
6204              * GSAR 97-07-21
6205              */
6206             t = s;
6207             if (*s == '\'' || *s == '"' || *s == '`') {
6208                 /* common case: get past first string, handling escapes */
6209                 for (t++; t < PL_bufend && *t != *s;)
6210                     if (*t++ == '\\')
6211                         t++;
6212                 t++;
6213             }
6214             else if (*s == 'q') {
6215                 if (++t < PL_bufend
6216                     && (!isWORDCHAR(*t)
6217                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6218                             && !isWORDCHAR(*t))))
6219                 {
6220                     /* skip q//-like construct */
6221                     const char *tmps;
6222                     char open, close, term;
6223                     I32 brackets = 1;
6224
6225                     while (t < PL_bufend && isSPACE(*t))
6226                         t++;
6227                     /* check for q => */
6228                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6229                         OPERATOR(HASHBRACK);
6230                     }
6231                     term = *t;
6232                     open = term;
6233                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6234                         term = tmps[5];
6235                     close = term;
6236                     if (open == close)
6237                         for (t++; t < PL_bufend; t++) {
6238                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6239                                 t++;
6240                             else if (*t == open)
6241                                 break;
6242                         }
6243                     else {
6244                         for (t++; t < PL_bufend; t++) {
6245                             if (*t == '\\' && t+1 < PL_bufend)
6246                                 t++;
6247                             else if (*t == close && --brackets <= 0)
6248                                 break;
6249                             else if (*t == open)
6250                                 brackets++;
6251                         }
6252                     }
6253                     t++;
6254                 }
6255                 else
6256                     /* skip plain q word */
6257                     while (   t < PL_bufend
6258                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6259                     {
6260                         t += UTF ? UTF8SKIP(t) : 1;
6261                     }
6262             }
6263             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6264                 t += UTF ? UTF8SKIP(t) : 1;
6265                 while (   t < PL_bufend
6266                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6267                 {
6268                     t += UTF ? UTF8SKIP(t) : 1;
6269                 }
6270             }
6271             while (t < PL_bufend && isSPACE(*t))
6272                 t++;
6273             /* if comma follows first term, call it an anon hash */
6274             /* XXX it could be a comma expression with loop modifiers */
6275             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6276                                || (*t == '=' && t[1] == '>')))
6277                 OPERATOR(HASHBRACK);
6278             if (PL_expect == XREF) {
6279               block_expectation:
6280                 /* If there is an opening brace or 'sub:', treat it
6281                    as a term to make ${{...}}{k} and &{sub:attr...}
6282                    dwim.  Otherwise, treat it as a statement, so
6283                    map {no strict; ...} works.
6284                  */
6285                 s = skipspace(s);
6286                 if (*s == '{') {
6287                     PL_expect = XTERM;
6288                     break;
6289                 }
6290                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6291                     PL_bufptr = s;
6292                     d = s + 3;
6293                     d = skipspace(d);
6294                     s = PL_bufptr;
6295                     if (*d == ':') {
6296                         PL_expect = XTERM;
6297                         break;
6298                     }
6299                 }
6300                 PL_expect = XSTATE;
6301             }
6302             else {
6303                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6304                 PL_expect = XSTATE;
6305             }
6306         }
6307         break;
6308     }
6309
6310     pl_yylval.ival = CopLINE(PL_curcop);
6311     PL_copline = NOLINE;   /* invalidate current command line number */
6312     TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6313 }
6314
6315 static int
6316 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6317 {
6318     assert(s != PL_bufend);
6319     s++;
6320
6321     if (PL_lex_brackets <= 0)
6322         /* diag_listed_as: Unmatched right %s bracket */
6323         yyerror("Unmatched right curly bracket");
6324     else
6325         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6326
6327     PL_lex_allbrackets--;
6328
6329     if (PL_lex_state == LEX_INTERPNORMAL) {
6330         if (PL_lex_brackets == 0) {
6331             if (PL_expect & XFAKEBRACK) {
6332                 PL_expect &= XENUMMASK;
6333                 PL_lex_state = LEX_INTERPEND;
6334                 PL_bufptr = s;
6335                 return yylex(); /* ignore fake brackets */
6336             }
6337             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6338              && SvEVALED(PL_lex_repl))
6339                 PL_lex_state = LEX_INTERPEND;
6340             else if (*s == '-' && s[1] == '>')
6341                 PL_lex_state = LEX_INTERPENDMAYBE;
6342             else if (*s != '[' && *s != '{')
6343                 PL_lex_state = LEX_INTERPEND;
6344         }
6345     }
6346
6347     if (PL_expect & XFAKEBRACK) {
6348         PL_expect &= XENUMMASK;
6349         PL_bufptr = s;
6350         return yylex();         /* ignore fake brackets */
6351     }
6352
6353     force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6354     if (formbrack) LEAVE_with_name("lex_format");
6355     if (formbrack == 2) { /* means . where arguments were expected */
6356         force_next(PERLY_SEMICOLON);
6357         TOKEN(FORMRBRACK);
6358     }
6359
6360     TOKEN(PERLY_SEMICOLON);
6361 }
6362
6363 static int
6364 yyl_ampersand(pTHX_ char *s)
6365 {
6366     if (PL_expect == XPOSTDEREF)
6367         POSTDEREF(PERLY_AMPERSAND);
6368
6369     s++;
6370     if (*s++ == '&') {
6371         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6372                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6373             s -= 2;
6374             TOKEN(0);
6375         }
6376         AOPERATOR(ANDAND);
6377     }
6378     s--;
6379
6380     if (PL_expect == XOPERATOR) {
6381         char *d;
6382         bool bof;
6383         if (   PL_bufptr == PL_linestart
6384             && ckWARN(WARN_SEMICOLON)
6385             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6386         {
6387             CopLINE_dec(PL_curcop);
6388             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6389             CopLINE_inc(PL_curcop);
6390         }
6391         d = s;
6392         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6393             s++;
6394         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6395                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6396             s = d;
6397             s--;
6398             TOKEN(0);
6399         }
6400         if (d == s)
6401             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6402         else
6403             BAop(OP_SBIT_AND);
6404     }
6405
6406     PL_tokenbuf[0] = '&';
6407     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6408     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6409
6410     if (PL_tokenbuf[1])
6411         force_ident_maybe_lex('&');
6412     else
6413         PREREF(PERLY_AMPERSAND);
6414
6415     TERM(PERLY_AMPERSAND);
6416 }
6417
6418 static int
6419 yyl_verticalbar(pTHX_ char *s)
6420 {
6421     char *d;
6422     bool bof;
6423
6424     s++;
6425     if (*s++ == '|') {
6426         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6427                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6428             s -= 2;
6429             TOKEN(0);
6430         }
6431         AOPERATOR(OROR);
6432     }
6433
6434     s--;
6435     d = s;
6436     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6437         s++;
6438
6439     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6440             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6441         s = d - 1;
6442         TOKEN(0);
6443     }
6444
6445     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6446 }
6447
6448 static int
6449 yyl_bang(pTHX_ char *s)
6450 {
6451     const char tmp = *s++;
6452     if (tmp == '=') {
6453         /* was this !=~ where !~ was meant?
6454          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6455
6456         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6457             const char *t = s+1;
6458
6459             while (t < PL_bufend && isSPACE(*t))
6460                 ++t;
6461
6462             if (*t == '/' || *t == '?'
6463                 || ((*t == 'm' || *t == 's' || *t == 'y')
6464                     && !isWORDCHAR(t[1]))
6465                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6466                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6467                             "!=~ should be !~");
6468         }
6469
6470         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6471             s -= 2;
6472             TOKEN(0);
6473         }
6474
6475         ChEop(OP_NE);
6476     }
6477
6478     if (tmp == '~')
6479         PMop(OP_NOT);
6480
6481     s--;
6482     OPERATOR(PERLY_EXCLAMATION_MARK);
6483 }
6484
6485 static int
6486 yyl_snail(pTHX_ char *s)
6487 {
6488     if (PL_expect == XPOSTDEREF)
6489         POSTDEREF(PERLY_SNAIL);
6490     PL_tokenbuf[0] = '@';
6491     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6492     if (PL_expect == XOPERATOR) {
6493         char *d = s;
6494         if (PL_bufptr > s) {
6495             d = PL_bufptr-1;
6496             PL_bufptr = PL_oldbufptr;
6497         }
6498         no_op("Array", d);
6499     }
6500     pl_yylval.ival = 0;
6501     if (!PL_tokenbuf[1]) {
6502         PREREF(PERLY_SNAIL);
6503     }
6504     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6505         s = skipspace(s);
6506     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6507         && intuit_more(s, PL_bufend))
6508     {
6509         if (*s == '{')
6510             PL_tokenbuf[0] = '%';
6511
6512         /* Warn about @ where they meant $. */
6513         if (*s == '[' || *s == '{') {
6514             if (ckWARN(WARN_SYNTAX)) {
6515                 S_check_scalar_slice(aTHX_ s);
6516             }
6517         }
6518     }
6519     PL_expect = XOPERATOR;
6520     force_ident_maybe_lex('@');
6521     TERM(PERLY_SNAIL);
6522 }
6523
6524 static int
6525 yyl_slash(pTHX_ char *s)
6526 {
6527     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6528         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6529                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6530             TOKEN(0);
6531         s += 2;
6532         AOPERATOR(DORDOR);
6533     }
6534     else if (PL_expect == XOPERATOR) {
6535         s++;
6536         if (*s == '=' && !PL_lex_allbrackets
6537             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6538         {
6539             s--;
6540             TOKEN(0);
6541         }
6542         Mop(OP_DIVIDE);
6543     }
6544     else {
6545         /* Disable warning on "study /blah/" */
6546         if (    PL_oldoldbufptr == PL_last_uni
6547             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6548                 || memNE(PL_last_uni, "study", 5)
6549                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6550          ))
6551             check_uni();
6552         s = scan_pat(s,OP_MATCH);
6553         TERM(sublex_start());
6554     }
6555 }
6556
6557 static int
6558 yyl_leftsquare(pTHX_ char *s)
6559 {
6560     if (PL_lex_brackets > 100)
6561         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6562     PL_lex_brackstack[PL_lex_brackets++] = 0;
6563     PL_lex_allbrackets++;
6564     s++;
6565     OPERATOR(PERLY_BRACKET_OPEN);
6566 }
6567
6568 static int
6569 yyl_rightsquare(pTHX_ char *s)
6570 {
6571     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6572         TOKEN(0);
6573     s++;
6574     if (PL_lex_brackets <= 0)
6575         /* diag_listed_as: Unmatched right %s bracket */
6576         yyerror("Unmatched right square bracket");
6577     else
6578         --PL_lex_brackets;
6579     PL_lex_allbrackets--;
6580     if (PL_lex_state == LEX_INTERPNORMAL) {
6581         if (PL_lex_brackets == 0) {
6582             if (*s == '-' && s[1] == '>')
6583                 PL_lex_state = LEX_INTERPENDMAYBE;
6584             else if (*s != '[' && *s != '{')
6585                 PL_lex_state = LEX_INTERPEND;
6586         }
6587     }
6588     TERM(PERLY_BRACKET_CLOSE);
6589 }
6590
6591 static int
6592 yyl_tilde(pTHX_ char *s)
6593 {
6594     bool bof;
6595     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6596         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6597             TOKEN(0);
6598         s += 2;
6599         Perl_ck_warner_d(aTHX_
6600             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6601             "Smartmatch is experimental");
6602         NCEop(OP_SMARTMATCH);
6603     }
6604     s++;
6605     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6606         s++;
6607         BCop(OP_SCOMPLEMENT);
6608     }
6609     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6610 }
6611
6612 static int
6613 yyl_leftparen(pTHX_ char *s)
6614 {
6615     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6616         PL_oldbufptr = PL_oldoldbufptr;         /* allow print(STDOUT 123) */
6617     else
6618         PL_expect = XTERM;
6619     s = skipspace(s);
6620     PL_lex_allbrackets++;
6621     TOKEN(PERLY_PAREN_OPEN);
6622 }
6623
6624 static int
6625 yyl_rightparen(pTHX_ char *s)
6626 {
6627     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6628         TOKEN(0);
6629     s++;
6630     PL_lex_allbrackets--;
6631     s = skipspace(s);
6632     if (*s == '{')
6633         PREBLOCK(PERLY_PAREN_CLOSE);
6634     TERM(PERLY_PAREN_CLOSE);
6635 }
6636
6637 static int
6638 yyl_leftpointy(pTHX_ char *s)
6639 {
6640     char tmp;
6641
6642     if (PL_expect != XOPERATOR) {
6643         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6644             check_uni();
6645         if (s[1] == '<' && s[2] != '>')
6646             s = scan_heredoc(s);
6647         else
6648             s = scan_inputsymbol(s);
6649         PL_expect = XOPERATOR;
6650         TOKEN(sublex_start());
6651     }
6652
6653     s++;
6654
6655     tmp = *s++;
6656     if (tmp == '<') {
6657         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6658             s -= 2;
6659             TOKEN(0);
6660         }
6661         SHop(OP_LEFT_SHIFT);
6662     }
6663     if (tmp == '=') {
6664         tmp = *s++;
6665         if (tmp == '>') {
6666             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6667                 s -= 3;
6668                 TOKEN(0);
6669             }
6670             NCEop(OP_NCMP);
6671         }
6672         s--;
6673         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6674             s -= 2;
6675             TOKEN(0);
6676         }
6677         ChRop(OP_LE);
6678     }
6679
6680     s--;
6681     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6682         s--;
6683         TOKEN(0);
6684     }
6685
6686     ChRop(OP_LT);
6687 }
6688
6689 static int
6690 yyl_rightpointy(pTHX_ char *s)
6691 {
6692     const char tmp = *s++;
6693
6694     if (tmp == '>') {
6695         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6696             s -= 2;
6697             TOKEN(0);
6698         }
6699         SHop(OP_RIGHT_SHIFT);
6700     }
6701     else if (tmp == '=') {
6702         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6703             s -= 2;
6704             TOKEN(0);
6705         }
6706         ChRop(OP_GE);
6707     }
6708
6709     s--;
6710     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6711         s--;
6712         TOKEN(0);
6713     }
6714
6715     ChRop(OP_GT);
6716 }
6717
6718 static int
6719 yyl_sglquote(pTHX_ char *s)
6720 {
6721     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6722     if (!s)
6723         missingterm(NULL, 0);
6724     COPLINE_SET_FROM_MULTI_END;
6725     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6726     if (PL_expect == XOPERATOR) {
6727         no_op("String",s);
6728     }
6729     pl_yylval.ival = OP_CONST;
6730     TERM(sublex_start());
6731 }
6732
6733 static int
6734 yyl_dblquote(pTHX_ char *s)
6735 {
6736     char *d;
6737     STRLEN len;
6738     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6739     DEBUG_T( {
6740         if (s)
6741             printbuf("### Saw string before %s\n", s);
6742         else
6743             PerlIO_printf(Perl_debug_log,
6744                          "### Saw unterminated string\n");
6745     } );
6746     if (PL_expect == XOPERATOR) {
6747             no_op("String",s);
6748     }
6749     if (!s)
6750         missingterm(NULL, 0);
6751     pl_yylval.ival = OP_CONST;
6752     /* FIXME. I think that this can be const if char *d is replaced by
6753        more localised variables.  */
6754     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6755         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6756             pl_yylval.ival = OP_STRINGIFY;
6757             break;
6758         }
6759     }
6760     if (pl_yylval.ival == OP_CONST)
6761         COPLINE_SET_FROM_MULTI_END;
6762     TERM(sublex_start());
6763 }
6764
6765 static int
6766 yyl_backtick(pTHX_ char *s)
6767 {
6768     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6769     DEBUG_T( {
6770         if (s)
6771             printbuf("### Saw backtick string before %s\n", s);
6772         else
6773             PerlIO_printf(Perl_debug_log,
6774                          "### Saw unterminated backtick string\n");
6775     } );
6776     if (PL_expect == XOPERATOR)
6777         no_op("Backticks",s);
6778     if (!s)
6779         missingterm(NULL, 0);
6780     pl_yylval.ival = OP_BACKTICK;
6781     TERM(sublex_start());
6782 }
6783
6784 static int
6785 yyl_backslash(pTHX_ char *s)
6786 {
6787     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6788         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6789                        *s, *s);
6790     if (PL_expect == XOPERATOR)
6791         no_op("Backslash",s);
6792     OPERATOR(REFGEN);
6793 }
6794
6795 static void
6796 yyl_data_handle(pTHX)
6797 {
6798     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6799                             ? PL_curstash
6800                             : PL_defstash;
6801     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6802
6803     if (!isGV(gv))
6804         gv_init(gv,stash,"DATA",4,0);
6805
6806     GvMULTI_on(gv);
6807     if (!GvIO(gv))
6808         GvIOp(gv) = newIO();
6809     IoIFP(GvIOp(gv)) = PL_rsfp;
6810
6811     /* Mark this internal pseudo-handle as clean */
6812     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6813     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6814         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6815     else
6816         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6817
6818 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6819     /* if the script was opened in binmode, we need to revert
6820      * it to text mode for compatibility; but only iff it has CRs
6821      * XXX this is a questionable hack at best. */
6822     if (PL_bufend-PL_bufptr > 2
6823         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6824     {
6825         Off_t loc = 0;
6826         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6827             loc = PerlIO_tell(PL_rsfp);
6828             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6829         }
6830         if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6831             if (loc > 0)
6832                 PerlIO_seek(PL_rsfp, loc, 0);
6833         }
6834     }
6835 #endif
6836
6837 #ifdef PERLIO_LAYERS
6838     if (!IN_BYTES) {
6839         if (UTF)
6840             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6841     }
6842 #endif
6843
6844     PL_rsfp = NULL;
6845 }
6846
6847 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6848     __attribute__noreturn__;
6849
6850 PERL_STATIC_NO_RET void
6851 yyl_croak_unrecognised(pTHX_ char *s)
6852 {
6853     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6854     const char *c;
6855     char *d;
6856     STRLEN len;
6857
6858     if (UTF) {
6859         STRLEN skiplen = UTF8SKIP(s);
6860         STRLEN stravail = PL_bufend - s;
6861         c = sv_uni_display(dsv, newSVpvn_flags(s,
6862                                                skiplen > stravail ? stravail : skiplen,
6863                                                SVs_TEMP | SVf_UTF8),
6864                            10, UNI_DISPLAY_ISPRINT);
6865     }
6866     else {
6867         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6868     }
6869
6870     if (s >= PL_linestart) {
6871         d = PL_linestart;
6872     }
6873     else {
6874         /* somehow (probably due to a parse failure), PL_linestart has advanced
6875          * pass PL_bufptr, get a reasonable beginning of line
6876          */
6877         d = s;
6878         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6879             --d;
6880     }
6881     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6882     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6883         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6884     }
6885
6886     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6887                       UTF8fARG(UTF, (s - d), d),
6888                      (int) len + 1);
6889 }
6890
6891 static int
6892 yyl_require(pTHX_ char *s, I32 orig_keyword)
6893 {
6894     s = skipspace(s);
6895     if (isDIGIT(*s)) {
6896         s = force_version(s, FALSE);
6897     }
6898     else if (*s != 'v' || !isDIGIT(s[1])
6899             || (s = force_version(s, TRUE), *s == 'v'))
6900     {
6901         *PL_tokenbuf = '\0';
6902         s = force_word(s,BAREWORD,TRUE,TRUE);
6903         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6904                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6905                                    UTF))
6906         {
6907             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6908                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6909         }
6910         else if (*s == '<')
6911             yyerror("<> at require-statement should be quotes");
6912     }
6913
6914     if (orig_keyword == KEY_require)
6915         pl_yylval.ival = 1;
6916     else
6917         pl_yylval.ival = 0;
6918
6919     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6920     PL_bufptr = s;
6921     PL_last_uni = PL_oldbufptr;
6922     PL_last_lop_op = OP_REQUIRE;
6923     s = skipspace(s);
6924     return REPORT( (int)KW_REQUIRE );
6925 }
6926
6927 static int
6928 yyl_foreach(pTHX_ char *s)
6929 {
6930     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6931         return REPORT(0);
6932     pl_yylval.ival = CopLINE(PL_curcop);
6933     s = skipspace(s);
6934     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6935         char *p = s;
6936         SSize_t s_off = s - SvPVX(PL_linestr);
6937         bool paren_is_valid = FALSE;
6938         bool maybe_package = FALSE;
6939         bool saw_core = FALSE;
6940         bool core_valid = FALSE;
6941
6942         if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6943             saw_core = TRUE;
6944             p += 6;
6945         }
6946         if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6947             core_valid = TRUE;
6948             paren_is_valid = TRUE;
6949             if (isSPACE(p[2])) {
6950                 p = skipspace(p + 3);
6951                 maybe_package = TRUE;
6952             }
6953             else {
6954                 p += 2;
6955             }
6956         }
6957         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6958             core_valid = TRUE;
6959             if (isSPACE(p[3])) {
6960                 p = skipspace(p + 4);
6961                 maybe_package = TRUE;
6962             }
6963             else {
6964                 p += 3;
6965             }
6966         }
6967         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
6968             core_valid = TRUE;
6969             if (isSPACE(p[5])) {
6970                 p = skipspace(p + 6);
6971             }
6972             else {
6973                 p += 5;
6974             }
6975         }
6976         if (saw_core && !core_valid) {
6977             Perl_croak(aTHX_ "Missing $ on loop variable");
6978         }
6979
6980         if (maybe_package && !saw_core) {
6981             /* skip optional package name, as in "for my abc $x (..)" */
6982             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
6983                 STRLEN len;
6984                 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6985                 p = skipspace(p);
6986                 paren_is_valid = FALSE;
6987             }
6988         }
6989
6990         if (UNLIKELY(paren_is_valid && *p == '(')) {
6991             Perl_ck_warner_d(aTHX_
6992                              packWARN(WARN_EXPERIMENTAL__FOR_LIST),
6993                              "for my (...) is experimental");
6994         }
6995         else if (UNLIKELY(*p != '$' && *p != '\\')) {
6996             /* "for myfoo (" will end up here, but with p pointing at the 'f' */
6997             Perl_croak(aTHX_ "Missing $ on loop variable");
6998         }
6999         /* The buffer may have been reallocated, update s */
7000         s = SvPVX(PL_linestr) + s_off;
7001     }
7002     OPERATOR(KW_FOR);
7003 }
7004
7005 static int
7006 yyl_do(pTHX_ char *s, I32 orig_keyword)
7007 {
7008     s = skipspace(s);
7009     if (*s == '{')
7010         PRETERMBLOCK(KW_DO);
7011     if (*s != '\'') {
7012         char *d;
7013         STRLEN len;
7014         *PL_tokenbuf = '&';
7015         d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7016                       1, &len);
7017         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7018          && !keyword(PL_tokenbuf + 1, len, 0)) {
7019             SSize_t off = s-SvPVX(PL_linestr);
7020             d = skipspace(d);
7021             s = SvPVX(PL_linestr)+off;
7022             if (*d == '(') {
7023                 force_ident_maybe_lex('&');
7024                 s = d;
7025             }
7026         }
7027     }
7028     if (orig_keyword == KEY_do)
7029         pl_yylval.ival = 1;
7030     else
7031         pl_yylval.ival = 0;
7032     OPERATOR(KW_DO);
7033 }
7034
7035 static int
7036 yyl_my(pTHX_ char *s, I32 my)
7037 {
7038     if (PL_in_my) {
7039         PL_bufptr = s;
7040         yyerror(Perl_form(aTHX_
7041                           "Can't redeclare \"%s\" in \"%s\"",
7042                            my       == KEY_my    ? "my" :
7043                            my       == KEY_state ? "state" : "our",
7044                            PL_in_my == KEY_my    ? "my" :
7045                            PL_in_my == KEY_state ? "state" : "our"));
7046     }
7047     PL_in_my = (U16)my;
7048     s = skipspace(s);
7049     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7050         STRLEN len;
7051         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7052         if (memEQs(PL_tokenbuf, len, "sub"))
7053             return yyl_sub(aTHX_ s, my);
7054         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7055         if (!PL_in_my_stash) {
7056             char tmpbuf[1024];
7057             int i;
7058             PL_bufptr = s;
7059             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7060             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7061             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7062         }
7063     }
7064     else if (*s == '\\') {
7065         if (!FEATURE_MYREF_IS_ENABLED)
7066             Perl_croak(aTHX_ "The experimental declared_refs "
7067                              "feature is not enabled");
7068         Perl_ck_warner_d(aTHX_
7069              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7070             "Declaring references is experimental");
7071     }
7072     OPERATOR(KW_MY);
7073 }
7074
7075 static int yyl_try(pTHX_ char*);
7076
7077 static bool
7078 yyl_eol_needs_semicolon(pTHX_ char **ps)
7079 {
7080     char *s = *ps;
7081     if (PL_lex_state != LEX_NORMAL
7082         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7083     {
7084         const bool in_comment = *s == '#';
7085         char *d;
7086         if (*s == '#' && s == PL_linestart && PL_in_eval
7087          && !PL_rsfp && !PL_parser->filtered) {
7088             /* handle eval qq[#line 1 "foo"\n ...] */
7089             CopLINE_dec(PL_curcop);
7090             incline(s, PL_bufend);
7091         }
7092         d = s;
7093         while (d < PL_bufend && *d != '\n')
7094             d++;
7095         if (d < PL_bufend)
7096             d++;
7097         s = d;
7098         if (in_comment && d == PL_bufend
7099             && PL_lex_state == LEX_INTERPNORMAL
7100             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7101             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7102         else
7103             incline(s, PL_bufend);
7104         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7105             PL_lex_state = LEX_FORMLINE;
7106             force_next(FORMRBRACK);
7107             *ps = s;
7108             return TRUE;
7109         }
7110     }
7111     else {
7112         while (s < PL_bufend && *s != '\n')
7113             s++;
7114         if (s < PL_bufend) {
7115             s++;
7116             if (s < PL_bufend)
7117                 incline(s, PL_bufend);
7118         }
7119     }
7120     *ps = s;
7121     return FALSE;
7122 }
7123
7124 static int
7125 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7126 {
7127     char *d;
7128
7129     goto start;
7130
7131     do {
7132         fake_eof = 0;
7133         bof = cBOOL(PL_rsfp);
7134       start:
7135
7136         PL_bufptr = PL_bufend;
7137         COPLINE_INC_WITH_HERELINES;
7138         if (!lex_next_chunk(fake_eof)) {
7139             CopLINE_dec(PL_curcop);
7140             s = PL_bufptr;
7141             TOKEN(PERLY_SEMICOLON);     /* not infinite loop because rsfp is NULL now */
7142         }
7143         CopLINE_dec(PL_curcop);
7144         s = PL_bufptr;
7145         /* If it looks like the start of a BOM or raw UTF-16,
7146          * check if it in fact is. */
7147         if (bof && PL_rsfp
7148             && (   *s == 0
7149                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7150                 || *(U8*)s >= 0xFE
7151                 || s[1] == 0))
7152         {
7153             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7154             bof = (offset == (Off_t)SvCUR(PL_linestr));
7155 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7156             /* offset may include swallowed CR */
7157             if (!bof)
7158                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7159 #endif
7160             if (bof) {
7161                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7162                 s = swallow_bom((U8*)s);
7163             }
7164         }
7165         if (PL_parser->in_pod) {
7166             /* Incest with pod. */
7167             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7168                 && !isALPHA(s[4]))
7169             {
7170                 SvPVCLEAR(PL_linestr);
7171                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7172                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7173                 PL_last_lop = PL_last_uni = NULL;
7174                 PL_parser->in_pod = 0;
7175             }
7176         }
7177         if (PL_rsfp || PL_parser->filtered)
7178             incline(s, PL_bufend);
7179     } while (PL_parser->in_pod);
7180
7181     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7182     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7183     PL_last_lop = PL_last_uni = NULL;
7184     if (CopLINE(PL_curcop) == 1) {
7185         while (s < PL_bufend && isSPACE(*s))
7186             s++;
7187         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7188             s++;
7189         d = NULL;
7190         if (!PL_in_eval) {
7191             if (*s == '#' && *(s+1) == '!')
7192                 d = s + 2;
7193 #ifdef ALTERNATE_SHEBANG
7194             else {
7195                 static char const as[] = ALTERNATE_SHEBANG;
7196                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7197                     d = s + (sizeof(as) - 1);
7198             }
7199 #endif /* ALTERNATE_SHEBANG */
7200         }
7201         if (d) {
7202             char *ipath;
7203             char *ipathend;
7204
7205             while (isSPACE(*d))
7206                 d++;
7207             ipath = d;
7208             while (*d && !isSPACE(*d))
7209                 d++;
7210             ipathend = d;
7211
7212 #ifdef ARG_ZERO_IS_SCRIPT
7213             if (ipathend > ipath) {
7214                 /*
7215                  * HP-UX (at least) sets argv[0] to the script name,
7216                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7217                  * at least, set argv[0] to the basename of the Perl
7218                  * interpreter. So, having found "#!", we'll set it right.
7219                  */
7220                 SV* copfilesv = CopFILESV(PL_curcop);
7221                 if (copfilesv) {
7222                     SV * const x =
7223                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7224                                          SVt_PV)); /* $^X */
7225                     assert(SvPOK(x) || SvGMAGICAL(x));
7226                     if (sv_eq(x, copfilesv)) {
7227                         sv_setpvn(x, ipath, ipathend - ipath);
7228                         SvSETMAGIC(x);
7229                     }
7230                     else {
7231                         STRLEN blen;
7232                         STRLEN llen;
7233                         const char *bstart = SvPV_const(copfilesv, blen);
7234                         const char * const lstart = SvPV_const(x, llen);
7235                         if (llen < blen) {
7236                             bstart += blen - llen;
7237                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7238                                 sv_setpvn(x, ipath, ipathend - ipath);
7239                                 SvSETMAGIC(x);
7240                             }
7241                         }
7242                     }
7243                 }
7244                 else {
7245                     /* Anything to do if no copfilesv? */
7246                 }
7247                 TAINT_NOT;      /* $^X is always tainted, but that's OK */
7248             }
7249 #endif /* ARG_ZERO_IS_SCRIPT */
7250
7251             /*
7252              * Look for options.
7253              */
7254             d = instr(s,"perl -");
7255             if (!d) {
7256                 d = instr(s,"perl");
7257 #if defined(DOSISH)
7258                 /* avoid getting into infinite loops when shebang
7259                  * line contains "Perl" rather than "perl" */
7260                 if (!d) {
7261                     for (d = ipathend-4; d >= ipath; --d) {
7262                         if (isALPHA_FOLD_EQ(*d, 'p')
7263                             && !ibcmp(d, "perl", 4))
7264                         {
7265                             break;
7266                         }
7267                     }
7268                     if (d < ipath)
7269                         d = NULL;
7270                 }
7271 #endif
7272             }
7273 #ifdef ALTERNATE_SHEBANG
7274             /*
7275              * If the ALTERNATE_SHEBANG on this system starts with a
7276              * character that can be part of a Perl expression, then if
7277              * we see it but not "perl", we're probably looking at the
7278              * start of Perl code, not a request to hand off to some
7279              * other interpreter.  Similarly, if "perl" is there, but
7280              * not in the first 'word' of the line, we assume the line
7281              * contains the start of the Perl program.
7282              */
7283             if (d && *s != '#') {
7284                 const char *c = ipath;
7285                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7286                     c++;
7287                 if (c < d)
7288                     d = NULL;   /* "perl" not in first word; ignore */
7289                 else
7290                     *s = '#';   /* Don't try to parse shebang line */
7291             }
7292 #endif /* ALTERNATE_SHEBANG */
7293             if (!d
7294                 && *s == '#'
7295                 && ipathend > ipath
7296                 && !PL_minus_c
7297                 && !instr(s,"indir")
7298                 && instr(PL_origargv[0],"perl"))
7299             {
7300                 char **newargv;
7301
7302                 *ipathend = '\0';
7303                 s = ipathend + 1;
7304                 while (s < PL_bufend && isSPACE(*s))
7305                     s++;
7306                 if (s < PL_bufend) {
7307                     Newx(newargv,PL_origargc+3,char*);
7308                     newargv[1] = s;
7309                     while (s < PL_bufend && !isSPACE(*s))
7310                         s++;
7311                     *s = '\0';
7312                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7313                 }
7314                 else
7315                     newargv = PL_origargv;
7316                 newargv[0] = ipath;
7317                 PERL_FPU_PRE_EXEC
7318                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7319                 PERL_FPU_POST_EXEC
7320                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7321             }
7322             if (d) {
7323                 while (*d && !isSPACE(*d))
7324                     d++;
7325                 while (SPACE_OR_TAB(*d))
7326                     d++;
7327
7328                 if (*d++ == '-') {
7329                     const bool switches_done = PL_doswitches;
7330                     const U32 oldpdb = PL_perldb;
7331                     const bool oldn = PL_minus_n;
7332                     const bool oldp = PL_minus_p;
7333                     const char *d1 = d;
7334
7335                     do {
7336                         bool baduni = FALSE;
7337                         if (*d1 == 'C') {
7338                             const char *d2 = d1 + 1;
7339                             if (parse_unicode_opts((const char **)&d2)
7340                                 != PL_unicode)
7341                                 baduni = TRUE;
7342                         }
7343                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7344                             const char * const m = d1;
7345                             while (*d1 && !isSPACE(*d1))
7346                                 d1++;
7347                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7348                                   (int)(d1 - m), m);
7349                         }
7350                         d1 = moreswitches(d1);
7351                     } while (d1);
7352                     if (PL_doswitches && !switches_done) {
7353                         int argc = PL_origargc;
7354                         char **argv = PL_origargv;
7355                         do {
7356                             argc--,argv++;
7357                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7358                         init_argv_symbols(argc,argv);
7359                     }
7360                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7361                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7362                           /* if we have already added "LINE: while (<>) {",
7363                              we must not do it again */
7364                     {
7365                         SvPVCLEAR(PL_linestr);
7366                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7367                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7368                         PL_last_lop = PL_last_uni = NULL;
7369                         PL_preambled = FALSE;
7370                         if (PERLDB_LINE_OR_SAVESRC)
7371                             (void)gv_fetchfile(PL_origfilename);
7372                         return YYL_RETRY;
7373                     }
7374                 }
7375             }
7376         }
7377     }
7378
7379     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7380         PL_lex_state = LEX_FORMLINE;
7381         force_next(FORMRBRACK);
7382         TOKEN(PERLY_SEMICOLON);
7383     }
7384
7385     PL_bufptr = s;
7386     return YYL_RETRY;
7387 }
7388
7389 static int
7390 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7391 {
7392     CLINE;
7393     pl_yylval.opval
7394         = newSVOP(OP_CONST, 0,
7395                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7396     pl_yylval.opval->op_private = OPpCONST_BARE;
7397     TERM(BAREWORD);
7398 }
7399
7400 static int
7401 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7402 {
7403     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7404         && PL_parser->saw_infix_sigil)
7405     {
7406         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7407                          "Operator or semicolon missing before %c%" UTF8f,
7408                          lastchar,
7409                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7410                                   PL_tokenbuf));
7411         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7412                          "Ambiguous use of %c resolved as operator %c",
7413                          lastchar, lastchar);
7414     }
7415     TOKEN(BAREWORD);
7416 }
7417
7418 static int
7419 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7420 {
7421     if (sv) {
7422         op_free(rv2cv_op);
7423         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7424         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7425         if (SvTYPE(sv) == SVt_PVAV)
7426             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7427                                       pl_yylval.opval);
7428         else {
7429             pl_yylval.opval->op_private = 0;
7430             pl_yylval.opval->op_folded = 1;
7431             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7432         }
7433         TOKEN(BAREWORD);
7434     }
7435
7436     op_free(pl_yylval.opval);
7437     pl_yylval.opval =
7438         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7439     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7440     PL_last_lop = PL_oldbufptr;
7441     PL_last_lop_op = OP_ENTERSUB;
7442
7443     /* Is there a prototype? */
7444     if (SvPOK(cv)) {
7445         int k = yyl_subproto(aTHX_ s, cv);
7446         if (k != KEY_NULL)
7447             return k;
7448     }
7449
7450     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7451     PL_expect = XTERM;
7452     force_next(off ? PRIVATEREF : BAREWORD);
7453     if (!PL_lex_allbrackets
7454         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7455     {
7456         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7457     }
7458
7459     TOKEN(NOAMP);
7460 }
7461
7462 /* Honour "reserved word" warnings, and enforce strict subs */
7463 static void
7464 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7465 {
7466     /* after "print" and similar functions (corresponding to
7467      * "F? L" in opcode.pl), whatever wasn't already parsed as
7468      * a filehandle should be subject to "strict subs".
7469      * Likewise for the optional indirect-object argument to system
7470      * or exec, which can't be a bareword */
7471     if ((PL_last_lop_op == OP_PRINT
7472             || PL_last_lop_op == OP_PRTF
7473             || PL_last_lop_op == OP_SAY
7474             || PL_last_lop_op == OP_SYSTEM
7475             || PL_last_lop_op == OP_EXEC)
7476         && (PL_hints & HINT_STRICT_SUBS))
7477     {
7478         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7479     }
7480
7481     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7482         char *d = PL_tokenbuf;
7483         while (isLOWER(*d))
7484             d++;
7485         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7486             /* PL_warn_reserved is constant */
7487             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7488             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7489                         PL_tokenbuf);
7490             GCC_DIAG_RESTORE_STMT;
7491         }
7492     }
7493 }
7494
7495 static int
7496 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7497 {
7498     int pkgname = 0;
7499     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7500     bool safebw;
7501     bool no_op_error = FALSE;
7502     /* Use this var to track whether intuit_method has been
7503        called.  intuit_method returns 0 or > 255.  */
7504     int key = 1;
7505
7506     if (PL_expect == XOPERATOR) {
7507         if (PL_bufptr == PL_linestart) {
7508             CopLINE_dec(PL_curcop);
7509             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7510             CopLINE_inc(PL_curcop);
7511         }
7512         else
7513             /* We want to call no_op with s pointing after the
7514                bareword, so defer it.  But we want it to come
7515                before the Bad name croak.  */
7516             no_op_error = TRUE;
7517     }
7518
7519     /* Get the rest if it looks like a package qualifier */
7520
7521     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7522         STRLEN morelen;
7523         s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7524                       TRUE, &morelen);
7525         if (no_op_error) {
7526             no_op("Bareword",s);
7527             no_op_error = FALSE;
7528         }
7529         if (!morelen)
7530             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7531                     UTF8fARG(UTF, len, PL_tokenbuf),
7532                     *s == '\'' ? "'" : "::");
7533         len += morelen;
7534         pkgname = 1;
7535     }
7536
7537     if (no_op_error)
7538         no_op("Bareword",s);
7539
7540     /* See if the name is "Foo::",
7541        in which case Foo is a bareword
7542        (and a package name). */
7543
7544     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7545         if (ckWARN(WARN_BAREWORD)
7546             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7547             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7548                         "Bareword \"%" UTF8f
7549                         "\" refers to nonexistent package",
7550                         UTF8fARG(UTF, len, PL_tokenbuf));
7551         len -= 2;
7552         PL_tokenbuf[len] = '\0';
7553         c.gv = NULL;
7554         c.gvp = 0;
7555         safebw = TRUE;
7556     }
7557     else {
7558         safebw = FALSE;
7559     }
7560
7561     /* if we saw a global override before, get the right name */
7562
7563     if (!c.sv)
7564         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7565     if (c.gvp) {
7566         SV *sv = newSVpvs("CORE::GLOBAL::");
7567         sv_catsv(sv, c.sv);
7568         SvREFCNT_dec(c.sv);
7569         c.sv = sv;
7570     }
7571
7572     /* Presume this is going to be a bareword of some sort. */
7573     CLINE;
7574     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7575     pl_yylval.opval->op_private = OPpCONST_BARE;
7576
7577     /* And if "Foo::", then that's what it certainly is. */
7578     if (safebw)
7579         return yyl_safe_bareword(aTHX_ s, lastchar);
7580
7581     if (!c.off) {
7582         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7583         const_op->op_private = OPpCONST_BARE;
7584         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7585         c.cv = c.lex
7586             ? isGV(c.gv)
7587                 ? GvCV(c.gv)
7588                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7589                     ? (CV *)SvRV(c.gv)
7590                     : ((CV *)c.gv)
7591             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7592     }
7593
7594     /* See if it's the indirect object for a list operator. */
7595
7596     if (PL_oldoldbufptr
7597         && PL_oldoldbufptr < PL_bufptr
7598         && (PL_oldoldbufptr == PL_last_lop
7599             || PL_oldoldbufptr == PL_last_uni)
7600         && /* NO SKIPSPACE BEFORE HERE! */
7601            (PL_expect == XREF
7602             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7603                                                    == OA_FILEREF))
7604     {
7605         bool immediate_paren = *s == '(';
7606         SSize_t s_off;
7607
7608         /* (Now we can afford to cross potential line boundary.) */
7609         s = skipspace(s);
7610
7611         /* intuit_method() can indirectly call lex_next_chunk(),
7612          * invalidating s
7613          */
7614         s_off = s - SvPVX(PL_linestr);
7615         /* Two barewords in a row may indicate method call. */
7616         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7617                 || *s == '$')
7618             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7619         {
7620             /* the code at method: doesn't use s */
7621             goto method;
7622         }
7623         s = SvPVX(PL_linestr) + s_off;
7624
7625         /* If not a declared subroutine, it's an indirect object. */
7626         /* (But it's an indir obj regardless for sort.) */
7627         /* Also, if "_" follows a filetest operator, it's a bareword */
7628
7629         if (
7630             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7631              || (!c.cv
7632                  && (PL_last_lop_op != OP_MAPSTART
7633                      && PL_last_lop_op != OP_GREPSTART))))
7634            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7635                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7636                                                 == OA_FILESTATOP))
7637            )
7638         {
7639             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7640             yyl_strictwarn_bareword(aTHX_ lastchar);
7641             op_free(c.rv2cv_op);
7642             return yyl_safe_bareword(aTHX_ s, lastchar);
7643         }
7644     }
7645
7646     PL_expect = XOPERATOR;
7647     s = skipspace(s);
7648
7649     /* Is this a word before a => operator? */
7650     if (*s == '=' && s[1] == '>' && !pkgname) {
7651         op_free(c.rv2cv_op);
7652         CLINE;
7653         if (c.gvp || (c.lex && !c.off)) {
7654             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7655             /* This is our own scalar, created a few lines
7656                above, so this is safe. */
7657             SvREADONLY_off(c.sv);
7658             sv_setpv(c.sv, PL_tokenbuf);
7659             if (UTF && !IN_BYTES
7660              && is_utf8_string((U8*)PL_tokenbuf, len))
7661                   SvUTF8_on(c.sv);
7662             SvREADONLY_on(c.sv);
7663         }
7664         TERM(BAREWORD);
7665     }
7666
7667     /* If followed by a paren, it's certainly a subroutine. */
7668     if (*s == '(') {
7669         CLINE;
7670         if (c.cv) {
7671             char *d = s + 1;
7672             while (SPACE_OR_TAB(*d))
7673                 d++;
7674             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7675                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7676         }
7677         NEXTVAL_NEXTTOKE.opval =
7678             c.off ? c.rv2cv_op : pl_yylval.opval;
7679         if (c.off)
7680              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7681         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7682         pl_yylval.ival = 0;
7683         TOKEN(PERLY_AMPERSAND);
7684     }
7685
7686     /* If followed by var or block, call it a method (unless sub) */
7687
7688     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7689         op_free(c.rv2cv_op);
7690         PL_last_lop = PL_oldbufptr;
7691         PL_last_lop_op = OP_METHOD;
7692         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7693             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7694         PL_expect = XBLOCKTERM;
7695         PL_bufptr = s;
7696         return REPORT(METHCALL0);
7697     }
7698
7699     /* If followed by a bareword, see if it looks like indir obj. */
7700
7701     if (   key == 1
7702         && !orig_keyword
7703         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7704         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7705     {
7706       method:
7707         if (c.lex && !c.off) {
7708             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7709             SvREADONLY_off(c.sv);
7710             sv_setpvn(c.sv, PL_tokenbuf, len);
7711             if (UTF && !IN_BYTES
7712              && is_utf8_string((U8*)PL_tokenbuf, len))
7713                 SvUTF8_on(c.sv);
7714             else SvUTF8_off(c.sv);
7715         }
7716         op_free(c.rv2cv_op);
7717         if (key == METHCALL0 && !PL_lex_allbrackets
7718             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7719         {
7720             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7721         }
7722         return REPORT(key);
7723     }
7724
7725     /* Not a method, so call it a subroutine (if defined) */
7726
7727     if (c.cv) {
7728         /* Check for a constant sub */
7729         c.sv = cv_const_sv_or_av(c.cv);
7730         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7731     }
7732
7733     /* Call it a bare word */
7734
7735     if (PL_hints & HINT_STRICT_SUBS)
7736         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7737     else
7738         yyl_strictwarn_bareword(aTHX_ lastchar);
7739
7740     op_free(c.rv2cv_op);
7741
7742     return yyl_safe_bareword(aTHX_ s, lastchar);
7743 }
7744
7745 static int
7746 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7747 {
7748     switch (key) {
7749     default:                    /* not a keyword */
7750         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7751
7752     case KEY___FILE__:
7753         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7754
7755     case KEY___LINE__:
7756         FUN0OP(
7757             newSVOP(OP_CONST, 0,
7758                 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7759         );
7760
7761     case KEY___PACKAGE__:
7762         FUN0OP(
7763             newSVOP(OP_CONST, 0, (PL_curstash
7764                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7765                                      : &PL_sv_undef))
7766         );
7767
7768     case KEY___DATA__:
7769     case KEY___END__:
7770         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7771             yyl_data_handle(aTHX);
7772         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7773
7774     case KEY___SUB__:
7775         /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7776          * OP_CONST. We need to make it big enough to allow room for that if
7777          * so */
7778         FUN0OP(CvCLONE(PL_compcv)
7779                     ? newOP(OP_RUNCV, 0)
7780                     : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7781
7782     case KEY_AUTOLOAD:
7783     case KEY_DESTROY:
7784     case KEY_BEGIN:
7785     case KEY_UNITCHECK:
7786     case KEY_CHECK:
7787     case KEY_INIT:
7788     case KEY_END:
7789         if (PL_expect == XSTATE)
7790             return yyl_sub(aTHX_ PL_bufptr, key);
7791         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7792
7793     case KEY_abs:
7794         UNI(OP_ABS);
7795
7796     case KEY_alarm:
7797         UNI(OP_ALARM);
7798
7799     case KEY_accept:
7800         LOP(OP_ACCEPT,XTERM);
7801
7802     case KEY_and:
7803         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7804             return REPORT(0);
7805         OPERATOR(ANDOP);
7806
7807     case KEY_atan2:
7808         LOP(OP_ATAN2,XTERM);
7809
7810     case KEY_bind:
7811         LOP(OP_BIND,XTERM);
7812
7813     case KEY_binmode:
7814         LOP(OP_BINMODE,XTERM);
7815
7816     case KEY_bless:
7817         LOP(OP_BLESS,XTERM);
7818
7819     case KEY_break:
7820         FUN0(OP_BREAK);
7821
7822     case KEY_catch:
7823         Perl_ck_warner_d(aTHX_
7824             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7825         PREBLOCK(KW_CATCH);
7826
7827     case KEY_chop:
7828         UNI(OP_CHOP);
7829
7830     case KEY_continue:
7831         /* We have to disambiguate the two senses of
7832           "continue". If the next token is a '{' then
7833           treat it as the start of a continue block;
7834           otherwise treat it as a control operator.
7835          */
7836         s = skipspace(s);
7837         if (*s == '{')
7838             PREBLOCK(KW_CONTINUE);
7839         else
7840             FUN0(OP_CONTINUE);
7841
7842     case KEY_chdir:
7843         /* may use HOME */
7844         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7845         UNI(OP_CHDIR);
7846
7847     case KEY_close:
7848         UNI(OP_CLOSE);
7849
7850     case KEY_closedir:
7851         UNI(OP_CLOSEDIR);
7852
7853     case KEY_cmp:
7854         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7855             return REPORT(0);
7856         NCEop(OP_SCMP);
7857
7858     case KEY_caller:
7859         UNI(OP_CALLER);
7860
7861     case KEY_crypt:
7862
7863         LOP(OP_CRYPT,XTERM);
7864
7865     case KEY_chmod:
7866         LOP(OP_CHMOD,XTERM);
7867
7868     case KEY_chown:
7869         LOP(OP_CHOWN,XTERM);
7870
7871     case KEY_connect:
7872         LOP(OP_CONNECT,XTERM);
7873
7874     case KEY_chr:
7875         UNI(OP_CHR);
7876
7877     case KEY_cos:
7878         UNI(OP_COS);
7879
7880     case KEY_chroot:
7881         UNI(OP_CHROOT);
7882
7883     case KEY_default:
7884         PREBLOCK(KW_DEFAULT);
7885
7886     case KEY_defer:
7887         Perl_ck_warner_d(aTHX_
7888             packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7889         PREBLOCK(KW_DEFER);
7890
7891     case KEY_do:
7892         return yyl_do(aTHX_ s, orig_keyword);
7893
7894     case KEY_die:
7895         PL_hints |= HINT_BLOCK_SCOPE;
7896         LOP(OP_DIE,XTERM);
7897
7898     case KEY_defined:
7899         UNI(OP_DEFINED);
7900
7901     case KEY_delete:
7902         UNI(OP_DELETE);
7903
7904     case KEY_dbmopen:
7905         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7906                           STR_WITH_LEN("NDBM_File::"),
7907                           STR_WITH_LEN("DB_File::"),
7908                           STR_WITH_LEN("GDBM_File::"),
7909                           STR_WITH_LEN("SDBM_File::"),
7910                           STR_WITH_LEN("ODBM_File::"),
7911                           NULL);
7912         LOP(OP_DBMOPEN,XTERM);
7913
7914     case KEY_dbmclose:
7915         UNI(OP_DBMCLOSE);
7916
7917     case KEY_dump:
7918         LOOPX(OP_DUMP);
7919
7920     case KEY_else:
7921         PREBLOCK(KW_ELSE);
7922
7923     case KEY_elsif:
7924         pl_yylval.ival = CopLINE(PL_curcop);
7925         OPERATOR(KW_ELSIF);
7926
7927     case KEY_eq:
7928         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7929             return REPORT(0);
7930         ChEop(OP_SEQ);
7931
7932     case KEY_exists:
7933         UNI(OP_EXISTS);
7934
7935     case KEY_exit:
7936         UNI(OP_EXIT);
7937
7938     case KEY_eval:
7939         s = skipspace(s);
7940         if (*s == '{') { /* block eval */
7941             PL_expect = XTERMBLOCK;
7942             UNIBRACK(OP_ENTERTRY);
7943         }
7944         else { /* string eval */
7945             PL_expect = XTERM;
7946             UNIBRACK(OP_ENTEREVAL);
7947         }
7948
7949     case KEY_evalbytes:
7950         PL_expect = XTERM;
7951         UNIBRACK(-OP_ENTEREVAL);
7952
7953     case KEY_eof:
7954         UNI(OP_EOF);
7955
7956     case KEY_exp:
7957         UNI(OP_EXP);
7958
7959     case KEY_each:
7960         UNI(OP_EACH);
7961
7962     case KEY_exec:
7963         LOP(OP_EXEC,XREF);
7964
7965     case KEY_endhostent:
7966         FUN0(OP_EHOSTENT);
7967
7968     case KEY_endnetent:
7969         FUN0(OP_ENETENT);
7970
7971     case KEY_endservent:
7972         FUN0(OP_ESERVENT);
7973
7974     case KEY_endprotoent:
7975         FUN0(OP_EPROTOENT);
7976
7977     case KEY_endpwent:
7978         FUN0(OP_EPWENT);
7979
7980     case KEY_endgrent:
7981         FUN0(OP_EGRENT);
7982
7983     case KEY_finally:
7984         Perl_ck_warner_d(aTHX_
7985             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
7986         PREBLOCK(KW_FINALLY);
7987
7988     case KEY_for:
7989     case KEY_foreach:
7990         return yyl_foreach(aTHX_ s);
7991
7992     case KEY_formline:
7993         LOP(OP_FORMLINE,XTERM);
7994
7995     case KEY_fork:
7996         FUN0(OP_FORK);
7997
7998     case KEY_fc:
7999         UNI(OP_FC);
8000
8001     case KEY_fcntl:
8002         LOP(OP_FCNTL,XTERM);
8003
8004     case KEY_fileno:
8005         UNI(OP_FILENO);
8006
8007     case KEY_flock:
8008         LOP(OP_FLOCK,XTERM);
8009
8010     case KEY_gt:
8011         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8012             return REPORT(0);
8013         ChRop(OP_SGT);
8014
8015     case KEY_ge:
8016         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8017             return REPORT(0);
8018         ChRop(OP_SGE);
8019
8020     case KEY_grep:
8021         LOP(OP_GREPSTART, XREF);
8022
8023     case KEY_goto:
8024         LOOPX(OP_GOTO);
8025
8026     case KEY_gmtime:
8027         UNI(OP_GMTIME);
8028
8029     case KEY_getc:
8030         UNIDOR(OP_GETC);
8031
8032     case KEY_getppid:
8033         FUN0(OP_GETPPID);
8034
8035     case KEY_getpgrp:
8036         UNI(OP_GETPGRP);
8037
8038     case KEY_getpriority:
8039         LOP(OP_GETPRIORITY,XTERM);
8040
8041     case KEY_getprotobyname:
8042         UNI(OP_GPBYNAME);
8043
8044     case KEY_getprotobynumber:
8045         LOP(OP_GPBYNUMBER,XTERM);
8046
8047     case KEY_getprotoent:
8048         FUN0(OP_GPROTOENT);
8049
8050     case KEY_getpwent:
8051         FUN0(OP_GPWENT);
8052
8053     case KEY_getpwnam:
8054         UNI(OP_GPWNAM);
8055
8056     case KEY_getpwuid:
8057         UNI(OP_GPWUID);
8058
8059     case KEY_getpeername:
8060         UNI(OP_GETPEERNAME);
8061
8062     case KEY_gethostbyname:
8063         UNI(OP_GHBYNAME);
8064
8065     case KEY_gethostbyaddr:
8066         LOP(OP_GHBYADDR,XTERM);
8067
8068     case KEY_gethostent:
8069         FUN0(OP_GHOSTENT);
8070
8071     case KEY_getnetbyname:
8072         UNI(OP_GNBYNAME);
8073
8074     case KEY_getnetbyaddr:
8075         LOP(OP_GNBYADDR,XTERM);
8076
8077     case KEY_getnetent:
8078         FUN0(OP_GNETENT);
8079
8080     case KEY_getservbyname:
8081         LOP(OP_GSBYNAME,XTERM);
8082
8083     case KEY_getservbyport:
8084         LOP(OP_GSBYPORT,XTERM);
8085
8086     case KEY_getservent:
8087         FUN0(OP_GSERVENT);
8088
8089     case KEY_getsockname:
8090         UNI(OP_GETSOCKNAME);
8091
8092     case KEY_getsockopt:
8093         LOP(OP_GSOCKOPT,XTERM);
8094
8095     case KEY_getgrent:
8096         FUN0(OP_GGRENT);
8097
8098     case KEY_getgrnam:
8099         UNI(OP_GGRNAM);
8100
8101     case KEY_getgrgid:
8102         UNI(OP_GGRGID);
8103
8104     case KEY_getlogin:
8105         FUN0(OP_GETLOGIN);
8106
8107     case KEY_given:
8108         pl_yylval.ival = CopLINE(PL_curcop);
8109         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8110                          "given is experimental");
8111         OPERATOR(KW_GIVEN);
8112
8113     case KEY_glob:
8114         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8115
8116     case KEY_hex:
8117         UNI(OP_HEX);
8118
8119     case KEY_if:
8120         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8121             return REPORT(0);
8122         pl_yylval.ival = CopLINE(PL_curcop);
8123         OPERATOR(KW_IF);
8124
8125     case KEY_index:
8126         LOP(OP_INDEX,XTERM);
8127
8128     case KEY_int:
8129         UNI(OP_INT);
8130
8131     case KEY_ioctl:
8132         LOP(OP_IOCTL,XTERM);
8133
8134     case KEY_isa:
8135         NCRop(OP_ISA);
8136
8137     case KEY_join:
8138         LOP(OP_JOIN,XTERM);
8139
8140     case KEY_keys:
8141         UNI(OP_KEYS);
8142
8143     case KEY_kill:
8144         LOP(OP_KILL,XTERM);
8145
8146     case KEY_last:
8147         LOOPX(OP_LAST);
8148
8149     case KEY_lc:
8150         UNI(OP_LC);
8151
8152     case KEY_lcfirst:
8153         UNI(OP_LCFIRST);
8154
8155     case KEY_local:
8156         OPERATOR(KW_LOCAL);
8157
8158     case KEY_length:
8159         UNI(OP_LENGTH);
8160
8161     case KEY_lt:
8162         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8163             return REPORT(0);
8164         ChRop(OP_SLT);
8165
8166     case KEY_le:
8167         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8168             return REPORT(0);
8169         ChRop(OP_SLE);
8170
8171     case KEY_localtime:
8172         UNI(OP_LOCALTIME);
8173
8174     case KEY_log:
8175         UNI(OP_LOG);
8176
8177     case KEY_link:
8178         LOP(OP_LINK,XTERM);
8179
8180     case KEY_listen:
8181         LOP(OP_LISTEN,XTERM);
8182
8183     case KEY_lock:
8184         UNI(OP_LOCK);
8185
8186     case KEY_lstat:
8187         UNI(OP_LSTAT);
8188
8189     case KEY_m:
8190         s = scan_pat(s,OP_MATCH);
8191         TERM(sublex_start());
8192
8193     case KEY_map:
8194         LOP(OP_MAPSTART, XREF);
8195
8196     case KEY_mkdir:
8197         LOP(OP_MKDIR,XTERM);
8198
8199     case KEY_msgctl:
8200         LOP(OP_MSGCTL,XTERM);
8201
8202     case KEY_msgget:
8203         LOP(OP_MSGGET,XTERM);
8204
8205     case KEY_msgrcv:
8206         LOP(OP_MSGRCV,XTERM);
8207
8208     case KEY_msgsnd:
8209         LOP(OP_MSGSND,XTERM);
8210
8211     case KEY_our:
8212     case KEY_my:
8213     case KEY_state:
8214         return yyl_my(aTHX_ s, key);
8215
8216     case KEY_next:
8217         LOOPX(OP_NEXT);
8218
8219     case KEY_ne:
8220         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8221             return REPORT(0);
8222         ChEop(OP_SNE);
8223
8224     case KEY_no:
8225         s = tokenize_use(0, s);
8226         TOKEN(KW_USE_or_NO);
8227
8228     case KEY_not:
8229         if (*s == '(' || (s = skipspace(s), *s == '('))
8230             FUN1(OP_NOT);
8231         else {
8232             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8233                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8234             OPERATOR(NOTOP);
8235         }
8236
8237     case KEY_open:
8238         s = skipspace(s);
8239         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8240             const char *t;
8241             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8242             for (t=d; isSPACE(*t);)
8243                 t++;
8244             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8245                 /* [perl #16184] */
8246                 && !(t[0] == '=' && t[1] == '>')
8247                 && !(t[0] == ':' && t[1] == ':')
8248                 && !keyword(s, d-s, 0)
8249             ) {
8250                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8251                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8252                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8253             }
8254         }
8255         LOP(OP_OPEN,XTERM);
8256
8257     case KEY_or:
8258         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8259             return REPORT(0);
8260         pl_yylval.ival = OP_OR;
8261         OPERATOR(OROP);
8262
8263     case KEY_ord:
8264         UNI(OP_ORD);
8265
8266     case KEY_oct:
8267         UNI(OP_OCT);
8268
8269     case KEY_opendir:
8270         LOP(OP_OPEN_DIR,XTERM);
8271
8272     case KEY_print:
8273         checkcomma(s,PL_tokenbuf,"filehandle");
8274         LOP(OP_PRINT,XREF);
8275
8276     case KEY_printf:
8277         checkcomma(s,PL_tokenbuf,"filehandle");
8278         LOP(OP_PRTF,XREF);
8279
8280     case KEY_prototype:
8281         UNI(OP_PROTOTYPE);
8282
8283     case KEY_push:
8284         LOP(OP_PUSH,XTERM);
8285
8286     case KEY_pop:
8287         UNIDOR(OP_POP);
8288
8289     case KEY_pos:
8290         UNIDOR(OP_POS);
8291
8292     case KEY_pack:
8293         LOP(OP_PACK,XTERM);
8294
8295     case KEY_package:
8296         s = force_word(s,BAREWORD,FALSE,TRUE);
8297         s = skipspace(s);
8298         s = force_strict_version(s);
8299         PREBLOCK(KW_PACKAGE);
8300
8301     case KEY_pipe:
8302         LOP(OP_PIPE_OP,XTERM);
8303
8304     case KEY_q:
8305         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8306         if (!s)
8307             missingterm(NULL, 0);
8308         COPLINE_SET_FROM_MULTI_END;
8309         pl_yylval.ival = OP_CONST;
8310         TERM(sublex_start());
8311
8312     case KEY_quotemeta:
8313         UNI(OP_QUOTEMETA);
8314
8315     case KEY_qw:
8316         return yyl_qw(aTHX_ s, len);
8317
8318     case KEY_qq:
8319         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8320         if (!s)
8321             missingterm(NULL, 0);
8322         pl_yylval.ival = OP_STRINGIFY;
8323         if (SvIVX(PL_lex_stuff) == '\'')
8324             SvIV_set(PL_lex_stuff, 0);  /* qq'$foo' should interpolate */
8325         TERM(sublex_start());
8326
8327     case KEY_qr:
8328         s = scan_pat(s,OP_QR);
8329         TERM(sublex_start());
8330
8331     case KEY_qx:
8332         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8333         if (!s)
8334             missingterm(NULL, 0);
8335         pl_yylval.ival = OP_BACKTICK;
8336         TERM(sublex_start());
8337
8338     case KEY_return:
8339         OLDLOP(OP_RETURN);
8340
8341     case KEY_require:
8342         return yyl_require(aTHX_ s, orig_keyword);
8343
8344     case KEY_reset:
8345         UNI(OP_RESET);
8346
8347     case KEY_redo:
8348         LOOPX(OP_REDO);
8349
8350     case KEY_rename:
8351         LOP(OP_RENAME,XTERM);
8352
8353     case KEY_rand:
8354         UNI(OP_RAND);
8355
8356     case KEY_rmdir:
8357         UNI(OP_RMDIR);
8358
8359     case KEY_rindex:
8360         LOP(OP_RINDEX,XTERM);
8361
8362     case KEY_read:
8363         LOP(OP_READ,XTERM);
8364
8365     case KEY_readdir:
8366         UNI(OP_READDIR);
8367
8368     case KEY_readline:
8369         UNIDOR(OP_READLINE);
8370
8371     case KEY_readpipe:
8372         UNIDOR(OP_BACKTICK);
8373
8374     case KEY_rewinddir:
8375         UNI(OP_REWINDDIR);
8376
8377     case KEY_recv:
8378         LOP(OP_RECV,XTERM);
8379
8380     case KEY_reverse:
8381         LOP(OP_REVERSE,XTERM);
8382
8383     case KEY_readlink:
8384         UNIDOR(OP_READLINK);
8385
8386     case KEY_ref:
8387         UNI(OP_REF);
8388
8389     case KEY_s:
8390         s = scan_subst(s);
8391         if (pl_yylval.opval)
8392             TERM(sublex_start());
8393         else
8394             TOKEN(1);   /* force error */
8395
8396     case KEY_say:
8397         checkcomma(s,PL_tokenbuf,"filehandle");
8398         LOP(OP_SAY,XREF);
8399
8400     case KEY_chomp:
8401         UNI(OP_CHOMP);
8402
8403     case KEY_scalar:
8404         UNI(OP_SCALAR);
8405
8406     case KEY_select:
8407         LOP(OP_SELECT,XTERM);
8408
8409     case KEY_seek:
8410         LOP(OP_SEEK,XTERM);
8411
8412     case KEY_semctl:
8413         LOP(OP_SEMCTL,XTERM);
8414
8415     case KEY_semget:
8416         LOP(OP_SEMGET,XTERM);
8417
8418     case KEY_semop:
8419         LOP(OP_SEMOP,XTERM);
8420
8421     case KEY_send:
8422         LOP(OP_SEND,XTERM);
8423
8424     case KEY_setpgrp:
8425         LOP(OP_SETPGRP,XTERM);
8426
8427     case KEY_setpriority:
8428         LOP(OP_SETPRIORITY,XTERM);
8429
8430     case KEY_sethostent:
8431         UNI(OP_SHOSTENT);
8432
8433     case KEY_setnetent:
8434         UNI(OP_SNETENT);
8435
8436     case KEY_setservent:
8437         UNI(OP_SSERVENT);
8438
8439     case KEY_setprotoent:
8440         UNI(OP_SPROTOENT);
8441
8442     case KEY_setpwent:
8443         FUN0(OP_SPWENT);
8444
8445     case KEY_setgrent:
8446         FUN0(OP_SGRENT);
8447
8448     case KEY_seekdir:
8449         LOP(OP_SEEKDIR,XTERM);
8450
8451     case KEY_setsockopt:
8452         LOP(OP_SSOCKOPT,XTERM);
8453
8454     case KEY_shift:
8455         UNIDOR(OP_SHIFT);
8456
8457     case KEY_shmctl:
8458         LOP(OP_SHMCTL,XTERM);
8459
8460     case KEY_shmget:
8461         LOP(OP_SHMGET,XTERM);
8462
8463     case KEY_shmread:
8464         LOP(OP_SHMREAD,XTERM);
8465
8466     case KEY_shmwrite:
8467         LOP(OP_SHMWRITE,XTERM);
8468
8469     case KEY_shutdown:
8470         LOP(OP_SHUTDOWN,XTERM);
8471
8472     case KEY_sin:
8473         UNI(OP_SIN);
8474
8475     case KEY_sleep:
8476         UNI(OP_SLEEP);
8477
8478     case KEY_socket:
8479         LOP(OP_SOCKET,XTERM);
8480
8481     case KEY_socketpair:
8482         LOP(OP_SOCKPAIR,XTERM);
8483
8484     case KEY_sort:
8485         checkcomma(s,PL_tokenbuf,"subroutine name");
8486         s = skipspace(s);
8487         PL_expect = XTERM;
8488         s = force_word(s,BAREWORD,TRUE,TRUE);
8489         LOP(OP_SORT,XREF);
8490
8491     case KEY_split:
8492         LOP(OP_SPLIT,XTERM);
8493
8494     case KEY_sprintf:
8495         LOP(OP_SPRINTF,XTERM);
8496
8497     case KEY_splice:
8498         LOP(OP_SPLICE,XTERM);
8499
8500     case KEY_sqrt:
8501         UNI(OP_SQRT);
8502
8503     case KEY_srand:
8504         UNI(OP_SRAND);
8505
8506     case KEY_stat:
8507         UNI(OP_STAT);
8508
8509     case KEY_study:
8510         UNI(OP_STUDY);
8511
8512     case KEY_substr:
8513         LOP(OP_SUBSTR,XTERM);
8514
8515     case KEY_format:
8516     case KEY_sub:
8517         return yyl_sub(aTHX_ s, key);
8518
8519     case KEY_system:
8520         LOP(OP_SYSTEM,XREF);
8521
8522     case KEY_symlink:
8523         LOP(OP_SYMLINK,XTERM);
8524
8525     case KEY_syscall:
8526         LOP(OP_SYSCALL,XTERM);
8527
8528     case KEY_sysopen:
8529         LOP(OP_SYSOPEN,XTERM);
8530
8531     case KEY_sysseek:
8532         LOP(OP_SYSSEEK,XTERM);
8533
8534     case KEY_sysread:
8535         LOP(OP_SYSREAD,XTERM);
8536
8537     case KEY_syswrite:
8538         LOP(OP_SYSWRITE,XTERM);
8539
8540     case KEY_tr:
8541     case KEY_y:
8542         s = scan_trans(s);
8543         TERM(sublex_start());
8544
8545     case KEY_tell:
8546         UNI(OP_TELL);
8547
8548     case KEY_telldir:
8549         UNI(OP_TELLDIR);
8550
8551     case KEY_tie:
8552         LOP(OP_TIE,XTERM);
8553
8554     case KEY_tied:
8555         UNI(OP_TIED);
8556
8557     case KEY_time:
8558         FUN0(OP_TIME);
8559
8560     case KEY_times:
8561         FUN0(OP_TMS);
8562
8563     case KEY_truncate:
8564         LOP(OP_TRUNCATE,XTERM);
8565
8566     case KEY_try:
8567         pl_yylval.ival = CopLINE(PL_curcop);
8568         Perl_ck_warner_d(aTHX_
8569             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8570         PREBLOCK(KW_TRY);
8571
8572     case KEY_uc:
8573         UNI(OP_UC);
8574
8575     case KEY_ucfirst:
8576         UNI(OP_UCFIRST);
8577
8578     case KEY_untie:
8579         UNI(OP_UNTIE);
8580
8581     case KEY_until:
8582         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8583             return REPORT(0);
8584         pl_yylval.ival = CopLINE(PL_curcop);
8585         OPERATOR(KW_UNTIL);
8586
8587     case KEY_unless:
8588         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8589             return REPORT(0);
8590         pl_yylval.ival = CopLINE(PL_curcop);
8591         OPERATOR(KW_UNLESS);
8592
8593     case KEY_unlink:
8594         LOP(OP_UNLINK,XTERM);
8595
8596     case KEY_undef:
8597         UNIDOR(OP_UNDEF);
8598
8599     case KEY_unpack:
8600         LOP(OP_UNPACK,XTERM);
8601
8602     case KEY_utime:
8603         LOP(OP_UTIME,XTERM);
8604
8605     case KEY_umask:
8606         UNIDOR(OP_UMASK);
8607
8608     case KEY_unshift:
8609         LOP(OP_UNSHIFT,XTERM);
8610
8611     case KEY_use:
8612         s = tokenize_use(1, s);
8613         TOKEN(KW_USE_or_NO);
8614
8615     case KEY_values:
8616         UNI(OP_VALUES);
8617
8618     case KEY_vec:
8619         LOP(OP_VEC,XTERM);
8620
8621     case KEY_when:
8622         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8623             return REPORT(0);
8624         pl_yylval.ival = CopLINE(PL_curcop);
8625         Perl_ck_warner_d(aTHX_
8626             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8627             "when is experimental");
8628         OPERATOR(KW_WHEN);
8629
8630     case KEY_while:
8631         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8632             return REPORT(0);
8633         pl_yylval.ival = CopLINE(PL_curcop);
8634         OPERATOR(KW_WHILE);
8635
8636     case KEY_warn:
8637         PL_hints |= HINT_BLOCK_SCOPE;
8638         LOP(OP_WARN,XTERM);
8639
8640     case KEY_wait:
8641         FUN0(OP_WAIT);
8642
8643     case KEY_waitpid:
8644         LOP(OP_WAITPID,XTERM);
8645
8646     case KEY_wantarray:
8647         FUN0(OP_WANTARRAY);
8648
8649     case KEY_write:
8650         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8651          * we use the same number on EBCDIC */
8652         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8653         UNI(OP_ENTERWRITE);
8654
8655     case KEY_x:
8656         if (PL_expect == XOPERATOR) {
8657             if (*s == '=' && !PL_lex_allbrackets
8658                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8659             {
8660                 return REPORT(0);
8661             }
8662             Mop(OP_REPEAT);
8663         }
8664         check_uni();
8665         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8666
8667     case KEY_xor:
8668         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8669             return REPORT(0);
8670         pl_yylval.ival = OP_XOR;
8671         OPERATOR(OROP);
8672     }
8673 }
8674
8675 static int
8676 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8677 {
8678     I32 key = 0;
8679     I32 orig_keyword = 0;
8680     STRLEN olen = len;
8681     char *d = s;
8682     s += 2;
8683     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8684     if ((*s == ':' && s[1] == ':')
8685         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8686     {
8687         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8688         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8689     }
8690     if (!key)
8691         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8692                           UTF8fARG(UTF, len, PL_tokenbuf));
8693     if (key < 0)
8694         key = -key;
8695     else if (key == KEY_require || key == KEY_do
8696           || key == KEY_glob)
8697         /* that's a way to remember we saw "CORE::" */
8698         orig_keyword = key;
8699
8700     /* Known to be a reserved word at this point */
8701     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8702 }
8703
8704 static int
8705 yyl_keylookup(pTHX_ char *s, GV *gv)
8706 {
8707     STRLEN len;
8708     bool anydelim;
8709     I32 key;
8710     struct code c = no_code;
8711     I32 orig_keyword = 0;
8712     char *d;
8713
8714     c.gv = gv;
8715
8716     PL_bufptr = s;
8717     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8718
8719     /* Some keywords can be followed by any delimiter, including ':' */
8720     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8721
8722     /* x::* is just a word, unless x is "CORE" */
8723     if (!anydelim && *s == ':' && s[1] == ':') {
8724         if (memEQs(PL_tokenbuf, len, "CORE"))
8725             return yyl_key_core(aTHX_ s, len, c);
8726         return yyl_just_a_word(aTHX_ s, len, 0, c);
8727     }
8728
8729     d = s;
8730     while (d < PL_bufend && isSPACE(*d))
8731             d++;        /* no comments skipped here, or s### is misparsed */
8732
8733     /* Is this a word before a => operator? */
8734     if (*d == '=' && d[1] == '>') {
8735         return yyl_fatcomma(aTHX_ s, len);
8736     }
8737
8738     /* Check for plugged-in keyword */
8739     {
8740         OP *o;
8741         int result;
8742         char *saved_bufptr = PL_bufptr;
8743         PL_bufptr = s;
8744         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8745         s = PL_bufptr;
8746         if (result == KEYWORD_PLUGIN_DECLINE) {
8747             /* not a plugged-in keyword */
8748             PL_bufptr = saved_bufptr;
8749         } else if (result == KEYWORD_PLUGIN_STMT) {
8750             pl_yylval.opval = o;
8751             CLINE;
8752             if (!PL_nexttoke) PL_expect = XSTATE;
8753             return REPORT(PLUGSTMT);
8754         } else if (result == KEYWORD_PLUGIN_EXPR) {
8755             pl_yylval.opval = o;
8756             CLINE;
8757             if (!PL_nexttoke) PL_expect = XOPERATOR;
8758             return REPORT(PLUGEXPR);
8759         } else {
8760             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8761         }
8762     }
8763
8764     /* Is this a label? */
8765     if (!anydelim && PL_expect == XSTATE
8766           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8767         s = d + 1;
8768         pl_yylval.opval =
8769             newSVOP(OP_CONST, 0,
8770                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8771         CLINE;
8772         TOKEN(LABEL);
8773     }
8774
8775     /* Check for lexical sub */
8776     if (PL_expect != XOPERATOR) {
8777         char tmpbuf[sizeof PL_tokenbuf + 1];
8778         *tmpbuf = '&';
8779         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8780         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8781         if (c.off != NOT_IN_PAD) {
8782             assert(c.off); /* we assume this is boolean-true below */
8783             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8784                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8785                 HEK * const stashname = HvNAME_HEK(stash);
8786                 c.sv = newSVhek(stashname);
8787                 sv_catpvs(c.sv, "::");
8788                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8789                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8790                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8791                                   SVt_PVCV);
8792                 c.off = 0;
8793                 if (!c.gv) {
8794                     sv_free(c.sv);
8795                     c.sv = NULL;
8796                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8797                 }
8798             }
8799             else {
8800                 c.rv2cv_op = newOP(OP_PADANY, 0);
8801                 c.rv2cv_op->op_targ = c.off;
8802                 c.cv = find_lexical_cv(c.off);
8803             }
8804             c.lex = TRUE;
8805             return yyl_just_a_word(aTHX_ s, len, 0, c);
8806         }
8807         c.off = 0;
8808     }
8809
8810     /* Check for built-in keyword */
8811     key = keyword(PL_tokenbuf, len, 0);
8812
8813     if (key < 0)
8814         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8815
8816     if (key && key != KEY___DATA__ && key != KEY___END__
8817      && (!anydelim || *s != '#')) {
8818         /* no override, and not s### either; skipspace is safe here
8819          * check for => on following line */
8820         bool arrow;
8821         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8822         STRLEN   soff = s         - SvPVX(PL_linestr);
8823         s = peekspace(s);
8824         arrow = *s == '=' && s[1] == '>';
8825         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8826         s         = SvPVX(PL_linestr) +   soff;
8827         if (arrow)
8828             return yyl_fatcomma(aTHX_ s, len);
8829     }
8830
8831     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8832 }
8833
8834 static int
8835 yyl_try(pTHX_ char *s)
8836 {
8837     char *d;
8838     GV *gv = NULL;
8839     int tok;
8840
8841   retry:
8842     switch (*s) {
8843     default:
8844         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8845             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8846                 return tok;
8847             goto retry_bufptr;
8848         }
8849         yyl_croak_unrecognised(aTHX_ s);
8850
8851     case 4:
8852     case 26:
8853         /* emulate EOF on ^D or ^Z */
8854         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8855             return tok;
8856     retry_bufptr:
8857         s = PL_bufptr;
8858         goto retry;
8859
8860     case 0:
8861         if ((!PL_rsfp || PL_lex_inwhat)
8862          && (!PL_parser->filtered || s+1 < PL_bufend)) {
8863             PL_last_uni = 0;
8864             PL_last_lop = 0;
8865             if (PL_lex_brackets
8866                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8867             {
8868                 yyerror((const char *)
8869                         (PL_lex_formbrack
8870                          ? "Format not terminated"
8871                          : "Missing right curly or square bracket"));
8872             }
8873             DEBUG_T({
8874                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8875             });
8876             TOKEN(0);
8877         }
8878         if (s++ < PL_bufend)
8879             goto retry;  /* ignore stray nulls */
8880         PL_last_uni = 0;
8881         PL_last_lop = 0;
8882         if (!PL_in_eval && !PL_preambled) {
8883             PL_preambled = TRUE;
8884             if (PL_perldb) {
8885                 /* Generate a string of Perl code to load the debugger.
8886                  * If PERL5DB is set, it will return the contents of that,
8887                  * otherwise a compile-time require of perl5db.pl.  */
8888
8889                 const char * const pdb = PerlEnv_getenv("PERL5DB");
8890
8891                 if (pdb) {
8892                     sv_setpv(PL_linestr, pdb);
8893                     sv_catpvs(PL_linestr,";");
8894                 } else {
8895                     SETERRNO(0,SS_NORMAL);
8896                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8897                 }
8898                 PL_parser->preambling = CopLINE(PL_curcop);
8899             } else
8900                 SvPVCLEAR(PL_linestr);
8901             if (PL_preambleav) {
8902                 SV **svp = AvARRAY(PL_preambleav);
8903                 SV **const end = svp + AvFILLp(PL_preambleav);
8904                 while(svp <= end) {
8905                     sv_catsv(PL_linestr, *svp);
8906                     ++svp;
8907                     sv_catpvs(PL_linestr, ";");
8908                 }
8909                 sv_free(MUTABLE_SV(PL_preambleav));
8910                 PL_preambleav = NULL;
8911             }
8912             if (PL_minus_E)
8913                 sv_catpvs(PL_linestr,
8914                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8915             if (PL_minus_n || PL_minus_p) {
8916                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8917                 if (PL_minus_l)
8918                     sv_catpvs(PL_linestr,"chomp;");
8919                 if (PL_minus_a) {
8920                     if (PL_minus_F) {
8921                         if (   (   *PL_splitstr == '/'
8922                                 || *PL_splitstr == '\''
8923                                 || *PL_splitstr == '"')
8924                             && strchr(PL_splitstr + 1, *PL_splitstr))
8925                         {
8926                             /* strchr is ok, because -F pattern can't contain
8927                              * embeddded NULs */
8928                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8929                         }
8930                         else {
8931                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8932                                bytes can be used as quoting characters.  :-) */
8933                             const char *splits = PL_splitstr;
8934                             sv_catpvs(PL_linestr, "our @F=split(q\0");
8935                             do {
8936                                 /* Need to \ \s  */
8937                                 if (*splits == '\\')
8938                                     sv_catpvn(PL_linestr, splits, 1);
8939                                 sv_catpvn(PL_linestr, splits, 1);
8940                             } while (*splits++);
8941                             /* This loop will embed the trailing NUL of
8942                                PL_linestr as the last thing it does before
8943                                terminating.  */
8944                             sv_catpvs(PL_linestr, ");");
8945                         }
8946                     }
8947                     else
8948                         sv_catpvs(PL_linestr,"our @F=split(' ');");
8949                 }
8950             }
8951             sv_catpvs(PL_linestr, "\n");
8952             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8953             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8954             PL_last_lop = PL_last_uni = NULL;
8955             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8956                 update_debugger_info(PL_linestr, NULL, 0);
8957             goto retry;
8958         }
8959         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8960             return tok;
8961         goto retry_bufptr;
8962
8963     case '\r':
8964 #ifdef PERL_STRICT_CR
8965         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8966         Perl_croak(aTHX_
8967       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8968 #endif
8969     case ' ': case '\t': case '\f': case '\v':
8970         s++;
8971         goto retry;
8972
8973     case '#':
8974     case '\n': {
8975         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8976         if (needs_semicolon)
8977             TOKEN(PERLY_SEMICOLON);
8978         else
8979             goto retry;
8980     }
8981
8982     case '-':
8983         return yyl_hyphen(aTHX_ s);
8984
8985     case '+':
8986         return yyl_plus(aTHX_ s);
8987
8988     case '*':
8989         return yyl_star(aTHX_ s);
8990
8991     case '%':
8992         return yyl_percent(aTHX_ s);
8993
8994     case '^':
8995         return yyl_caret(aTHX_ s);
8996
8997     case '[':
8998         return yyl_leftsquare(aTHX_ s);
8999
9000     case '~':
9001         return yyl_tilde(aTHX_ s);
9002
9003     case ',':
9004         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9005             TOKEN(0);
9006         s++;
9007         OPERATOR(PERLY_COMMA);
9008     case ':':
9009         if (s[1] == ':')
9010             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9011         return yyl_colon(aTHX_ s + 1);
9012
9013     case '(':
9014         return yyl_leftparen(aTHX_ s + 1);
9015
9016     case ';':
9017         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9018             TOKEN(0);
9019         CLINE;
9020         s++;
9021         PL_expect = XSTATE;
9022         TOKEN(PERLY_SEMICOLON);
9023
9024     case ')':
9025         return yyl_rightparen(aTHX_ s);
9026
9027     case ']':
9028         return yyl_rightsquare(aTHX_ s);
9029
9030     case '{':
9031         return yyl_leftcurly(aTHX_ s + 1, 0);
9032
9033     case '}':
9034         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9035             TOKEN(0);
9036         return yyl_rightcurly(aTHX_ s, 0);
9037
9038     case '&':
9039         return yyl_ampersand(aTHX_ s);
9040
9041     case '|':
9042         return yyl_verticalbar(aTHX_ s);
9043
9044     case '=':
9045         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9046             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9047         {
9048             s = vcs_conflict_marker(s + 7);
9049             goto retry;
9050         }
9051
9052         s++;
9053         {
9054             const char tmp = *s++;
9055             if (tmp == '=') {
9056                 if (!PL_lex_allbrackets
9057                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9058                 {
9059                     s -= 2;
9060                     TOKEN(0);
9061                 }
9062                 ChEop(OP_EQ);
9063             }
9064             if (tmp == '>') {
9065                 if (!PL_lex_allbrackets
9066                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9067                 {
9068                     s -= 2;
9069                     TOKEN(0);
9070                 }
9071                 OPERATOR(PERLY_COMMA);
9072             }
9073             if (tmp == '~')
9074                 PMop(OP_MATCH);
9075             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9076                 && memCHRs("+-*/%.^&|<",tmp))
9077                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9078                             "Reversed %c= operator",(int)tmp);
9079             s--;
9080             if (PL_expect == XSTATE
9081                 && isALPHA(tmp)
9082                 && (s == PL_linestart+1 || s[-2] == '\n') )
9083             {
9084                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9085                     || PL_lex_state != LEX_NORMAL)
9086                 {
9087                     d = PL_bufend;
9088                     while (s < d) {
9089                         if (*s++ == '\n') {
9090                             incline(s, PL_bufend);
9091                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9092                             {
9093                                 s = (char *) memchr(s,'\n', d - s);
9094                                 if (s)
9095                                     s++;
9096                                 else
9097                                     s = d;
9098                                 incline(s, PL_bufend);
9099                                 goto retry;
9100                             }
9101                         }
9102                     }
9103                     goto retry;
9104                 }
9105                 s = PL_bufend;
9106                 PL_parser->in_pod = 1;
9107                 goto retry;
9108             }
9109         }
9110         if (PL_expect == XBLOCK) {
9111             const char *t = s;
9112 #ifdef PERL_STRICT_CR
9113             while (SPACE_OR_TAB(*t))
9114 #else
9115             while (SPACE_OR_TAB(*t) || *t == '\r')
9116 #endif
9117                 t++;
9118             if (*t == '\n' || *t == '#') {
9119                 ENTER_with_name("lex_format");
9120                 SAVEI8(PL_parser->form_lex_state);
9121                 SAVEI32(PL_lex_formbrack);
9122                 PL_parser->form_lex_state = PL_lex_state;
9123                 PL_lex_formbrack = PL_lex_brackets + 1;
9124                 PL_parser->sub_error_count = PL_error_count;
9125                 return yyl_leftcurly(aTHX_ s, 1);
9126             }
9127         }
9128         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9129             s--;
9130             TOKEN(0);
9131         }
9132         pl_yylval.ival = 0;
9133         OPERATOR(ASSIGNOP);
9134
9135         case '!':
9136         return yyl_bang(aTHX_ s + 1);
9137
9138     case '<':
9139         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9140             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9141         {
9142             s = vcs_conflict_marker(s + 7);
9143             goto retry;
9144         }
9145         return yyl_leftpointy(aTHX_ s);
9146
9147     case '>':
9148         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9149             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9150         {
9151             s = vcs_conflict_marker(s + 7);
9152             goto retry;
9153         }
9154         return yyl_rightpointy(aTHX_ s + 1);
9155
9156     case '$':
9157         return yyl_dollar(aTHX_ s);
9158
9159     case '@':
9160         return yyl_snail(aTHX_ s);
9161
9162     case '/':                   /* may be division, defined-or, or pattern */
9163         return yyl_slash(aTHX_ s);
9164
9165      case '?':                  /* conditional */
9166         s++;
9167         if (!PL_lex_allbrackets
9168             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9169         {
9170             s--;
9171             TOKEN(0);
9172         }
9173         PL_lex_allbrackets++;
9174         OPERATOR(PERLY_QUESTION_MARK);
9175
9176     case '.':
9177         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9178 #ifdef PERL_STRICT_CR
9179             && s[1] == '\n'
9180 #else
9181             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9182 #endif
9183             && (s == PL_linestart || s[-1] == '\n') )
9184         {
9185             PL_expect = XSTATE;
9186             /* formbrack==2 means dot seen where arguments expected */
9187             return yyl_rightcurly(aTHX_ s, 2);
9188         }
9189         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9190             s += 3;
9191             OPERATOR(YADAYADA);
9192         }
9193         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9194             char tmp = *s++;
9195             if (*s == tmp) {
9196                 if (!PL_lex_allbrackets
9197                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9198                 {
9199                     s--;
9200                     TOKEN(0);
9201                 }
9202                 s++;
9203                 if (*s == tmp) {
9204                     s++;
9205                     pl_yylval.ival = OPf_SPECIAL;
9206                 }
9207                 else
9208                     pl_yylval.ival = 0;
9209                 OPERATOR(DOTDOT);
9210             }
9211             if (*s == '=' && !PL_lex_allbrackets
9212                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9213             {
9214                 s--;
9215                 TOKEN(0);
9216             }
9217             Aop(OP_CONCAT);
9218         }
9219         /* FALLTHROUGH */
9220     case '0': case '1': case '2': case '3': case '4':
9221     case '5': case '6': case '7': case '8': case '9':
9222         s = scan_num(s, &pl_yylval);
9223         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9224         if (PL_expect == XOPERATOR)
9225             no_op("Number",s);
9226         TERM(THING);
9227
9228     case '\'':
9229         return yyl_sglquote(aTHX_ s);
9230
9231     case '"':
9232         return yyl_dblquote(aTHX_ s);
9233
9234     case '`':
9235         return yyl_backtick(aTHX_ s);
9236
9237     case '\\':
9238         return yyl_backslash(aTHX_ s + 1);
9239
9240     case 'v':
9241         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9242             char *start = s + 2;
9243             while (isDIGIT(*start) || *start == '_')
9244                 start++;
9245             if (*start == '.' && isDIGIT(start[1])) {
9246                 s = scan_num(s, &pl_yylval);
9247                 TERM(THING);
9248             }
9249             else if ((*start == ':' && start[1] == ':')
9250                      || (PL_expect == XSTATE && *start == ':')) {
9251                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9252                     return tok;
9253                 goto retry_bufptr;
9254             }
9255             else if (PL_expect == XSTATE) {
9256                 d = start;
9257                 while (d < PL_bufend && isSPACE(*d)) d++;
9258                 if (*d == ':') {
9259                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9260                         return tok;
9261                     goto retry_bufptr;
9262                 }
9263             }
9264             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9265             if (!isALPHA(*start) && (PL_expect == XTERM
9266                         || PL_expect == XREF || PL_expect == XSTATE
9267                         || PL_expect == XTERMORDORDOR)) {
9268                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9269                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9270                 if (!gv) {
9271                     s = scan_num(s, &pl_yylval);
9272                     TERM(THING);
9273                 }
9274             }
9275         }
9276         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9277             return tok;
9278         goto retry_bufptr;
9279
9280     case 'x':
9281         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9282             s++;
9283             Mop(OP_REPEAT);
9284         }
9285         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9286             return tok;
9287         goto retry_bufptr;
9288
9289     case '_':
9290     case 'a': case 'A':
9291     case 'b': case 'B':
9292     case 'c': case 'C':
9293     case 'd': case 'D':
9294     case 'e': case 'E':
9295     case 'f': case 'F':
9296     case 'g': case 'G':
9297     case 'h': case 'H':
9298     case 'i': case 'I':
9299     case 'j': case 'J':
9300     case 'k': case 'K':
9301     case 'l': case 'L':
9302     case 'm': case 'M':
9303     case 'n': case 'N':
9304     case 'o': case 'O':
9305     case 'p': case 'P':
9306     case 'q': case 'Q':
9307     case 'r': case 'R':
9308     case 's': case 'S':
9309     case 't': case 'T':
9310     case 'u': case 'U':
9311               case 'V':
9312     case 'w': case 'W':
9313               case 'X':
9314     case 'y': case 'Y':
9315     case 'z': case 'Z':
9316         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9317             return tok;
9318         goto retry_bufptr;
9319     }
9320 }
9321
9322
9323 /*
9324   yylex
9325
9326   Works out what to call the token just pulled out of the input
9327   stream.  The yacc parser takes care of taking the ops we return and
9328   stitching them into a tree.
9329
9330   Returns:
9331     The type of the next token
9332
9333   Structure:
9334       Check if we have already built the token; if so, use it.
9335       Switch based on the current state:
9336           - if we have a case modifier in a string, deal with that
9337           - handle other cases of interpolation inside a string
9338           - scan the next line if we are inside a format
9339       In the normal state, switch on the next character:
9340           - default:
9341             if alphabetic, go to key lookup
9342             unrecognized character - croak
9343           - 0/4/26: handle end-of-line or EOF
9344           - cases for whitespace
9345           - \n and #: handle comments and line numbers
9346           - various operators, brackets and sigils
9347           - numbers
9348           - quotes
9349           - 'v': vstrings (or go to key lookup)
9350           - 'x' repetition operator (or go to key lookup)
9351           - other ASCII alphanumerics (key lookup begins here):
9352               word before => ?
9353               keyword plugin
9354               scan built-in keyword (but do nothing with it yet)
9355               check for statement label
9356               check for lexical subs
9357                   return yyl_just_a_word if there is one
9358               see whether built-in keyword is overridden
9359               switch on keyword number:
9360                   - default: return yyl_just_a_word:
9361                       not a built-in keyword; handle bareword lookup
9362                       disambiguate between method and sub call
9363                       fall back to bareword
9364                   - cases for built-in keywords
9365 */
9366
9367 int
9368 Perl_yylex(pTHX)
9369 {
9370     char *s = PL_bufptr;
9371
9372     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9373         const U8* first_bad_char_loc;
9374         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9375                                                         PL_bufend - PL_bufptr,
9376                                                         &first_bad_char_loc)))
9377         {
9378             _force_out_malformed_utf8_message(first_bad_char_loc,
9379                                               (U8 *) PL_bufend,
9380                                               0,
9381                                               1 /* 1 means die */ );
9382             NOT_REACHED; /* NOTREACHED */
9383         }
9384         PL_parser->recheck_utf8_validity = FALSE;
9385     }
9386     DEBUG_T( {
9387         SV* tmp = newSVpvs("");
9388         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9389             (IV)CopLINE(PL_curcop),
9390             lex_state_names[PL_lex_state],
9391             exp_name[PL_expect],
9392             pv_display(tmp, s, strlen(s), 0, 60));
9393         SvREFCNT_dec(tmp);
9394     } );
9395
9396     /* when we've already built the next token, just pull it out of the queue */
9397     if (PL_nexttoke) {
9398         PL_nexttoke--;
9399         pl_yylval = PL_nextval[PL_nexttoke];
9400         {
9401             I32 next_type;
9402             next_type = PL_nexttype[PL_nexttoke];
9403             if (next_type & (7<<24)) {
9404                 if (next_type & (1<<24)) {
9405                     if (PL_lex_brackets > 100)
9406                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9407                     PL_lex_brackstack[PL_lex_brackets++] =
9408                         (char) ((U8) (next_type >> 16));
9409                 }
9410                 if (next_type & (2<<24))
9411                     PL_lex_allbrackets++;
9412                 if (next_type & (4<<24))
9413                     PL_lex_allbrackets--;
9414                 next_type &= 0xffff;
9415             }
9416             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9417         }
9418     }
9419
9420     switch (PL_lex_state) {
9421     case LEX_NORMAL:
9422     case LEX_INTERPNORMAL:
9423         break;
9424
9425     /* interpolated case modifiers like \L \U, including \Q and \E.
9426        when we get here, PL_bufptr is at the \
9427     */
9428     case LEX_INTERPCASEMOD:
9429         /* handle \E or end of string */
9430         return yyl_interpcasemod(aTHX_ s);
9431
9432     case LEX_INTERPPUSH:
9433         return REPORT(sublex_push());
9434
9435     case LEX_INTERPSTART:
9436         if (PL_bufptr == PL_bufend)
9437             return REPORT(sublex_done());
9438         DEBUG_T({
9439             if(*PL_bufptr != '(')
9440                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9441         });
9442         PL_expect = XTERM;
9443         /* for /@a/, we leave the joining for the regex engine to do
9444          * (unless we're within \Q etc) */
9445         PL_lex_dojoin = (*PL_bufptr == '@'
9446                             && (!PL_lex_inpat || PL_lex_casemods));
9447         PL_lex_state = LEX_INTERPNORMAL;
9448         if (PL_lex_dojoin) {
9449             NEXTVAL_NEXTTOKE.ival = 0;
9450             force_next(PERLY_COMMA);
9451             force_ident("\"", PERLY_DOLLAR);
9452             NEXTVAL_NEXTTOKE.ival = 0;
9453             force_next(PERLY_DOLLAR);
9454             NEXTVAL_NEXTTOKE.ival = 0;
9455             force_next((2<<24)|PERLY_PAREN_OPEN);
9456             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
9457             force_next(FUNC);
9458         }
9459         /* Convert (?{...}) and friends to 'do {...}' */
9460         if (PL_lex_inpat && *PL_bufptr == '(') {
9461             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9462             PL_bufptr += 2;
9463             if (*PL_bufptr != '{')
9464                 PL_bufptr++;
9465             PL_expect = XTERMBLOCK;
9466             force_next(KW_DO);
9467         }
9468
9469         if (PL_lex_starts++) {
9470             s = PL_bufptr;
9471             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9472             if (!PL_lex_casemods && PL_lex_inpat)
9473                 TOKEN(PERLY_COMMA);
9474             else
9475                 AopNOASSIGN(OP_CONCAT);
9476         }
9477         return yylex();
9478
9479     case LEX_INTERPENDMAYBE:
9480         if (intuit_more(PL_bufptr, PL_bufend)) {
9481             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
9482             break;
9483         }
9484         /* FALLTHROUGH */
9485
9486     case LEX_INTERPEND:
9487         if (PL_lex_dojoin) {
9488             const U8 dojoin_was = PL_lex_dojoin;
9489             PL_lex_dojoin = FALSE;
9490             PL_lex_state = LEX_INTERPCONCAT;
9491             PL_lex_allbrackets--;
9492             return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9493         }
9494         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9495             && SvEVALED(PL_lex_repl))
9496         {
9497             if (PL_bufptr != PL_bufend)
9498                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9499             PL_lex_repl = NULL;
9500         }
9501         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9502            re_eval_str.  If the here-doc body's length equals the previous
9503            value of re_eval_start, re_eval_start will now be null.  So
9504            check re_eval_str as well. */
9505         if (PL_parser->lex_shared->re_eval_start
9506          || PL_parser->lex_shared->re_eval_str) {
9507             SV *sv;
9508             if (*PL_bufptr != ')')
9509                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9510             PL_bufptr++;
9511             /* having compiled a (?{..}) expression, return the original
9512              * text too, as a const */
9513             if (PL_parser->lex_shared->re_eval_str) {
9514                 sv = PL_parser->lex_shared->re_eval_str;
9515                 PL_parser->lex_shared->re_eval_str = NULL;
9516                 SvCUR_set(sv,
9517                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9518                 SvPV_shrink_to_cur(sv);
9519             }
9520             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9521                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9522             NEXTVAL_NEXTTOKE.opval =
9523                     newSVOP(OP_CONST, 0,
9524                                  sv);
9525             force_next(THING);
9526             PL_parser->lex_shared->re_eval_start = NULL;
9527             PL_expect = XTERM;
9528             return REPORT(PERLY_COMMA);
9529         }
9530
9531         /* FALLTHROUGH */
9532     case LEX_INTERPCONCAT:
9533 #ifdef DEBUGGING
9534         if (PL_lex_brackets)
9535             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9536                        (long) PL_lex_brackets);
9537 #endif
9538         if (PL_bufptr == PL_bufend)
9539             return REPORT(sublex_done());
9540
9541         /* m'foo' still needs to be parsed for possible (?{...}) */
9542         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9543             SV *sv = newSVsv(PL_linestr);
9544             sv = tokeq(sv);
9545             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9546             s = PL_bufend;
9547         }
9548         else {
9549             int save_error_count = PL_error_count;
9550
9551             s = scan_const(PL_bufptr);
9552
9553             /* Set flag if this was a pattern and there were errors.  op.c will
9554              * refuse to compile a pattern with this flag set.  Otherwise, we
9555              * could get segfaults, etc. */
9556             if (PL_lex_inpat && PL_error_count > save_error_count) {
9557                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9558             }
9559             if (*s == '\\')
9560                 PL_lex_state = LEX_INTERPCASEMOD;
9561             else
9562                 PL_lex_state = LEX_INTERPSTART;
9563         }
9564
9565         if (s != PL_bufptr) {
9566             NEXTVAL_NEXTTOKE = pl_yylval;
9567             PL_expect = XTERM;
9568             force_next(THING);
9569             if (PL_lex_starts++) {
9570                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9571                 if (!PL_lex_casemods && PL_lex_inpat)
9572                     TOKEN(PERLY_COMMA);
9573                 else
9574                     AopNOASSIGN(OP_CONCAT);
9575             }
9576             else {
9577                 PL_bufptr = s;
9578                 return yylex();
9579             }
9580         }
9581
9582         return yylex();
9583     case LEX_FORMLINE:
9584         if (PL_parser->sub_error_count != PL_error_count) {
9585             /* There was an error parsing a formline, which tends to
9586                mess up the parser.
9587                Unlike interpolated sub-parsing, we can't treat any of
9588                these as recoverable, so no need to check sub_no_recover.
9589             */
9590             yyquit();
9591         }
9592         assert(PL_lex_formbrack);
9593         s = scan_formline(PL_bufptr);
9594         if (!PL_lex_formbrack)
9595             return yyl_rightcurly(aTHX_ s, 1);
9596         PL_bufptr = s;
9597         return yylex();
9598     }
9599
9600     /* We really do *not* want PL_linestr ever becoming a COW. */
9601     assert (!SvIsCOW(PL_linestr));
9602     s = PL_bufptr;
9603     PL_oldoldbufptr = PL_oldbufptr;
9604     PL_oldbufptr = s;
9605
9606     if (PL_in_my == KEY_sigvar) {
9607         PL_parser->saw_infix_sigil = 0;
9608         return yyl_sigvar(aTHX_ s);
9609     }
9610
9611     {
9612         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9613            On its return, we then need to set it to indicate whether the token
9614            we just encountered was an infix operator that (if we hadn't been
9615            expecting an operator) have been a sigil.
9616         */
9617         bool expected_operator = (PL_expect == XOPERATOR);
9618         int ret = yyl_try(aTHX_ s);
9619         switch (pl_yylval.ival) {
9620         case OP_BIT_AND:
9621         case OP_MODULO:
9622         case OP_MULTIPLY:
9623         case OP_NBIT_AND:
9624             if (expected_operator) {
9625                 PL_parser->saw_infix_sigil = 1;
9626                 break;
9627             }
9628             /* FALLTHROUGH */
9629         default:
9630             PL_parser->saw_infix_sigil = 0;
9631         }
9632         return ret;
9633     }
9634 }
9635
9636
9637 /*
9638   S_pending_ident
9639
9640   Looks up an identifier in the pad or in a package
9641
9642   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9643   rather than a plain pad var.
9644
9645   Returns:
9646     PRIVATEREF if this is a lexical name.
9647     BAREWORD   if this belongs to a package.
9648
9649   Structure:
9650       if we're in a my declaration
9651           croak if they tried to say my($foo::bar)
9652           build the ops for a my() declaration
9653       if it's an access to a my() variable
9654           build ops for access to a my() variable
9655       if in a dq string, and they've said @foo and we can't find @foo
9656           warn
9657       build ops for a bareword
9658 */
9659
9660 static int
9661 S_pending_ident(pTHX)
9662 {
9663     PADOFFSET tmp = 0;
9664     const char pit = (char)pl_yylval.ival;
9665     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9666     /* All routes through this function want to know if there is a colon.  */
9667     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9668
9669     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9670           "### Pending identifier '%s'\n", PL_tokenbuf); });
9671     assert(tokenbuf_len >= 2);
9672
9673     /* if we're in a my(), we can't allow dynamics here.
9674        $foo'bar has already been turned into $foo::bar, so
9675        just check for colons.
9676
9677        if it's a legal name, the OP is a PADANY.
9678     */
9679     if (PL_in_my) {
9680         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9681             if (has_colon)
9682                 /* diag_listed_as: No package name allowed for variable %s
9683                                    in "our" */
9684                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9685                                   "%s %s in \"our\"",
9686                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9687                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9688             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9689         }
9690         else {
9691             OP *o;
9692             if (has_colon) {
9693                 /* "my" variable %s can't be in a package */
9694                 /* PL_no_myglob is constant */
9695                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9696                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9697                             PL_in_my == KEY_my ? "my" : "state",
9698                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9699                             PL_tokenbuf),
9700                             UTF ? SVf_UTF8 : 0);
9701                 GCC_DIAG_RESTORE_STMT;
9702             }
9703
9704             if (PL_in_my == KEY_sigvar) {
9705                 /* A signature 'padop' needs in addition, an op_first to
9706                  * point to a child sigdefelem, and an extra field to hold
9707                  * the signature index. We can achieve both by using an
9708                  * UNOP_AUX and (ab)using the op_aux field to hold the
9709                  * index. If we ever need more fields, use a real malloced
9710                  * aux strut instead.
9711                  */
9712                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9713                                     INT2PTR(UNOP_AUX_item *,
9714                                         (PL_parser->sig_elems)));
9715                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9716                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9717                                   :                         OPpARGELEM_HV);
9718             }
9719             else
9720                 o = newOP(OP_PADANY, 0);
9721             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9722                                                         UTF ? SVf_UTF8 : 0);
9723             if (PL_in_my == KEY_sigvar)
9724                 PL_in_my = 0;
9725
9726             pl_yylval.opval = o;
9727             return PRIVATEREF;
9728         }
9729     }
9730
9731     /*
9732        build the ops for accesses to a my() variable.
9733     */
9734
9735     if (!has_colon) {
9736         if (!PL_in_my)
9737             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9738                                  0);
9739         if (tmp != NOT_IN_PAD) {
9740             /* might be an "our" variable" */
9741             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9742                 /* build ops for a bareword */
9743                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9744                 HEK * const stashname = HvNAME_HEK(stash);
9745                 SV *  const sym = newSVhek(stashname);
9746                 sv_catpvs(sym, "::");
9747                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9748                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9749                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9750                 if (pit != '&')
9751                   gv_fetchsv(sym,
9752                     GV_ADDMULTI,
9753                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9754                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9755                      : SVt_PVHV));
9756                 return BAREWORD;
9757             }
9758
9759             pl_yylval.opval = newOP(OP_PADANY, 0);
9760             pl_yylval.opval->op_targ = tmp;
9761             return PRIVATEREF;
9762         }
9763     }
9764
9765     /*
9766        Whine if they've said @foo or @foo{key} in a doublequoted string,
9767        and @foo (or %foo) isn't a variable we can find in the symbol
9768        table.
9769     */
9770     if (ckWARN(WARN_AMBIGUOUS)
9771         && pit == '@'
9772         && PL_lex_state != LEX_NORMAL
9773         && !PL_lex_brackets)
9774     {
9775         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9776                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9777                                          SVt_PVAV);
9778         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9779            )
9780         {
9781             /* Downgraded from fatal to warning 20000522 mjd */
9782             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9783                         "Possible unintended interpolation of %" UTF8f
9784                         " in string",
9785                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9786         }
9787     }
9788
9789     /* build ops for a bareword */
9790     pl_yylval.opval = newSVOP(OP_CONST, 0,
9791                                    newSVpvn_flags(PL_tokenbuf + 1,
9792                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9793                                                       UTF ? SVf_UTF8 : 0 ));
9794     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9795     if (pit != '&')
9796         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9797                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9798                      | ( UTF ? SVf_UTF8 : 0 ),
9799                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9800                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9801                       : SVt_PVHV));
9802     return BAREWORD;
9803 }
9804
9805 STATIC void
9806 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9807 {
9808     PERL_ARGS_ASSERT_CHECKCOMMA;
9809
9810     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9811         if (ckWARN(WARN_SYNTAX)) {
9812             int level = 1;
9813             const char *w;
9814             for (w = s+2; *w && level; w++) {
9815                 if (*w == '(')
9816                     ++level;
9817                 else if (*w == ')')
9818                     --level;
9819             }
9820             while (isSPACE(*w))
9821                 ++w;
9822             /* the list of chars below is for end of statements or
9823              * block / parens, boolean operators (&&, ||, //) and branch
9824              * constructs (or, and, if, until, unless, while, err, for).
9825              * Not a very solid hack... */
9826             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9827                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9828                             "%s (...) interpreted as function",name);
9829         }
9830     }
9831     while (s < PL_bufend && isSPACE(*s))
9832         s++;
9833     if (*s == '(')
9834         s++;
9835     while (s < PL_bufend && isSPACE(*s))
9836         s++;
9837     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9838         const char * const w = s;
9839         s += UTF ? UTF8SKIP(s) : 1;
9840         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9841             s += UTF ? UTF8SKIP(s) : 1;
9842         while (s < PL_bufend && isSPACE(*s))
9843             s++;
9844         if (*s == ',') {
9845             GV* gv;
9846             if (keyword(w, s - w, 0))
9847                 return;
9848
9849             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9850             if (gv && GvCVu(gv))
9851                 return;
9852             if (s - w <= 254) {
9853                 PADOFFSET off;
9854                 char tmpbuf[256];
9855                 Copy(w, tmpbuf+1, s - w, char);
9856                 *tmpbuf = '&';
9857                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9858                 if (off != NOT_IN_PAD) return;
9859             }
9860             Perl_croak(aTHX_ "No comma allowed after %s", what);
9861         }
9862     }
9863 }
9864
9865 /* S_new_constant(): do any overload::constant lookup.
9866
9867    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9868    Best used as sv=new_constant(..., sv, ...).
9869    If s, pv are NULL, calls subroutine with one argument,
9870    and <type> is used with error messages only.
9871    <type> is assumed to be well formed UTF-8.
9872
9873    If error_msg is not NULL, *error_msg will be set to any error encountered.
9874    Otherwise yyerror() will be used to output it */
9875
9876 STATIC SV *
9877 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9878                SV *sv, SV *pv, const char *type, STRLEN typelen,
9879                const char ** error_msg)
9880 {
9881     dSP;
9882     HV * table = GvHV(PL_hintgv);                /* ^H */
9883     SV *res;
9884     SV *errsv = NULL;
9885     SV **cvp;
9886     SV *cv, *typesv;
9887     const char *why1 = "", *why2 = "", *why3 = "";
9888     const char * optional_colon = ":";  /* Only some messages have a colon */
9889     char *msg;
9890
9891     PERL_ARGS_ASSERT_NEW_CONSTANT;
9892     /* We assume that this is true: */
9893     assert(type || s);
9894
9895     sv_2mortal(sv);                     /* Parent created it permanently */
9896
9897     if (   ! table
9898         || ! (PL_hints & HINT_LOCALIZE_HH))
9899     {
9900         why1 = "unknown";
9901         optional_colon = "";
9902         goto report;
9903     }
9904
9905     cvp = hv_fetch(table, key, keylen, FALSE);
9906     if (!cvp || !SvOK(*cvp)) {
9907         why1 = "$^H{";
9908         why2 = key;
9909         why3 = "} is not defined";
9910         goto report;
9911     }
9912
9913     cv = *cvp;
9914     if (!pv && s)
9915         pv = newSVpvn_flags(s, len, SVs_TEMP);
9916     if (type && pv)
9917         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9918     else
9919         typesv = &PL_sv_undef;
9920
9921     PUSHSTACKi(PERLSI_OVERLOAD);
9922     ENTER ;
9923     SAVETMPS;
9924
9925     PUSHMARK(SP) ;
9926     EXTEND(sp, 3);
9927     if (pv)
9928         PUSHs(pv);
9929     PUSHs(sv);
9930     if (pv)
9931         PUSHs(typesv);
9932     PUTBACK;
9933     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9934
9935     SPAGAIN ;
9936
9937     /* Check the eval first */
9938     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9939         STRLEN errlen;
9940         const char * errstr;
9941         sv_catpvs(errsv, "Propagated");
9942         errstr = SvPV_const(errsv, errlen);
9943         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9944         (void)POPs;
9945         res = SvREFCNT_inc_simple_NN(sv);
9946     }
9947     else {
9948         res = POPs;
9949         SvREFCNT_inc_simple_void_NN(res);
9950     }
9951
9952     PUTBACK ;
9953     FREETMPS ;
9954     LEAVE ;
9955     POPSTACK;
9956
9957     if (SvOK(res)) {
9958         return res;
9959     }
9960
9961     sv = res;
9962     (void)sv_2mortal(sv);
9963
9964     why1 = "Call to &{$^H{";
9965     why2 = key;
9966     why3 = "}} did not return a defined value";
9967
9968   report:
9969
9970     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9971                         (int)(type ? typelen : len),
9972                         (type ? type: s),
9973                         optional_colon,
9974                         why1, why2, why3);
9975     if (error_msg) {
9976         *error_msg = msg;
9977     }
9978     else {
9979         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9980     }
9981     return SvREFCNT_inc_simple_NN(sv);
9982 }
9983
9984 PERL_STATIC_INLINE void
9985 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9986                     bool is_utf8, bool check_dollar, bool tick_warn)
9987 {
9988     int saw_tick = 0;
9989     const char *olds = *s;
9990     PERL_ARGS_ASSERT_PARSE_IDENT;
9991
9992     while (*s < PL_bufend) {
9993         if (*d >= e)
9994             Perl_croak(aTHX_ "%s", ident_too_long);
9995         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9996              /* The UTF-8 case must come first, otherwise things
9997              * like c\N{COMBINING TILDE} would start failing, as the
9998              * isWORDCHAR_A case below would gobble the 'c' up.
9999              */
10000
10001             char *t = *s + UTF8SKIP(*s);
10002             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10003                 t += UTF8SKIP(t);
10004             }
10005             if (*d + (t - *s) > e)
10006                 Perl_croak(aTHX_ "%s", ident_too_long);
10007             Copy(*s, *d, t - *s, char);
10008             *d += t - *s;
10009             *s = t;
10010         }
10011         else if ( isWORDCHAR_A(**s) ) {
10012             do {
10013                 *(*d)++ = *(*s)++;
10014             } while (isWORDCHAR_A(**s) && *d < e);
10015         }
10016         else if (   allow_package
10017                  && **s == '\''
10018                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10019         {
10020             *(*d)++ = ':';
10021             *(*d)++ = ':';
10022             (*s)++;
10023             saw_tick++;
10024         }
10025         else if (allow_package && **s == ':' && (*s)[1] == ':'
10026            /* Disallow things like Foo::$bar. For the curious, this is
10027             * the code path that triggers the "Bad name after" warning
10028             * when looking for barewords.
10029             */
10030            && !(check_dollar && (*s)[2] == '$')) {
10031             *(*d)++ = *(*s)++;
10032             *(*d)++ = *(*s)++;
10033         }
10034         else
10035             break;
10036     }
10037     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
10038               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
10039         char *this_d;
10040         char *d2;
10041         Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10042         d2 = this_d;
10043         SAVEFREEPV(this_d);
10044         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10045                          "Old package separator used in string");
10046         if (olds[-1] == '#')
10047             *d2++ = olds[-2];
10048         *d2++ = olds[-1];
10049         while (olds < *s) {
10050             if (*olds == '\'') {
10051                 *d2++ = '\\';
10052                 *d2++ = *olds++;
10053             }
10054             else
10055                 *d2++ = *olds++;
10056         }
10057         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10058                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10059                           UTF8fARG(is_utf8, d2-this_d, this_d));
10060     }
10061     return;
10062 }
10063
10064 /* Returns a NUL terminated string, with the length of the string written to
10065    *slp
10066    */
10067 char *
10068 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10069 {
10070     char *d = dest;
10071     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10072     bool is_utf8 = cBOOL(UTF);
10073
10074     PERL_ARGS_ASSERT_SCAN_WORD;
10075
10076     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
10077     *d = '\0';
10078     *slp = d - dest;
10079     return s;
10080 }
10081
10082
10083 /* scan s and extract an identifier ($var) from it if possible
10084  * into dest.
10085  * XXX: This function has subtle implications on parsing, and
10086  * changing how it behaves can cause a variable to change from
10087  * being a run time rv2sv call or a compile time binding to a
10088  * specific variable name.
10089  */
10090 STATIC char *
10091 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10092 {
10093     I32 herelines = PL_parser->herelines;
10094     SSize_t bracket = -1;
10095     char funny = *s++;
10096     char *d = dest;
10097     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10098     bool is_utf8 = cBOOL(UTF);
10099     I32 orig_copline = 0, tmp_copline = 0;
10100
10101     PERL_ARGS_ASSERT_SCAN_IDENT;
10102
10103     if (isSPACE(*s) || !*s)
10104         s = skipspace(s);
10105     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10106         bool is_zero= *s == '0' ? TRUE : FALSE;
10107         char *digit_start= d;
10108         *d++ = *s++;
10109         while (s < PL_bufend && isDIGIT(*s)) {
10110             if (d >= e)
10111                 Perl_croak(aTHX_ "%s", ident_too_long);
10112             *d++ = *s++;
10113         }
10114         if (is_zero && d - digit_start > 1)
10115             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10116     }
10117     else {  /* See if it is a "normal" identifier */
10118         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10119     }
10120     *d = '\0';
10121     d = dest;
10122     if (*d) {
10123         /* Either a digit variable, or parse_ident() found an identifier
10124            (anything valid as a bareword), so job done and return.  */
10125         if (PL_lex_state != LEX_NORMAL)
10126             PL_lex_state = LEX_INTERPENDMAYBE;
10127         return s;
10128     }
10129
10130     /* Here, it is not a run-of-the-mill identifier name */
10131
10132     if (*s == '$' && s[1]
10133         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10134             || isDIGIT_A((U8)s[1])
10135             || s[1] == '$'
10136             || s[1] == '{'
10137             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10138     {
10139         /* Dereferencing a value in a scalar variable.
10140            The alternatives are different syntaxes for a scalar variable.
10141            Using ' as a leading package separator isn't allowed. :: is.   */
10142         return s;
10143     }
10144     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10145     if (*s == '{') {
10146         bracket = s - SvPVX(PL_linestr);
10147         s++;
10148         orig_copline = CopLINE(PL_curcop);
10149         if (s < PL_bufend && isSPACE(*s)) {
10150             s = skipspace(s);
10151         }
10152     }
10153
10154
10155     /* Extract the first character of the variable name from 's' and
10156      * copy it, null terminated into 'd'. Note that this does not
10157      * involve checking for just IDFIRST characters, as it allows the
10158      * '^' for ${^FOO} type variable names, and it allows all the
10159      * characters that are legal in a single character variable name.
10160      *
10161      * The legal ones are any of:
10162      *  a) all ASCII characters except:
10163      *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10164      *          2) '{'
10165      *     The final case currently doesn't get this far in the program, so we
10166      *     don't test for it.  If that were to change, it would be ok to allow it.
10167      *  b) When not under Unicode rules, any upper Latin1 character
10168      *  c) Otherwise, when unicode rules are used, all XIDS characters.
10169      *
10170      *      Because all ASCII characters have the same representation whether
10171      *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10172      *      '{' without knowing if is UTF-8 or not. */
10173
10174     if ((s <= PL_bufend - ((is_utf8)
10175                           ? UTF8SKIP(s)
10176                           : 1))
10177         && (
10178             isGRAPH_A(*s)
10179             ||
10180             ( is_utf8
10181               ? isIDFIRST_utf8_safe(s, PL_bufend)
10182               : (isGRAPH_L1(*s)
10183                  && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10184                 )
10185             )
10186         )
10187     ){
10188         if (is_utf8) {
10189             const STRLEN skip = UTF8SKIP(s);
10190             STRLEN i;
10191             d[skip] = '\0';
10192             for ( i = 0; i < skip; i++ )
10193                 d[i] = *s++;
10194         }
10195         else {
10196             *d = *s++;
10197             d[1] = '\0';
10198         }
10199     }
10200
10201     /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10202     if (isDIGIT(*d)) {
10203         bool is_zero= *d == '0' ? TRUE : FALSE;
10204         char *digit_start= d;
10205         while (s < PL_bufend && isDIGIT(*s)) {
10206             d++;
10207             if (d >= e)
10208                 Perl_croak(aTHX_ "%s", ident_too_long);
10209             *d= *s++;
10210         }
10211         if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10212             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10213         d[1] = '\0';
10214     }
10215
10216     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10217     else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10218         *d = toCTRL(*s);
10219         s++;
10220     }
10221     /* Warn about ambiguous code after unary operators if {...} notation isn't
10222        used.  There's no difference in ambiguity; it's merely a heuristic
10223        about when not to warn.  */
10224     else if (ck_uni && bracket == -1)
10225         check_uni();
10226
10227     if (bracket != -1) {
10228         bool skip;
10229         char *s2;
10230         /* If we were processing {...} notation then...  */
10231         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10232             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10233                  && isWORDCHAR(*s))
10234         ) {
10235             /* note we have to check for a normal identifier first,
10236              * as it handles utf8 symbols, and only after that has
10237              * been ruled out can we look at the caret words */
10238             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10239                 /* if it starts as a valid identifier, assume that it is one.
10240                    (the later check for } being at the expected point will trap
10241                    cases where this doesn't pan out.)  */
10242                 d += is_utf8 ? UTF8SKIP(d) : 1;
10243                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10244                 *d = '\0';
10245             }
10246             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10247                 d++;
10248                 while (isWORDCHAR(*s) && d < e) {
10249                     *d++ = *s++;
10250                 }
10251                 if (d >= e)
10252                     Perl_croak(aTHX_ "%s", ident_too_long);
10253                 *d = '\0';
10254             }
10255             tmp_copline = CopLINE(PL_curcop);
10256             if (s < PL_bufend && isSPACE(*s)) {
10257                 s = skipspace(s);
10258             }
10259             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10260                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10261                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10262                     const char * const brack =
10263                         (const char *)
10264                         ((*s == '[') ? "[...]" : "{...}");
10265                     orig_copline = CopLINE(PL_curcop);
10266                     CopLINE_set(PL_curcop, tmp_copline);
10267    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10268                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10269                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10270                         funny, dest, brack, funny, dest, brack);
10271                     CopLINE_set(PL_curcop, orig_copline);
10272                 }
10273                 bracket++;
10274                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10275                 PL_lex_allbrackets++;
10276                 return s;
10277             }
10278         }
10279
10280         if ( !tmp_copline )
10281             tmp_copline = CopLINE(PL_curcop);
10282         if ((skip = s < PL_bufend && isSPACE(*s))) {
10283             /* Avoid incrementing line numbers or resetting PL_linestart,
10284                in case we have to back up.  */
10285             STRLEN s_off = s - SvPVX(PL_linestr);
10286             s2 = peekspace(s);
10287             s = SvPVX(PL_linestr) + s_off;
10288         }
10289         else
10290             s2 = s;
10291
10292         /* Expect to find a closing } after consuming any trailing whitespace.
10293          */
10294         if (*s2 == '}') {
10295             /* Now increment line numbers if applicable.  */
10296             if (skip)
10297                 s = skipspace(s);
10298             s++;
10299             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10300                 PL_lex_state = LEX_INTERPEND;
10301                 PL_expect = XREF;
10302             }
10303             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10304                 if (ckWARN(WARN_AMBIGUOUS)
10305                     && (keyword(dest, d - dest, 0)
10306                         || get_cvn_flags(dest, d - dest, is_utf8
10307                            ? SVf_UTF8
10308                            : 0)))
10309                 {
10310                     SV *tmp = newSVpvn_flags( dest, d - dest,
10311                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10312                     if (funny == '#')
10313                         funny = '@';
10314                     orig_copline = CopLINE(PL_curcop);
10315                     CopLINE_set(PL_curcop, tmp_copline);
10316                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10317                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10318                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10319                     CopLINE_set(PL_curcop, orig_copline);
10320                 }
10321             }
10322         }
10323         else {
10324             /* Didn't find the closing } at the point we expected, so restore
10325                state such that the next thing to process is the opening { and */
10326             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10327             CopLINE_set(PL_curcop, orig_copline);
10328             PL_parser->herelines = herelines;
10329             *dest = '\0';
10330             PL_parser->sub_no_recover = TRUE;
10331         }
10332     }
10333     else if (   PL_lex_state == LEX_INTERPNORMAL
10334              && !PL_lex_brackets
10335              && !intuit_more(s, PL_bufend))
10336         PL_lex_state = LEX_INTERPEND;
10337     return s;
10338 }
10339
10340 static bool
10341 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10342
10343     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10344      * found in the parse starting at 's', based on the subset that are valid
10345      * in this context input to this routine in 'valid_flags'. Advances s.
10346      * Returns TRUE if the input should be treated as a valid flag, so the next
10347      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10348      * upon first call on the current regex.  This routine will set it to any
10349      * charset modifier found.  The caller shouldn't change it.  This way,
10350      * another charset modifier encountered in the parse can be detected as an
10351      * error, as we have decided to allow only one */
10352
10353     const char c = **s;
10354     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10355
10356     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10357         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10358             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10359                        UTF ? SVf_UTF8 : 0);
10360             (*s) += charlen;
10361             /* Pretend that it worked, so will continue processing before
10362              * dieing */
10363             return TRUE;
10364         }
10365         return FALSE;
10366     }
10367
10368     switch (c) {
10369
10370         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10371         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10372         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10373         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10374         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10375         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10376         case LOCALE_PAT_MOD:
10377             if (*charset) {
10378                 goto multiple_charsets;
10379             }
10380             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10381             *charset = c;
10382             break;
10383         case UNICODE_PAT_MOD:
10384             if (*charset) {
10385                 goto multiple_charsets;
10386             }
10387             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10388             *charset = c;
10389             break;
10390         case ASCII_RESTRICT_PAT_MOD:
10391             if (! *charset) {
10392                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10393             }
10394             else {
10395
10396                 /* Error if previous modifier wasn't an 'a', but if it was, see
10397                  * if, and accept, a second occurrence (only) */
10398                 if (*charset != 'a'
10399                     || get_regex_charset(*pmfl)
10400                         != REGEX_ASCII_RESTRICTED_CHARSET)
10401                 {
10402                         goto multiple_charsets;
10403                 }
10404                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10405             }
10406             *charset = c;
10407             break;
10408         case DEPENDS_PAT_MOD:
10409             if (*charset) {
10410                 goto multiple_charsets;
10411             }
10412             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10413             *charset = c;
10414             break;
10415     }
10416
10417     (*s)++;
10418     return TRUE;
10419
10420     multiple_charsets:
10421         if (*charset != c) {
10422             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10423         }
10424         else if (c == 'a') {
10425   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10426             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10427         }
10428         else {
10429             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10430         }
10431
10432         /* Pretend that it worked, so will continue processing before dieing */
10433         (*s)++;
10434         return TRUE;
10435 }
10436
10437 STATIC char *
10438 S_scan_pat(pTHX_ char *start, I32 type)
10439 {
10440     PMOP *pm;
10441     char *s;
10442     const char * const valid_flags =
10443         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10444     char charset = '\0';    /* character set modifier */
10445     unsigned int x_mod_count = 0;
10446
10447     PERL_ARGS_ASSERT_SCAN_PAT;
10448
10449     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10450     if (!s)
10451         Perl_croak(aTHX_ "Search pattern not terminated");
10452
10453     pm = (PMOP*)newPMOP(type, 0);
10454     if (PL_multi_open == '?') {
10455         /* This is the only point in the code that sets PMf_ONCE:  */
10456         pm->op_pmflags |= PMf_ONCE;
10457
10458         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10459            allows us to restrict the list needed by reset to just the ??
10460            matches.  */
10461         assert(type != OP_TRANS);
10462         if (PL_curstash) {
10463             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10464             U32 elements;
10465             if (!mg) {
10466                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10467                                  0);
10468             }
10469             elements = mg->mg_len / sizeof(PMOP**);
10470             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10471             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10472             mg->mg_len = elements * sizeof(PMOP**);
10473             PmopSTASH_set(pm,PL_curstash);
10474         }
10475     }
10476
10477     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10478      * anon CV. False positives like qr/[(?{]/ are harmless */
10479
10480     if (type == OP_QR) {
10481         STRLEN len;
10482         char *e, *p = SvPV(PL_lex_stuff, len);
10483         e = p + len;
10484         for (; p < e; p++) {
10485             if (p[0] == '(' && p[1] == '?'
10486                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10487             {
10488                 pm->op_pmflags |= PMf_HAS_CV;
10489                 break;
10490             }
10491         }
10492         pm->op_pmflags |= PMf_IS_QR;
10493     }
10494
10495     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10496                                 &s, &charset, &x_mod_count))
10497     {};
10498     /* issue a warning if /c is specified,but /g is not */
10499     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10500     {
10501         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10502                        "Use of /c modifier is meaningless without /g" );
10503     }
10504
10505     PL_lex_op = (OP*)pm;
10506     pl_yylval.ival = OP_MATCH;
10507     return s;
10508 }
10509
10510 STATIC char *
10511 S_scan_subst(pTHX_ char *start)
10512 {
10513     char *s;
10514     PMOP *pm;
10515     I32 first_start;
10516     line_t first_line;
10517     line_t linediff = 0;
10518     I32 es = 0;
10519     char charset = '\0';    /* character set modifier */
10520     unsigned int x_mod_count = 0;
10521     char *t;
10522
10523     PERL_ARGS_ASSERT_SCAN_SUBST;
10524
10525     pl_yylval.ival = OP_NULL;
10526
10527     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10528
10529     if (!s)
10530         Perl_croak(aTHX_ "Substitution pattern not terminated");
10531
10532     s = t;
10533
10534     first_start = PL_multi_start;
10535     first_line = CopLINE(PL_curcop);
10536     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10537     if (!s) {
10538         SvREFCNT_dec_NN(PL_lex_stuff);
10539         PL_lex_stuff = NULL;
10540         Perl_croak(aTHX_ "Substitution replacement not terminated");
10541     }
10542     PL_multi_start = first_start;       /* so whole substitution is taken together */
10543
10544     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10545
10546
10547     while (*s) {
10548         if (*s == EXEC_PAT_MOD) {
10549             s++;
10550             es++;
10551         }
10552         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10553                                   &s, &charset, &x_mod_count))
10554         {
10555             break;
10556         }
10557     }
10558
10559     if ((pm->op_pmflags & PMf_CONTINUE)) {
10560         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10561     }
10562
10563     if (es) {
10564         SV * const repl = newSVpvs("");
10565
10566         PL_multi_end = 0;
10567         pm->op_pmflags |= PMf_EVAL;
10568         for (; es > 1; es--) {
10569             sv_catpvs(repl, "eval ");
10570         }
10571         sv_catpvs(repl, "do {");
10572         sv_catsv(repl, PL_parser->lex_sub_repl);
10573         sv_catpvs(repl, "}");
10574         SvREFCNT_dec(PL_parser->lex_sub_repl);
10575         PL_parser->lex_sub_repl = repl;
10576     }
10577
10578
10579     linediff = CopLINE(PL_curcop) - first_line;
10580     if (linediff)
10581         CopLINE_set(PL_curcop, first_line);
10582
10583     if (linediff || es) {
10584         /* the IVX field indicates that the replacement string is a s///e;
10585          * the NVX field indicates how many src code lines the replacement
10586          * spreads over */
10587         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10588         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10589         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10590                                                                     cBOOL(es);
10591     }
10592
10593     PL_lex_op = (OP*)pm;
10594     pl_yylval.ival = OP_SUBST;
10595     return s;
10596 }
10597
10598 STATIC char *
10599 S_scan_trans(pTHX_ char *start)
10600 {
10601     char* s;
10602     OP *o;
10603     U8 squash;
10604     U8 del;
10605     U8 complement;
10606     bool nondestruct = 0;
10607     char *t;
10608
10609     PERL_ARGS_ASSERT_SCAN_TRANS;
10610
10611     pl_yylval.ival = OP_NULL;
10612
10613     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10614     if (!s)
10615         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10616
10617     s = t;
10618
10619     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10620     if (!s) {
10621         SvREFCNT_dec_NN(PL_lex_stuff);
10622         PL_lex_stuff = NULL;
10623         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10624     }
10625
10626     complement = del = squash = 0;
10627     while (1) {
10628         switch (*s) {
10629         case 'c':
10630             complement = OPpTRANS_COMPLEMENT;
10631             break;
10632         case 'd':
10633             del = OPpTRANS_DELETE;
10634             break;
10635         case 's':
10636             squash = OPpTRANS_SQUASH;
10637             break;
10638         case 'r':
10639             nondestruct = 1;
10640             break;
10641         default:
10642             goto no_more;
10643         }
10644         s++;
10645     }
10646   no_more:
10647
10648     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10649     o->op_private &= ~OPpTRANS_ALL;
10650     o->op_private |= del|squash|complement;
10651
10652     PL_lex_op = o;
10653     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10654
10655
10656     return s;
10657 }
10658
10659 /* scan_heredoc
10660    Takes a pointer to the first < in <<FOO.
10661    Returns a pointer to the byte following <<FOO.
10662
10663    This function scans a heredoc, which involves different methods
10664    depending on whether we are in a string eval, quoted construct, etc.
10665    This is because PL_linestr could containing a single line of input, or
10666    a whole string being evalled, or the contents of the current quote-
10667    like operator.
10668
10669    The two basic methods are:
10670     - Steal lines from the input stream
10671     - Scan the heredoc in PL_linestr and remove it therefrom
10672
10673    In a file scope or filtered eval, the first method is used; in a
10674    string eval, the second.
10675
10676    In a quote-like operator, we have to choose between the two,
10677    depending on where we can find a newline.  We peek into outer lex-
10678    ing scopes until we find one with a newline in it.  If we reach the
10679    outermost lexing scope and it is a file, we use the stream method.
10680    Otherwise it is treated as an eval.
10681 */
10682
10683 STATIC char *
10684 S_scan_heredoc(pTHX_ char *s)
10685 {
10686     I32 op_type = OP_SCALAR;
10687     I32 len;
10688     SV *tmpstr;
10689     char term;
10690     char *d;
10691     char *e;
10692     char *peek;
10693     char *indent = 0;
10694     I32 indent_len = 0;
10695     bool indented = FALSE;
10696     const bool infile = PL_rsfp || PL_parser->filtered;
10697     const line_t origline = CopLINE(PL_curcop);
10698     LEXSHARED *shared = PL_parser->lex_shared;
10699
10700     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10701
10702     s += 2;
10703     d = PL_tokenbuf + 1;
10704     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10705     *PL_tokenbuf = '\n';
10706     peek = s;
10707
10708     if (*peek == '~') {
10709         indented = TRUE;
10710         peek++; s++;
10711     }
10712
10713     while (SPACE_OR_TAB(*peek))
10714         peek++;
10715
10716     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10717         s = peek;
10718         term = *s++;
10719         s = delimcpy(d, e, s, PL_bufend, term, &len);
10720         if (s == PL_bufend)
10721             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10722         d += len;
10723         s++;
10724     }
10725     else {
10726         if (*s == '\\')
10727             /* <<\FOO is equivalent to <<'FOO' */
10728             s++, term = '\'';
10729         else
10730             term = '"';
10731
10732         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10733             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10734
10735         peek = s;
10736
10737         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10738             peek += UTF ? UTF8SKIP(peek) : 1;
10739         }
10740
10741         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10742         Copy(s, d, len, char);
10743         s += len;
10744         d += len;
10745     }
10746
10747     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10748         Perl_croak(aTHX_ "Delimiter for here document is too long");
10749
10750     *d++ = '\n';
10751     *d = '\0';
10752     len = d - PL_tokenbuf;
10753
10754 #ifndef PERL_STRICT_CR
10755     d = (char *) memchr(s, '\r', PL_bufend - s);
10756     if (d) {
10757         char * const olds = s;
10758         s = d;
10759         while (s < PL_bufend) {
10760             if (*s == '\r') {
10761                 *d++ = '\n';
10762                 if (*++s == '\n')
10763                     s++;
10764             }
10765             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10766                 *d++ = *s++;
10767                 s++;
10768             }
10769             else
10770                 *d++ = *s++;
10771         }
10772         *d = '\0';
10773         PL_bufend = d;
10774         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10775         s = olds;
10776     }
10777 #endif
10778
10779     tmpstr = newSV_type(SVt_PVIV);
10780     SvGROW(tmpstr, 80);
10781     if (term == '\'') {
10782         op_type = OP_CONST;
10783         SvIV_set(tmpstr, -1);
10784     }
10785     else if (term == '`') {
10786         op_type = OP_BACKTICK;
10787         SvIV_set(tmpstr, '\\');
10788     }
10789
10790     PL_multi_start = origline + 1 + PL_parser->herelines;
10791     PL_multi_open = PL_multi_close = '<';
10792
10793     /* inside a string eval or quote-like operator */
10794     if (!infile || PL_lex_inwhat) {
10795         SV *linestr;
10796         char *bufend;
10797         char * const olds = s;
10798         PERL_CONTEXT * const cx = CX_CUR();
10799         /* These two fields are not set until an inner lexing scope is
10800            entered.  But we need them set here. */
10801         shared->ls_bufptr  = s;
10802         shared->ls_linestr = PL_linestr;
10803
10804         if (PL_lex_inwhat) {
10805             /* Look for a newline.  If the current buffer does not have one,
10806              peek into the line buffer of the parent lexing scope, going
10807              up as many levels as necessary to find one with a newline
10808              after bufptr.
10809             */
10810             while (!(s = (char *)memchr(
10811                                 (void *)shared->ls_bufptr, '\n',
10812                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10813                 )))
10814             {
10815                 shared = shared->ls_prev;
10816                 /* shared is only null if we have gone beyond the outermost
10817                    lexing scope.  In a file, we will have broken out of the
10818                    loop in the previous iteration.  In an eval, the string buf-
10819                    fer ends with "\n;", so the while condition above will have
10820                    evaluated to false.  So shared can never be null.  Or so you
10821                    might think.  Odd syntax errors like s;@{<<; can gobble up
10822                    the implicit semicolon at the end of a flie, causing the
10823                    file handle to be closed even when we are not in a string
10824                    eval.  So shared may be null in that case.
10825                    (Closing '>>}' here to balance the earlier open brace for
10826                    editors that look for matched pairs.) */
10827                 if (UNLIKELY(!shared))
10828                     goto interminable;
10829                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10830                    most lexing scope.  In a file, shared->ls_linestr at that
10831                    level is just one line, so there is no body to steal. */
10832                 if (infile && !shared->ls_prev) {
10833                     s = olds;
10834                     goto streaming;
10835                 }
10836             }
10837         }
10838         else {  /* eval or we've already hit EOF */
10839             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10840             if (!s)
10841                 goto interminable;
10842         }
10843
10844         linestr = shared->ls_linestr;
10845         bufend = SvEND(linestr);
10846         d = s;
10847         if (indented) {
10848             char *myolds = s;
10849
10850             while (s < bufend - len + 1) {
10851                 if (*s++ == '\n')
10852                     ++PL_parser->herelines;
10853
10854                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10855                     char *backup = s;
10856                     indent_len = 0;
10857
10858                     /* Only valid if it's preceded by whitespace only */
10859                     while (backup != myolds && --backup >= myolds) {
10860                         if (! SPACE_OR_TAB(*backup)) {
10861                             break;
10862                         }
10863                         indent_len++;
10864                     }
10865
10866                     /* No whitespace or all! */
10867                     if (backup == s || *backup == '\n') {
10868                         Newx(indent, indent_len + 1, char);
10869                         memcpy(indent, backup + 1, indent_len);
10870                         indent[indent_len] = 0;
10871                         s--; /* before our delimiter */
10872                         PL_parser->herelines--; /* this line doesn't count */
10873                         break;
10874                     }
10875                 }
10876             }
10877         }
10878         else {
10879             while (s < bufend - len + 1
10880                    && memNE(s,PL_tokenbuf,len) )
10881             {
10882                 if (*s++ == '\n')
10883                     ++PL_parser->herelines;
10884             }
10885         }
10886
10887         if (s >= bufend - len + 1) {
10888             goto interminable;
10889         }
10890
10891         sv_setpvn(tmpstr,d+1,s-d);
10892         s += len - 1;
10893         /* the preceding stmt passes a newline */
10894         PL_parser->herelines++;
10895
10896         /* s now points to the newline after the heredoc terminator.
10897            d points to the newline before the body of the heredoc.
10898          */
10899
10900         /* We are going to modify linestr in place here, so set
10901            aside copies of the string if necessary for re-evals or
10902            (caller $n)[6]. */
10903         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10904            check shared->re_eval_str. */
10905         if (shared->re_eval_start || shared->re_eval_str) {
10906             /* Set aside the rest of the regexp */
10907             if (!shared->re_eval_str)
10908                 shared->re_eval_str =
10909                        newSVpvn(shared->re_eval_start,
10910                                 bufend - shared->re_eval_start);
10911             shared->re_eval_start -= s-d;
10912         }
10913
10914         if (cxstack_ix >= 0
10915             && CxTYPE(cx) == CXt_EVAL
10916             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10917             && cx->blk_eval.cur_text == linestr)
10918         {
10919             cx->blk_eval.cur_text = newSVsv(linestr);
10920             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10921         }
10922
10923         /* Copy everything from s onwards back to d. */
10924         Move(s,d,bufend-s + 1,char);
10925         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10926         /* Setting PL_bufend only applies when we have not dug deeper
10927            into other scopes, because sublex_done sets PL_bufend to
10928            SvEND(PL_linestr). */
10929         if (shared == PL_parser->lex_shared)
10930             PL_bufend = SvEND(linestr);
10931         s = olds;
10932     }
10933     else {
10934         SV *linestr_save;
10935         char *oldbufptr_save;
10936         char *oldoldbufptr_save;
10937       streaming:
10938         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10939         term = PL_tokenbuf[1];
10940         len--;
10941         linestr_save = PL_linestr; /* must restore this afterwards */
10942         d = s;                   /* and this */
10943         oldbufptr_save = PL_oldbufptr;
10944         oldoldbufptr_save = PL_oldoldbufptr;
10945         PL_linestr = newSVpvs("");
10946         PL_bufend = SvPVX(PL_linestr);
10947
10948         while (1) {
10949             PL_bufptr = PL_bufend;
10950             CopLINE_set(PL_curcop,
10951                         origline + 1 + PL_parser->herelines);
10952
10953             if (   !lex_next_chunk(LEX_NO_TERM)
10954                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10955             {
10956                 /* Simply freeing linestr_save might seem simpler here, as it
10957                    does not matter what PL_linestr points to, since we are
10958                    about to croak; but in a quote-like op, linestr_save
10959                    will have been prospectively freed already, via
10960                    SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
10961                    restore PL_linestr. */
10962                 SvREFCNT_dec_NN(PL_linestr);
10963                 PL_linestr = linestr_save;
10964                 PL_oldbufptr = oldbufptr_save;
10965                 PL_oldoldbufptr = oldoldbufptr_save;
10966                 goto interminable;
10967             }
10968
10969             CopLINE_set(PL_curcop, origline);
10970
10971             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10972                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10973                 /* ^That should be enough to avoid this needing to grow:  */
10974                 sv_catpvs(PL_linestr, "\n\0");
10975                 assert(s == SvPVX(PL_linestr));
10976                 PL_bufend = SvEND(PL_linestr);
10977             }
10978
10979             s = PL_bufptr;
10980             PL_parser->herelines++;
10981             PL_last_lop = PL_last_uni = NULL;
10982
10983 #ifndef PERL_STRICT_CR
10984             if (PL_bufend - PL_linestart >= 2) {
10985                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10986                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10987                 {
10988                     PL_bufend[-2] = '\n';
10989                     PL_bufend--;
10990                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10991                 }
10992                 else if (PL_bufend[-1] == '\r')
10993                     PL_bufend[-1] = '\n';
10994             }
10995             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10996                 PL_bufend[-1] = '\n';
10997 #endif
10998
10999             if (indented && (PL_bufend-s) >= len) {
11000                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11001
11002                 if (found) {
11003                     char *backup = found;
11004                     indent_len = 0;
11005
11006                     /* Only valid if it's preceded by whitespace only */
11007                     while (backup != s && --backup >= s) {
11008                         if (! SPACE_OR_TAB(*backup)) {
11009                             break;
11010                         }
11011                         indent_len++;
11012                     }
11013
11014                     /* All whitespace or none! */
11015                     if (backup == found || SPACE_OR_TAB(*backup)) {
11016                         Newx(indent, indent_len + 1, char);
11017                         memcpy(indent, backup, indent_len);
11018                         indent[indent_len] = 0;
11019                         SvREFCNT_dec(PL_linestr);
11020                         PL_linestr = linestr_save;
11021                         PL_linestart = SvPVX(linestr_save);
11022                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11023                         PL_oldbufptr = oldbufptr_save;
11024                         PL_oldoldbufptr = oldoldbufptr_save;
11025                         s = d;
11026                         break;
11027                     }
11028                 }
11029
11030                 /* Didn't find it */
11031                 sv_catsv(tmpstr,PL_linestr);
11032             }
11033             else {
11034                 if (*s == term && PL_bufend-s >= len
11035                     && memEQ(s,PL_tokenbuf + 1,len))
11036                 {
11037                     SvREFCNT_dec(PL_linestr);
11038                     PL_linestr = linestr_save;
11039                     PL_linestart = SvPVX(linestr_save);
11040                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11041                     PL_oldbufptr = oldbufptr_save;
11042                     PL_oldoldbufptr = oldoldbufptr_save;
11043                     s = d;
11044                     break;
11045                 }
11046                 else {
11047                     sv_catsv(tmpstr,PL_linestr);
11048                 }
11049             }
11050         } /* while (1) */
11051     }
11052
11053     PL_multi_end = origline + PL_parser->herelines;
11054
11055     if (indented && indent) {
11056         STRLEN linecount = 1;
11057         STRLEN herelen = SvCUR(tmpstr);
11058         char *ss = SvPVX(tmpstr);
11059         char *se = ss + herelen;
11060         SV *newstr = newSV(herelen+1);
11061         SvPOK_on(newstr);
11062
11063         /* Trim leading whitespace */
11064         while (ss < se) {
11065             /* newline only? Copy and move on */
11066             if (*ss == '\n') {
11067                 sv_catpvs(newstr,"\n");
11068                 ss++;
11069                 linecount++;
11070
11071             /* Found our indentation? Strip it */
11072             }
11073             else if (se - ss >= indent_len
11074                        && memEQ(ss, indent, indent_len))
11075             {
11076                 STRLEN le = 0;
11077                 ss += indent_len;
11078
11079                 while ((ss + le) < se && *(ss + le) != '\n')
11080                     le++;
11081
11082                 sv_catpvn(newstr, ss, le);
11083                 ss += le;
11084
11085             /* Line doesn't begin with our indentation? Croak */
11086             }
11087             else {
11088                 Safefree(indent);
11089                 Perl_croak(aTHX_
11090                     "Indentation on line %d of here-doc doesn't match delimiter",
11091                     (int)linecount
11092                 );
11093             }
11094         } /* while */
11095
11096         /* avoid sv_setsv() as we dont wan't to COW here */
11097         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11098         Safefree(indent);
11099         SvREFCNT_dec_NN(newstr);
11100     }
11101
11102     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11103         SvPV_shrink_to_cur(tmpstr);
11104     }
11105
11106     if (!IN_BYTES) {
11107         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11108             SvUTF8_on(tmpstr);
11109     }
11110
11111     PL_lex_stuff = tmpstr;
11112     pl_yylval.ival = op_type;
11113     return s;
11114
11115   interminable:
11116     if (indent)
11117         Safefree(indent);
11118     SvREFCNT_dec(tmpstr);
11119     CopLINE_set(PL_curcop, origline);
11120     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11121 }
11122
11123
11124 /* scan_inputsymbol
11125    takes: position of first '<' in input buffer
11126    returns: position of first char following the matching '>' in
11127             input buffer
11128    side-effects: pl_yylval and lex_op are set.
11129
11130    This code handles:
11131
11132    <>           read from ARGV
11133    <<>>         read from ARGV without magic open
11134    <FH>         read from filehandle
11135    <pkg::FH>    read from package qualified filehandle
11136    <pkg'FH>     read from package qualified filehandle
11137    <$fh>        read from filehandle in $fh
11138    <*.h>        filename glob
11139
11140 */
11141
11142 STATIC char *
11143 S_scan_inputsymbol(pTHX_ char *start)
11144 {
11145     char *s = start;            /* current position in buffer */
11146     char *end;
11147     I32 len;
11148     bool nomagicopen = FALSE;
11149     char *d = PL_tokenbuf;                                      /* start of temp holding space */
11150     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
11151
11152     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11153
11154     end = (char *) memchr(s, '\n', PL_bufend - s);
11155     if (!end)
11156         end = PL_bufend;
11157     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11158         nomagicopen = TRUE;
11159         *d = '\0';
11160         len = 0;
11161         s += 3;
11162     }
11163     else
11164         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
11165
11166     /* die if we didn't have space for the contents of the <>,
11167        or if it didn't end, or if we see a newline
11168     */
11169
11170     if (len >= (I32)sizeof PL_tokenbuf)
11171         Perl_croak(aTHX_ "Excessively long <> operator");
11172     if (s >= end)
11173         Perl_croak(aTHX_ "Unterminated <> operator");
11174
11175     s++;
11176
11177     /* check for <$fh>
11178        Remember, only scalar variables are interpreted as filehandles by
11179        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11180        treated as a glob() call.
11181        This code makes use of the fact that except for the $ at the front,
11182        a scalar variable and a filehandle look the same.
11183     */
11184     if (*d == '$' && d[1]) d++;
11185
11186     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11187     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11188         d += UTF ? UTF8SKIP(d) : 1;
11189     }
11190
11191     /* If we've tried to read what we allow filehandles to look like, and
11192        there's still text left, then it must be a glob() and not a getline.
11193        Use scan_str to pull out the stuff between the <> and treat it
11194        as nothing more than a string.
11195     */
11196
11197     if (d - PL_tokenbuf != len) {
11198         pl_yylval.ival = OP_GLOB;
11199         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11200         if (!s)
11201            Perl_croak(aTHX_ "Glob not terminated");
11202         return s;
11203     }
11204     else {
11205         bool readline_overriden = FALSE;
11206         GV *gv_readline;
11207         /* we're in a filehandle read situation */
11208         d = PL_tokenbuf;
11209
11210         /* turn <> into <ARGV> */
11211         if (!len)
11212             Copy("ARGV",d,5,char);
11213
11214         /* Check whether readline() is overriden */
11215         if ((gv_readline = gv_override("readline",8)))
11216             readline_overriden = TRUE;
11217
11218         /* if <$fh>, create the ops to turn the variable into a
11219            filehandle
11220         */
11221         if (*d == '$') {
11222             /* try to find it in the pad for this block, otherwise find
11223                add symbol table ops
11224             */
11225             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11226             if (tmp != NOT_IN_PAD) {
11227                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11228                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11229                     HEK * const stashname = HvNAME_HEK(stash);
11230                     SV * const sym = newSVhek_mortal(stashname);
11231                     sv_catpvs(sym, "::");
11232                     sv_catpv(sym, d+1);
11233                     d = SvPVX(sym);
11234                     goto intro_sym;
11235                 }
11236                 else {
11237                     OP * const o = newOP(OP_PADSV, 0);
11238                     o->op_targ = tmp;
11239                     PL_lex_op = readline_overriden
11240                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11241                                 op_append_elem(OP_LIST, o,
11242                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11243                         : newUNOP(OP_READLINE, 0, o);
11244                 }
11245             }
11246             else {
11247                 GV *gv;
11248                 ++d;
11249               intro_sym:
11250                 gv = gv_fetchpv(d,
11251                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11252                                 SVt_PV);
11253                 PL_lex_op = readline_overriden
11254                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11255                             op_append_elem(OP_LIST,
11256                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11257                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11258                     : newUNOP(OP_READLINE, 0,
11259                             newUNOP(OP_RV2SV, 0,
11260                                 newGVOP(OP_GV, 0, gv)));
11261             }
11262             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11263             pl_yylval.ival = OP_NULL;
11264         }
11265
11266         /* If it's none of the above, it must be a literal filehandle
11267            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11268         else {
11269             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11270             PL_lex_op = readline_overriden
11271                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11272                         op_append_elem(OP_LIST,
11273                             newGVOP(OP_GV, 0, gv),
11274                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11275                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11276             pl_yylval.ival = OP_NULL;
11277
11278             /* leave the token generation above to avoid confusing the parser */
11279             if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11280                 no_bareword_filehandle(d);
11281             }
11282         }
11283     }
11284
11285     return s;
11286 }
11287
11288
11289 /* scan_str
11290    takes:
11291         start                   position in buffer
11292         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11293                                 only if they are of the open/close form
11294         keep_delims             preserve the delimiters around the string
11295         re_reparse              compiling a run-time /(?{})/:
11296                                    collapse // to /,  and skip encoding src
11297         delimp                  if non-null, this is set to the position of
11298                                 the closing delimiter, or just after it if
11299                                 the closing and opening delimiters differ
11300                                 (i.e., the opening delimiter of a substitu-
11301                                 tion replacement)
11302    returns: position to continue reading from buffer
11303    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11304         updates the read buffer.
11305
11306    This subroutine pulls a string out of the input.  It is called for:
11307         q               single quotes           q(literal text)
11308         '               single quotes           'literal text'
11309         qq              double quotes           qq(interpolate $here please)
11310         "               double quotes           "interpolate $here please"
11311         qx              backticks               qx(/bin/ls -l)
11312         `               backticks               `/bin/ls -l`
11313         qw              quote words             @EXPORT_OK = qw( func() $spam )
11314         m//             regexp match            m/this/
11315         s///            regexp substitute       s/this/that/
11316         tr///           string transliterate    tr/this/that/
11317         y///            string transliterate    y/this/that/
11318         ($*@)           sub prototypes          sub foo ($)
11319         (stuff)         sub attr parameters     sub foo : attr(stuff)
11320         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11321
11322    In most of these cases (all but <>, patterns and transliterate)
11323    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11324    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11325    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11326    calls scan_str().
11327
11328    It skips whitespace before the string starts, and treats the first
11329    character as the delimiter.  If the delimiter is one of ([{< then
11330    the corresponding "close" character )]}> is used as the closing
11331    delimiter.  It allows quoting of delimiters, and if the string has
11332    balanced delimiters ([{<>}]) it allows nesting.
11333
11334    On success, the SV with the resulting string is put into lex_stuff or,
11335    if that is already non-NULL, into lex_repl. The second case occurs only
11336    when parsing the RHS of the special constructs s/// and tr/// (y///).
11337    For convenience, the terminating delimiter character is stuffed into
11338    SvIVX of the SV.
11339 */
11340
11341 char *
11342 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11343                  char **delimp
11344     )
11345 {
11346     SV *sv;                     /* scalar value: string */
11347     char *s = start;            /* current position in the buffer */
11348     char *to;                   /* current position in the sv's data */
11349     int brackets = 1;           /* bracket nesting level */
11350     bool d_is_utf8 = FALSE;     /* is there any utf8 content? */
11351     UV open_delim_code;         /* code point */
11352     char open_delim_str[UTF8_MAXBYTES+1];
11353     STRLEN delim_byte_len;      /* each delimiter currently is the same number
11354                                    of bytes */
11355     line_t herelines;
11356
11357     /* The only non-UTF character that isn't a stand alone grapheme is
11358      * white-space, hence can't be a delimiter. */
11359     const char * non_grapheme_msg = "Use of unassigned code point or"
11360                                     " non-standalone grapheme for a delimiter"
11361                                     " is not allowed";
11362     PERL_ARGS_ASSERT_SCAN_STR;
11363
11364     /* skip space before the delimiter */
11365     if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
11366                            'start' also has to change */
11367         s = start = skipspace(s);
11368     }
11369
11370     /* mark where we are, in case we need to report errors */
11371     CLINE;
11372
11373     /* after skipping whitespace, the next character is the delimiter */
11374     if (! UTF || UTF8_IS_INVARIANT(*s)) {
11375         open_delim_code   = (U8) *s;
11376         open_delim_str[0] =      *s;
11377         delim_byte_len = 1;
11378     }
11379     else {
11380         open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11381                                             &delim_byte_len);
11382         if (UNLIKELY(! is_grapheme((U8 *) start,
11383                                    (U8 *) s,
11384                                    (U8 *) PL_bufend,
11385                                    open_delim_code)))
11386         {
11387             yyerror(non_grapheme_msg);
11388         }
11389
11390         Copy(s, open_delim_str, delim_byte_len, char);
11391     }
11392     open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
11393
11394
11395     /* mark where we are */
11396     PL_multi_start = CopLINE(PL_curcop);
11397     PL_multi_open = open_delim_code;
11398     herelines = PL_parser->herelines;
11399
11400     const char * legal_paired_opening_delims;
11401     const char * legal_paired_closing_delims;
11402     const char * deprecated_opening_delims;
11403     if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11404         if (UTF) {
11405             legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11406             legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11407
11408             /* We are deprecating using a closing delimiter as the opening, in
11409              * case we want in the future to accept them reversed.  The string
11410              * may include ones that are legal, but the code below won't look
11411              * at this string unless it didn't find a legal opening one */
11412             deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11413         }
11414         else {
11415             legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11416             legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11417             deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11418         }
11419     }
11420     else {
11421         legal_paired_opening_delims = "([{<";
11422         legal_paired_closing_delims = ")]}>";
11423         deprecated_opening_delims = (UTF)
11424                                     ? DEPRECATED_OPENING_UTF8_BRACKETS
11425                                     : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11426     }
11427
11428     const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11429                                           + strlen(legal_paired_opening_delims);
11430     const char * deprecated_delims_end = deprecated_opening_delims
11431                                 + strlen(deprecated_opening_delims);
11432
11433     const char * close_delim_str = open_delim_str;
11434     UV close_delim_code = open_delim_code;
11435
11436     /* If the delimiter has a mirror-image closing one, get it */
11437     const char *tmps = ninstr(legal_paired_opening_delims,
11438                               legal_paired_opening_delims_end,
11439                               open_delim_str, open_delim_str + delim_byte_len);
11440     if (tmps) {
11441         /* Here, there is a paired delimiter, and tmps points to its position
11442            in the string of the accepted opening paired delimiters.  The
11443            corresponding position in the string of closing ones is the
11444            beginning of the paired mate.  Both contain the same number of
11445            bytes. */
11446         close_delim_str = legal_paired_closing_delims
11447                         + (tmps - legal_paired_opening_delims);
11448
11449         /* The list of paired delimiters contains all the ASCII ones that have
11450          * always been legal, and no other ASCIIs.  Don't raise a message if
11451          * using one of these */
11452         if (! isASCII(open_delim_code)) {
11453             Perl_ck_warner_d(aTHX_
11454                              packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11455                              "Use of '%" UTF8f "' is experimental as a string delimiter",
11456                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11457         }
11458
11459         close_delim_code = (UTF)
11460                            ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11461                            : * (U8 *) close_delim_str;
11462     }
11463     else {  /* Here, the delimiter isn't paired, hence the close is the same as
11464                the open; and has aready been set up.  But make sure it isn't
11465                deprecated to use this particular delimiter, as we plan
11466                eventually to make it paired. */
11467         if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11468                    open_delim_str, open_delim_str + delim_byte_len))
11469         {
11470             Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11471                              "Use of '%" UTF8f "' is deprecated as a string delimiter",
11472                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11473         }
11474
11475         /* Note that a NUL may be used as a delimiter, and this happens when
11476          * delimitting an empty string, and no special handling for it is
11477          * needed, as ninstr() calls are used */
11478     }
11479
11480     PL_multi_close = close_delim_code;
11481
11482     if (PL_multi_open == PL_multi_close) {
11483         keep_bracketed_quoted = FALSE;
11484     }
11485
11486     /* create a new SV to hold the contents.  79 is the SV's initial length.
11487        What a random number. */
11488     sv = newSV_type(SVt_PVIV);
11489     SvGROW(sv, 79);
11490     SvIV_set(sv, close_delim_code);
11491     (void)SvPOK_only(sv);               /* validate pointer */
11492
11493     /* move past delimiter and try to read a complete string */
11494     if (keep_delims)
11495         sv_catpvn(sv, s, delim_byte_len);
11496     s += delim_byte_len;
11497     for (;;) {
11498         /* extend sv if need be */
11499         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11500         /* set 'to' to the next character in the sv's string */
11501         to = SvPVX(sv)+SvCUR(sv);
11502
11503         /* read until we run out of string, or we find the closing delimiter */
11504         while (s < PL_bufend) {
11505             /* embedded newlines increment the line count */
11506             if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11507                 COPLINE_INC_WITH_HERELINES;
11508
11509             /* backslashes can escape the closing delimiter */
11510             if (   *s == '\\' && s < PL_bufend - delim_byte_len
11511
11512                    /* ... but not if the delimiter itself is a backslash */
11513                 && close_delim_code != '\\')
11514             {
11515                 /* Here, we have an escaping backslash.  If we're supposed to
11516                  * discard those that escape the closing delimiter, just
11517                  * discard this one */
11518                 if (   !  keep_bracketed_quoted
11519                     &&   (    memEQ(s + 1,  open_delim_str, delim_byte_len)
11520                           ||  (   PL_multi_open == PL_multi_close
11521                                && re_reparse && s[1] == '\\')
11522                           ||  memEQ(s + 1, close_delim_str, delim_byte_len)))
11523                 {
11524                     s++;
11525                 }
11526                 else /* any other escapes are simply copied straight through */
11527                     *to++ = *s++;
11528             }
11529             else if (   s < PL_bufend - (delim_byte_len - 1)
11530                      && memEQ(s, close_delim_str, delim_byte_len)
11531                      && --brackets <= 0)
11532             {
11533                 /* Found unescaped closing delimiter, unnested if we care about
11534                  * that; so are done.
11535                  *
11536                  * In the case of the opening and closing delimiters being
11537                  * different, we have to deal with nesting; the conditional
11538                  * above makes sure we don't get here until the nesting level,
11539                  * 'brackets', is back down to zero.  In the other case,
11540                  * nesting isn't an issue, and 'brackets' never can get
11541                  * incremented above 0, so will come here at the first closing
11542                  * delimiter.
11543                  *
11544                  * Only grapheme delimiters are legal. */
11545                 if (   UTF  /* All Non-UTF-8's are graphemes */
11546                     && UNLIKELY(! is_grapheme((U8 *) start,
11547                                               (U8 *) s,
11548                                               (U8 *) PL_bufend,
11549                                               close_delim_code)))
11550                 {
11551                     yyerror(non_grapheme_msg);
11552                 }
11553
11554                 break;
11555             }
11556                         /* No nesting if open eq close */
11557             else if (   PL_multi_open != PL_multi_close
11558                      && s < PL_bufend - (delim_byte_len - 1)
11559                      && memEQ(s, open_delim_str, delim_byte_len))
11560             {
11561                 brackets++;
11562             }
11563
11564             /* Here, still in the middle of the string; copy this character */
11565             if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11566                 *to++ = *s++;
11567             }
11568             else {
11569                 size_t this_char_len = UTF8SKIP(s);
11570                 Copy(s, to, this_char_len, char);
11571                 s  += this_char_len;
11572                 to += this_char_len;
11573
11574                 d_is_utf8 = TRUE;
11575             }
11576         } /* End of loop through buffer */
11577
11578         /* Here, found end of the string, OR ran out of buffer: terminate the
11579          * copied string and update the sv's end-of-string */
11580         *to = '\0';
11581         SvCUR_set(sv, to - SvPVX_const(sv));
11582
11583         /*
11584          * this next chunk reads more into the buffer if we're not done yet
11585          */
11586
11587         if (s < PL_bufend)
11588             break;              /* handle case where we are done yet :-) */
11589
11590 #ifndef PERL_STRICT_CR
11591         if (to - SvPVX_const(sv) >= 2) {
11592             if (   (to[-2] == '\r' && to[-1] == '\n')
11593                 || (to[-2] == '\n' && to[-1] == '\r'))
11594             {
11595                 to[-2] = '\n';
11596                 to--;
11597                 SvCUR_set(sv, to - SvPVX_const(sv));
11598             }
11599             else if (to[-1] == '\r')
11600                 to[-1] = '\n';
11601         }
11602         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11603             to[-1] = '\n';
11604 #endif
11605
11606         /* if we're out of file, or a read fails, bail and reset the current
11607            line marker so we can report where the unterminated string began
11608         */
11609         COPLINE_INC_WITH_HERELINES;
11610         PL_bufptr = PL_bufend;
11611         if (!lex_next_chunk(0)) {
11612             sv_free(sv);
11613             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11614             return NULL;
11615         }
11616         s = start = PL_bufptr;
11617     } /* End of infinite loop */
11618
11619     /* at this point, we have successfully read the delimited string */
11620
11621     if (keep_delims)
11622             sv_catpvn(sv, s, delim_byte_len);
11623     s += delim_byte_len;
11624
11625     if (d_is_utf8)
11626         SvUTF8_on(sv);
11627
11628     PL_multi_end = CopLINE(PL_curcop);
11629     CopLINE_set(PL_curcop, PL_multi_start);
11630     PL_parser->herelines = herelines;
11631
11632     /* if we allocated too much space, give some back */
11633     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11634         SvLEN_set(sv, SvCUR(sv) + 1);
11635         SvPV_shrink_to_cur(sv);
11636     }
11637
11638     /* decide whether this is the first or second quoted string we've read
11639        for this op
11640     */
11641
11642     if (PL_lex_stuff)
11643         PL_parser->lex_sub_repl = sv;
11644     else
11645         PL_lex_stuff = sv;
11646     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11647     return s;
11648 }
11649
11650 /*
11651   scan_num
11652   takes: pointer to position in buffer
11653   returns: pointer to new position in buffer
11654   side-effects: builds ops for the constant in pl_yylval.op
11655
11656   Read a number in any of the formats that Perl accepts:
11657
11658   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
11659   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
11660   0b[01](_?[01])*                                       binary integers
11661   0o?[0-7](_?[0-7])*                                    octal integers
11662   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11663   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11664
11665   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11666   thing it reads.
11667
11668   If it reads a number without a decimal point or an exponent, it will
11669   try converting the number to an integer and see if it can do so
11670   without loss of precision.
11671 */
11672
11673 char *
11674 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11675 {
11676     const char *s = start;      /* current position in buffer */
11677     char *d;                    /* destination in temp buffer */
11678     char *e;                    /* end of temp buffer */
11679     NV nv;                              /* number read, as a double */
11680     SV *sv = NULL;                      /* place to put the converted number */
11681     bool floatit;                       /* boolean: int or float? */
11682     const char *lastub = NULL;          /* position of last underbar */
11683     static const char* const number_too_long = "Number too long";
11684     bool warned_about_underscore = 0;
11685     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11686 #define WARN_ABOUT_UNDERSCORE() \
11687         do { \
11688             if (!warned_about_underscore) { \
11689                 warned_about_underscore = 1; \
11690                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11691                                "Misplaced _ in number"); \
11692             } \
11693         } while(0)
11694     /* Hexadecimal floating point.
11695      *
11696      * In many places (where we have quads and NV is IEEE 754 double)
11697      * we can fit the mantissa bits of a NV into an unsigned quad.
11698      * (Note that UVs might not be quads even when we have quads.)
11699      * This will not work everywhere, though (either no quads, or
11700      * using long doubles), in which case we have to resort to NV,
11701      * which will probably mean horrible loss of precision due to
11702      * multiple fp operations. */
11703     bool hexfp = FALSE;
11704     int total_bits = 0;
11705     int significant_bits = 0;
11706 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11707 #  define HEXFP_UQUAD
11708     Uquad_t hexfp_uquad = 0;
11709     int hexfp_frac_bits = 0;
11710 #else
11711 #  define HEXFP_NV
11712     NV hexfp_nv = 0.0;
11713 #endif
11714     NV hexfp_mult = 1.0;
11715     UV high_non_zero = 0; /* highest digit */
11716     int non_zero_integer_digits = 0;
11717     bool new_octal = FALSE;     /* octal with "0o" prefix */
11718
11719     PERL_ARGS_ASSERT_SCAN_NUM;
11720
11721     /* We use the first character to decide what type of number this is */
11722
11723     switch (*s) {
11724     default:
11725         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11726
11727     /* if it starts with a 0, it could be an octal number, a decimal in
11728        0.13 disguise, or a hexadecimal number, or a binary number. */
11729     case '0':
11730         {
11731           /* variables:
11732              u          holds the "number so far"
11733              overflowed was the number more than we can hold?
11734
11735              Shift is used when we add a digit.  It also serves as an "are
11736              we in octal/hex/binary?" indicator to disallow hex characters
11737              when in octal mode.
11738            */
11739             NV n = 0.0;
11740             UV u = 0;
11741             bool overflowed = FALSE;
11742             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11743             bool has_digs = FALSE;
11744             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11745             static const char* const bases[5] =
11746               { "", "binary", "", "octal", "hexadecimal" };
11747             static const char* const Bases[5] =
11748               { "", "Binary", "", "Octal", "Hexadecimal" };
11749             static const char* const maxima[5] =
11750               { "",
11751                 "0b11111111111111111111111111111111",
11752                 "",
11753                 "037777777777",
11754                 "0xffffffff" };
11755
11756             /* check for hex */
11757             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11758                 shift = 4;
11759                 s += 2;
11760                 just_zero = FALSE;
11761             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11762                 shift = 1;
11763                 s += 2;
11764                 just_zero = FALSE;
11765             }
11766             /* check for a decimal in disguise */
11767             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11768                 goto decimal;
11769             /* so it must be octal */
11770             else {
11771                 shift = 3;
11772                 s++;
11773                 if (isALPHA_FOLD_EQ(*s, 'o')) {
11774                     s++;
11775                     just_zero = FALSE;
11776                     new_octal = TRUE;
11777                 }
11778             }
11779
11780             if (*s == '_') {
11781                 WARN_ABOUT_UNDERSCORE();
11782                lastub = s++;
11783             }
11784
11785             /* read the rest of the number */
11786             for (;;) {
11787                 /* x is used in the overflow test,
11788                    b is the digit we're adding on. */
11789                 UV x, b;
11790
11791                 switch (*s) {
11792
11793                 /* if we don't mention it, we're done */
11794                 default:
11795                     goto out;
11796
11797                 /* _ are ignored -- but warned about if consecutive */
11798                 case '_':
11799                     if (lastub && s == lastub + 1)
11800                         WARN_ABOUT_UNDERSCORE();
11801                     lastub = s++;
11802                     break;
11803
11804                 /* 8 and 9 are not octal */
11805                 case '8': case '9':
11806                     if (shift == 3)
11807                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11808                     /* FALLTHROUGH */
11809
11810                 /* octal digits */
11811                 case '2': case '3': case '4':
11812                 case '5': case '6': case '7':
11813                     if (shift == 1)
11814                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11815                     /* FALLTHROUGH */
11816
11817                 case '0': case '1':
11818                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11819                     goto digit;
11820
11821                 /* hex digits */
11822                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11823                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11824                     /* make sure they said 0x */
11825                     if (shift != 4)
11826                         goto out;
11827                     b = (*s++ & 7) + 9;
11828
11829                     /* Prepare to put the digit we have onto the end
11830                        of the number so far.  We check for overflows.
11831                     */
11832
11833                   digit:
11834                     just_zero = FALSE;
11835                     has_digs = TRUE;
11836                     if (!overflowed) {
11837                         assert(shift >= 0);
11838                         x = u << shift; /* make room for the digit */
11839
11840                         total_bits += shift;
11841
11842                         if ((x >> shift) != u
11843                             && !(PL_hints & HINT_NEW_BINARY)) {
11844                             overflowed = TRUE;
11845                             n = (NV) u;
11846                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11847                                              "Integer overflow in %s number",
11848                                              bases[shift]);
11849                         } else
11850                             u = x | b;          /* add the digit to the end */
11851                     }
11852                     if (overflowed) {
11853                         n *= nvshift[shift];
11854                         /* If an NV has not enough bits in its
11855                          * mantissa to represent an UV this summing of
11856                          * small low-order numbers is a waste of time
11857                          * (because the NV cannot preserve the
11858                          * low-order bits anyway): we could just
11859                          * remember when did we overflow and in the
11860                          * end just multiply n by the right
11861                          * amount. */
11862                         n += (NV) b;
11863                     }
11864
11865                     if (high_non_zero == 0 && b > 0)
11866                         high_non_zero = b;
11867
11868                     if (high_non_zero)
11869                         non_zero_integer_digits++;
11870
11871                     /* this could be hexfp, but peek ahead
11872                      * to avoid matching ".." */
11873                     if (UNLIKELY(HEXFP_PEEK(s))) {
11874                         goto out;
11875                     }
11876
11877                     break;
11878                 }
11879             }
11880
11881           /* if we get here, we had success: make a scalar value from
11882              the number.
11883           */
11884           out:
11885
11886             /* final misplaced underbar check */
11887             if (s[-1] == '_')
11888                 WARN_ABOUT_UNDERSCORE();
11889
11890             if (UNLIKELY(HEXFP_PEEK(s))) {
11891                 /* Do sloppy (on the underbars) but quick detection
11892                  * (and value construction) for hexfp, the decimal
11893                  * detection will shortly be more thorough with the
11894                  * underbar checks. */
11895                 const char* h = s;
11896                 significant_bits = non_zero_integer_digits * shift;
11897 #ifdef HEXFP_UQUAD
11898                 hexfp_uquad = u;
11899 #else /* HEXFP_NV */
11900                 hexfp_nv = u;
11901 #endif
11902                 /* Ignore the leading zero bits of
11903                  * the high (first) non-zero digit. */
11904                 if (high_non_zero) {
11905                     if (high_non_zero < 0x8)
11906                         significant_bits--;
11907                     if (high_non_zero < 0x4)
11908                         significant_bits--;
11909                     if (high_non_zero < 0x2)
11910                         significant_bits--;
11911                 }
11912
11913                 if (*h == '.') {
11914 #ifdef HEXFP_NV
11915                     NV nv_mult = 1.0;
11916 #endif
11917                     bool accumulate = TRUE;
11918                     U8 b;
11919                     int lim = 1 << shift;
11920                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11921                                *h == '_'); h++) {
11922                         if (isXDIGIT(*h)) {
11923                             significant_bits += shift;
11924 #ifdef HEXFP_UQUAD
11925                             if (accumulate) {
11926                                 if (significant_bits < NV_MANT_DIG) {
11927                                     /* We are in the long "run" of xdigits,
11928                                      * accumulate the full four bits. */
11929                                     assert(shift >= 0);
11930                                     hexfp_uquad <<= shift;
11931                                     hexfp_uquad |= b;
11932                                     hexfp_frac_bits += shift;
11933                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11934                                     /* We are at a hexdigit either at,
11935                                      * or straddling, the edge of mantissa.
11936                                      * We will try grabbing as many as
11937                                      * possible bits. */
11938                                     int tail =
11939                                       significant_bits - NV_MANT_DIG;
11940                                     if (tail <= 0)
11941                                        tail += shift;
11942                                     assert(tail >= 0);
11943                                     hexfp_uquad <<= tail;
11944                                     assert((shift - tail) >= 0);
11945                                     hexfp_uquad |= b >> (shift - tail);
11946                                     hexfp_frac_bits += tail;
11947
11948                                     /* Ignore the trailing zero bits
11949                                      * of the last non-zero xdigit.
11950                                      *
11951                                      * The assumption here is that if
11952                                      * one has input of e.g. the xdigit
11953                                      * eight (0x8), there is only one
11954                                      * bit being input, not the full
11955                                      * four bits.  Conversely, if one
11956                                      * specifies a zero xdigit, the
11957                                      * assumption is that one really
11958                                      * wants all those bits to be zero. */
11959                                     if (b) {
11960                                         if ((b & 0x1) == 0x0) {
11961                                             significant_bits--;
11962                                             if ((b & 0x2) == 0x0) {
11963                                                 significant_bits--;
11964                                                 if ((b & 0x4) == 0x0) {
11965                                                     significant_bits--;
11966                                                 }
11967                                             }
11968                                         }
11969                                     }
11970
11971                                     accumulate = FALSE;
11972                                 }
11973                             } else {
11974                                 /* Keep skipping the xdigits, and
11975                                  * accumulating the significant bits,
11976                                  * but do not shift the uquad
11977                                  * (which would catastrophically drop
11978                                  * high-order bits) or accumulate the
11979                                  * xdigits anymore. */
11980                             }
11981 #else /* HEXFP_NV */
11982                             if (accumulate) {
11983                                 nv_mult /= nvshift[shift];
11984                                 if (nv_mult > 0.0)
11985                                     hexfp_nv += b * nv_mult;
11986                                 else
11987                                     accumulate = FALSE;
11988                             }
11989 #endif
11990                         }
11991                         if (significant_bits >= NV_MANT_DIG)
11992                             accumulate = FALSE;
11993                     }
11994                 }
11995
11996                 if ((total_bits > 0 || significant_bits > 0) &&
11997                     isALPHA_FOLD_EQ(*h, 'p')) {
11998                     bool negexp = FALSE;
11999                     h++;
12000                     if (*h == '+')
12001                         h++;
12002                     else if (*h == '-') {
12003                         negexp = TRUE;
12004                         h++;
12005                     }
12006                     if (isDIGIT(*h)) {
12007                         I32 hexfp_exp = 0;
12008                         while (isDIGIT(*h) || *h == '_') {
12009                             if (isDIGIT(*h)) {
12010                                 hexfp_exp *= 10;
12011                                 hexfp_exp += *h - '0';
12012 #ifdef NV_MIN_EXP
12013                                 if (negexp
12014                                     && -hexfp_exp < NV_MIN_EXP - 1) {
12015                                     /* NOTE: this means that the exponent
12016                                      * underflow warning happens for
12017                                      * the IEEE 754 subnormals (denormals),
12018                                      * because DBL_MIN_EXP etc are the lowest
12019                                      * possible binary (or, rather, DBL_RADIX-base)
12020                                      * exponent for normals, not subnormals.
12021                                      *
12022                                      * This may or may not be a good thing. */
12023                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12024                                                    "Hexadecimal float: exponent underflow");
12025                                     break;
12026                                 }
12027 #endif
12028 #ifdef NV_MAX_EXP
12029                                 if (!negexp
12030                                     && hexfp_exp > NV_MAX_EXP - 1) {
12031                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12032                                                    "Hexadecimal float: exponent overflow");
12033                                     break;
12034                                 }
12035 #endif
12036                             }
12037                             h++;
12038                         }
12039                         if (negexp)
12040                             hexfp_exp = -hexfp_exp;
12041 #ifdef HEXFP_UQUAD
12042                         hexfp_exp -= hexfp_frac_bits;
12043 #endif
12044                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
12045                         hexfp = TRUE;
12046                         goto decimal;
12047                     }
12048                 }
12049             }
12050
12051             if (!just_zero && !has_digs) {
12052                 /* 0x, 0o or 0b with no digits, treat it as an error.
12053                    Originally this backed up the parse before the b or
12054                    x, but that has the potential for silent changes in
12055                    behaviour, like for: "0x.3" and "0x+$foo".
12056                 */
12057                 const char *d = s;
12058                 char *oldbp = PL_bufptr;
12059                 if (*d) ++d; /* so the user sees the bad non-digit */
12060                 PL_bufptr = (char *)d; /* so yyerror reports the context */
12061                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12062                                   bases[shift]));
12063                 PL_bufptr = oldbp;
12064             }
12065
12066             if (overflowed) {
12067                 if (n > 4294967295.0)
12068                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12069                                    "%s number > %s non-portable",
12070                                    Bases[shift],
12071                                    new_octal ? "0o37777777777" : maxima[shift]);
12072                 sv = newSVnv(n);
12073             }
12074             else {
12075 #if UVSIZE > 4
12076                 if (u > 0xffffffff)
12077                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12078                                    "%s number > %s non-portable",
12079                                    Bases[shift],
12080                                    new_octal ? "0o37777777777" : maxima[shift]);
12081 #endif
12082                 sv = newSVuv(u);
12083             }
12084             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12085                 sv = new_constant(start, s - start, "integer",
12086                                   sv, NULL, NULL, 0, NULL);
12087             else if (PL_hints & HINT_NEW_BINARY)
12088                 sv = new_constant(start, s - start, "binary",
12089                                   sv, NULL, NULL, 0, NULL);
12090         }
12091         break;
12092
12093     /*
12094       handle decimal numbers.
12095       we're also sent here when we read a 0 as the first digit
12096     */
12097     case '1': case '2': case '3': case '4': case '5':
12098     case '6': case '7': case '8': case '9': case '.':
12099       decimal:
12100         d = PL_tokenbuf;
12101         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12102         floatit = FALSE;
12103         if (hexfp) {
12104             floatit = TRUE;
12105             *d++ = '0';
12106             switch (shift) {
12107             case 4:
12108                 *d++ = 'x';
12109                 s = start + 2;
12110                 break;
12111             case 3:
12112                 if (new_octal) {
12113                     *d++ = 'o';
12114                     s = start + 2;
12115                     break;
12116                 }
12117                 s = start + 1;
12118                 break;
12119             case 1:
12120                 *d++ = 'b';
12121                 s = start + 2;
12122                 break;
12123             default:
12124                 NOT_REACHED; /* NOTREACHED */
12125             }
12126         }
12127
12128         /* read next group of digits and _ and copy into d */
12129         while (isDIGIT(*s)
12130                || *s == '_'
12131                || UNLIKELY(hexfp && isXDIGIT(*s)))
12132         {
12133             /* skip underscores, checking for misplaced ones
12134                if -w is on
12135             */
12136             if (*s == '_') {
12137                 if (lastub && s == lastub + 1)
12138                     WARN_ABOUT_UNDERSCORE();
12139                 lastub = s++;
12140             }
12141             else {
12142                 /* check for end of fixed-length buffer */
12143                 if (d >= e)
12144                     Perl_croak(aTHX_ "%s", number_too_long);
12145                 /* if we're ok, copy the character */
12146                 *d++ = *s++;
12147             }
12148         }
12149
12150         /* final misplaced underbar check */
12151         if (lastub && s == lastub + 1)
12152             WARN_ABOUT_UNDERSCORE();
12153
12154         /* read a decimal portion if there is one.  avoid
12155            3..5 being interpreted as the number 3. followed
12156            by .5
12157         */
12158         if (*s == '.' && s[1] != '.') {
12159             floatit = TRUE;
12160             *d++ = *s++;
12161
12162             if (*s == '_') {
12163                 WARN_ABOUT_UNDERSCORE();
12164                 lastub = s;
12165             }
12166
12167             /* copy, ignoring underbars, until we run out of digits.
12168             */
12169             for (; isDIGIT(*s)
12170                    || *s == '_'
12171                    || UNLIKELY(hexfp && isXDIGIT(*s));
12172                  s++)
12173             {
12174                 /* fixed length buffer check */
12175                 if (d >= e)
12176                     Perl_croak(aTHX_ "%s", number_too_long);
12177                 if (*s == '_') {
12178                    if (lastub && s == lastub + 1)
12179                         WARN_ABOUT_UNDERSCORE();
12180                    lastub = s;
12181                 }
12182                 else
12183                     *d++ = *s;
12184             }
12185             /* fractional part ending in underbar? */
12186             if (s[-1] == '_')
12187                 WARN_ABOUT_UNDERSCORE();
12188             if (*s == '.' && isDIGIT(s[1])) {
12189                 /* oops, it's really a v-string, but without the "v" */
12190                 s = start;
12191                 goto vstring;
12192             }
12193         }
12194
12195         /* read exponent part, if present */
12196         if ((isALPHA_FOLD_EQ(*s, 'e')
12197               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12198             && memCHRs("+-0123456789_", s[1]))
12199         {
12200             int exp_digits = 0;
12201             const char *save_s = s;
12202             char * save_d = d;
12203
12204             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12205                ditto for p (hexfloats) */
12206             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12207                 /* At least some Mach atof()s don't grok 'E' */
12208                 *d++ = 'e';
12209             }
12210             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12211                 *d++ = 'p';
12212             }
12213
12214             s++;
12215
12216
12217             /* stray preinitial _ */
12218             if (*s == '_') {
12219                 WARN_ABOUT_UNDERSCORE();
12220                 lastub = s++;
12221             }
12222
12223             /* allow positive or negative exponent */
12224             if (*s == '+' || *s == '-')
12225                 *d++ = *s++;
12226
12227             /* stray initial _ */
12228             if (*s == '_') {
12229                 WARN_ABOUT_UNDERSCORE();
12230                 lastub = s++;
12231             }
12232
12233             /* read digits of exponent */
12234             while (isDIGIT(*s) || *s == '_') {
12235                 if (isDIGIT(*s)) {
12236                     ++exp_digits;
12237                     if (d >= e)
12238                         Perl_croak(aTHX_ "%s", number_too_long);
12239                     *d++ = *s++;
12240                 }
12241                 else {
12242                    if (((lastub && s == lastub + 1)
12243                         || (!isDIGIT(s[1]) && s[1] != '_')))
12244                         WARN_ABOUT_UNDERSCORE();
12245                    lastub = s++;
12246                 }
12247             }
12248
12249             if (!exp_digits) {
12250                 /* no exponent digits, the [eEpP] could be for something else,
12251                  * though in practice we don't get here for p since that's preparsed
12252                  * earlier, and results in only the 0xX being consumed, so behave similarly
12253                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
12254                  * next token.
12255                  */
12256                 s = save_s;
12257                 d = save_d;
12258             }
12259             else {
12260                 floatit = TRUE;
12261             }
12262         }
12263
12264
12265         /*
12266            We try to do an integer conversion first if no characters
12267            indicating "float" have been found.
12268          */
12269
12270         if (!floatit) {
12271             UV uv;
12272             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12273
12274             if (flags == IS_NUMBER_IN_UV) {
12275               if (uv <= IV_MAX)
12276                 sv = newSViv(uv); /* Prefer IVs over UVs. */
12277               else
12278                 sv = newSVuv(uv);
12279             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12280               if (uv <= (UV) IV_MIN)
12281                 sv = newSViv(-(IV)uv);
12282               else
12283                 floatit = TRUE;
12284             } else
12285               floatit = TRUE;
12286         }
12287         if (floatit) {
12288             /* terminate the string */
12289             *d = '\0';
12290             if (UNLIKELY(hexfp)) {
12291 #  ifdef NV_MANT_DIG
12292                 if (significant_bits > NV_MANT_DIG)
12293                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12294                                    "Hexadecimal float: mantissa overflow");
12295 #  endif
12296 #ifdef HEXFP_UQUAD
12297                 nv = hexfp_uquad * hexfp_mult;
12298 #else /* HEXFP_NV */
12299                 nv = hexfp_nv * hexfp_mult;
12300 #endif
12301             } else {
12302                 nv = Atof(PL_tokenbuf);
12303             }
12304             sv = newSVnv(nv);
12305         }
12306
12307         if ( floatit
12308              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12309             const char *const key = floatit ? "float" : "integer";
12310             const STRLEN keylen = floatit ? 5 : 7;
12311             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12312                                 key, keylen, sv, NULL, NULL, 0, NULL);
12313         }
12314         break;
12315
12316     /* if it starts with a v, it could be a v-string */
12317     case 'v':
12318     vstring:
12319                 sv = newSV(5); /* preallocate storage space */
12320                 ENTER_with_name("scan_vstring");
12321                 SAVEFREESV(sv);
12322                 s = scan_vstring(s, PL_bufend, sv);
12323                 SvREFCNT_inc_simple_void_NN(sv);
12324                 LEAVE_with_name("scan_vstring");
12325         break;
12326     }
12327
12328     /* make the op for the constant and return */
12329
12330     if (sv)
12331         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12332     else
12333         lvalp->opval = NULL;
12334
12335     return (char *)s;
12336 }
12337
12338 STATIC char *
12339 S_scan_formline(pTHX_ char *s)
12340 {
12341     SV * const stuff = newSVpvs("");
12342     bool needargs = FALSE;
12343     bool eofmt = FALSE;
12344
12345     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12346
12347     while (!needargs) {
12348         char *eol;
12349         if (*s == '.') {
12350             char *t = s+1;
12351 #ifdef PERL_STRICT_CR
12352             while (SPACE_OR_TAB(*t))
12353                 t++;
12354 #else
12355             while (SPACE_OR_TAB(*t) || *t == '\r')
12356                 t++;
12357 #endif
12358             if (*t == '\n' || t == PL_bufend) {
12359                 eofmt = TRUE;
12360                 break;
12361             }
12362         }
12363         eol = (char *) memchr(s,'\n',PL_bufend-s);
12364         if (! eol) {
12365             eol = PL_bufend;
12366         }
12367         else {
12368             eol++;
12369         }
12370         if (*s != '#') {
12371             char *t;
12372             for (t = s; t < eol; t++) {
12373                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12374                     needargs = FALSE;
12375                     goto enough;        /* ~~ must be first line in formline */
12376                 }
12377                 if (*t == '@' || *t == '^')
12378                     needargs = TRUE;
12379             }
12380             if (eol > s) {
12381                 sv_catpvn(stuff, s, eol-s);
12382 #ifndef PERL_STRICT_CR
12383                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12384                     char *end = SvPVX(stuff) + SvCUR(stuff);
12385                     end[-2] = '\n';
12386                     end[-1] = '\0';
12387                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12388                 }
12389 #endif
12390             }
12391             else
12392               break;
12393         }
12394         s = (char*)eol;
12395         if ((PL_rsfp || PL_parser->filtered)
12396          && PL_parser->form_lex_state == LEX_NORMAL) {
12397             bool got_some;
12398             PL_bufptr = PL_bufend;
12399             COPLINE_INC_WITH_HERELINES;
12400             got_some = lex_next_chunk(0);
12401             CopLINE_dec(PL_curcop);
12402             s = PL_bufptr;
12403             if (!got_some)
12404                 break;
12405         }
12406         incline(s, PL_bufend);
12407     }
12408   enough:
12409     if (!SvCUR(stuff) || needargs)
12410         PL_lex_state = PL_parser->form_lex_state;
12411     if (SvCUR(stuff)) {
12412         PL_expect = XSTATE;
12413         if (needargs) {
12414             const char *s2 = s;
12415             while (isSPACE(*s2) && *s2 != '\n')
12416                 s2++;
12417             if (*s2 == '{') {
12418                 PL_expect = XTERMBLOCK;
12419                 NEXTVAL_NEXTTOKE.ival = 0;
12420                 force_next(KW_DO);
12421             }
12422             NEXTVAL_NEXTTOKE.ival = 0;
12423             force_next(FORMLBRACK);
12424         }
12425         if (!IN_BYTES) {
12426             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12427                 SvUTF8_on(stuff);
12428         }
12429         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12430         force_next(THING);
12431     }
12432     else {
12433         SvREFCNT_dec(stuff);
12434         if (eofmt)
12435             PL_lex_formbrack = 0;
12436     }
12437     return s;
12438 }
12439
12440 /*
12441 =for apidoc start_subparse
12442
12443 Set things up for parsing a subroutine.
12444
12445 If C<is_format> is non-zero, the input is to be considered a format sub
12446 (a specialised sub used to implement perl's C<format> feature); else a
12447 normal C<sub>.
12448
12449 C<flags> are added to the flags for C<PL_compcv>.
12450
12451 This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12452 the function;
12453
12454 =cut
12455 */
12456
12457 I32
12458 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12459 {
12460     const I32 oldsavestack_ix = PL_savestack_ix;
12461     CV* const outsidecv = PL_compcv;
12462
12463     SAVEI32(PL_subline);
12464     save_item(PL_subname);
12465     SAVESPTR(PL_compcv);
12466
12467     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12468     CvFLAGS(PL_compcv) |= flags;
12469
12470     PL_subline = CopLINE(PL_curcop);
12471     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12472     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12473     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12474     if (outsidecv && CvPADLIST(outsidecv))
12475         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12476
12477     return oldsavestack_ix;
12478 }
12479
12480
12481 /* Do extra initialisation of a CV (typically one just created by
12482  * start_subparse()) if that CV is for a named sub
12483  */
12484
12485 void
12486 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12487 {
12488     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12489
12490     if (nameop->op_type == OP_CONST) {
12491         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12492         if (   strEQ(name, "BEGIN")
12493             || strEQ(name, "END")
12494             || strEQ(name, "INIT")
12495             || strEQ(name, "CHECK")
12496             || strEQ(name, "UNITCHECK")
12497         )
12498           CvSPECIAL_on(cv);
12499     }
12500     else
12501     /* State subs inside anonymous subs need to be
12502      clonable themselves. */
12503     if (   CvANON(CvOUTSIDE(cv))
12504         || CvCLONE(CvOUTSIDE(cv))
12505         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12506                         CvOUTSIDE(cv)
12507                      ))[nameop->op_targ])
12508     )
12509       CvCLONE_on(cv);
12510 }
12511
12512
12513 static int
12514 S_yywarn(pTHX_ const char *const s, U32 flags)
12515 {
12516     PERL_ARGS_ASSERT_YYWARN;
12517
12518     PL_in_eval |= EVAL_WARNONLY;
12519     yyerror_pv(s, flags);
12520     return 0;
12521 }
12522
12523 void
12524 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12525 {
12526     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12527
12528     if (PL_minus_c)
12529         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12530     else {
12531         Perl_croak(aTHX_
12532                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12533     }
12534     NOT_REACHED; /* NOTREACHED */
12535 }
12536
12537 void
12538 Perl_yyquit(pTHX)
12539 {
12540     /* Called, after at least one error has been found, to abort the parse now,
12541      * instead of trying to forge ahead */
12542
12543     yyerror_pvn(NULL, 0, 0);
12544 }
12545
12546 int
12547 Perl_yyerror(pTHX_ const char *const s)
12548 {
12549     PERL_ARGS_ASSERT_YYERROR;
12550     return yyerror_pvn(s, strlen(s), 0);
12551 }
12552
12553 int
12554 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12555 {
12556     PERL_ARGS_ASSERT_YYERROR_PV;
12557     return yyerror_pvn(s, strlen(s), flags);
12558 }
12559
12560 int
12561 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12562 {
12563     const char *context = NULL;
12564     int contlen = -1;
12565     SV *msg;
12566     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12567     int yychar  = PL_parser->yychar;
12568
12569     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12570      * apply.  If the number of errors found is large enough, it abandons
12571      * parsing.  If 's' is NULL, there is no message, and it abandons
12572      * processing unconditionally */
12573
12574     if (s != NULL) {
12575         if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12576             sv_catpvs(where_sv, "at EOF");
12577         else if (   PL_oldoldbufptr
12578                  && PL_bufptr > PL_oldoldbufptr
12579                  && PL_bufptr - PL_oldoldbufptr < 200
12580                  && PL_oldoldbufptr != PL_oldbufptr
12581                  && PL_oldbufptr != PL_bufptr)
12582         {
12583             while (isSPACE(*PL_oldoldbufptr))
12584                 PL_oldoldbufptr++;
12585             context = PL_oldoldbufptr;
12586             contlen = PL_bufptr - PL_oldoldbufptr;
12587         }
12588         else if (  PL_oldbufptr
12589                 && PL_bufptr > PL_oldbufptr
12590                 && PL_bufptr - PL_oldbufptr < 200
12591                 && PL_oldbufptr != PL_bufptr)
12592         {
12593             while (isSPACE(*PL_oldbufptr))
12594                 PL_oldbufptr++;
12595             context = PL_oldbufptr;
12596             contlen = PL_bufptr - PL_oldbufptr;
12597         }
12598         else if (yychar > 255)
12599             sv_catpvs(where_sv, "next token ???");
12600         else if (yychar == YYEMPTY) {
12601             if (PL_lex_state == LEX_NORMAL)
12602                 sv_catpvs(where_sv, "at end of line");
12603             else if (PL_lex_inpat)
12604                 sv_catpvs(where_sv, "within pattern");
12605             else
12606                 sv_catpvs(where_sv, "within string");
12607         }
12608         else {
12609             sv_catpvs(where_sv, "next char ");
12610             if (yychar < 32)
12611                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12612             else if (isPRINT_LC(yychar)) {
12613                 const char string = yychar;
12614                 sv_catpvn(where_sv, &string, 1);
12615             }
12616             else
12617                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12618         }
12619         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12620         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12621             OutCopFILE(PL_curcop),
12622             (IV)(PL_parser->preambling == NOLINE
12623                    ? CopLINE(PL_curcop)
12624                    : PL_parser->preambling));
12625         if (context)
12626             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12627                                  UTF8fARG(UTF, contlen, context));
12628         else
12629             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12630         if (   PL_multi_start < PL_multi_end
12631             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12632         {
12633             Perl_sv_catpvf(aTHX_ msg,
12634             "  (Might be a runaway multi-line %c%c string starting on"
12635             " line %" IVdf ")\n",
12636                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12637             PL_multi_end = 0;
12638         }
12639         if (PL_in_eval & EVAL_WARNONLY) {
12640             PL_in_eval &= ~EVAL_WARNONLY;
12641             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12642         }
12643         else {
12644             qerror(msg);
12645         }
12646     }
12647     if (s == NULL || PL_error_count >= 10) {
12648         const char * msg = "";
12649         const char * const name = OutCopFILE(PL_curcop);
12650
12651         if (PL_in_eval) {
12652             SV * errsv = ERRSV;
12653             if (SvCUR(errsv)) {
12654                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12655             }
12656         }
12657
12658         if (s == NULL) {
12659             abort_execution(msg, name);
12660         }
12661         else {
12662             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12663         }
12664     }
12665     PL_in_my = 0;
12666     PL_in_my_stash = NULL;
12667     return 0;
12668 }
12669
12670 STATIC char*
12671 S_swallow_bom(pTHX_ U8 *s)
12672 {
12673     const STRLEN slen = SvCUR(PL_linestr);
12674
12675     PERL_ARGS_ASSERT_SWALLOW_BOM;
12676
12677     switch (s[0]) {
12678     case 0xFF:
12679         if (s[1] == 0xFE) {
12680             /* UTF-16 little-endian? (or UTF-32LE?) */
12681             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12682                 /* diag_listed_as: Unsupported script encoding %s */
12683                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12684 #ifndef PERL_NO_UTF16_FILTER
12685 #ifdef DEBUGGING
12686             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12687 #endif
12688             s += 2;
12689             if (PL_bufend > (char*)s) {
12690                 s = add_utf16_textfilter(s, TRUE);
12691             }
12692 #else
12693             /* diag_listed_as: Unsupported script encoding %s */
12694             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12695 #endif
12696         }
12697         break;
12698     case 0xFE:
12699         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12700 #ifndef PERL_NO_UTF16_FILTER
12701 #ifdef DEBUGGING
12702             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12703 #endif
12704             s += 2;
12705             if (PL_bufend > (char *)s) {
12706                 s = add_utf16_textfilter(s, FALSE);
12707             }
12708 #else
12709             /* diag_listed_as: Unsupported script encoding %s */
12710             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12711 #endif
12712         }
12713         break;
12714     case BOM_UTF8_FIRST_BYTE: {
12715         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12716 #ifdef DEBUGGING
12717             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12718 #endif
12719             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12720         }
12721         break;
12722     }
12723     case 0:
12724         if (slen > 3) {
12725              if (s[1] == 0) {
12726                   if (s[2] == 0xFE && s[3] == 0xFF) {
12727                        /* UTF-32 big-endian */
12728                        /* diag_listed_as: Unsupported script encoding %s */
12729                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12730                   }
12731              }
12732              else if (s[2] == 0 && s[3] != 0) {
12733                   /* Leading bytes
12734                    * 00 xx 00 xx
12735                    * are a good indicator of UTF-16BE. */
12736 #ifndef PERL_NO_UTF16_FILTER
12737 #ifdef DEBUGGING
12738                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12739 #endif
12740                   s = add_utf16_textfilter(s, FALSE);
12741 #else
12742                   /* diag_listed_as: Unsupported script encoding %s */
12743                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12744 #endif
12745              }
12746         }
12747         break;
12748
12749     default:
12750          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12751                   /* Leading bytes
12752                    * xx 00 xx 00
12753                    * are a good indicator of UTF-16LE. */
12754 #ifndef PERL_NO_UTF16_FILTER
12755 #ifdef DEBUGGING
12756               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12757 #endif
12758               s = add_utf16_textfilter(s, TRUE);
12759 #else
12760               /* diag_listed_as: Unsupported script encoding %s */
12761               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12762 #endif
12763          }
12764     }
12765     return (char*)s;
12766 }
12767
12768
12769 #ifndef PERL_NO_UTF16_FILTER
12770 static I32
12771 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12772 {
12773     SV *const filter = FILTER_DATA(idx);
12774     /* We re-use this each time round, throwing the contents away before we
12775        return.  */
12776     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12777     SV *const utf8_buffer = filter;
12778     IV status = IoPAGE(filter);
12779     const bool reverse = cBOOL(IoLINES(filter));
12780     I32 retval;
12781
12782     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12783
12784     /* As we're automatically added, at the lowest level, and hence only called
12785        from this file, we can be sure that we're not called in block mode. Hence
12786        don't bother writing code to deal with block mode.  */
12787     if (maxlen) {
12788         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12789     }
12790     if (status < 0) {
12791         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12792     }
12793     DEBUG_P(PerlIO_printf(Perl_debug_log,
12794                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12795                           FPTR2DPTR(void *, S_utf16_textfilter),
12796                           reverse ? 'l' : 'b', idx, maxlen, status,
12797                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12798
12799     while (1) {
12800         STRLEN chars;
12801         STRLEN have;
12802         Size_t newlen;
12803         U8 *end;
12804         /* First, look in our buffer of existing UTF-8 data:  */
12805         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12806
12807         if (nl) {
12808             ++nl;
12809         } else if (status == 0) {
12810             /* EOF */
12811             IoPAGE(filter) = 0;
12812             nl = SvEND(utf8_buffer);
12813         }
12814         if (nl) {
12815             STRLEN got = nl - SvPVX(utf8_buffer);
12816             /* Did we have anything to append?  */
12817             retval = got != 0;
12818             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12819             /* Everything else in this code works just fine if SVp_POK isn't
12820                set.  This, however, needs it, and we need it to work, else
12821                we loop infinitely because the buffer is never consumed.  */
12822             sv_chop(utf8_buffer, nl);
12823             break;
12824         }
12825
12826         /* OK, not a complete line there, so need to read some more UTF-16.
12827            Read an extra octect if the buffer currently has an odd number. */
12828         while (1) {
12829             if (status <= 0)
12830                 break;
12831             if (SvCUR(utf16_buffer) >= 2) {
12832                 /* Location of the high octet of the last complete code point.
12833                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12834                    *coupled* with all the benefits of partial reads and
12835                    endianness.  */
12836                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12837                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12838
12839                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12840                     break;
12841                 }
12842
12843                 /* We have the first half of a surrogate. Read more.  */
12844                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12845             }
12846
12847             status = FILTER_READ(idx + 1, utf16_buffer,
12848                                  160 + (SvCUR(utf16_buffer) & 1));
12849             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12850             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12851             if (status < 0) {
12852                 /* Error */
12853                 IoPAGE(filter) = status;
12854                 return status;
12855             }
12856         }
12857
12858         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12859          * require 4 bytes per char */
12860         chars = SvCUR(utf16_buffer) >> 1;
12861         have = SvCUR(utf8_buffer);
12862
12863         /* Assume the worst case size as noted by the functions: twice the
12864          * number of input bytes */
12865         SvGROW(utf8_buffer, have + chars * 4 + 1);
12866
12867         if (reverse) {
12868             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12869                                          (U8*)SvPVX_const(utf8_buffer) + have,
12870                                          chars * 2, &newlen);
12871         } else {
12872             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12873                                 (U8*)SvPVX_const(utf8_buffer) + have,
12874                                 chars * 2, &newlen);
12875         }
12876         SvCUR_set(utf8_buffer, have + newlen);
12877         *end = '\0';
12878
12879         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12880            it's private to us, and utf16_to_utf8{,reversed} take a
12881            (pointer,length) pair, rather than a NUL-terminated string.  */
12882         if(SvCUR(utf16_buffer) & 1) {
12883             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12884             SvCUR_set(utf16_buffer, 1);
12885         } else {
12886             SvCUR_set(utf16_buffer, 0);
12887         }
12888     }
12889     DEBUG_P(PerlIO_printf(Perl_debug_log,
12890                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12891                           status,
12892                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12893     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12894     return retval;
12895 }
12896
12897 static U8 *
12898 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12899 {
12900     SV *filter = filter_add(S_utf16_textfilter, NULL);
12901
12902     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12903
12904     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12905     SvPVCLEAR(filter);
12906     IoLINES(filter) = reversed;
12907     IoPAGE(filter) = 1; /* Not EOF */
12908
12909     /* Sadly, we have to return a valid pointer, come what may, so we have to
12910        ignore any error return from this.  */
12911     SvCUR_set(PL_linestr, 0);
12912     if (FILTER_READ(0, PL_linestr, 0)) {
12913         SvUTF8_on(PL_linestr);
12914     } else {
12915         SvUTF8_on(PL_linestr);
12916     }
12917     PL_bufend = SvEND(PL_linestr);
12918     return (U8*)SvPVX(PL_linestr);
12919 }
12920 #endif
12921
12922 /*
12923 =for apidoc scan_vstring
12924
12925 Returns a pointer to the next character after the parsed
12926 vstring, as well as updating the passed in sv.
12927
12928 Function must be called like
12929
12930         sv = sv_2mortal(newSV(5));
12931         s = scan_vstring(s,e,sv);
12932
12933 where s and e are the start and end of the string.
12934 The sv should already be large enough to store the vstring
12935 passed in, for performance reasons.
12936
12937 This function may croak if fatal warnings are enabled in the
12938 calling scope, hence the sv_2mortal in the example (to prevent
12939 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12940 sv_2mortal.
12941
12942 =cut
12943 */
12944
12945 char *
12946 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12947 {
12948     const char *pos = s;
12949     const char *start = s;
12950
12951     PERL_ARGS_ASSERT_SCAN_VSTRING;
12952
12953     if (*pos == 'v') pos++;  /* get past 'v' */
12954     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12955         pos++;
12956     if ( *pos != '.') {
12957         /* this may not be a v-string if followed by => */
12958         const char *next = pos;
12959         while (next < e && isSPACE(*next))
12960             ++next;
12961         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12962             /* return string not v-string */
12963             sv_setpvn(sv,(char *)s,pos-s);
12964             return (char *)pos;
12965         }
12966     }
12967
12968     if (!isALPHA(*pos)) {
12969         U8 tmpbuf[UTF8_MAXBYTES+1];
12970
12971         if (*s == 'v')
12972             s++;  /* get past 'v' */
12973
12974         SvPVCLEAR(sv);
12975
12976         for (;;) {
12977             /* this is atoi() that tolerates underscores */
12978             U8 *tmpend;
12979             UV rev = 0;
12980             const char *end = pos;
12981             UV mult = 1;
12982             while (--end >= s) {
12983                 if (*end != '_') {
12984                     const UV orev = rev;
12985                     rev += (*end - '0') * mult;
12986                     mult *= 10;
12987                     if (orev > rev)
12988                         /* diag_listed_as: Integer overflow in %s number */
12989                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12990                                          "Integer overflow in decimal number");
12991                 }
12992             }
12993
12994             /* Append native character for the rev point */
12995             tmpend = uvchr_to_utf8(tmpbuf, rev);
12996             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12997             if (!UVCHR_IS_INVARIANT(rev))
12998                  SvUTF8_on(sv);
12999             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13000                  s = ++pos;
13001             else {
13002                  s = pos;
13003                  break;
13004             }
13005             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13006                  pos++;
13007         }
13008         SvPOK_on(sv);
13009         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13010         SvRMAGICAL_on(sv);
13011     }
13012     return (char *)s;
13013 }
13014
13015 int
13016 Perl_keyword_plugin_standard(pTHX_
13017         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13018 {
13019     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13020     PERL_UNUSED_CONTEXT;
13021     PERL_UNUSED_ARG(keyword_ptr);
13022     PERL_UNUSED_ARG(keyword_len);
13023     PERL_UNUSED_ARG(op_ptr);
13024     return KEYWORD_PLUGIN_DECLINE;
13025 }
13026
13027 /*
13028 =for apidoc_section $lexer
13029 =for apidoc wrap_keyword_plugin
13030
13031 Puts a C function into the chain of keyword plugins.  This is the
13032 preferred way to manipulate the L</PL_keyword_plugin> variable.
13033 C<new_plugin> is a pointer to the C function that is to be added to the
13034 keyword plugin chain, and C<old_plugin_p> points to the storage location
13035 where a pointer to the next function in the chain will be stored.  The
13036 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13037 while the value previously stored there is written to C<*old_plugin_p>.
13038
13039 L</PL_keyword_plugin> is global to an entire process, and a module wishing
13040 to hook keyword parsing may find itself invoked more than once per
13041 process, typically in different threads.  To handle that situation, this
13042 function is idempotent.  The location C<*old_plugin_p> must initially
13043 (once per process) contain a null pointer.  A C variable of static
13044 duration (declared at file scope, typically also marked C<static> to give
13045 it internal linkage) will be implicitly initialised appropriately, if it
13046 does not have an explicit initialiser.  This function will only actually
13047 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
13048 function is also thread safe on the small scale.  It uses appropriate
13049 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13050
13051 When this function is called, the function referenced by C<new_plugin>
13052 must be ready to be called, except for C<*old_plugin_p> being unfilled.
13053 In a threading situation, C<new_plugin> may be called immediately, even
13054 before this function has returned.  C<*old_plugin_p> will always be
13055 appropriately set before C<new_plugin> is called.  If C<new_plugin>
13056 decides not to do anything special with the identifier that it is given
13057 (which is the usual case for most calls to a keyword plugin), it must
13058 chain the plugin function referenced by C<*old_plugin_p>.
13059
13060 Taken all together, XS code to install a keyword plugin should typically
13061 look something like this:
13062
13063     static Perl_keyword_plugin_t next_keyword_plugin;
13064     static OP *my_keyword_plugin(pTHX_
13065         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13066     {
13067         if (memEQs(keyword_ptr, keyword_len,
13068                    "my_new_keyword")) {
13069             ...
13070         } else {
13071             return next_keyword_plugin(aTHX_
13072                 keyword_ptr, keyword_len, op_ptr);
13073         }
13074     }
13075     BOOT:
13076         wrap_keyword_plugin(my_keyword_plugin,
13077                             &next_keyword_plugin);
13078
13079 Direct access to L</PL_keyword_plugin> should be avoided.
13080
13081 =cut
13082 */
13083
13084 void
13085 Perl_wrap_keyword_plugin(pTHX_
13086     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13087 {
13088
13089     PERL_UNUSED_CONTEXT;
13090     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13091     if (*old_plugin_p) return;
13092     KEYWORD_PLUGIN_MUTEX_LOCK;
13093     if (!*old_plugin_p) {
13094         *old_plugin_p = PL_keyword_plugin;
13095         PL_keyword_plugin = new_plugin;
13096     }
13097     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13098 }
13099
13100 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13101 static void
13102 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13103 {
13104     SAVEI32(PL_lex_brackets);
13105     if (PL_lex_brackets > 100)
13106         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13107     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13108     SAVEI32(PL_lex_allbrackets);
13109     PL_lex_allbrackets = 0;
13110     SAVEI8(PL_lex_fakeeof);
13111     PL_lex_fakeeof = (U8)fakeeof;
13112     if(yyparse(gramtype) && !PL_parser->error_count)
13113         qerror(Perl_mess(aTHX_ "Parse error"));
13114 }
13115
13116 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13117 static OP *
13118 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13119 {
13120     OP *o;
13121     ENTER;
13122     SAVEVPTR(PL_eval_root);
13123     PL_eval_root = NULL;
13124     parse_recdescent(gramtype, fakeeof);
13125     o = PL_eval_root;
13126     LEAVE;
13127     return o;
13128 }
13129
13130 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13131 static OP *
13132 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13133 {
13134     OP *exprop;
13135     if (flags & ~PARSE_OPTIONAL)
13136         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13137     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13138     if (!exprop && !(flags & PARSE_OPTIONAL)) {
13139         if (!PL_parser->error_count)
13140             qerror(Perl_mess(aTHX_ "Parse error"));
13141         exprop = newOP(OP_NULL, 0);
13142     }
13143     return exprop;
13144 }
13145
13146 /*
13147 =for apidoc parse_arithexpr
13148
13149 Parse a Perl arithmetic expression.  This may contain operators of precedence
13150 down to the bit shift operators.  The expression must be followed (and thus
13151 terminated) either by a comparison or lower-precedence operator or by
13152 something that would normally terminate an expression such as semicolon.
13153 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13154 otherwise it is mandatory.  It is up to the caller to ensure that the
13155 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13156 the source of the code to be parsed and the lexical context for the
13157 expression.
13158
13159 The op tree representing the expression is returned.  If an optional
13160 expression is absent, a null pointer is returned, otherwise the pointer
13161 will be non-null.
13162
13163 If an error occurs in parsing or compilation, in most cases a valid op
13164 tree is returned anyway.  The error is reflected in the parser state,
13165 normally resulting in a single exception at the top level of parsing
13166 which covers all the compilation errors that occurred.  Some compilation
13167 errors, however, will throw an exception immediately.
13168
13169 =for apidoc Amnh||PARSE_OPTIONAL
13170
13171 =cut
13172
13173 */
13174
13175 OP *
13176 Perl_parse_arithexpr(pTHX_ U32 flags)
13177 {
13178     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13179 }
13180
13181 /*
13182 =for apidoc parse_termexpr
13183
13184 Parse a Perl term expression.  This may contain operators of precedence
13185 down to the assignment operators.  The expression must be followed (and thus
13186 terminated) either by a comma or lower-precedence operator or by
13187 something that would normally terminate an expression such as semicolon.
13188 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13189 otherwise it is mandatory.  It is up to the caller to ensure that the
13190 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13191 the source of the code to be parsed and the lexical context for the
13192 expression.
13193
13194 The op tree representing the expression is returned.  If an optional
13195 expression is absent, a null pointer is returned, otherwise the pointer
13196 will be non-null.
13197
13198 If an error occurs in parsing or compilation, in most cases a valid op
13199 tree is returned anyway.  The error is reflected in the parser state,
13200 normally resulting in a single exception at the top level of parsing
13201 which covers all the compilation errors that occurred.  Some compilation
13202 errors, however, will throw an exception immediately.
13203
13204 =cut
13205 */
13206
13207 OP *
13208 Perl_parse_termexpr(pTHX_ U32 flags)
13209 {
13210     return parse_expr(LEX_FAKEEOF_COMMA, flags);
13211 }
13212
13213 /*
13214 =for apidoc parse_listexpr
13215
13216 Parse a Perl list expression.  This may contain operators of precedence
13217 down to the comma operator.  The expression must be followed (and thus
13218 terminated) either by a low-precedence logic operator such as C<or> or by
13219 something that would normally terminate an expression such as semicolon.
13220 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13221 otherwise it is mandatory.  It is up to the caller to ensure that the
13222 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13223 the source of the code to be parsed and the lexical context for the
13224 expression.
13225
13226 The op tree representing the expression is returned.  If an optional
13227 expression is absent, a null pointer is returned, otherwise the pointer
13228 will be non-null.
13229
13230 If an error occurs in parsing or compilation, in most cases a valid op
13231 tree is returned anyway.  The error is reflected in the parser state,
13232 normally resulting in a single exception at the top level of parsing
13233 which covers all the compilation errors that occurred.  Some compilation
13234 errors, however, will throw an exception immediately.
13235
13236 =cut
13237 */
13238
13239 OP *
13240 Perl_parse_listexpr(pTHX_ U32 flags)
13241 {
13242     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13243 }
13244
13245 /*
13246 =for apidoc parse_fullexpr
13247
13248 Parse a single complete Perl expression.  This allows the full
13249 expression grammar, including the lowest-precedence operators such
13250 as C<or>.  The expression must be followed (and thus terminated) by a
13251 token that an expression would normally be terminated by: end-of-file,
13252 closing bracketing punctuation, semicolon, or one of the keywords that
13253 signals a postfix expression-statement modifier.  If C<flags> has the
13254 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13255 mandatory.  It is up to the caller to ensure that the dynamic parser
13256 state (L</PL_parser> et al) is correctly set to reflect the source of
13257 the code to be parsed and the lexical context for the expression.
13258
13259 The op tree representing the expression is returned.  If an optional
13260 expression is absent, a null pointer is returned, otherwise the pointer
13261 will be non-null.
13262
13263 If an error occurs in parsing or compilation, in most cases a valid op
13264 tree is returned anyway.  The error is reflected in the parser state,
13265 normally resulting in a single exception at the top level of parsing
13266 which covers all the compilation errors that occurred.  Some compilation
13267 errors, however, will throw an exception immediately.
13268
13269 =cut
13270 */
13271
13272 OP *
13273 Perl_parse_fullexpr(pTHX_ U32 flags)
13274 {
13275     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13276 }
13277
13278 /*
13279 =for apidoc parse_block
13280
13281 Parse a single complete Perl code block.  This consists of an opening
13282 brace, a sequence of statements, and a closing brace.  The block
13283 constitutes a lexical scope, so C<my> variables and various compile-time
13284 effects can be contained within it.  It is up to the caller to ensure
13285 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13286 reflect the source of the code to be parsed and the lexical context for
13287 the statement.
13288
13289 The op tree representing the code block is returned.  This is always a
13290 real op, never a null pointer.  It will normally be a C<lineseq> list,
13291 including C<nextstate> or equivalent ops.  No ops to construct any kind
13292 of runtime scope are included by virtue of it being a block.
13293
13294 If an error occurs in parsing or compilation, in most cases a valid op
13295 tree (most likely null) is returned anyway.  The error is reflected in
13296 the parser state, normally resulting in a single exception at the top
13297 level of parsing which covers all the compilation errors that occurred.
13298 Some compilation errors, however, will throw an exception immediately.
13299
13300 The C<flags> parameter is reserved for future use, and must always
13301 be zero.
13302
13303 =cut
13304 */
13305
13306 OP *
13307 Perl_parse_block(pTHX_ U32 flags)
13308 {
13309     if (flags)
13310         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13311     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13312 }
13313
13314 /*
13315 =for apidoc parse_barestmt
13316
13317 Parse a single unadorned Perl statement.  This may be a normal imperative
13318 statement or a declaration that has compile-time effect.  It does not
13319 include any label or other affixture.  It is up to the caller to ensure
13320 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13321 reflect the source of the code to be parsed and the lexical context for
13322 the statement.
13323
13324 The op tree representing the statement is returned.  This may be a
13325 null pointer if the statement is null, for example if it was actually
13326 a subroutine definition (which has compile-time side effects).  If not
13327 null, it will be ops directly implementing the statement, suitable to
13328 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13329 equivalent op (except for those embedded in a scope contained entirely
13330 within the statement).
13331
13332 If an error occurs in parsing or compilation, in most cases a valid op
13333 tree (most likely null) is returned anyway.  The error is reflected in
13334 the parser state, normally resulting in a single exception at the top
13335 level of parsing which covers all the compilation errors that occurred.
13336 Some compilation errors, however, will throw an exception immediately.
13337
13338 The C<flags> parameter is reserved for future use, and must always
13339 be zero.
13340
13341 =cut
13342 */
13343
13344 OP *
13345 Perl_parse_barestmt(pTHX_ U32 flags)
13346 {
13347     if (flags)
13348         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13349     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13350 }
13351
13352 /*
13353 =for apidoc parse_label
13354
13355 Parse a single label, possibly optional, of the type that may prefix a
13356 Perl statement.  It is up to the caller to ensure that the dynamic parser
13357 state (L</PL_parser> et al) is correctly set to reflect the source of
13358 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13359 label is optional, otherwise it is mandatory.
13360
13361 The name of the label is returned in the form of a fresh scalar.  If an
13362 optional label is absent, a null pointer is returned.
13363
13364 If an error occurs in parsing, which can only occur if the label is
13365 mandatory, a valid label is returned anyway.  The error is reflected in
13366 the parser state, normally resulting in a single exception at the top
13367 level of parsing which covers all the compilation errors that occurred.
13368
13369 =cut
13370 */
13371
13372 SV *
13373 Perl_parse_label(pTHX_ U32 flags)
13374 {
13375     if (flags & ~PARSE_OPTIONAL)
13376         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13377     if (PL_nexttoke) {
13378         PL_parser->yychar = yylex();
13379         if (PL_parser->yychar == LABEL) {
13380             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13381             PL_parser->yychar = YYEMPTY;
13382             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13383             op_free(pl_yylval.opval);
13384             return labelsv;
13385         } else {
13386             yyunlex();
13387             goto no_label;
13388         }
13389     } else {
13390         char *s, *t;
13391         STRLEN wlen, bufptr_pos;
13392         lex_read_space(0);
13393         t = s = PL_bufptr;
13394         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13395             goto no_label;
13396         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13397         if (word_takes_any_delimiter(s, wlen))
13398             goto no_label;
13399         bufptr_pos = s - SvPVX(PL_linestr);
13400         PL_bufptr = t;
13401         lex_read_space(LEX_KEEP_PREVIOUS);
13402         t = PL_bufptr;
13403         s = SvPVX(PL_linestr) + bufptr_pos;
13404         if (t[0] == ':' && t[1] != ':') {
13405             PL_oldoldbufptr = PL_oldbufptr;
13406             PL_oldbufptr = s;
13407             PL_bufptr = t+1;
13408             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13409         } else {
13410             PL_bufptr = s;
13411             no_label:
13412             if (flags & PARSE_OPTIONAL) {
13413                 return NULL;
13414             } else {
13415                 qerror(Perl_mess(aTHX_ "Parse error"));
13416                 return newSVpvs("x");
13417             }
13418         }
13419     }
13420 }
13421
13422 /*
13423 =for apidoc parse_fullstmt
13424
13425 Parse a single complete Perl statement.  This may be a normal imperative
13426 statement or a declaration that has compile-time effect, and may include
13427 optional labels.  It is up to the caller to ensure that the dynamic
13428 parser state (L</PL_parser> et al) is correctly set to reflect the source
13429 of the code to be parsed and the lexical context for the statement.
13430
13431 The op tree representing the statement is returned.  This may be a
13432 null pointer if the statement is null, for example if it was actually
13433 a subroutine definition (which has compile-time side effects).  If not
13434 null, it will be the result of a L</newSTATEOP> call, normally including
13435 a C<nextstate> or equivalent op.
13436
13437 If an error occurs in parsing or compilation, in most cases a valid op
13438 tree (most likely null) is returned anyway.  The error is reflected in
13439 the parser state, normally resulting in a single exception at the top
13440 level of parsing which covers all the compilation errors that occurred.
13441 Some compilation errors, however, will throw an exception immediately.
13442
13443 The C<flags> parameter is reserved for future use, and must always
13444 be zero.
13445
13446 =cut
13447 */
13448
13449 OP *
13450 Perl_parse_fullstmt(pTHX_ U32 flags)
13451 {
13452     if (flags)
13453         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13454     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13455 }
13456
13457 /*
13458 =for apidoc parse_stmtseq
13459
13460 Parse a sequence of zero or more Perl statements.  These may be normal
13461 imperative statements, including optional labels, or declarations
13462 that have compile-time effect, or any mixture thereof.  The statement
13463 sequence ends when a closing brace or end-of-file is encountered in a
13464 place where a new statement could have validly started.  It is up to
13465 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13466 is correctly set to reflect the source of the code to be parsed and the
13467 lexical context for the statements.
13468
13469 The op tree representing the statement sequence is returned.  This may
13470 be a null pointer if the statements were all null, for example if there
13471 were no statements or if there were only subroutine definitions (which
13472 have compile-time side effects).  If not null, it will be a C<lineseq>
13473 list, normally including C<nextstate> or equivalent ops.
13474
13475 If an error occurs in parsing or compilation, in most cases a valid op
13476 tree is returned anyway.  The error is reflected in the parser state,
13477 normally resulting in a single exception at the top level of parsing
13478 which covers all the compilation errors that occurred.  Some compilation
13479 errors, however, will throw an exception immediately.
13480
13481 The C<flags> parameter is reserved for future use, and must always
13482 be zero.
13483
13484 =cut
13485 */
13486
13487 OP *
13488 Perl_parse_stmtseq(pTHX_ U32 flags)
13489 {
13490     OP *stmtseqop;
13491     I32 c;
13492     if (flags)
13493         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13494     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13495     c = lex_peek_unichar(0);
13496     if (c != -1 && c != /*{*/'}')
13497         qerror(Perl_mess(aTHX_ "Parse error"));
13498     return stmtseqop;
13499 }
13500
13501 /*
13502 =for apidoc parse_subsignature
13503
13504 Parse a subroutine signature declaration. This is the contents of the
13505 parentheses following a named or anonymous subroutine declaration when the
13506 C<signatures> feature is enabled. Note that this function neither expects
13507 nor consumes the opening and closing parentheses around the signature; it
13508 is the caller's job to handle these.
13509
13510 This function must only be called during parsing of a subroutine; after
13511 L</start_subparse> has been called. It might allocate lexical variables on
13512 the pad for the current subroutine.
13513
13514 The op tree to unpack the arguments from the stack at runtime is returned.
13515 This op tree should appear at the beginning of the compiled function. The
13516 caller may wish to use L</op_append_list> to build their function body
13517 after it, or splice it together with the body before calling L</newATTRSUB>.
13518
13519 The C<flags> parameter is reserved for future use, and must always
13520 be zero.
13521
13522 =cut
13523 */
13524
13525 OP *
13526 Perl_parse_subsignature(pTHX_ U32 flags)
13527 {
13528     if (flags)
13529         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13530     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13531 }
13532
13533 /*
13534  * ex: set ts=8 sts=4 sw=4 et:
13535  */