#include "invlist_inline.h"
#define new_constant(a,b,c,d,e,f,g, h) \
- S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
+ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
#define pl_yylval (PL_parser->yylval)
(SvTYPE(sv) >= SVt_PVNV \
&& ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
-static const char* const ident_too_long = "Identifier too long";
-static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
+static const char ident_too_long[] = "Identifier too long";
+static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
* 1999-02-27 mjd-perl-patch@plover.com */
#define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
+/* Non-identifier plugin infix operators are allowed any printing character
+ * except spaces, digits, or identifier chars
+ */
+#define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
+/* Plugin infix operators may not begin with a quote symbol */
+#define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
+
+#define PLUGINFIX_IS_ENABLED UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
+
#define SPACE_OR_TAB(c) isBLANK_A(c)
#define HEXFP_PEEK(s) \
#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
#define LEX_INTERPSTART 6 /* expecting the start of a $var */
- /* at end of code, eg "$x" followed by: */
+ /* at end of code, eg "$x" followed by: */
#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
- string or after \E, $foo, etc */
+ string or after \E, $foo, etc */
#define LEX_INTERPCONST 2 /* NOT USED */
#define LEX_FORMLINE 1 /* expecting a format line */
#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
+#define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
#define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
- pl_yylval.ival=f, \
- PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
- REPORT((int)LOOPEX))
+ pl_yylval.ival=f, \
+ PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
+ REPORT((int)LOOPEX))
#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
- REPORT(PERLY_TILDE)
+ REPORT(PERLY_TILDE)
#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
* operator (such as C<shift // 0>).
*/
#define UNI3(f,x,have_x) { \
- pl_yylval.ival = f; \
- if (have_x) PL_expect = x; \
- PL_bufptr = s; \
- PL_last_uni = PL_oldbufptr; \
- PL_last_lop_op = (f) < 0 ? -(f) : (f); \
- if (*s == '(') \
- return REPORT( (int)FUNC1 ); \
- s = skipspace(s); \
- return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
- }
+ pl_yylval.ival = f; \
+ if (have_x) PL_expect = x; \
+ PL_bufptr = s; \
+ PL_last_uni = PL_oldbufptr; \
+ PL_last_lop_op = (f) < 0 ? -(f) : (f); \
+ if (*s == '(') \
+ return REPORT( (int)FUNC1 ); \
+ s = skipspace(s); \
+ return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
+ }
#define UNI(f) UNI3(f,XTERM,1)
#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
#define UNIPROTO(f,optional) { \
- if (optional) PL_last_uni = PL_oldbufptr; \
- OPERATOR(f); \
- }
+ if (optional) PL_last_uni = PL_oldbufptr; \
+ OPERATOR(f); \
+ }
#define UNIBRACK(f) UNI3(f,0,0)
-/* grandfather return to old style */
+/* return has special case parsing.
+ *
+ * List operators have low precedence. Functions have high precedence.
+ * Every built in, *except return*, if written with () around its arguments, is
+ * parsed as a function. Hence every other list built in:
+ *
+ * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
+ * 429
+ * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
+ * 639
+ * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
+ * Useless use of a constant (2) in void context at -e line 1.
+ * Useless use of a constant (4) in void context at -e line 1.
+ *
+ * $
+ *
+ * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
+ * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
+ *
+ * Whereas return:
+ *
+ * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
+ * 2
+ * 4
+ * 9
+ * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
+ * Useless use of a constant (2) in void context at -e line 1.
+ * Useless use of a constant (4) in void context at -e line 1.
+ * 9
+ * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
+ * Useless use of a constant (2) in void context at -e line 1.
+ * Useless use of a constant (4) in void context at -e line 1.
+ * 9
+ * $
+ *
+ * and:
+ * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
+ * 2
+ * 4
+ * 6
+ *
+ * This last example is what we expect, but it's clearly inconsistent with how
+ * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
+ * followed.
+ *
+ *
+ * Perl 3 attempted to be consistent:
+ *
+ * The rules are more consistent about where parens are needed and
+ * where they are not. In particular, unary operators and list operators now
+ * behave like functions if they're called like functions.
+ *
+ * However, the behaviour for return was reverted to the "old" parsing with
+ * patches 9-12:
+ *
+ * The construct
+ * return (1,2,3);
+ * did not do what was expected, since return was swallowing the
+ * parens in order to consider itself a function. The solution,
+ * since return never wants any trailing expression such as
+ * return (1,2,3) + 2;
+ * is to simply make return an exception to the paren-makes-a-function
+ * rule, and treat it the way it always was, so that it doesn't
+ * strip the parens.
+ *
+ * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
+ * LOP(OP_RETURN, XTERM);
+ *
+ * and constructs such as
+ *
+ * return (Internals::V())[2]
+ *
+ * turn into syntax errors
+ */
+
#define OLDLOP(f) \
- do { \
- if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
- PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
- pl_yylval.ival = (f); \
- PL_expect = XTERM; \
- PL_bufptr = s; \
- return (int)LSTOP; \
- } while(0)
+ do { \
+ if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
+ PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
+ pl_yylval.ival = (f); \
+ PL_expect = XTERM; \
+ PL_bufptr = s; \
+ return (int)LSTOP; \
+ } while(0)
#define COPLINE_INC_WITH_HERELINES \
STMT_START { \
- CopLINE_inc(PL_curcop); \
- if (PL_parser->herelines) \
- CopLINE(PL_curcop) += PL_parser->herelines, \
- PL_parser->herelines = 0; \
+ CopLINE_inc(PL_curcop); \
+ if (PL_parser->herelines) \
+ CopLINE(PL_curcop) += PL_parser->herelines, \
+ PL_parser->herelines = 0; \
} STMT_END
/* Called after scan_str to update CopLINE(PL_curcop), but only when there
* is no sublex_push to follow. */
#define COPLINE_SET_FROM_MULTI_END \
STMT_START { \
- CopLINE_set(PL_curcop, PL_multi_end); \
- if (PL_multi_end != PL_multi_start) \
- PL_parser->herelines = 0; \
+ CopLINE_set(PL_curcop, PL_multi_end); \
+ if (PL_multi_end != PL_multi_start) \
+ PL_parser->herelines = 0; \
} STMT_END
const char *name;
} const debug_tokens[] =
{
- { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
- { ANDAND, TOKENTYPE_NONE, "ANDAND" },
- { ANDOP, TOKENTYPE_NONE, "ANDOP" },
- { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
- { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
- { ARROW, TOKENTYPE_NONE, "ARROW" },
- { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
- { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
- { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
- { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" },
- { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" },
- { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
- { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
- { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
- { DO, TOKENTYPE_NONE, "DO" },
- { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
- { DORDOR, TOKENTYPE_NONE, "DORDOR" },
- { DOROP, TOKENTYPE_OPNUM, "DOROP" },
- { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
- { ELSE, TOKENTYPE_NONE, "ELSE" },
- { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
- { FOR, TOKENTYPE_IVAL, "FOR" },
- { FORMAT, TOKENTYPE_NONE, "FORMAT" },
- { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
- { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
- { FUNC, TOKENTYPE_OPNUM, "FUNC" },
- { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
- { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
- { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
- { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
- { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
- { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
- { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
- { IF, TOKENTYPE_IVAL, "IF" },
- { LABEL, TOKENTYPE_OPVAL, "LABEL" },
- { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
- { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
- { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
- { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
- { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
- { METHOD, TOKENTYPE_OPVAL, "METHOD" },
- { MULOP, TOKENTYPE_OPNUM, "MULOP" },
- { MY, TOKENTYPE_IVAL, "MY" },
- { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" },
- { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" },
- { NOAMP, TOKENTYPE_NONE, "NOAMP" },
- { NOTOP, TOKENTYPE_NONE, "NOTOP" },
- { OROP, TOKENTYPE_IVAL, "OROP" },
- { OROR, TOKENTYPE_NONE, "OROR" },
- { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
- DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
- DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
- DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
- DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
- DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
- DEBUG_TOKEN (IVAL, PERLY_COLON),
- DEBUG_TOKEN (IVAL, PERLY_COMMA),
- DEBUG_TOKEN (IVAL, PERLY_DOT),
- DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
- DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
- DEBUG_TOKEN (IVAL, PERLY_MINUS),
- DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
- DEBUG_TOKEN (IVAL, PERLY_PLUS),
- DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
- DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
- DEBUG_TOKEN (IVAL, PERLY_SNAIL),
- DEBUG_TOKEN (IVAL, PERLY_TILDE),
- { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
- { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
- { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
- { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
- { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
- { POSTINC, TOKENTYPE_NONE, "POSTINC" },
- { POWOP, TOKENTYPE_OPNUM, "POWOP" },
- { PREDEC, TOKENTYPE_NONE, "PREDEC" },
- { PREINC, TOKENTYPE_NONE, "PREINC" },
- { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
- { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
- { REFGEN, TOKENTYPE_NONE, "REFGEN" },
- { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
- { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
- { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
- { SUB, TOKENTYPE_NONE, "SUB" },
- { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
- { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
- { THING, TOKENTYPE_OPVAL, "THING" },
- { UMINUS, TOKENTYPE_NONE, "UMINUS" },
- { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
- { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
- { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
- { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
- { USE, TOKENTYPE_IVAL, "USE" },
- { WHEN, TOKENTYPE_IVAL, "WHEN" },
- { WHILE, TOKENTYPE_IVAL, "WHILE" },
- { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
- { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
+ DEBUG_TOKEN (OPNUM, ADDOP),
+ DEBUG_TOKEN (NONE, ANDAND),
+ DEBUG_TOKEN (NONE, ANDOP),
+ DEBUG_TOKEN (NONE, ARROW),
+ DEBUG_TOKEN (OPNUM, ASSIGNOP),
+ DEBUG_TOKEN (OPNUM, BITANDOP),
+ DEBUG_TOKEN (OPNUM, BITOROP),
+ DEBUG_TOKEN (OPNUM, CHEQOP),
+ DEBUG_TOKEN (OPNUM, CHRELOP),
+ DEBUG_TOKEN (NONE, COLONATTR),
+ DEBUG_TOKEN (NONE, DOLSHARP),
+ DEBUG_TOKEN (NONE, DORDOR),
+ DEBUG_TOKEN (IVAL, DOTDOT),
+ DEBUG_TOKEN (NONE, FORMLBRACK),
+ DEBUG_TOKEN (NONE, FORMRBRACK),
+ DEBUG_TOKEN (OPNUM, FUNC),
+ DEBUG_TOKEN (OPNUM, FUNC0),
+ DEBUG_TOKEN (OPVAL, FUNC0OP),
+ DEBUG_TOKEN (OPVAL, FUNC0SUB),
+ DEBUG_TOKEN (OPNUM, FUNC1),
+ DEBUG_TOKEN (NONE, HASHBRACK),
+ DEBUG_TOKEN (IVAL, KW_CATCH),
+ DEBUG_TOKEN (IVAL, KW_CLASS),
+ DEBUG_TOKEN (IVAL, KW_CONTINUE),
+ DEBUG_TOKEN (IVAL, KW_DEFAULT),
+ DEBUG_TOKEN (IVAL, KW_DO),
+ DEBUG_TOKEN (IVAL, KW_ELSE),
+ DEBUG_TOKEN (IVAL, KW_ELSIF),
+ DEBUG_TOKEN (IVAL, KW_FIELD),
+ DEBUG_TOKEN (IVAL, KW_GIVEN),
+ DEBUG_TOKEN (IVAL, KW_FOR),
+ DEBUG_TOKEN (IVAL, KW_FORMAT),
+ DEBUG_TOKEN (IVAL, KW_IF),
+ DEBUG_TOKEN (IVAL, KW_LOCAL),
+ DEBUG_TOKEN (IVAL, KW_METHOD_anon),
+ DEBUG_TOKEN (IVAL, KW_METHOD_named),
+ DEBUG_TOKEN (IVAL, KW_MY),
+ DEBUG_TOKEN (IVAL, KW_PACKAGE),
+ DEBUG_TOKEN (IVAL, KW_REQUIRE),
+ DEBUG_TOKEN (IVAL, KW_SUB_anon),
+ DEBUG_TOKEN (IVAL, KW_SUB_anon_sig),
+ DEBUG_TOKEN (IVAL, KW_SUB_named),
+ DEBUG_TOKEN (IVAL, KW_SUB_named_sig),
+ DEBUG_TOKEN (IVAL, KW_TRY),
+ DEBUG_TOKEN (IVAL, KW_USE_or_NO),
+ DEBUG_TOKEN (IVAL, KW_UNLESS),
+ DEBUG_TOKEN (IVAL, KW_UNTIL),
+ DEBUG_TOKEN (IVAL, KW_WHEN),
+ DEBUG_TOKEN (IVAL, KW_WHILE),
+ DEBUG_TOKEN (OPVAL, LABEL),
+ DEBUG_TOKEN (OPNUM, LOOPEX),
+ DEBUG_TOKEN (OPNUM, LSTOP),
+ DEBUG_TOKEN (OPVAL, LSTOPSUB),
+ DEBUG_TOKEN (OPNUM, MATCHOP),
+ DEBUG_TOKEN (OPVAL, METHCALL),
+ DEBUG_TOKEN (OPVAL, METHCALL0),
+ DEBUG_TOKEN (OPNUM, MULOP),
+ DEBUG_TOKEN (OPNUM, NCEQOP),
+ DEBUG_TOKEN (OPNUM, NCRELOP),
+ DEBUG_TOKEN (NONE, NOAMP),
+ DEBUG_TOKEN (NONE, NOTOP),
+ DEBUG_TOKEN (IVAL, OROP),
+ DEBUG_TOKEN (NONE, OROR),
+ DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
+ DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
+ DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
+ DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
+ DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
+ DEBUG_TOKEN (IVAL, PERLY_COLON),
+ DEBUG_TOKEN (IVAL, PERLY_COMMA),
+ DEBUG_TOKEN (IVAL, PERLY_DOT),
+ DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
+ DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
+ DEBUG_TOKEN (IVAL, PERLY_MINUS),
+ DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
+ DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
+ DEBUG_TOKEN (IVAL, PERLY_PLUS),
+ DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
+ DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
+ DEBUG_TOKEN (IVAL, PERLY_SLASH),
+ DEBUG_TOKEN (IVAL, PERLY_SNAIL),
+ DEBUG_TOKEN (IVAL, PERLY_STAR),
+ DEBUG_TOKEN (IVAL, PERLY_TILDE),
+ DEBUG_TOKEN (OPVAL, PLUGEXPR),
+ DEBUG_TOKEN (OPVAL, PLUGSTMT),
+ DEBUG_TOKEN (PVAL, PLUGIN_ADD_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_ASSIGN_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_HIGH_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_LOW_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_LOW_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_LOW_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_MUL_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_POW_OP),
+ DEBUG_TOKEN (PVAL, PLUGIN_REL_OP),
+ DEBUG_TOKEN (OPVAL, PMFUNC),
+ DEBUG_TOKEN (NONE, POSTJOIN),
+ DEBUG_TOKEN (NONE, POSTDEC),
+ DEBUG_TOKEN (NONE, POSTINC),
+ DEBUG_TOKEN (OPNUM, POWOP),
+ DEBUG_TOKEN (NONE, PREDEC),
+ DEBUG_TOKEN (NONE, PREINC),
+ DEBUG_TOKEN (OPVAL, PRIVATEREF),
+ DEBUG_TOKEN (OPVAL, QWLIST),
+ DEBUG_TOKEN (NONE, REFGEN),
+ DEBUG_TOKEN (OPNUM, SHIFTOP),
+ DEBUG_TOKEN (NONE, SUBLEXEND),
+ DEBUG_TOKEN (NONE, SUBLEXSTART),
+ DEBUG_TOKEN (OPVAL, THING),
+ DEBUG_TOKEN (NONE, UMINUS),
+ DEBUG_TOKEN (OPNUM, UNIOP),
+ DEBUG_TOKEN (OPVAL, UNIOPSUB),
+ DEBUG_TOKEN (OPVAL, BAREWORD),
+ DEBUG_TOKEN (IVAL, YADAYADA),
{ 0, TOKENTYPE_NONE, NULL }
};
PERL_ARGS_ASSERT_TOKEREPORT;
if (DEBUG_T_TEST) {
- const char *name = NULL;
- enum token_type type = TOKENTYPE_NONE;
- const struct debug_tokens *p;
- SV* const report = newSVpvs("<== ");
-
- for (p = debug_tokens; p->token; p++) {
- if (p->token == (int)rv) {
- name = p->name;
- type = p->type;
- break;
- }
- }
- if (name)
- Perl_sv_catpv(aTHX_ report, name);
- else if (isGRAPH(rv))
- {
- Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
- if ((char)rv == 'p')
- sv_catpvs(report, " (pending identifier)");
- }
- else if (!rv)
- sv_catpvs(report, "EOF");
- else
- Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
- switch (type) {
- case TOKENTYPE_NONE:
- break;
- case TOKENTYPE_IVAL:
- Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
- break;
- case TOKENTYPE_OPNUM:
- Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
- PL_op_name[lvalp->ival]);
- break;
- case TOKENTYPE_PVAL:
- Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
- break;
- case TOKENTYPE_OPVAL:
- if (lvalp->opval) {
- Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
- PL_op_name[lvalp->opval->op_type]);
- if (lvalp->opval->op_type == OP_CONST) {
- Perl_sv_catpvf(aTHX_ report, " %s",
- SvPEEK(cSVOPx_sv(lvalp->opval)));
- }
-
- }
- else
- sv_catpvs(report, "(opval=null)");
- break;
- }
+ const char *name = NULL;
+ enum token_type type = TOKENTYPE_NONE;
+ const struct debug_tokens *p;
+ SV* const report = newSVpvs("<== ");
+
+ for (p = debug_tokens; p->token; p++) {
+ if (p->token == (int)rv) {
+ name = p->name;
+ type = p->type;
+ break;
+ }
+ }
+ if (name)
+ Perl_sv_catpv(aTHX_ report, name);
+ else if (isGRAPH(rv))
+ {
+ Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+ if ((char)rv == 'p')
+ sv_catpvs(report, " (pending identifier)");
+ }
+ else if (!rv)
+ sv_catpvs(report, "EOF");
+ else
+ Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
+ switch (type) {
+ case TOKENTYPE_NONE:
+ break;
+ case TOKENTYPE_IVAL:
+ Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
+ break;
+ case TOKENTYPE_OPNUM:
+ Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
+ PL_op_name[lvalp->ival]);
+ break;
+ case TOKENTYPE_PVAL:
+ Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
+ break;
+ case TOKENTYPE_OPVAL:
+ if (lvalp->opval) {
+ Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
+ PL_op_name[lvalp->opval->op_type]);
+ if (lvalp->opval->op_type == OP_CONST) {
+ Perl_sv_catpvf(aTHX_ report, " %s",
+ SvPEEK(cSVOPx_sv(lvalp->opval)));
+ }
+
+ }
+ else
+ sv_catpvs(report, "(opval=null)");
+ break;
+ }
PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
};
return (int)rv;
S_ao(pTHX_ int toketype)
{
if (*PL_bufptr == '=') {
- PL_bufptr++;
- if (toketype == ANDAND)
- pl_yylval.ival = OP_ANDASSIGN;
- else if (toketype == OROR)
- pl_yylval.ival = OP_ORASSIGN;
- else if (toketype == DORDOR)
- pl_yylval.ival = OP_DORASSIGN;
- toketype = ASSIGNOP;
+ PL_bufptr++;
+
+ switch (toketype) {
+ case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
+ case OROR: pl_yylval.ival = OP_ORASSIGN; break;
+ case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
+ }
+
+ toketype = ASSIGNOP;
}
return REPORT(toketype);
}
{
char * const oldbp = PL_bufptr;
const bool is_first = (PL_oldbufptr == PL_linestart);
+ SV *message = sv_2mortal( newSVpvf(
+ PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
+ what
+ ) );
PERL_ARGS_ASSERT_NO_OP;
if (!s)
- s = oldbp;
+ s = oldbp;
else
- PL_bufptr = s;
- yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
+ PL_bufptr = s;
+
if (ckWARN_d(WARN_SYNTAX)) {
- if (is_first)
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Missing semicolon on previous line?)\n");
- else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
- PL_bufend,
- UTF))
- {
- const char *t;
- for (t = PL_oldoldbufptr;
- (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
+ bool has_more = FALSE;
+ if (is_first) {
+ has_more = TRUE;
+ sv_catpvs(message,
+ " (Missing semicolon on previous line?)");
+ }
+ else if (PL_oldoldbufptr) {
+ /* yyerror (via yywarn) would do this itself, so we should too */
+ const char *t;
+ for (t = PL_oldoldbufptr;
+ t < PL_bufptr && isSPACE(*t);
t += UTF ? UTF8SKIP(t) : 1)
{
- NOOP;
- }
- if (t < PL_bufptr && isSPACE(*t))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Do you need to predeclare %" UTF8f "?)\n",
- UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
- }
- else {
- assert(s >= oldbp);
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Missing operator before %" UTF8f "?)\n",
- UTF8fARG(UTF, s - oldbp, oldbp));
- }
+ NOOP;
+ }
+ /* see if we can identify the cause of the warning */
+ if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
+ {
+ const char *t_start= t;
+ for ( ;
+ (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
+ t += UTF ? UTF8SKIP(t) : 1)
+ {
+ NOOP;
+ }
+ if (t < PL_bufptr && isSPACE(*t)) {
+ has_more = TRUE;
+ sv_catpvf( message,
+ " (Do you need to predeclare \"%" UTF8f "\"?)",
+ UTF8fARG(UTF, t - t_start, t_start));
+ }
+ }
+ }
+ if (!has_more) {
+ const char *t= oldbp;
+ assert(s >= oldbp);
+ while (t < s && isSPACE(*t)) {
+ t += UTF ? UTF8SKIP(t) : 1;
+ }
+
+ sv_catpvf(message,
+ " (Missing operator before \"%" UTF8f "\"?)",
+ UTF8fARG(UTF, s - t, t));
+ }
}
+ yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
PL_bufptr = oldbp;
}
char tmpbuf[UTF8_MAXBYTES + 1];
char q;
bool uni = FALSE;
- SV *sv;
if (s) {
- char * const nl = (char *) my_memrchr(s, '\n', len);
+ char * const nl = (char *) my_memrchr(s, '\n', len);
if (nl) {
*nl = '\0';
len = nl - s;
}
- uni = UTF;
+ uni = UTF;
}
else if (PL_multi_close < 32) {
- *tmpbuf = '^';
- tmpbuf[1] = (char)toCTRL(PL_multi_close);
- tmpbuf[2] = '\0';
- s = tmpbuf;
+ *tmpbuf = '^';
+ tmpbuf[1] = (char)toCTRL(PL_multi_close);
+ tmpbuf[2] = '\0';
+ s = tmpbuf;
len = 2;
}
else {
- if (LIKELY(PL_multi_close < 256)) {
- *tmpbuf = (char)PL_multi_close;
- tmpbuf[1] = '\0';
+ if (! UTF && LIKELY(PL_multi_close < 256)) {
+ *tmpbuf = (char)PL_multi_close;
+ tmpbuf[1] = '\0';
len = 1;
- }
- else {
+ }
+ else {
char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
*end = '\0';
len = end - tmpbuf;
- uni = TRUE;
- }
- s = tmpbuf;
+ uni = TRUE;
+ }
+ s = tmpbuf;
}
q = memchr(s, '"', len) ? '\'' : '"';
- sv = sv_2mortal(newSVpvn(s, len));
- if (uni)
- SvUTF8_on(sv);
- Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
- " anywhere before EOF", q, SVfARG(sv), q);
+ Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
+ " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
}
#include "feature.h"
/* outer loop optimized to do nothing if there are no CR-LFs */
while (s < e) {
- if (*s++ == '\r' && *s == '\n') {
- /* hit a CR-LF, need to copy the rest */
- char *d = s - 1;
- *d++ = *s++;
- while (s < e) {
- if (*s == '\r' && s[1] == '\n')
- s++;
- *d++ = *s++;
- }
- SvCUR(sv) -= s - d;
- return;
- }
+ if (*s++ == '\r' && *s == '\n') {
+ /* hit a CR-LF, need to copy the rest */
+ char *d = s - 1;
+ *d++ = *s++;
+ while (s < e) {
+ if (*s == '\r' && s[1] == '\n')
+ s++;
+ *d++ = *s++;
+ }
+ SvCUR(sv) -= s - d;
+ return;
+ }
}
}
{
const I32 count = FILTER_READ(idx+1, sv, maxlen);
if (count > 0 && !maxlen)
- strip_return(sv);
+ strip_return(sv);
return count;
}
#endif
yy_parser *parser, *oparser;
if (flags && flags & ~LEX_START_FLAGS)
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
/* create and initialise a parser */
Newxz(parser->lex_shared, 1, LEXSHARED);
if (line) {
- STRLEN len;
+ Size_t len;
const U8* first_bad_char_loc;
- s = SvPV_const(line, len);
+ s = SvPV_const(line, len);
if ( SvUTF8(line)
&& UNLIKELY(! is_utf8_string_loc((U8 *) s,
NOT_REACHED; /* NOTREACHED */
}
- parser->linestr = flags & LEX_START_COPIED
- ? SvREFCNT_inc_simple_NN(line)
- : newSVpvn_flags(s, len, SvUTF8(line));
- if (!rsfp)
- sv_catpvs(parser->linestr, "\n;");
+ parser->linestr = flags & LEX_START_COPIED
+ ? SvREFCNT_inc_simple_NN(line)
+ : newSVpvn_flags(s, len, SvUTF8(line));
+ if (!rsfp)
+ sv_catpvs(parser->linestr, "\n;");
} else {
- parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
+ parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
}
parser->oldoldbufptr =
- parser->oldbufptr =
- parser->bufptr =
- parser->linestart = SvPVX(parser->linestr);
+ parser->oldbufptr =
+ parser->bufptr =
+ parser->linestart = SvPVX(parser->linestr);
parser->bufend = parser->bufptr + SvCUR(parser->linestr);
parser->last_lop = parser->last_uni = NULL;
SvREFCNT_dec(parser->linestr);
if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
- PerlIO_clearerr(parser->rsfp);
+ PerlIO_clearerr(parser->rsfp);
else if (parser->rsfp && (!parser->old_parser
|| (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
- PerlIO_close(parser->rsfp);
+ PerlIO_close(parser->rsfp);
SvREFCNT_dec(parser->rsfp_filters);
SvREFCNT_dec(parser->lex_stuff);
SvREFCNT_dec(parser->lex_sub_repl);
I32 nexttoke = parser->nexttoke;
PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
while (nexttoke--) {
- if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
- && parser->nextval[nexttoke].opval
- && parser->nextval[nexttoke].opval->op_slabbed
- && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
- op_free(parser->nextval[nexttoke].opval);
- parser->nextval[nexttoke].opval = NULL;
- }
+ if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
+ && parser->nextval[nexttoke].opval
+ && parser->nextval[nexttoke].opval->op_slabbed
+ && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
+ op_free(parser->nextval[nexttoke].opval);
+ parser->nextval[nexttoke].opval = NULL;
+ }
}
}
linestr = PL_parser->linestr;
buf = SvPVX(linestr);
if (len <= SvLEN(linestr))
- return buf;
+ return buf;
/* Is the lex_shared linestr SV the same as the current linestr SV?
* Only in this case does re_eval_start need adjusting, since it
PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
PL_parser->linestart = buf + linestart_pos;
if (PL_parser->last_uni)
- PL_parser->last_uni = buf + last_uni_pos;
+ PL_parser->last_uni = buf + last_uni_pos;
if (PL_parser->last_lop)
- PL_parser->last_lop = buf + last_lop_pos;
+ PL_parser->last_lop = buf + last_lop_pos;
if (current && PL_parser->lex_shared->re_eval_start)
PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
return buf;
char *bufptr;
PERL_ARGS_ASSERT_LEX_STUFF_PVN;
if (flags & ~(LEX_STUFF_UTF8))
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
if (UTF) {
- if (flags & LEX_STUFF_UTF8) {
- goto plain_copy;
- } else {
- STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
+ if (flags & LEX_STUFF_UTF8) {
+ goto plain_copy;
+ } else {
+ STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
(U8 *) pv + len);
const char *p, *e = pv+len;;
- if (!highhalf)
- goto plain_copy;
- lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
- bufptr = PL_parser->bufptr;
- Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
- SvCUR_set(PL_parser->linestr,
- SvCUR(PL_parser->linestr) + len+highhalf);
- PL_parser->bufend += len+highhalf;
- for (p = pv; p != e; p++) {
+ if (!highhalf)
+ goto plain_copy;
+ lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
+ bufptr = PL_parser->bufptr;
+ Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr,
+ SvCUR(PL_parser->linestr) + len+highhalf);
+ PL_parser->bufend += len+highhalf;
+ for (p = pv; p != e; p++) {
append_utf8_from_native_byte(*p, (U8 **) &bufptr);
- }
- }
+ }
+ }
} else {
- if (flags & LEX_STUFF_UTF8) {
- STRLEN highhalf = 0;
- const char *p, *e = pv+len;
- for (p = pv; p != e; p++) {
- U8 c = (U8)*p;
- if (UTF8_IS_ABOVE_LATIN1(c)) {
- Perl_croak(aTHX_ "Lexing code attempted to stuff "
- "non-Latin-1 character into Latin-1 input");
- } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
- p++;
- highhalf++;
+ if (flags & LEX_STUFF_UTF8) {
+ STRLEN highhalf = 0;
+ const char *p, *e = pv+len;
+ for (p = pv; p != e; p++) {
+ U8 c = (U8)*p;
+ if (UTF8_IS_ABOVE_LATIN1(c)) {
+ Perl_croak(aTHX_ "Lexing code attempted to stuff "
+ "non-Latin-1 character into Latin-1 input");
+ } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
+ p++;
+ highhalf++;
} else assert(UTF8_IS_INVARIANT(c));
- }
- if (!highhalf)
- goto plain_copy;
- lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
- bufptr = PL_parser->bufptr;
- Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
- SvCUR_set(PL_parser->linestr,
- SvCUR(PL_parser->linestr) + len-highhalf);
- PL_parser->bufend += len-highhalf;
- p = pv;
- while (p < e) {
- if (UTF8_IS_INVARIANT(*p)) {
- *bufptr++ = *p;
+ }
+ if (!highhalf)
+ goto plain_copy;
+ lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
+ bufptr = PL_parser->bufptr;
+ Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr,
+ SvCUR(PL_parser->linestr) + len-highhalf);
+ PL_parser->bufend += len-highhalf;
+ p = pv;
+ while (p < e) {
+ if (UTF8_IS_INVARIANT(*p)) {
+ *bufptr++ = *p;
p++;
- }
- else {
+ }
+ else {
assert(p < e -1 );
- *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
- p += 2;
+ *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
+ p += 2;
}
- }
- } else {
- plain_copy:
- lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
- bufptr = PL_parser->bufptr;
- Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
- SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
- PL_parser->bufend += len;
- Copy(pv, bufptr, len, char);
- }
+ }
+ } else {
+ plain_copy:
+ lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
+ bufptr = PL_parser->bufptr;
+ Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
+ PL_parser->bufend += len;
+ Copy(pv, bufptr, len, char);
+ }
}
}
STRLEN len;
PERL_ARGS_ASSERT_LEX_STUFF_SV;
if (flags)
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
pv = SvPV(sv, len);
lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
}
PERL_ARGS_ASSERT_LEX_UNSTUFF;
buf = PL_parser->bufptr;
if (ptr < buf)
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
if (ptr == buf)
- return;
+ return;
bufend = PL_parser->bufend;
if (ptr > bufend)
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
unstuff_len = ptr - buf;
Move(ptr, buf, bufend+1-ptr, char);
SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
PERL_ARGS_ASSERT_LEX_READ_TO;
s = PL_parser->bufptr;
if (ptr < s || ptr > PL_parser->bufend)
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
for (; s != ptr; s++)
- if (*s == '\n') {
- COPLINE_INC_WITH_HERELINES;
- PL_parser->linestart = s+1;
- }
+ if (*s == '\n') {
+ COPLINE_INC_WITH_HERELINES;
+ PL_parser->linestart = s+1;
+ }
PL_parser->bufptr = ptr;
}
PERL_ARGS_ASSERT_LEX_DISCARD_TO;
buf = SvPVX(PL_parser->linestr);
if (ptr < buf)
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
if (ptr == buf)
- return;
+ return;
if (ptr > PL_parser->bufptr)
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
discard_len = ptr - buf;
if (PL_parser->oldbufptr < ptr)
- PL_parser->oldbufptr = ptr;
+ PL_parser->oldbufptr = ptr;
if (PL_parser->oldoldbufptr < ptr)
- PL_parser->oldoldbufptr = ptr;
+ PL_parser->oldoldbufptr = ptr;
if (PL_parser->last_uni && PL_parser->last_uni < ptr)
- PL_parser->last_uni = NULL;
+ PL_parser->last_uni = NULL;
if (PL_parser->last_lop && PL_parser->last_lop < ptr)
- PL_parser->last_lop = NULL;
+ PL_parser->last_lop = NULL;
Move(ptr, buf, PL_parser->bufend+1-ptr, char);
SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
PL_parser->bufend -= discard_len;
PL_parser->oldbufptr -= discard_len;
PL_parser->oldoldbufptr -= discard_len;
if (PL_parser->last_uni)
- PL_parser->last_uni -= discard_len;
+ PL_parser->last_uni -= discard_len;
if (PL_parser->last_lop)
- PL_parser->last_lop -= discard_len;
+ PL_parser->last_lop -= discard_len;
}
void
bool got_some;
if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
- return FALSE;
+ return FALSE;
linestr = PL_parser->linestr;
buf = SvPVX(linestr);
if (!(flags & LEX_KEEP_PREVIOUS)
&& PL_parser->bufptr == PL_parser->bufend)
{
- old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
- linestart_pos = 0;
- if (PL_parser->last_uni != PL_parser->bufend)
- PL_parser->last_uni = NULL;
- if (PL_parser->last_lop != PL_parser->bufend)
- PL_parser->last_lop = NULL;
- last_uni_pos = last_lop_pos = 0;
- *buf = 0;
- SvCUR_set(linestr, 0);
+ old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
+ linestart_pos = 0;
+ if (PL_parser->last_uni != PL_parser->bufend)
+ PL_parser->last_uni = NULL;
+ if (PL_parser->last_lop != PL_parser->bufend)
+ PL_parser->last_lop = NULL;
+ last_uni_pos = last_lop_pos = 0;
+ *buf = 0;
+ SvCUR_set(linestr, 0);
} else {
- old_bufend_pos = PL_parser->bufend - buf;
- bufptr_pos = PL_parser->bufptr - buf;
- oldbufptr_pos = PL_parser->oldbufptr - buf;
- oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
- linestart_pos = PL_parser->linestart - buf;
- last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
- last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+ old_bufend_pos = PL_parser->bufend - buf;
+ bufptr_pos = PL_parser->bufptr - buf;
+ oldbufptr_pos = PL_parser->oldbufptr - buf;
+ oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+ linestart_pos = PL_parser->linestart - buf;
+ last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
+ last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
}
if (flags & LEX_FAKE_EOF) {
- goto eof;
+ goto eof;
} else if (!PL_parser->rsfp && !PL_parser->filtered) {
- got_some = 0;
+ got_some = 0;
} else if (filter_gets(linestr, old_bufend_pos)) {
- got_some = 1;
- got_some_for_debugger = 1;
+ got_some = 1;
+ got_some_for_debugger = 1;
} else if (flags & LEX_NO_TERM) {
- got_some = 0;
+ got_some = 0;
} else {
- if (!SvPOK(linestr)) /* can get undefined by filter_gets */
+ if (!SvPOK(linestr)) /* can get undefined by filter_gets */
SvPVCLEAR(linestr);
- eof:
- /* End of real input. Close filehandle (unless it was STDIN),
- * then add implicit termination.
- */
- if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
- PerlIO_clearerr(PL_parser->rsfp);
- else if (PL_parser->rsfp)
- (void)PerlIO_close(PL_parser->rsfp);
- PL_parser->rsfp = NULL;
- PL_parser->in_pod = PL_parser->filtered = 0;
- if (!PL_in_eval && PL_minus_p) {
- sv_catpvs(linestr,
- /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
- PL_minus_n = PL_minus_p = 0;
- } else if (!PL_in_eval && PL_minus_n) {
- sv_catpvs(linestr, /*{*/";}");
- PL_minus_n = 0;
- } else
- sv_catpvs(linestr, ";");
- got_some = 1;
+ eof:
+ /* End of real input. Close filehandle (unless it was STDIN),
+ * then add implicit termination.
+ */
+ if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
+ PerlIO_clearerr(PL_parser->rsfp);
+ else if (PL_parser->rsfp)
+ (void)PerlIO_close(PL_parser->rsfp);
+ PL_parser->rsfp = NULL;
+ PL_parser->in_pod = PL_parser->filtered = 0;
+ if (!PL_in_eval && PL_minus_p) {
+ sv_catpvs(linestr,
+ /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
+ PL_minus_n = PL_minus_p = 0;
+ } else if (!PL_in_eval && PL_minus_n) {
+ sv_catpvs(linestr, /*{*/";}");
+ PL_minus_n = 0;
+ } else
+ sv_catpvs(linestr, ";");
+ got_some = 1;
}
buf = SvPVX(linestr);
new_bufend_pos = SvCUR(linestr);
PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
PL_parser->linestart = buf + linestart_pos;
if (PL_parser->last_uni)
- PL_parser->last_uni = buf + last_uni_pos;
+ PL_parser->last_uni = buf + last_uni_pos;
if (PL_parser->last_lop)
- PL_parser->last_lop = buf + last_lop_pos;
+ PL_parser->last_lop = buf + last_lop_pos;
if (PL_parser->preambling != NOLINE) {
- CopLINE_set(PL_curcop, PL_parser->preambling + 1);
- PL_parser->preambling = NOLINE;
+ CopLINE_set(PL_curcop, PL_parser->preambling + 1);
+ PL_parser->preambling = NOLINE;
}
if ( got_some_for_debugger
&& PERLDB_LINE_OR_SAVESRC
&& PL_curstash != PL_debstash)
{
- /* debugger active and we're not compiling the debugger code,
- * so store the line into the debugger's array of lines
- */
- update_debugger_info(NULL, buf+old_bufend_pos,
- new_bufend_pos-old_bufend_pos);
+ /* debugger active and we're not compiling the debugger code,
+ * so store the line into the debugger's array of lines
+ */
+ update_debugger_info(NULL, buf+old_bufend_pos,
+ new_bufend_pos-old_bufend_pos);
}
return got_some;
}
{
char *s, *bufend;
if (flags & ~(LEX_KEEP_PREVIOUS))
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
s = PL_parser->bufptr;
bufend = PL_parser->bufend;
if (UTF) {
- U8 head;
- I32 unichar;
- STRLEN len, retlen;
- if (s == bufend) {
- if (!lex_next_chunk(flags))
- return -1;
- s = PL_parser->bufptr;
- bufend = PL_parser->bufend;
- }
- head = (U8)*s;
- if (UTF8_IS_INVARIANT(head))
- return head;
- if (UTF8_IS_START(head)) {
- len = UTF8SKIP(&head);
- while ((STRLEN)(bufend-s) < len) {
- if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
- break;
- s = PL_parser->bufptr;
- bufend = PL_parser->bufend;
- }
- }
- unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
- if (retlen == (STRLEN)-1) {
+ U8 head;
+ I32 unichar;
+ STRLEN len, retlen;
+ if (s == bufend) {
+ if (!lex_next_chunk(flags))
+ return -1;
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ }
+ head = (U8)*s;
+ if (UTF8_IS_INVARIANT(head))
+ return head;
+ if (UTF8_IS_START(head)) {
+ len = UTF8SKIP(&head);
+ while ((STRLEN)(bufend-s) < len) {
+ if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
+ break;
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ }
+ }
+ unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+ if (retlen == (STRLEN)-1) {
_force_out_malformed_utf8_message((U8 *) s,
(U8 *) bufend,
0,
1 /* 1 means die */ );
NOT_REACHED; /* NOTREACHED */
- }
- return unichar;
+ }
+ return unichar;
} else {
- if (s == bufend) {
- if (!lex_next_chunk(flags))
- return -1;
- s = PL_parser->bufptr;
- }
- return (U8)*s;
+ if (s == bufend) {
+ if (!lex_next_chunk(flags))
+ return -1;
+ s = PL_parser->bufptr;
+ }
+ return (U8)*s;
}
}
{
I32 c;
if (flags & ~(LEX_KEEP_PREVIOUS))
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
c = lex_peek_unichar(flags);
if (c != -1) {
- if (c == '\n')
- COPLINE_INC_WITH_HERELINES;
- if (UTF)
- PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
- else
- ++(PL_parser->bufptr);
+ if (c == '\n')
+ COPLINE_INC_WITH_HERELINES;
+ if (UTF)
+ PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
+ else
+ ++(PL_parser->bufptr);
}
return c;
}
const bool can_incline = !(flags & LEX_NO_INCLINE);
bool need_incline = 0;
if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
- Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
s = PL_parser->bufptr;
bufend = PL_parser->bufend;
while (1) {
- char c = *s;
- if (c == '#') {
- do {
- c = *++s;
- } while (!(c == '\n' || (c == 0 && s == bufend)));
- } else if (c == '\n') {
- s++;
- if (can_incline) {
- PL_parser->linestart = s;
- if (s == bufend)
- need_incline = 1;
- else
- incline(s, bufend);
- }
- } else if (isSPACE(c)) {
- s++;
- } else if (c == 0 && s == bufend) {
- bool got_more;
- line_t l;
- if (flags & LEX_NO_NEXT_CHUNK)
- break;
- PL_parser->bufptr = s;
- l = CopLINE(PL_curcop);
- CopLINE(PL_curcop) += PL_parser->herelines + 1;
- got_more = lex_next_chunk(flags);
- CopLINE_set(PL_curcop, l);
- s = PL_parser->bufptr;
- bufend = PL_parser->bufend;
- if (!got_more)
- break;
- if (can_incline && need_incline && PL_parser->rsfp) {
- incline(s, bufend);
- need_incline = 0;
- }
- } else if (!c) {
- s++;
- } else {
- break;
- }
+ char c = *s;
+ if (c == '#') {
+ do {
+ c = *++s;
+ } while (!(c == '\n' || (c == 0 && s == bufend)));
+ } else if (c == '\n') {
+ s++;
+ if (can_incline) {
+ PL_parser->linestart = s;
+ if (s == bufend)
+ need_incline = 1;
+ else
+ incline(s, bufend);
+ }
+ } else if (isSPACE(c)) {
+ s++;
+ } else if (c == 0 && s == bufend) {
+ bool got_more;
+ line_t l;
+ if (flags & LEX_NO_NEXT_CHUNK)
+ break;
+ PL_parser->bufptr = s;
+ l = CopLINE(PL_curcop);
+ CopLINE(PL_curcop) += PL_parser->herelines + 1;
+ got_more = lex_next_chunk(flags);
+ CopLINE_set(PL_curcop, l);
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ if (!got_more)
+ break;
+ if (can_incline && need_incline && PL_parser->rsfp) {
+ incline(s, bufend);
+ need_incline = 0;
+ }
+ } else if (!c) {
+ s++;
+ } else {
+ break;
+ }
}
PL_parser->bufptr = s;
}
PERL_ARGS_ASSERT_VALIDATE_PROTO;
if (!proto)
- return TRUE;
+ return TRUE;
p = SvPV(proto, len);
origlen = len;
for (; len--; p++) {
- if (!isSPACE(*p)) {
- if (must_be_last)
- proto_after_greedy_proto = TRUE;
- if (underscore) {
- if (!memCHRs(";@%", *p))
- bad_proto_after_underscore = TRUE;
- underscore = FALSE;
- }
- if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
- bad_proto = TRUE;
- }
- else {
- if (*p == '[')
- in_brackets = TRUE;
- else if (*p == ']')
- in_brackets = FALSE;
- else if ((*p == '@' || *p == '%')
+ if (!isSPACE(*p)) {
+ if (must_be_last)
+ proto_after_greedy_proto = TRUE;
+ if (underscore) {
+ if (!memCHRs(";@%", *p))
+ bad_proto_after_underscore = TRUE;
+ underscore = FALSE;
+ }
+ if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
+ bad_proto = TRUE;
+ }
+ else {
+ if (*p == '[')
+ in_brackets = TRUE;
+ else if (*p == ']')
+ in_brackets = FALSE;
+ else if ((*p == '@' || *p == '%')
&& !after_slash
&& !in_brackets )
{
- must_be_last = TRUE;
- greedy_proto = *p;
- }
- else if (*p == '_')
- underscore = TRUE;
- }
- if (*p == '\\')
- after_slash = TRUE;
- else
- after_slash = FALSE;
- }
+ must_be_last = TRUE;
+ greedy_proto = *p;
+ }
+ else if (*p == '_')
+ underscore = TRUE;
+ }
+ if (*p == '\\')
+ after_slash = TRUE;
+ else
+ after_slash = FALSE;
+ }
}
if (warn) {
- SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
- p -= origlen;
- p = SvUTF8(proto)
- ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
- origlen, UNI_DISPLAY_ISPRINT)
- : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
-
- if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
- SV *name2 = sv_2mortal(newSVsv(PL_curstname));
- sv_catpvs(name2, "::");
- sv_catsv(name2, (SV *)name);
- name = name2;
- }
-
- if (proto_after_greedy_proto)
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Prototype after '%c' for %" SVf " : %s",
- greedy_proto, SVfARG(name), p);
- if (in_brackets)
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Missing ']' in prototype for %" SVf " : %s",
- SVfARG(name), p);
- if (bad_proto)
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Illegal character in prototype for %" SVf " : %s",
- SVfARG(name), p);
- if (bad_proto_after_underscore)
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Illegal character after '_' in prototype for %" SVf " : %s",
- SVfARG(name), p);
+ SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
+ p -= origlen;
+ p = SvUTF8(proto)
+ ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
+ origlen, UNI_DISPLAY_ISPRINT)
+ : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
+
+ if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
+ SV *name2 = sv_2mortal(newSVsv(PL_curstname));
+ sv_catpvs(name2, "::");
+ sv_catsv(name2, (SV *)name);
+ name = name2;
+ }
+
+ if (proto_after_greedy_proto)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Prototype after '%c' for %" SVf " : %s",
+ greedy_proto, SVfARG(name), p);
+ if (in_brackets)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Missing ']' in prototype for %" SVf " : %s",
+ SVfARG(name), p);
+ if (bad_proto)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Illegal character in prototype for %" SVf " : %s",
+ SVfARG(name), p);
+ if (bad_proto_after_underscore)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Illegal character after '_' in prototype for %" SVf " : %s",
+ SVfARG(name), p);
}
return (! (proto_after_greedy_proto || bad_proto) );
COPLINE_INC_WITH_HERELINES;
if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
&& s+1 == PL_bufend && *s == ';') {
- /* fake newline in string eval */
- CopLINE_dec(PL_curcop);
- return;
+ /* fake newline in string eval */
+ CopLINE_dec(PL_curcop);
+ return;
}
if (*s++ != '#')
- return;
+ return;
while (SPACE_OR_TAB(*s))
- s++;
+ s++;
if (memBEGINs(s, (STRLEN) (end - s), "line"))
- s += sizeof("line") - 1;
+ s += sizeof("line") - 1;
else
- return;
+ return;
if (SPACE_OR_TAB(*s))
- s++;
+ s++;
else
- return;
+ return;
while (SPACE_OR_TAB(*s))
- s++;
+ s++;
if (!isDIGIT(*s))
- return;
+ return;
n = s;
while (isDIGIT(*s))
- s++;
+ s++;
if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
- return;
+ return;
while (SPACE_OR_TAB(*s))
- s++;
+ s++;
if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
- s++;
- e = t + 1;
+ s++;
+ e = t + 1;
}
else {
- t = s;
- while (*t && !isSPACE(*t))
- t++;
- e = t;
+ t = s;
+ while (*t && !isSPACE(*t))
+ t++;
+ e = t;
}
while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
- e++;
+ e++;
if (*e != '\n' && *e != '\0')
- return; /* false alarm */
+ return; /* false alarm */
if (!grok_atoUV(n, &uv, &e))
return;
line_num = ((line_t)uv) - 1;
if (t - s > 0) {
- const STRLEN len = t - s;
-
- if (!PL_rsfp && !PL_parser->filtered) {
- /* must copy *{"::_<(eval N)[oldfilename:L]"}
- * to *{"::_<newfilename"} */
- /* However, the long form of evals is only turned on by the
- debugger - usually they're "(eval %lu)" */
- GV * const cfgv = CopFILEGV(PL_curcop);
- if (cfgv) {
- char smallbuf[128];
- STRLEN tmplen2 = len;
- char *tmpbuf2;
- GV *gv2;
-
- if (tmplen2 + 2 <= sizeof smallbuf)
- tmpbuf2 = smallbuf;
- else
- Newx(tmpbuf2, tmplen2 + 2, char);
-
- tmpbuf2[0] = '_';
- tmpbuf2[1] = '<';
-
- memcpy(tmpbuf2 + 2, s, tmplen2);
- tmplen2 += 2;
-
- gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
- if (!isGV(gv2)) {
- gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
- /* adjust ${"::_<newfilename"} to store the new file name */
- GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
- /* The line number may differ. If that is the case,
- alias the saved lines that are in the array.
- Otherwise alias the whole array. */
- if (CopLINE(PL_curcop) == line_num) {
- GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
- GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
- }
- else if (GvAV(cfgv)) {
- AV * const av = GvAV(cfgv);
- const line_t start = CopLINE(PL_curcop)+1;
- SSize_t items = AvFILLp(av) - start;
- if (items > 0) {
- AV * const av2 = GvAVn(gv2);
- SV **svp = AvARRAY(av) + start;
- Size_t l = line_num+1;
- while (items-- && l < SSize_t_MAX && l == (line_t)l)
- av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
- }
- }
- }
-
- if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
- }
- }
- CopFILE_free(PL_curcop);
- CopFILE_setn(PL_curcop, s, len);
+ const STRLEN len = t - s;
+
+ if (!PL_rsfp && !PL_parser->filtered) {
+ /* must copy *{"::_<(eval N)[oldfilename:L]"}
+ * to *{"::_<newfilename"} */
+ /* However, the long form of evals is only turned on by the
+ debugger - usually they're "(eval %lu)" */
+ GV * const cfgv = CopFILEGV(PL_curcop);
+ if (cfgv) {
+ char smallbuf[128];
+ STRLEN tmplen2 = len;
+ char *tmpbuf2;
+ GV *gv2;
+
+ if (tmplen2 + 2 <= sizeof smallbuf)
+ tmpbuf2 = smallbuf;
+ else
+ Newx(tmpbuf2, tmplen2 + 2, char);
+
+ tmpbuf2[0] = '_';
+ tmpbuf2[1] = '<';
+
+ memcpy(tmpbuf2 + 2, s, tmplen2);
+ tmplen2 += 2;
+
+ gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
+ if (!isGV(gv2)) {
+ gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
+ /* adjust ${"::_<newfilename"} to store the new file name */
+ GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
+ /* The line number may differ. If that is the case,
+ alias the saved lines that are in the array.
+ Otherwise alias the whole array. */
+ if (CopLINE(PL_curcop) == line_num) {
+ GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+ GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
+ }
+ else if (GvAV(cfgv)) {
+ AV * const av = GvAV(cfgv);
+ const line_t start = CopLINE(PL_curcop)+1;
+ SSize_t items = AvFILLp(av) - start;
+ if (items > 0) {
+ AV * const av2 = GvAVn(gv2);
+ SV **svp = AvARRAY(av) + start;
+ Size_t l = line_num+1;
+ while (items-- && l < SSize_t_MAX && l == (line_t)l)
+ av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
+ }
+ }
+ }
+
+ if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
+ }
+ }
+ CopFILE_free(PL_curcop);
+ CopFILE_setn(PL_curcop, s, len);
}
CopLINE_set(PL_curcop, line_num);
}
{
AV *av = CopFILEAVx(PL_curcop);
if (av) {
- SV * sv;
- if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
- else {
- sv = *av_fetch(av, 0, 1);
- SvUPGRADE(sv, SVt_PVMG);
- }
+ SV * sv;
+ if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
+ else {
+ sv = *av_fetch(av, 0, 1);
+ SvUPGRADE(sv, SVt_PVMG);
+ }
if (!SvPOK(sv)) SvPVCLEAR(sv);
- if (orig_sv)
- sv_catsv(sv, orig_sv);
- else
- sv_catpvn(sv, buf, len);
- if (!SvIOK(sv)) {
- (void)SvIOK_on(sv);
- SvIV_set(sv, 0);
- }
- if (PL_parser->preambling == NOLINE)
- av_store(av, CopLINE(PL_curcop), sv);
+ if (orig_sv)
+ sv_catsv(sv, orig_sv);
+ else
+ sv_catpvn(sv, buf, len);
+ if (!SvIOK(sv)) {
+ (void)SvIOK_on(sv);
+ SvIV_set(sv, 0);
+ }
+ if (PL_parser->preambling == NOLINE)
+ av_store(av, CopLINE(PL_curcop), sv);
}
}
{
PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
- while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
- s++;
+ while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
+ s++;
} else {
- STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
- PL_bufptr = s;
- lex_read_space(flags | LEX_KEEP_PREVIOUS |
- (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
- LEX_NO_NEXT_CHUNK : 0));
- s = PL_bufptr;
- PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
- if (PL_linestart > PL_bufptr)
- PL_bufptr = PL_linestart;
- return s;
+ STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
+ PL_bufptr = s;
+ lex_read_space(flags | LEX_KEEP_PREVIOUS |
+ (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
+ LEX_NO_NEXT_CHUNK : 0));
+ s = PL_bufptr;
+ PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
+ if (PL_linestart > PL_bufptr)
+ PL_bufptr = PL_linestart;
+ return s;
}
return s;
}
const char *s;
if (PL_oldoldbufptr != PL_last_uni)
- return;
+ return;
while (isSPACE(*PL_last_uni))
- PL_last_uni++;
+ PL_last_uni++;
s = PL_last_uni;
while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
- s += UTF ? UTF8SKIP(s) : 1;
+ s += UTF ? UTF8SKIP(s) : 1;
if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
- return;
+ return;
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
- UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
+ "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
+ UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
}
/*
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = (OPCODE)f;
if (PL_nexttoke)
- goto lstop;
+ goto lstop;
PL_expect = x;
if (*s == '(')
- return REPORT(FUNC);
+ return REPORT(FUNC);
s = skipspace(s);
if (*s == '(')
- return REPORT(FUNC);
+ return REPORT(FUNC);
else {
- lstop:
- if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
- PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
- return REPORT(LSTOP);
+ lstop:
+ if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+ return REPORT(LSTOP);
}
}
#ifdef DEBUGGING
if (DEBUG_T_TEST) {
PerlIO_printf(Perl_debug_log, "### forced token:\n");
- tokereport(type, &NEXTVAL_NEXTTOKE);
+ tokereport(type, &NEXTVAL_NEXTTOKE);
}
#endif
assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
S_postderef(pTHX_ int const funny, char const next)
{
assert(funny == DOLSHARP
- || memCHRs("$@%&*", funny)
+ || funny == PERLY_DOLLAR
|| funny == PERLY_SNAIL
|| funny == PERLY_PERCENT_SIGN
|| funny == PERLY_AMPERSAND
+ || funny == PERLY_STAR
);
if (next == '*') {
- PL_expect = XOPERATOR;
- if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
- assert(PERLY_SNAIL == funny || '$' == funny || DOLSHARP == funny);
- PL_lex_state = LEX_INTERPEND;
- if (PERLY_SNAIL == funny)
- force_next(POSTJOIN);
- }
- force_next(next);
- PL_bufptr+=2;
+ PL_expect = XOPERATOR;
+ if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
+ assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
+ PL_lex_state = LEX_INTERPEND;
+ if (PERLY_SNAIL == funny)
+ force_next(POSTJOIN);
+ }
+ force_next(PERLY_STAR);
+ PL_bufptr+=2;
}
else {
- if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
- && !PL_lex_brackets)
- PL_lex_dojoin = 2;
- PL_expect = XOPERATOR;
- PL_bufptr++;
+ if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
+ && !PL_lex_brackets)
+ PL_lex_dojoin = 2;
+ PL_expect = XOPERATOR;
+ PL_bufptr++;
}
return funny;
}
{
int yyc = PL_parser->yychar;
if (yyc != YYEMPTY) {
- if (yyc) {
- NEXTVAL_NEXTTOKE = PL_parser->yylval;
- if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
- PL_lex_allbrackets--;
- PL_lex_brackets--;
- yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
- } else if (yyc == '('/*)*/) {
- PL_lex_allbrackets--;
- yyc |= (2<<24);
- }
- force_next(yyc);
- }
- PL_parser->yychar = YYEMPTY;
+ if (yyc) {
+ NEXTVAL_NEXTTOKE = PL_parser->yylval;
+ if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
+ PL_lex_allbrackets--;
+ PL_lex_brackets--;
+ yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
+ } else if (yyc == PERLY_PAREN_OPEN) {
+ PL_lex_allbrackets--;
+ yyc |= (2<<24);
+ }
+ force_next(yyc);
+ }
+ PL_parser->yychar = YYEMPTY;
}
}
* Arguments:
* char *start : buffer position (must be within PL_linestr)
* int token : PL_next* will be this type of bare word
- * (e.g., METHOD,BAREWORD)
+ * (e.g., METHCALL0,BAREWORD)
* int check_keyword : if true, Perl checks to make sure the word isn't
* a keyword (do this if the word is a label, e.g. goto FOO)
* int allow_pack : if true, : characters will also be allowed (require,
if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| (allow_pack && *s == ':' && s[1] == ':') )
{
- s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
- if (check_keyword) {
- char *s2 = PL_tokenbuf;
- STRLEN len2 = len;
- if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
- s2 += sizeof("CORE::") - 1;
+ s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
+ if (check_keyword) {
+ char *s2 = PL_tokenbuf;
+ STRLEN len2 = len;
+ if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
+ s2 += sizeof("CORE::") - 1;
len2 -= sizeof("CORE::") - 1;
}
- if (keyword(s2, len2, 0))
- return start;
- }
- if (token == METHOD) {
- s = skipspace(s);
- if (*s == '(')
- PL_expect = XTERM;
- else {
- PL_expect = XOPERATOR;
- }
- }
- NEXTVAL_NEXTTOKE.opval
+ if (keyword(s2, len2, 0))
+ return start;
+ }
+ if (token == METHCALL0) {
+ s = skipspace(s);
+ if (*s == '(')
+ PL_expect = XTERM;
+ else {
+ PL_expect = XOPERATOR;
+ }
+ }
+ NEXTVAL_NEXTTOKE.opval
= newSVOP(OP_CONST,0,
- S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
- NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
- force_next(token);
+ S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
+ NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
+ force_next(token);
}
return s;
}
PERL_ARGS_ASSERT_FORCE_IDENT;
if (s[0]) {
- const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
+ const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
UTF ? SVf_UTF8 : 0));
- NEXTVAL_NEXTTOKE.opval = o;
- force_next(BAREWORD);
- if (kind) {
- o->op_private = OPpCONST_ENTERED;
- /* XXX see note in pp_entereval() for why we forgo typo
- warnings if the symbol must be introduced in an eval.
- GSAR 96-10-12 */
- gv_fetchpvn_flags(s, len,
- (PL_in_eval ? GV_ADDMULTI
- : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
- kind == '$' ? SVt_PV :
- kind == PERLY_SNAIL ? SVt_PVAV :
- kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
- SVt_PVGV
- );
- }
+ NEXTVAL_NEXTTOKE.opval = o;
+ force_next(BAREWORD);
+ if (kind) {
+ o->op_private = OPpCONST_ENTERED;
+ /* XXX see note in pp_entereval() for why we forgo typo
+ warnings if the symbol must be introduced in an eval.
+ GSAR 96-10-12 */
+ gv_fetchpvn_flags(s, len,
+ (PL_in_eval ? GV_ADDMULTI
+ : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
+ kind == PERLY_DOLLAR ? SVt_PV :
+ kind == PERLY_SNAIL ? SVt_PVAV :
+ kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
+ SVt_PVGV
+ );
+ }
}
}
PERL_ARGS_ASSERT_STR_TO_VERSION;
while (start < end) {
- STRLEN skip;
- UV n;
- if (utf)
- n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
- else {
- n = *(U8*)start;
- skip = 1;
- }
- retval += ((NV)n)/nshift;
- start += skip;
- nshift *= 1000;
+ STRLEN skip;
+ UV n;
+ if (utf)
+ n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
+ else {
+ n = *(U8*)start;
+ skip = 1;
+ }
+ retval += ((NV)n)/nshift;
+ start += skip;
+ nshift *= 1000;
}
return retval;
}
d = s;
if (*d == 'v')
- d++;
+ d++;
if (isDIGIT(*d)) {
- while (isDIGIT(*d) || *d == '_' || *d == '.')
- d++;
+ while (isDIGIT(*d) || *d == '_' || *d == '.')
+ d++;
if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
- SV *ver;
+ SV *ver;
s = scan_num(s, &pl_yylval);
version = pl_yylval.opval;
- ver = cSVOPx(version)->op_sv;
- if (SvPOK(ver) && !SvNIOK(ver)) {
- SvUPGRADE(ver, SVt_PVNV);
- SvNV_set(ver, str_to_version(ver));
- SvNOK_on(ver); /* hint that it is a version */
- }
+ ver = cSVOPx(version)->op_sv;
+ if (SvPOK(ver) && !SvNIOK(ver)) {
+ SvUPGRADE(ver, SVt_PVNV);
+ SvNV_set(ver, str_to_version(ver));
+ SvNOK_on(ver); /* hint that it is a version */
+ }
+ }
+ else if (guessing) {
+ return s;
}
- else if (guessing) {
- return s;
- }
}
/* NOTE: The parser sees the package name and the VERSION swapped */
PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
while (isSPACE(*s)) /* leading whitespace */
- s++;
+ s++;
if (is_STRICT_VERSION(s,&errstr)) {
- SV *ver = newSV(0);
- s = (char *)scan_version(s, ver, 0);
- version = newSVOP(OP_CONST, 0, ver);
+ SV *ver = newSV_type(SVt_NULL);
+ s = (char *)scan_version(s, ver, 0);
+ version = newSVOP(OP_CONST, 0, ver);
}
- else if ((*s != ';' && *s != '{' && *s != '}' )
- && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
+ else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' )
+ && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' )))
{
- PL_bufptr = s;
- if (errstr)
- yyerror(errstr); /* version required */
- return s;
+ PL_bufptr = s;
+ if (errstr)
+ yyerror(errstr); /* version required */
+ return s;
}
/* NOTE: The parser sees the package name and the VERSION swapped */
assert (SvLEN(sv));
assert (!SvIsCOW(sv));
if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
- goto finish;
+ goto finish;
s = SvPVX(sv);
send = SvEND(sv);
/* This is relying on the SV being "well formed" with a trailing '\0' */
while (s < send && !(*s == '\\' && s[1] == '\\'))
- s++;
+ s++;
if (s == send)
- goto finish;
+ goto finish;
d = s;
if ( PL_hints & HINT_NEW_STRING ) {
- pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
- SVs_TEMP | SvUTF8(sv));
+ pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
+ SVs_TEMP | SvUTF8(sv));
}
while (s < send) {
- if (*s == '\\') {
- if (s + 1 < send && (s[1] == '\\'))
- s++; /* all that, just for this */
- }
- *d++ = *s++;
+ if (*s == '\\') {
+ if (s + 1 < send && (s[1] == '\\'))
+ s++; /* all that, just for this */
+ }
+ *d++ = *s++;
}
*d = '\0';
SvCUR_set(sv, d - SvPVX_const(sv));
const I32 op_type = pl_yylval.ival;
if (op_type == OP_NULL) {
- pl_yylval.opval = PL_lex_op;
- PL_lex_op = NULL;
- return THING;
+ pl_yylval.opval = PL_lex_op;
+ PL_lex_op = NULL;
+ return THING;
}
if (op_type == OP_CONST) {
- SV *sv = PL_lex_stuff;
- PL_lex_stuff = NULL;
- sv = tokeq(sv);
-
- if (SvTYPE(sv) == SVt_PVIV) {
- /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
- STRLEN len;
- const char * const p = SvPV_const(sv, len);
- SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
- SvREFCNT_dec(sv);
- sv = nsv;
- }
+ SV *sv = PL_lex_stuff;
+ PL_lex_stuff = NULL;
+ sv = tokeq(sv);
+
+ if (SvTYPE(sv) == SVt_PVIV) {
+ /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
+ STRLEN len;
+ const char * const p = SvPV_const(sv, len);
+ SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
+ SvREFCNT_dec(sv);
+ sv = nsv;
+ }
pl_yylval.opval = newSVOP(op_type, 0, sv);
- return THING;
+ return THING;
}
PL_parser->lex_super_state = PL_lex_state;
PL_expect = XTERM;
if (PL_lex_op) {
- pl_yylval.opval = PL_lex_op;
- PL_lex_op = NULL;
- return PMFUNC;
+ pl_yylval.opval = PL_lex_op;
+ PL_lex_op = NULL;
+ return PMFUNC;
}
else
- return FUNC;
+ return FUNC;
}
/*
SAVEI16(PL_lex_inwhat);
if (is_heredoc)
{
- SAVECOPLINE(PL_curcop);
- SAVEI32(PL_multi_end);
- SAVEI32(PL_parser->herelines);
- PL_parser->herelines = 0;
+ SAVECOPLINE(PL_curcop);
+ SAVEI32(PL_multi_end);
+ SAVEI32(PL_parser->herelines);
+ PL_parser->herelines = 0;
}
SAVEIV(PL_multi_close);
SAVEPPTR(PL_bufptr);
/* The here-doc parser needs to be able to peek into outer lexing
scopes to find the body of the here-doc. So we put PL_linestr and
- PL_bufptr into lex_shared, to ‘share’ those values.
+ PL_bufptr into lex_shared, to 'share' those values.
*/
PL_parser->lex_shared->ls_linestr = PL_linestr;
PL_parser->lex_shared->ls_bufptr = PL_bufptr;
SAVEGENERICSV(PL_parser->lex_sub_repl);
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
- = SvPVX(PL_linestr);
+ = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
SAVEFREESV(PL_linestr);
PL_lex_starts = 0;
PL_lex_state = LEX_INTERPCONCAT;
if (is_heredoc)
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+ CopLINE_set(PL_curcop, (line_t)PL_multi_start);
PL_copline = NOLINE;
Newxz(shared, 1, LEXSHARED);
PL_lex_inwhat = PL_parser->lex_sub_inwhat;
if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
- PL_lex_inpat = PL_parser->lex_sub_op;
+ PL_lex_inpat = PL_parser->lex_sub_op;
else
- PL_lex_inpat = NULL;
+ PL_lex_inpat = NULL;
PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
PL_in_eval &= ~EVAL_RE_REPARSING;
S_sublex_done(pTHX)
{
if (!PL_lex_starts++) {
- SV * const sv = newSVpvs("");
- if (SvUTF8(PL_linestr))
- SvUTF8_on(sv);
- PL_expect = XOPERATOR;
+ SV * const sv = newSVpvs("");
+ if (SvUTF8(PL_linestr))
+ SvUTF8_on(sv);
+ PL_expect = XOPERATOR;
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
- return THING;
+ return THING;
}
if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
- PL_lex_state = LEX_INTERPCASEMOD;
- return yylex();
+ PL_lex_state = LEX_INTERPCASEMOD;
+ return yylex();
}
/* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
assert(PL_lex_inwhat != OP_TRANSR);
if (PL_lex_repl) {
- assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
- PL_linestr = PL_lex_repl;
- PL_lex_inpat = 0;
- PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
- PL_bufend += SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
- PL_lex_dojoin = FALSE;
- PL_lex_brackets = 0;
- PL_lex_allbrackets = 0;
- PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
- PL_lex_casemods = 0;
- *PL_lex_casestack = '\0';
- PL_lex_starts = 0;
- if (SvEVALED(PL_lex_repl)) {
- PL_lex_state = LEX_INTERPNORMAL;
- PL_lex_starts++;
- /* we don't clear PL_lex_repl here, so that we can check later
- whether this is an evalled subst; that means we rely on the
- logic to ensure sublex_done() is called again only via the
- branch (in yylex()) that clears PL_lex_repl, else we'll loop */
- }
- else {
- PL_lex_state = LEX_INTERPCONCAT;
- PL_lex_repl = NULL;
- }
- if (SvTYPE(PL_linestr) >= SVt_PVNV) {
- CopLINE(PL_curcop) +=
- ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
- + PL_parser->herelines;
- PL_parser->herelines = 0;
- }
- return '/';
+ assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
+ PL_linestr = PL_lex_repl;
+ PL_lex_inpat = 0;
+ PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
+ PL_bufend += SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = NULL;
+ PL_lex_dojoin = FALSE;
+ PL_lex_brackets = 0;
+ PL_lex_allbrackets = 0;
+ PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
+ PL_lex_casemods = 0;
+ *PL_lex_casestack = '\0';
+ PL_lex_starts = 0;
+ if (SvEVALED(PL_lex_repl)) {
+ PL_lex_state = LEX_INTERPNORMAL;
+ PL_lex_starts++;
+ /* we don't clear PL_lex_repl here, so that we can check later
+ whether this is an evalled subst; that means we rely on the
+ logic to ensure sublex_done() is called again only via the
+ branch (in yylex()) that clears PL_lex_repl, else we'll loop */
+ }
+ else {
+ PL_lex_state = LEX_INTERPCONCAT;
+ PL_lex_repl = NULL;
+ }
+ if (SvTYPE(PL_linestr) >= SVt_PVNV) {
+ CopLINE(PL_curcop) +=
+ ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
+ + PL_parser->herelines;
+ PL_parser->herelines = 0;
+ }
+ return PERLY_SLASH;
}
else {
- const line_t l = CopLINE(PL_curcop);
- LEAVE;
+ const line_t l = CopLINE(PL_curcop);
+ LEAVE;
if (PL_parser->sub_error_count != PL_error_count) {
if (PL_parser->sub_no_recover) {
yyquit();
NOT_REACHED;
}
}
- if (PL_multi_close == '<')
- PL_parser->herelines += l - PL_multi_end;
- PL_bufend = SvPVX(PL_linestr);
- PL_bufend += SvCUR(PL_linestr);
- PL_expect = XOPERATOR;
- return SUBLEXEND;
+ if (PL_multi_close == '<')
+ PL_parser->herelines += l - PL_multi_end;
+ PL_bufend = SvPVX(PL_linestr);
+ PL_bufend += SvCUR(PL_linestr);
+ PL_expect = XOPERATOR;
+ return SUBLEXEND;
}
}
/* charnames doesn't work well if there have been errors found */
if (PL_error_count > 0) {
- return NULL;
+ return NULL;
}
result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
SV*
Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
- const char* const e,
+ const char* e,
const bool is_utf8,
const char ** error_msg)
{
assert(e >= s);
assert(s > (char *) 3);
+ while (s < e && isBLANK(*s)) {
+ s++;
+ }
+
+ while (s < e && isBLANK(*(e - 1))) {
+ e--;
+ }
+
char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
if (!SvCUR(char_name)) {
if (! isCHARNAME_CONT(*s)) {
goto bad_charname;
}
- if (*s == ' ' && *(s-1) == ' ') {
+ if (*s == ' ' && *(s-1) == ' ') {
goto multi_spaces;
}
s++;
(if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
pass through:
- all other \-char, including \N and \N{ apart from \N{ABC}
+ all other \-char, including \N and \N{ apart from \N{ABC}
stops on:
- @ and $ where it appears to be a var, but not for $ as tail anchor
+ @ and $ where it appears to be a var, but not for $ as tail anchor
\l \L \u \U \Q \E
- (?{ or (??{
+ (?{ or (??{ or (*{
In transliterations:
characters are VERY literal, except for - not at the start or end
The structure of the code is
while (there's a character to process) {
- handle transliteration ranges
- skip regexp comments /(?#comment)/ and codes /(?{code})/
- skip #-initiated comments in //x patterns
- check for embedded arrays
- check for embedded scalars
- if (backslash) {
- deprecate \1 in substitution replacements
- handle string-changing backslashes \l \U \Q \E, etc.
- switch (what was escaped) {
- handle \- in a transliteration (becomes a literal -)
- if a pattern and not \N{, go treat as regular character
- handle \132 (octal characters)
- handle \x15 and \x{1234} (hex characters)
- handle \N{name} (named characters, also \N{3,5} in a pattern)
- handle \cV (control characters)
- handle printf-style backslashes (\f, \r, \n, etc)
- } (end switch)
- continue
- } (end if backslash)
+ handle transliteration ranges
+ skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
+ skip #-initiated comments in //x patterns
+ check for embedded arrays
+ check for embedded scalars
+ if (backslash) {
+ deprecate \1 in substitution replacements
+ handle string-changing backslashes \l \U \Q \E, etc.
+ switch (what was escaped) {
+ handle \- in a transliteration (becomes a literal -)
+ if a pattern and not \N{, go treat as regular character
+ handle \132 (octal characters)
+ handle \x15 and \x{1234} (hex characters)
+ handle \N{name} (named characters, also \N{3,5} in a pattern)
+ handle \cV (control characters)
+ handle printf-style backslashes (\f, \r, \n, etc)
+ } (end switch)
+ continue
+ } (end if backslash)
handle regular character
} (end while character to read)
STATIC char *
S_scan_const(pTHX_ char *start)
{
- char *send = PL_bufend; /* end of the constant */
+ const char * const send = PL_bufend;/* end of the constant */
SV *sv = newSV(send - start); /* sv for the constant. See note below
on sizing. */
char *s = start; /* start of the constant */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
bool in_charclass = FALSE; /* within /[...]/ */
- bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
+ const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
UTF8? But, this can show as true
when the source isn't utf8, as for
example when it is entirely composed
) {
/* get transliterations out of the way (they're most literal) */
- if (PL_lex_inwhat == OP_TRANS) {
+ if (PL_lex_inwhat == OP_TRANS) {
/* But there isn't any special handling necessary unless there is a
* range, so for most cases we just drop down and handle the value
* because each code point in it has to be processed here
* individually to get its native translation */
- if (! dorange) {
+ if (! dorange) {
/* Here, we don't think we're in a range. If the new character
* is not a hyphen; or if it is a hyphen, but it's too close to
char * max_ptr;
char * min_ptr;
IV range_min;
- IV range_max; /* last character in range */
+ IV range_max; /* last character in range */
STRLEN grow;
Size_t offset_to_min = 0;
Size_t extras = 0;
* of them */
if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
Perl_croak(aTHX_
- "Invalid range \"%c-%c\" in transliteration operator",
- (char)range_min, (char)range_max);
+ "Invalid range \"%c-%c\" in transliteration operator",
+ (char)range_min, (char)range_max);
}
#ifdef EBCDIC
else if (convert_unicode) {
/* Here the range contains at least 3 code points */
- if (d_is_utf8) {
+ if (d_is_utf8) {
/* If everything in the transliteration is below 256, we
* can avoid special handling later. A translation table
* if we have to convert to/from Unicode values */
if ( has_above_latin1
#ifdef EBCDIC
- && (range_min > 255 || ! convert_unicode)
+ && (range_min > 255 || ! convert_unicode)
#endif
) {
const STRLEN off = d - SvPVX(sv);
range_max = 255;
}
#endif
- }
+ }
/* Here we need to expand out the string to contain each
* character in the range. Grow the output to handle this.
for (i = range_min; i <= range_max; i++) {
*d++ = (char)LATIN1_TO_NATIVE((U8) i);
}
- }
- }
+ }
+ }
else
#endif
/* Always gets run for ASCII, and sometimes for EBCDIC. */
* 'utf8_variant_count' on EBCDIC (it's already been
* counted when originally parsed) */
*d++ = (char) range_max;
- }
- }
+ }
+ }
#ifdef EBCDIC
/* If the original range extended above 255, add in that
#endif
range_done:
- /* mark the range as done, and continue */
- didrange = TRUE;
- dorange = FALSE;
+ /* mark the range as done, and continue */
+ didrange = TRUE;
+ dorange = FALSE;
#ifdef EBCDIC
- non_portable_endpoint = 0;
+ non_portable_endpoint = 0;
backslash_N = 0;
#endif
- continue;
- } /* End of is a range */
+ continue;
+ } /* End of is a range */
} /* End of transliteration. Joins main code after these else's */
- else if (*s == '[' && PL_lex_inpat && !in_charclass) {
- char *s1 = s-1;
- int esc = 0;
- while (s1 >= start && *s1-- == '\\')
- esc = !esc;
- if (!esc)
- in_charclass = TRUE;
- }
- else if (*s == ']' && PL_lex_inpat && in_charclass) {
- char *s1 = s-1;
- int esc = 0;
- while (s1 >= start && *s1-- == '\\')
- esc = !esc;
- if (!esc)
- in_charclass = FALSE;
- }
+ else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = TRUE;
+ }
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = FALSE;
+ }
/* skip for regexp comments /(?#comment)/, except for the last
* char, which will be done separately. Stop on (?{..}) and
- * friends */
- else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
- if (s[2] == '#') {
+ * friends (??{ ... }) or (*{ ... }) */
+ else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) {
+ if (s[1] == '?' && s[2] == '#') {
if (s_is_utf8) {
PERL_UINT_FAST8_T len = UTF8SKIP(s);
else while (s+1 < send && *s != ')') {
*d++ = *s++;
}
- }
- else if (!PL_lex_casemods
- && ( s[2] == '{' /* This should match regcomp.c */
- || (s[2] == '?' && s[3] == '{')))
- {
- break;
- }
- }
+ }
+ else
+ if (!PL_lex_casemods &&
+ /* The following should match regcomp.c */
+ ((s[1] == '?' && (s[2] == '{' /* (?{ ... }) */
+ || (s[2] == '?' && s[3] == '{'))) || /* (??{ ... }) */
+ (s[1] == '*' && (s[2] == '{' ))) /* (*{ ... }) */
+ ){
+ break;
+ }
+ }
/* likewise skip #-initiated comments in //x patterns */
- else if (*s == '#'
+ else if (*s == '#'
&& PL_lex_inpat
&& !in_charclass
&& ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
{
- while (s < send && *s != '\n')
- *d++ = *s++;
- }
+ while (s < send && *s != '\n')
+ *d++ = *s++;
+ }
/* no further processing of single-quoted regex */
- else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
- goto default_action;
+ else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
+ goto default_action;
/* check for embedded arrays
* (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
- else if (*s == '@' && s[1]) {
- if (UTF
+ else if (*s == '@' && s[1]) {
+ if (UTF
? isIDFIRST_utf8_safe(s+1, send)
: isWORDCHAR_A(s[1]))
{
- break;
+ break;
}
- if (memCHRs(":'{$", s[1]))
- break;
- if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
- break; /* in regexp, neither @+ nor @- are interpolated */
- }
+ if (memCHRs(":'{$", s[1]))
+ break;
+ if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
+ break; /* in regexp, neither @+ nor @- are interpolated */
+ }
/* check for embedded scalars. only stop if we're sure it's a
* variable. */
- else if (*s == '$') {
- if (!PL_lex_inpat) /* not a regexp, so $ must be var */
- break;
- if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
- if (s[1] == '\\') {
- Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of $\\ in regex");
- }
- break; /* in regexp, $ might be tail anchor */
+ else if (*s == '$') {
+ if (!PL_lex_inpat) /* not a regexp, so $ must be var */
+ break;
+ if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
+ if (s[1] == '\\') {
+ Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Possible unintended interpolation of $\\ in regex");
+ }
+ break; /* in regexp, $ might be tail anchor */
}
- }
+ }
- /* End of else if chain - OP_TRANS rejoin rest */
+ /* End of else if chain - OP_TRANS rejoin rest */
if (UNLIKELY(s >= send)) {
assert(s == send);
break;
}
- /* backslashes */
- if (*s == '\\' && s+1 < send) {
- char* e; /* Can be used for ending '}', etc. */
-
- s++;
+ /* backslashes */
+ if (*s == '\\' && s+1 < send) {
+ char* bslash = s; /* point to beginning \ */
+ char* rbrace; /* point to ending '}' */
+ char* e; /* 1 past the meat (non-blanks) before the
+ brace */
+ s++;
- /* warn on \1 - \9 in substitution replacements, but note that \11
- * is an octal; and \19 is \1 followed by '9' */
- if (PL_lex_inwhat == OP_SUBST
+ /* warn on \1 - \9 in substitution replacements, but note that \11
+ * is an octal; and \19 is \1 followed by '9' */
+ if (PL_lex_inwhat == OP_SUBST
&& !PL_lex_inpat
&& isDIGIT(*s)
&& *s != '0'
&& !isDIGIT(s[1]))
- {
- /* diag_listed_as: \%d better written as $%d */
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
- *--s = '$';
- break;
- }
-
- /* string-change backslash escapes */
- if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
- --s;
- break;
- }
- /* In a pattern, process \N, but skip any other backslash escapes.
- * This is because we don't want to translate an escape sequence
- * into a meta symbol and have the regex compiler use the meta
- * symbol meaning, e.g. \x{2E} would be confused with a dot. But
- * in spite of this, we do have to process \N here while the proper
- * charnames handler is in scope. See bugs #56444 and #62056.
+ {
+ /* diag_listed_as: \%d better written as $%d */
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
+ s = bslash;
+ *s = '$';
+ break;
+ }
+
+ /* string-change backslash escapes */
+ if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
+ s = bslash;
+ break;
+ }
+ /* In a pattern, process \N, but skip any other backslash escapes.
+ * This is because we don't want to translate an escape sequence
+ * into a meta symbol and have the regex compiler use the meta
+ * symbol meaning, e.g. \x{2E} would be confused with a dot. But
+ * in spite of this, we do have to process \N here while the proper
+ * charnames handler is in scope. See bugs #56444 and #62056.
*
- * There is a complication because \N in a pattern may also stand
- * for 'match a non-nl', and not mean a charname, in which case its
- * processing should be deferred to the regex compiler. To be a
- * charname it must be followed immediately by a '{', and not look
- * like \N followed by a curly quantifier, i.e., not something like
- * \N{3,}. regcurly returns a boolean indicating if it is a legal
- * quantifier */
- else if (PL_lex_inpat
- && (*s != 'N'
- || s[1] != '{'
- || regcurly(s + 1)))
- {
- *d++ = '\\';
- goto default_action;
- }
-
- switch (*s) {
- default:
- {
- if ((isALPHANUMERIC(*s)))
- Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
- "Unrecognized escape \\%c passed through",
- *s);
- /* default action is to copy the quoted character */
- goto default_action;
- }
-
- /* eg. \132 indicates the octal constant 0132 */
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- {
+ * There is a complication because \N in a pattern may also stand
+ * for 'match a non-nl', and not mean a charname, in which case its
+ * processing should be deferred to the regex compiler. To be a
+ * charname it must be followed immediately by a '{', and not look
+ * like \N followed by a curly quantifier, i.e., not something like
+ * \N{3,}. regcurly returns a boolean indicating if it is a legal
+ * quantifier */
+ else if (PL_lex_inpat
+ && (*s != 'N'
+ || s[1] != '{'
+ || regcurly(s + 1, send, NULL)))
+ {
+ *d++ = '\\';
+ goto default_action;
+ }
+
+ switch (*s) {
+ default:
+ {
+ if ((isALPHANUMERIC(*s)))
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+ "Unrecognized escape \\%c passed through",
+ *s);
+ /* default action is to copy the quoted character */
+ goto default_action;
+ }
+
+ /* eg. \132 indicates the octal constant 0132 */
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ {
I32 flags = PERL_SCAN_SILENT_ILLDIGIT
| PERL_SCAN_NOTIFY_ILLDIGIT;
STRLEN len = 3;
Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
form_alien_digit_msg(8, len, s, send, UTF, FALSE));
}
- }
- goto NUM_ESCAPE_INSERT;
+ }
+ goto NUM_ESCAPE_INSERT;
- /* eg. \o{24} indicates the octal constant \024 */
- case 'o':
- {
- const char* error;
+ /* eg. \o{24} indicates the octal constant \024 */
+ case 'o':
+ {
+ const char* error;
- if (! grok_bslash_o(&s, send,
+ if (! grok_bslash_o(&s, send,
&uv, &error,
NULL,
FALSE, /* Not strict */
FALSE, /* No illegal cp's */
UTF))
{
- yyerror(error);
- uv = 0; /* drop through to ensure range ends are set */
- }
- goto NUM_ESCAPE_INSERT;
- }
-
- /* eg. \x24 indicates the hex constant 0x24 */
- case 'x':
- {
- const char* error;
-
- if (! grok_bslash_x(&s, send,
+ yyerror(error);
+ uv = 0; /* drop through to ensure range ends are set */
+ }
+ goto NUM_ESCAPE_INSERT;
+ }
+
+ /* eg. \x24 indicates the hex constant 0x24 */
+ case 'x':
+ {
+ const char* error;
+
+ if (! grok_bslash_x(&s, send,
&uv, &error,
NULL,
FALSE, /* Not strict */
FALSE, /* No illegal cp's */
UTF))
{
- yyerror(error);
- uv = 0; /* drop through to ensure range ends are set */
- }
- }
+ yyerror(error);
+ uv = 0; /* drop through to ensure range ends are set */
+ }
+ }
- NUM_ESCAPE_INSERT:
- /* Insert oct or hex escaped character. */
+ NUM_ESCAPE_INSERT:
+ /* Insert oct or hex escaped character. */
- /* Here uv is the ordinal of the next character being added */
- if (UVCHR_IS_INVARIANT(uv)) {
- *d++ = (char) uv;
- }
- else {
- if (!d_is_utf8 && uv > 255) {
+ /* Here uv is the ordinal of the next character being added */
+ if (UVCHR_IS_INVARIANT(uv)) {
+ *d++ = (char) uv;
+ }
+ else {
+ if (!d_is_utf8 && uv > 255) {
/* Here, 'uv' won't fit unless we convert to UTF-8.
* If we've only seen invariants so far, all we have to
}
if (! d_is_utf8) {
- *d++ = (char)uv;
+ *d++ = (char)uv;
utf8_variant_count++;
}
- else {
+ else {
/* Usually, there will already be enough room in 'sv'
* since such escapes are likely longer than any UTF-8
* sequence they can end up as. This isn't the case on
d = SvCUR(sv) + SvGROW(sv, needed);
}
- d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
+ d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
(ckWARN(WARN_PORTABLE))
? UNICODE_WARN_PERL_EXTENDED
: 0);
- }
- }
+ }
+ }
#ifdef EBCDIC
non_portable_endpoint++;
#endif
- continue;
+ continue;
- case 'N':
+ case 'N':
/* In a non-pattern \N must be like \N{U+0041}, or it can be a
* named character, like \N{LATIN SMALL LETTER A}, or a named
* sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
* right now, while preserving the fact that it was a named
* character, so that the regex compiler knows this.
*
- * The structure of this section of code (besides checking for
- * errors and upgrading to utf8) is:
+ * The structure of this section of code (besides checking for
+ * errors and upgrading to utf8) is:
* If the named character is of the form \N{U+...}, pass it
* through if a pattern; otherwise convert the code point
* to utf8
* only done if the code point requires it to be representable.
*
* Here, 's' points to the 'N'; the test below is guaranteed to
- * succeed if we are being called on a pattern, as we already
+ * succeed if we are being called on a pattern, as we already
* know from a test above that the next character is a '{'. A
* non-pattern \N must mean 'named character', which requires
* braces */
- s++;
- if (*s != '{') {
- yyerror("Missing braces on \\N{}");
+ s++;
+ if (*s != '{') {
+ yyerror("Missing braces on \\N{}");
*d++ = '\0';
- continue;
- }
- s++;
-
- /* If there is no matching '}', it is an error. */
- if (! (e = (char *) memchr(s, '}', send - s))) {
- if (! PL_lex_inpat) {
- yyerror("Missing right brace on \\N{}");
- } else {
- yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
- }
+ continue;
+ }
+ s++;
+
+ /* If there is no matching '}', it is an error. */
+ if (! (rbrace = (char *) memchr(s, '}', send - s))) {
+ if (! PL_lex_inpat) {
+ yyerror("Missing right brace on \\N{}");
+ } else {
+ yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
+ }
yyquit(); /* Have exhausted the input. */
- }
+ }
+
+ /* Here it looks like a named character */
+ while (s < rbrace && isBLANK(*s)) {
+ s++;
+ }
- /* Here it looks like a named character */
+ e = rbrace;
+ while (s < e && isBLANK(*(e - 1))) {
+ e--;
+ }
- if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
- s += 2; /* Skip to next char after the 'U+' */
- if (PL_lex_inpat) {
+ if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
+ s += 2; /* Skip to next char after the 'U+' */
+ if (PL_lex_inpat) {
/* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
/* Check the syntax. */
- const char *orig_s;
- orig_s = s - 5;
if (!isXDIGIT(*s)) {
bad_NU:
yyerror(
"Invalid hexadecimal number in \\N{U+...}"
);
- s = e + 1;
+ s = rbrace + 1;
*d++ = '\0';
continue;
}
}
/* Pass everything through unchanged.
- * +1 is for the '}' */
- Copy(orig_s, d, e - orig_s + 1, char);
- d += e - orig_s + 1;
- }
- else { /* Not a pattern: convert the hex to string */
+ * +1 is to include the '}' */
+ Copy(bslash, d, rbrace - bslash + 1, char);
+ d += rbrace - bslash + 1;
+ }
+ else { /* Not a pattern: convert the hex to string */
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_SILENT_ILLDIGIT
- | PERL_SCAN_SILENT_OVERFLOW
- | PERL_SCAN_DISALLOW_PREFIX;
+ | PERL_SCAN_SILENT_ILLDIGIT
+ | PERL_SCAN_SILENT_OVERFLOW
+ | PERL_SCAN_DISALLOW_PREFIX;
STRLEN len = e - s;
uv = grok_hex(s, &len, &flags, NULL);
* tr/// doesn't care about Unicode rules, so no need
* there to upgrade to UTF-8 for small enough code
* points */
- if (! d_is_utf8 && ( uv > 0xFF
+ if (! d_is_utf8 && ( uv > 0xFF
|| PL_lex_inwhat != OP_TRANS))
{
- /* See Note on sizing above. */
- const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
+ /* See Note on sizing above. */
+ const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
- SvCUR_set(sv, d - SvPVX_const(sv));
- SvPOK_on(sv);
- *d = '\0';
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
if (utf8_variant_count == 0) {
SvUTF8_on(sv);
d = SvPVX(sv) + SvCUR(sv);
}
- d_is_utf8 = TRUE;
+ d_is_utf8 = TRUE;
has_above_latin1 = TRUE;
- }
+ }
/* Add the (Unicode) code point to the output. */
- if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
- *d++ = (char) LATIN1_TO_NATIVE(uv);
- }
- else {
+ if (OFFUNI_IS_INVARIANT(uv)) {
+ *d++ = (char) LATIN1_TO_NATIVE(uv);
+ }
+ else if (! d_is_utf8) {
+ *d++ = (char) LATIN1_TO_NATIVE(uv);
+ utf8_variant_count++;
+ }
+ else {
d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
(ckWARN(WARN_PORTABLE))
? UNICODE_WARN_PERL_EXTENDED
: 0);
}
- }
- }
- else /* Here is \N{NAME} but not \N{U+...}. */
+ }
+ }
+ else /* Here is \N{NAME} but not \N{U+...}. */
if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
{ /* Failed. We should die eventually, but for now use a NUL
to keep parsing */
const char *str = SvPV_const(res, len);
if (PL_lex_inpat) {
- if (! len) { /* The name resolved to an empty string */
+ if (! len) { /* The name resolved to an empty string */
const char empty_N[] = "\\N{_}";
Copy(empty_N, d, sizeof(empty_N) - 1, char);
d += sizeof(empty_N) - 1;
- }
- else {
- /* In order to not lose information for the regex
- * compiler, pass the result in the specially made
- * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
- * the code points in hex of each character
- * returned by charnames */
+ }
+ else {
+ /* In order to not lose information for the regex
+ * compiler, pass the result in the specially made
+ * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
+ * the code points in hex of each character
+ * returned by charnames */
- const char *str_end = str + len;
- const STRLEN off = d - SvPVX_const(sv);
+ const char *str_end = str + len;
+ const STRLEN off = d - SvPVX_const(sv);
if (! SvUTF8(res)) {
/* For the non-UTF-8 case, we can determine the
/* +1 for trailing NUL */
+ initial_len + 1
- + (STRLEN)(send - e));
+ + (STRLEN)(send - rbrace));
Copy(initial_text, d, initial_len, char);
d += initial_len;
while (str < str_end) {
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
+ output_length
- + (STRLEN)(send - e)
+ + (STRLEN)(send - rbrace)
+ 2); /* '}' + NUL */
/* And output it */
Copy(hex_string, d, output_length, char);
d = off + SvGROW(sv, off
+ output_length
- + (STRLEN)(send - e)
+ + (STRLEN)(send - rbrace)
+ 2); /* '}' + NUL */
Copy(hex_string, d, output_length, char);
d += output_length;
}
- }
+ }
- *d++ = '}'; /* Done. Add the trailing brace */
- }
- }
- else { /* Here, not in a pattern. Convert the name to a
- * string. */
+ *d++ = '}'; /* Done. Add the trailing brace */
+ }
+ }
+ else { /* Here, not in a pattern. Convert the name to a
+ * string. */
if (PL_lex_inwhat == OP_TRANS) {
str = SvPV_const(res, len);
"%.*s must not be a named sequence"
" in transliteration operator",
/* +1 to include the "}" */
- (int) (e + 1 - start), start));
+ (int) (rbrace + 1 - start), start));
*d++ = '\0';
goto end_backslash_N;
}
/* Upgrade destination to be utf8 if this new
* component is */
- if (! d_is_utf8 && SvUTF8(res)) {
- /* See Note on sizing above. */
+ if (! d_is_utf8 && SvUTF8(res)) {
+ /* See Note on sizing above. */
const STRLEN extra = len + (send - s) + 1;
- SvCUR_set(sv, d - SvPVX_const(sv));
- SvPOK_on(sv);
- *d = '\0';
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
if (utf8_variant_count == 0) {
SvUTF8_on(sv);
}
else {
sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- extra);
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ extra);
d = SvPVX(sv) + SvCUR(sv);
}
- d_is_utf8 = TRUE;
- } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
+ d_is_utf8 = TRUE;
+ } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
- /* See Note on sizing above. (NOTE: SvCUR() is not
- * set correctly here). */
- const STRLEN extra = len + (send - e) + 1;
- const STRLEN off = d - SvPVX_const(sv);
- d = off + SvGROW(sv, off + extra);
- }
- Copy(str, d, len, char);
- d += len;
- }
+ /* See Note on sizing above. (NOTE: SvCUR() is not
+ * set correctly here). */
+ const STRLEN extra = len + (send - rbrace) + 1;
+ const STRLEN off = d - SvPVX_const(sv);
+ d = off + SvGROW(sv, off + extra);
+ }
+ Copy(str, d, len, char);
+ d += len;
+ }
- SvREFCNT_dec(res);
+ SvREFCNT_dec(res);
- } /* End \N{NAME} */
+ } /* End \N{NAME} */
end_backslash_N:
#ifdef EBCDIC
backslash_N++; /* \N{} is defined to be Unicode */
#endif
- s = e + 1; /* Point to just after the '}' */
- continue;
+ s = rbrace + 1; /* Point to just after the '}' */
+ continue;
- /* \c is a control character */
- case 'c':
- s++;
- if (s < send) {
+ /* \c is a control character */
+ case 'c':
+ s++;
+ if (s < send) {
const char * message;
- if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
+ if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
yyerror(message);
yyquit(); /* Have always immediately croaked on
errors in this */
}
- d++;
- }
- else {
- yyerror("Missing control char name in \\c");
- yyquit(); /* Are at end of input, no sense continuing */
- }
+ d++;
+ }
+ else {
+ yyerror("Missing control char name in \\c");
+ yyquit(); /* Are at end of input, no sense continuing */
+ }
#ifdef EBCDIC
non_portable_endpoint++;
#endif
break;
- /* printf-style backslashes, formfeeds, newlines, etc */
- case 'b':
- *d++ = '\b';
- break;
- case 'n':
- *d++ = '\n';
- break;
- case 'r':
- *d++ = '\r';
- break;
- case 'f':
- *d++ = '\f';
- break;
- case 't':
- *d++ = '\t';
- break;
- case 'e':
- *d++ = ESC_NATIVE;
- break;
- case 'a':
- *d++ = '\a';
- break;
- } /* end switch */
-
- s++;
- continue;
- } /* end if (backslash) */
+ /* printf-style backslashes, formfeeds, newlines, etc */
+ case 'b':
+ *d++ = '\b';
+ break;
+ case 'n':
+ *d++ = '\n';
+ break;
+ case 'r':
+ *d++ = '\r';
+ break;
+ case 'f':
+ *d++ = '\f';
+ break;
+ case 't':
+ *d++ = '\t';
+ break;
+ case 'e':
+ *d++ = ESC_NATIVE;
+ break;
+ case 'a':
+ *d++ = '\a';
+ break;
+ } /* end switch */
+
+ s++;
+ continue;
+ } /* end if (backslash) */
default_action:
/* Just copy the input to the output, though we may have to convert
* If the input has the same representation in UTF-8 as not, it will be
* a single byte, and we don't care about UTF8ness; just copy the byte */
if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
- *d++ = *s++;
+ *d++ = *s++;
}
else if (! s_is_utf8 && ! d_is_utf8) {
/* If neither source nor output is UTF-8, is also a single byte,
* just copy it; but this byte counts should we later have to
* convert to UTF-8 */
- *d++ = *s++;
+ *d++ = *s++;
utf8_variant_count++;
}
else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
- const STRLEN len = UTF8SKIP(s);
+ const STRLEN len = UTF8SKIP(s);
/* We expect the source to have already been checked for
* malformedness */
const STRLEN off = d - SvPVX(sv);
const STRLEN extra = 2 + (send - s - 1) + 1;
if (off + extra > SvLEN(sv)) {
- d = off + SvGROW(sv, off + extra);
- }
+ d = off + SvGROW(sv, off + extra);
+ }
*d++ = UTF8_EIGHT_BIT_HI(*s);
*d++ = UTF8_EIGHT_BIT_LO(*s);
s++;
- }
+ }
} /* while loop to process each character */
{
SvPOK_on(sv);
if (d_is_utf8) {
- SvUTF8_on(sv);
+ SvUTF8_on(sv);
}
/* shrink the sv if we allocated more than we used */
if (SvCUR(sv) + 5 < SvLEN(sv)) {
- SvPV_shrink_to_cur(sv);
+ SvPV_shrink_to_cur(sv);
}
/* return the substring (via pl_yylval) only if we parsed anything */
if (s > start) {
- char *s2 = start;
- for (; s2 < s; s2++) {
- if (*s2 == '\n')
- COPLINE_INC_WITH_HERELINES;
- }
- SvREFCNT_inc_simple_void_NN(sv);
- if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
+ char *s2 = start;
+ for (; s2 < s; s2++) {
+ if (*s2 == '\n')
+ COPLINE_INC_WITH_HERELINES;
+ }
+ SvREFCNT_inc_simple_void_NN(sv);
+ if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
&& ! PL_parser->lex_re_reparsing)
{
- const char *const key = PL_lex_inpat ? "qr" : "q";
- const STRLEN keylen = PL_lex_inpat ? 2 : 1;
- const char *type;
- STRLEN typelen;
-
- if (PL_lex_inwhat == OP_TRANS) {
- type = "tr";
- typelen = 2;
- } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
- type = "s";
- typelen = 1;
- } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
- type = "q";
- typelen = 1;
- } else {
- type = "qq";
- typelen = 2;
- }
-
- sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
- type, typelen, NULL);
- }
+ const char *const key = PL_lex_inpat ? "qr" : "q";
+ const STRLEN keylen = PL_lex_inpat ? 2 : 1;
+ const char *type;
+ STRLEN typelen;
+
+ if (PL_lex_inwhat == OP_TRANS) {
+ type = "tr";
+ typelen = 2;
+ } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
+ type = "s";
+ typelen = 1;
+ } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
+ type = "q";
+ typelen = 1;
+ } else {
+ type = "qq";
+ typelen = 2;
+ }
+
+ sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
+ type, typelen, NULL);
+ }
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
}
LEAVE_with_name("scan_const");
* {4,5} (any digits around the comma) returns FALSE
* if we're in a pattern and the first char is a [
* [] returns FALSE
- * [SOMETHING] has a funky algorithm to decide whether it's a
+ * [SOMETHING] has a funky heuristic to decide whether it's a
* character class or not. It has to deal with things like
* /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
* anything else returns TRUE
{
PERL_ARGS_ASSERT_INTUIT_MORE;
+ /* This function has been mostly untouched for a long time, due to its,
+ * 'scariness', and lack of comments. khw has gone through and done some
+ * cleanup, while finding various instances of problematic behavior.
+ * Rather than change this base-level function immediately, khw has added
+ * commentary to those areas. */
+
+ /* If recursed within brackets, there is more to the expression */
if (PL_lex_brackets)
- return TRUE;
- if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
- return TRUE;
- if (*s == '-' && s[1] == '>'
- && FEATURE_POSTDEREF_QQ_IS_ENABLED
- && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
- ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
- return TRUE;
- if (*s != '{' && *s != '[')
- return FALSE;
+ return TRUE;
+
+ /* If begins with '->' ... */
+ if (s[0] == '-' && s[1] == '>') {
+
+ /* '->[' and '->{' imply more to the expression */
+ if (s[2] == '[' || s[2] == '{') {
+ return TRUE;
+ }
+
+ /* Any post deref construct implies more to the expression */
+ if ( FEATURE_POSTDEREF_QQ_IS_ENABLED
+ && ( (s[2] == '$' && ( s[3] == '*'
+ || (s[3] == '#' && s[4] == '*')))
+ || (s[2] == '@' && memCHRs("*[{", s[3])) ))
+ {
+ return TRUE;
+ }
+ }
+
+ if (s[0] != '{' && s[0] != '[')
+ return FALSE;
+
+ /* quit immediately from any errors from now on */
PL_parser->sub_no_recover = TRUE;
+
+ /* Here is '{' or '['. Outside patterns, they're always subscripts */
if (!PL_lex_inpat)
- return TRUE;
+ return TRUE;
- /* In a pattern, so maybe we have {n,m}. */
- if (*s == '{') {
- if (regcurly(s)) {
- return FALSE;
- }
- return TRUE;
+ /* In a pattern, so maybe we have {n,m}, in which case, there isn't more to
+ * the expression.
+ *
+ * khw: This assumes that anything matching regcurly is a character class.
+ * The syntax of regcurly has been loosened since this function was
+ * written, and regcurly never required a comma, as in {0}. Probably it is
+ * ok as-is */
+ if (s[0] == '{') {
+ if (regcurly(s, e, NULL)) {
+ return FALSE;
+ }
+ return TRUE;
}
- /* On the other hand, maybe we have a character class */
-
+ /* Here is '[': maybe we have a character class. Examine the guts */
s++;
- if (*s == ']' || *s == '^')
- return FALSE;
- else {
- /* this is terrifying, and it works */
- int weight;
- char seen[256];
- const char * const send = (char *) memchr(s, ']', e - s);
- unsigned char un_char, last_un_char;
- char tmpbuf[sizeof PL_tokenbuf * 4];
-
- if (!send) /* has to be an expression */
- return TRUE;
- weight = 2; /* let's weigh the evidence */
-
- if (*s == '$')
- weight -= 3;
- else if (isDIGIT(*s)) {
- if (s[1] != ']') {
- if (isDIGIT(s[1]) && s[2] == ']')
- weight -= 10;
- }
- else
- weight -= 100;
- }
- Zero(seen,256,char);
- un_char = 255;
- for (; s < send; s++) {
- last_un_char = un_char;
- un_char = (unsigned char)*s;
- switch (*s) {
- case '@':
- case '&':
- case '$':
- weight -= seen[un_char] * 10;
- if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
- int len;
- scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
- len = (int)strlen(tmpbuf);
- if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
- UTF ? SVf_UTF8 : 0, SVt_PV))
- weight -= 100;
- else
- weight -= 10;
- }
- else if (*s == '$'
- && s[1]
- && memCHRs("[#!%*<>()-=",s[1]))
- {
- if (/*{*/ memCHRs("])} =",s[2]))
- weight -= 10;
- else
- weight -= 1;
- }
- break;
- case '\\':
- un_char = 254;
- if (s[1]) {
- if (memCHRs("wds]",s[1]))
- weight += 100;
- else if (seen[(U8)'\''] || seen[(U8)'"'])
- weight += 1;
- else if (memCHRs("rnftbxcav",s[1]))
- weight += 40;
- else if (isDIGIT(s[1])) {
- weight += 40;
- while (s[1] && isDIGIT(s[1]))
- s++;
- }
- }
- else
- weight += 100;
- break;
- case '-':
- if (s[1] == '\\')
- weight += 50;
- if (memCHRs("aA01! ",last_un_char))
- weight += 30;
- if (memCHRs("zZ79~",s[1]))
- weight += 30;
- if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
- weight -= 5; /* cope with negative subscript */
- break;
- default:
- if (!isWORDCHAR(last_un_char)
- && !(last_un_char == '$' || last_un_char == '@'
- || last_un_char == '&')
- && isALPHA(*s) && s[1] && isALPHA(s[1])) {
- char *d = s;
- while (isALPHA(*s))
- s++;
- if (keyword(d, s - d, 0))
- weight -= 150;
- }
- if (un_char == last_un_char + 1)
- weight += 5;
- weight -= seen[un_char];
- break;
- }
- seen[un_char]++;
- }
- if (weight >= 0) /* probably a character class */
- return FALSE;
+
+ /* '^' implies a character class; An empty '[]' isn't legal, but it does
+ * mean there isn't more to come */
+ if (s[0] == ']' || s[0] == '^')
+ return FALSE;
+
+ /* Find matching ']'. khw: This means any s[1] below is guaranteed to
+ * exist */
+ const char * const send = (char *) memchr(s, ']', e - s);
+ if (! send) /* has to be an expression */
+ return TRUE;
+
+ /* If the construct consists entirely of one or two digits, call it a
+ * subscript. */
+ if (isDIGIT(s[0]) && send - s <= 2 && (send - s == 1 || (isDIGIT(s[1])))) {
+ return TRUE;
}
+ /* this is terrifying, and it mostly works. See GH #16478.
+ *
+ * khw: That ticket shows that the heuristics here get things wrong. That
+ * most of the weights are divisible by 5 indicates that not a lot of
+ * tuning was done, and that the values are fairly arbitrary. Especially
+ * problematic are when all characters in the construct are numeric. We
+ * have [89] always resolving to a subscript, though that could well be a
+ * character class that is related to finding non-octals. And [100] is a
+ * character class when it could well be a subscript. */
+
+ int weight;
+
+ if (s[0] == '$') { /* First char is dollar; lean very slightly to it
+ being a subscript */
+ weight = -1;
+ }
+ else { /* Otherwise, lean a little more towards it being a
+ character class. */
+ weight = 2;
+ }
+
+ /* Unsigned version of current character */
+ unsigned char un_char = 0;
+
+ /* Keep track of how many multiple occurrences of the same character there
+ * are */
+ char seen[256];
+ Zero(seen, 256, char);
+
+ /* Examine each character in the construct */
+ bool first_time = true;
+ for (; s < send; s++, first_time = false) {
+ unsigned char prev_un_char = un_char;
+ un_char = (unsigned char) s[0];
+ switch (s[0]) {
+ case '@':
+ case '&':
+ case '$':
+
+ /* Each additional occurrence of one of these three strongly
+ * indicates it is a subscript */
+ weight -= seen[un_char] * 10;
+
+ /* Following one of these characters, we look to see if there is an
+ * identifier already found in the program by that name. If so,
+ * strongly suspect this isn't a character class */
+ if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
+ int len;
+ char tmpbuf[sizeof PL_tokenbuf * 4];
+ scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
+ len = (int)strlen(tmpbuf);
+ if ( len > 1
+ && gv_fetchpvn_flags(tmpbuf,
+ len,
+ UTF ? SVf_UTF8 : 0,
+ SVt_PV))
+ weight -= 100;
+ else /* Not a multi-char identifier already known in the
+ program; is somewhat likely to be a subscript */
+ weight -= 10;
+ }
+ else if ( s[0] == '$'
+ && s[1]
+ && memCHRs("[#!%*<>()-=", s[1]))
+ {
+ /* Here we have what could be a punctuation variable. If the
+ * next character after it is a closing bracket, it makes it
+ * quite likely to be that, and hence a subscript. If it is
+ * something else, more mildly a subscript */
+ if (/*{*/ memCHRs("])} =", s[2]))
+ weight -= 10;
+ else
+ weight -= 1;
+ }
+ break;
+
+ case '\\':
+ if (s[1]) {
+ if (memCHRs("wds]", s[1]))
+ weight += 100; /* \w \d \s => strongly charclass */
+ /* khw: Why not \W \D \S \h \v, etc as well? */
+ else if (seen[(U8)'\''] || seen[(U8)'"'])
+ weight += 1; /* \' => mildly charclass */
+ else if (memCHRs("abcfnrtvx", s[1]))
+ weight += 40; /* \n, etc => charclass */
+ /* khw: Why not \e etc as well? */
+ else if (isDIGIT(s[1])) {
+ weight += 40; /* \123 => charclass */
+ while (s[1] && isDIGIT(s[1]))
+ s++;
+ }
+ }
+ else /* \ followed by NUL strongly indicates character class */
+ weight += 100;
+ break;
+
+ case '-':
+ /* If it is something like '-\', it is more likely to be a
+ * character class.
+ *
+ * khw: The rest of the conditionals in this 'case' really should
+ * be subject to an 'else' of this condition */
+ if (s[1] == '\\')
+ weight += 50;
+
+ /* If it is something like 'a-' or '0-', it is more likely to
+ * be a character class. '!' is the first ASCII graphic, so '!-'
+ * would be the start of a range of graphics. */
+ if (! first_time && memCHRs("aA01! ", prev_un_char))
+ weight += 30;
+
+ /* If it is something like '-Z' or '-7' (for octal) or '-9' it
+ * is more likely to be a character class. '~' is the final ASCII
+ * graphic, so '-~' would be the end of a range of graphics.
+ *
+ * khw: Having [-z] really doesn't imply what the comments above
+ * indicate, so this should only be tested when '! first_time' */
+ if (memCHRs("zZ79~", s[1]))
+ weight += 30;
+
+ /* If it is something like -1 or -$foo, it is more likely to be a
+ * subscript. */
+ if (first_time && (isDIGIT(s[1]) || s[1] == '$')) {
+ weight -= 5; /* cope with negative subscript */
+ }
+ break;
+
+ default:
+ if ( (first_time || ( ! isWORDCHAR(prev_un_char)
+ && prev_un_char != '$'
+ && prev_un_char != '@'
+ && prev_un_char != '&'))
+ && isALPHA(s[0])
+ && isALPHA(s[1]))
+ {
+ /* Here it's \W (that isn't [$@&] ) followed immediately by two
+ * alphas in a row. Accumulate all the consecutive alphas */
+ char *d = s;
+ while (isALPHA(s[0]))
+ s++;
+
+ /* If those alphas spell a keyword, it's almost certainly not a
+ * character class */
+ if (keyword(d, s - d, 0))
+ weight -= 150;
+
+ /* khw: Should those alphas be marked as seen? */
+ }
+
+ /* Consecutive chars like [...12...] and [...ab...] are presumed
+ * more likely to be character classes */
+ if ( ! first_time
+ && ( NATIVE_TO_LATIN1(un_char)
+ == NATIVE_TO_LATIN1(prev_un_char) + 1))
+ {
+ weight += 5;
+ }
+
+ /* But repeating a character inside a character class does nothing,
+ * like [aba], so less likely that someone makes such a class, more
+ * likely that it is a subscript; the more repeats, the less
+ * likely. */
+ weight -= seen[un_char];
+ break;
+ } /* End of switch */
+
+ /* khw: 'seen' is declared as a char. This ++ can cause it to wrap.
+ * This gives different results with compilers for which a plain 'char'
+ * is actually unsigned, versus those where it is signed. I believe it
+ * is undefined behavior to wrap a 'signed'. I think it should be
+ * instead declared an unsigned int to make the chances of wrapping
+ * essentially zero.
+ *
+ * And I believe that extra backslashes are different from other
+ * repeated characters. */
+ seen[un_char]++;
+ } /* End of loop through each character of the construct */
+
+ if (weight >= 0) /* probably a character class */
+ return FALSE;
+
return TRUE;
}
* Does all the checking to disambiguate
* foo bar
* between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
- * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
+ * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
*
* First argument is the stuff after the first token, e.g. "bar".
*
char tmpbuf[sizeof PL_tokenbuf];
STRLEN len;
GV* indirgv;
- /* Mustn't actually add anything to a symbol table.
- But also don't want to "initialise" any placeholder
- constants that might already be there into full
- blown PVGVs with attached PVCV. */
+ /* Mustn't actually add anything to a symbol table.
+ But also don't want to "initialise" any placeholder
+ constants that might already be there into full
+ blown PVGVs with attached PVCV. */
GV * const gv =
- ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
+ ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
PERL_ARGS_ASSERT_INTUIT_METHOD;
return 0;
if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
- return 0;
+ return 0;
if (cv && SvPOK(cv)) {
- const char *proto = CvPROTO(cv);
- if (proto) {
- while (*proto && (isSPACE(*proto) || *proto == ';'))
- proto++;
- if (*proto == '*')
- return 0;
- }
+ const char *proto = CvPROTO(cv);
+ if (proto) {
+ while (*proto && (isSPACE(*proto) || *proto == ';'))
+ proto++;
+ if (*proto == '*')
+ return 0;
+ }
}
if (*start == '$') {
SSize_t start_off = start - SvPVX(PL_linestr);
- if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
+ if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
|| isUPPER(*PL_tokenbuf))
- return 0;
+ return 0;
/* this could be $# */
if (isSPACE(*s))
s = skipspace(s);
- PL_bufptr = SvPVX(PL_linestr) + start_off;
- PL_expect = XREF;
- return *s == '(' ? FUNCMETH : METHOD;
+ PL_bufptr = SvPVX(PL_linestr) + start_off;
+ PL_expect = XREF;
+ return *s == '(' ? METHCALL : METHCALL0;
}
- s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
+ s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
/* start is the beginning of the possible filehandle/object,
* and s is the end of it
* tmpbuf is a copy of it (but with single quotes as double colons)
*/
if (!keyword(tmpbuf, len, 0)) {
- if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
- len -= 2;
- tmpbuf[len] = '\0';
- goto bare_package;
- }
- indirgv = gv_fetchpvn_flags(tmpbuf, len,
- GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
- SVt_PVCV);
- if (indirgv && SvTYPE(indirgv) != SVt_NULL
- && (!isGV(indirgv) || GvCVu(indirgv)))
- return 0;
- /* filehandle or package name makes it a method */
- if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
- s = skipspace(s);
- if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
- return 0; /* no assumptions -- "=>" quotes bareword */
+ if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
+ len -= 2;
+ tmpbuf[len] = '\0';
+ goto bare_package;
+ }
+ indirgv = gv_fetchpvn_flags(tmpbuf, len,
+ GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
+ SVt_PVCV);
+ if (indirgv && SvTYPE(indirgv) != SVt_NULL
+ && (!isGV(indirgv) || GvCVu(indirgv)))
+ return 0;
+ /* filehandle or package name makes it a method */
+ if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
+ s = skipspace(s);
+ if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
+ return 0; /* no assumptions -- "=>" quotes bareword */
bare_package:
NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
- S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
- NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
- PL_expect = XTERM;
- force_next(BAREWORD);
- PL_bufptr = s;
- return *s == '(' ? FUNCMETH : METHOD;
- }
+ S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
+ NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
+ PL_expect = XTERM;
+ force_next(BAREWORD);
+ PL_bufptr = s;
+ return *s == '(' ? METHCALL : METHCALL0;
+ }
}
return 0;
}
Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
{
if (!funcp)
- return NULL;
+ return NULL;
if (!PL_parser)
- return NULL;
+ return NULL;
if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
- Perl_croak(aTHX_ "Source filters apply only to byte streams");
+ Perl_croak(aTHX_ "Source filters apply only to byte streams");
if (!PL_rsfp_filters)
- PL_rsfp_filters = newAV();
+ PL_rsfp_filters = newAV();
if (!datasv)
- datasv = newSV(0);
+ datasv = newSV(0);
SvUPGRADE(datasv, SVt_PVIO);
IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
IoFLAGS(datasv) |= IOf_FAKE_DIRP;
DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
- FPTR2DPTR(void *, IoANY(datasv)),
- SvPV_nolen(datasv)));
+ FPTR2DPTR(void *, IoANY(datasv)),
+ SvPV_nolen(datasv)));
av_unshift(PL_rsfp_filters, 1);
av_store(PL_rsfp_filters, 0, datasv) ;
if (
- !PL_parser->filtered
+ !PL_parser->filtered
&& PL_parser->lex_flags & LEX_EVALBYTES
&& PL_bufptr < PL_bufend
) {
- const char *s = PL_bufptr;
- while (s < PL_bufend) {
- if (*s == '\n') {
- SV *linestr = PL_parser->linestr;
- char *buf = SvPVX(linestr);
- STRLEN const bufptr_pos = PL_parser->bufptr - buf;
- STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
- STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
- STRLEN const linestart_pos = PL_parser->linestart - buf;
- STRLEN const last_uni_pos =
- PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
- STRLEN const last_lop_pos =
- PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
- av_push(PL_rsfp_filters, linestr);
- PL_parser->linestr =
- newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
- buf = SvPVX(PL_parser->linestr);
- PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
- PL_parser->bufptr = buf + bufptr_pos;
- PL_parser->oldbufptr = buf + oldbufptr_pos;
- PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
- PL_parser->linestart = buf + linestart_pos;
- if (PL_parser->last_uni)
- PL_parser->last_uni = buf + last_uni_pos;
- if (PL_parser->last_lop)
- PL_parser->last_lop = buf + last_lop_pos;
- SvLEN_set(linestr, SvCUR(linestr));
- SvCUR_set(linestr, s - SvPVX(linestr));
- PL_parser->filtered = 1;
- break;
- }
- s++;
- }
+ const char *s = PL_bufptr;
+ while (s < PL_bufend) {
+ if (*s == '\n') {
+ SV *linestr = PL_parser->linestr;
+ char *buf = SvPVX(linestr);
+ STRLEN const bufptr_pos = PL_parser->bufptr - buf;
+ STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
+ STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
+ STRLEN const linestart_pos = PL_parser->linestart - buf;
+ STRLEN const last_uni_pos =
+ PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
+ STRLEN const last_lop_pos =
+ PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+ av_push(PL_rsfp_filters, linestr);
+ PL_parser->linestr =
+ newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
+ buf = SvPVX(PL_parser->linestr);
+ PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
+ PL_parser->bufptr = buf + bufptr_pos;
+ PL_parser->oldbufptr = buf + oldbufptr_pos;
+ PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+ PL_parser->linestart = buf + linestart_pos;
+ if (PL_parser->last_uni)
+ PL_parser->last_uni = buf + last_uni_pos;
+ if (PL_parser->last_lop)
+ PL_parser->last_lop = buf + last_lop_pos;
+ SvLEN_set(linestr, SvCUR(linestr));
+ SvCUR_set(linestr, s - SvPVX(linestr));
+ PL_parser->filtered = 1;
+ break;
+ }
+ s++;
+ }
}
return(datasv);
}
+/*
+=for apidoc_section $filters
+=for apidoc filter_del
+
+Delete most recently added instance of the filter function argument
+
+=cut
+*/
-/* Delete most recently added instance of this filter function. */
void
Perl_filter_del(pTHX_ filter_t funcp)
{
#ifdef DEBUGGING
DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
- FPTR2DPTR(void*, funcp)));
+ FPTR2DPTR(void*, funcp)));
#endif
if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
- return;
+ return;
/* if filter is on top of stack (usual case) just pop it off */
datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
- sv_free(av_pop(PL_rsfp_filters));
+ SvREFCNT_dec(av_pop(PL_rsfp_filters));
return;
}
PERL_ARGS_ASSERT_FILTER_READ;
if (!PL_parser || !PL_rsfp_filters)
- return -1;
+ return -1;
if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
- /* Provide a default input filter to make life easy. */
- /* Note that we append to the line. This is handy. */
- DEBUG_P(PerlIO_printf(Perl_debug_log,
- "filter_read %d: from rsfp\n", idx));
- if (correct_length) {
- /* Want a block */
- int len ;
- const int old_len = SvCUR(buf_sv);
-
- /* ensure buf_sv is large enough */
- SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
- if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
- correct_length)) <= 0) {
- if (PerlIO_error(PL_rsfp))
- return -1; /* error */
- else
- return 0 ; /* end of file */
- }
- SvCUR_set(buf_sv, old_len + len) ;
- SvPVX(buf_sv)[old_len + len] = '\0';
- } else {
- /* Want a line */
+ /* Provide a default input filter to make life easy. */
+ /* Note that we append to the line. This is handy. */
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "filter_read %d: from rsfp\n", idx));
+ if (correct_length) {
+ /* Want a block */
+ int len ;
+ const int old_len = SvCUR(buf_sv);
+
+ /* ensure buf_sv is large enough */
+ SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
+ if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
+ correct_length)) <= 0) {
+ if (PerlIO_error(PL_rsfp))
+ return -1; /* error */
+ else
+ return 0 ; /* end of file */
+ }
+ SvCUR_set(buf_sv, old_len + len) ;
+ SvPVX(buf_sv)[old_len + len] = '\0';
+ } else {
+ /* Want a line */
if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
- if (PerlIO_error(PL_rsfp))
- return -1; /* error */
- else
- return 0 ; /* end of file */
- }
- }
- return SvCUR(buf_sv);
+ if (PerlIO_error(PL_rsfp))
+ return -1; /* error */
+ else
+ return 0 ; /* end of file */
+ }
+ }
+ return SvCUR(buf_sv);
}
/* Skip this filter slot if filter has been deleted */
if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
- DEBUG_P(PerlIO_printf(Perl_debug_log,
- "filter_read %d: skipped (filter deleted)\n",
- idx));
- return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "filter_read %d: skipped (filter deleted)\n",
+ idx));
+ return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
}
if (SvTYPE(datasv) != SVt_PVIO) {
- if (correct_length) {
- /* Want a block */
- const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
- if (!remainder) return 0; /* eof */
- if (correct_length > remainder) correct_length = remainder;
- sv_catpvn(buf_sv, SvEND(datasv), correct_length);
- SvCUR_set(datasv, SvCUR(datasv) + correct_length);
- } else {
- /* Want a line */
- const char *s = SvEND(datasv);
- const char *send = SvPVX(datasv) + SvLEN(datasv);
- while (s < send) {
- if (*s == '\n') {
- s++;
- break;
- }
- s++;
- }
- if (s == send) return 0; /* eof */
- sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
- SvCUR_set(datasv, s-SvPVX(datasv));
- }
- return SvCUR(buf_sv);
+ if (correct_length) {
+ /* Want a block */
+ const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
+ if (!remainder) return 0; /* eof */
+ if (correct_length > remainder) correct_length = remainder;
+ sv_catpvn(buf_sv, SvEND(datasv), correct_length);
+ SvCUR_set(datasv, SvCUR(datasv) + correct_length);
+ } else {
+ /* Want a line */
+ const char *s = SvEND(datasv);
+ const char *send = SvPVX(datasv) + SvLEN(datasv);
+ while (s < send) {
+ if (*s == '\n') {
+ s++;
+ break;
+ }
+ s++;
+ }
+ if (s == send) return 0; /* eof */
+ sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
+ SvCUR_set(datasv, s-SvPVX(datasv));
+ }
+ return SvCUR(buf_sv);
}
/* Get function pointer hidden within datasv */
funcp = DPTR2FPTR(filter_t, IoANY(datasv));
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "filter_read %d: via function %p (%s)\n",
- idx, (void*)datasv, SvPV_nolen_const(datasv)));
+ "filter_read %d: via function %p (%s)\n",
+ idx, (void*)datasv, SvPV_nolen_const(datasv)));
/* Call function. The function is expected to */
/* call "FILTER_READ(idx+1, buf_sv)" first. */
/* Return: <0:error, =0:eof, >0:not eof */
ENTER;
save_scalar(PL_errgv);
+
+ /* although this calls out to a random C function, there's a good
+ * chance that that function will call back into perl (e.g. using
+ * Filter::Util::Call). So downgrade the stack to
+ * non-reference-counted for backwards compatibility - i.e. do the
+ * equivalent of xs_wrap(), but this time we know there are no
+ * args to be passed or returned on the stack, simplifying it.
+ */
+#ifdef PERL_RC_STACK
+ assert(AvREAL(PL_curstack));
+ I32 oldbase = PL_curstackinfo->si_stack_nonrc_base;
+ I32 oldsp = PL_stack_sp - PL_stack_base;
+ if (!oldbase)
+ PL_curstackinfo->si_stack_nonrc_base = oldsp + 1;
+#endif
+
ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
+
+#ifdef PERL_RC_STACK
+ assert(oldsp == PL_stack_sp - PL_stack_base);
+ assert(AvREAL(PL_curstack));
+ assert(PL_curstackinfo->si_stack_nonrc_base ==
+ oldbase ? oldbase : oldsp + 1);
+ PL_curstackinfo->si_stack_nonrc_base = oldbase;
+#endif
+
LEAVE;
return ret;
}
#ifdef PERL_CR_FILTER
if (!PL_rsfp_filters) {
- filter_add(S_cr_textfilter,NULL);
+ filter_add(S_cr_textfilter,NULL);
}
#endif
if (PL_rsfp_filters) {
- if (!append)
+ if (!append)
SvCUR_set(sv, 0); /* start with empty line */
if (FILTER_READ(0, sv, 0) > 0)
return ( SvPVX(sv) ) ;
else
- return NULL ;
+ return NULL ;
}
else
return (sv_gets(sv, PL_rsfp, append));
/* use constant CLASS => 'MyClass' */
gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
if (gv && GvCV(gv)) {
- SV * const sv = cv_const_sv(GvCV(gv));
- if (sv)
- return gv_stashsv(sv, 0);
+ SV * const sv = cv_const_sv(GvCV(gv));
+ if (sv)
+ return gv_stashsv(sv, 0);
}
return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
PERL_ARGS_ASSERT_TOKENIZE_USE;
if (PL_expect != XSTATE)
- /* diag_listed_as: "use" not allowed in expression */
- yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
- is_use ? "use" : "no"));
+ /* diag_listed_as: "use" not allowed in expression */
+ yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
+ is_use ? "use" : "no"));
PL_expect = XTERM;
s = skipspace(s);
if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
- s = force_version(s, TRUE);
- if (*s == ';' || *s == '}'
- || (s = skipspace(s), (*s == ';' || *s == '}'))) {
- NEXTVAL_NEXTTOKE.opval = NULL;
- force_next(BAREWORD);
- }
- else if (*s == 'v') {
- s = force_word(s,BAREWORD,FALSE,TRUE);
- s = force_version(s, FALSE);
- }
+ s = force_version(s, TRUE);
+ if (*s == ';' || *s == '}'
+ || (s = skipspace(s), (*s == ';' || *s == '}'))) {
+ NEXTVAL_NEXTTOKE.opval = NULL;
+ force_next(BAREWORD);
+ }
+ else if (*s == 'v') {
+ s = force_word(s,BAREWORD,FALSE,TRUE);
+ s = force_version(s, FALSE);
+ }
}
else {
- s = force_word(s,BAREWORD,FALSE,TRUE);
- s = force_version(s, FALSE);
+ s = force_word(s,BAREWORD,FALSE,TRUE);
+ s = force_version(s, FALSE);
}
pl_yylval.ival = is_use;
return s;
}
#ifdef DEBUGGING
static const char* const exp_name[] =
- { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
- "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
- "SIGVAR", "TERMORDORDOR"
- };
+ { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
+ "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
+ "SIGVAR", "TERMORDORDOR"
+ };
#endif
#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
PL_bufend,
UTF))
{
- return;
+ return;
}
while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
|| (*s && memCHRs(" \t$#+-'\"", *s)))
s += UTF ? UTF8SKIP(s) : 1;
}
if (*s == '}' || *s == ']')
- pl_yylval.ival = OPpSLICEWARNING;
+ pl_yylval.ival = OPpSLICEWARNING;
}
#define lex_token_boundary() S_lex_token_boundary(aTHX)
PL_bufptr = s;
yyerror("Version control conflict marker");
while (s < PL_bufend && *s != '\n')
- s++;
+ s++;
return s;
}
PL_oldbufptr = s;
++s;
- NEXTVAL_NEXTTOKE.ival = 0;
+ NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
+ force_next(ASSIGNOP);
+ PL_expect = XTERM;
+ }
+ else if(*s == '/' && s[1] == '/' && s[2] == '=') {
+ PL_oldbufptr = s;
+
+ s += 3;
+ NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
+ force_next(ASSIGNOP);
+ PL_expect = XTERM;
+ }
+ else if(*s == '|' && s[1] == '|' && s[2] == '=') {
+ PL_oldbufptr = s;
+
+ s += 3;
+ NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
force_next(ASSIGNOP);
PL_expect = XTERM;
}
switch (sigil) {
case ',': TOKEN (PERLY_COMMA);
+ case '$': TOKEN (PERLY_DOLLAR);
case '@': TOKEN (PERLY_SNAIL);
case '%': TOKEN (PERLY_PERCENT_SIGN);
+ case ')': TOKEN (PERLY_PAREN_CLOSE);
default: TOKEN (sigil);
}
}
s++;
POSTDEREF(DOLSHARP);
}
- POSTDEREF('$');
+ POSTDEREF(PERLY_DOLLAR);
}
if ( s[1] == '#'
if (!PL_tokenbuf[1]) {
if (s == PL_bufend)
yyerror("Final $ should be \\$ or $name");
- PREREF('$');
+ PREREF(PERLY_DOLLAR);
}
{
} while (isSPACE(*t));
if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
STRLEN len;
- t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
- &len);
+ t = scan_word6(t, tmpbuf, sizeof tmpbuf, TRUE,
+ &len, TRUE);
while (isSPACE(*t))
t++;
if ( *t == ';'
char tmpbuf[sizeof PL_tokenbuf];
int t2;
STRLEN len;
- scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
+ scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
if ((t2 = keyword(tmpbuf, len, 0))) {
/* binary operators exclude handle interpretations */
switch (t2) {
else if (*s == '.' && isDIGIT(s[1]))
PL_expect = XTERM; /* e.g. print $fh .3 */
else if ((*s == '?' || *s == '-' || *s == '+')
- && !isSPACE(s[1]) && s[1] != '=')
+ && !isSPACE(s[1]) && s[1] != '=')
PL_expect = XTERM; /* e.g. print $fh -1 */
else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
&& s[1] != '/')
}
}
force_ident_maybe_lex('$');
- TOKEN('$');
+ TOKEN(PERLY_DOLLAR);
}
static int
bool have_name, have_proto;
STRLEN len;
SV *format_name = NULL;
- bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
+ bool is_method = (key == KEY_method);
+
+ /* method always implies signatures */
+ bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED;
SSize_t off = s-SvPVX(PL_linestr);
char *d;
{
PL_expect = XATTRBLOCK;
- d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
- &len);
+ d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
+ &len, TRUE);
if (key == KEY_format)
format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
*PL_tokenbuf = '&';
NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
force_next(BAREWORD);
}
- PREBLOCK(FORMAT);
+ PREBLOCK(KW_FORMAT);
}
/* Look for a prototype */
if ( !(*s == ':' && s[1] != ':')
&& (*s != '{' && *s != '(') && key != KEY_format)
{
- assert(key == KEY_sub || key == KEY_AUTOLOAD ||
- key == KEY_DESTROY || key == KEY_BEGIN ||
- key == KEY_UNITCHECK || key == KEY_CHECK ||
+ assert(key == KEY_sub || key == KEY_method ||
+ key == KEY_AUTOLOAD || key == KEY_DESTROY ||
+ key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK ||
key == KEY_INIT || key == KEY_END ||
key == KEY_my || key == KEY_state ||
key == KEY_our);
PL_lex_stuff = NULL;
force_next(THING);
}
+
if (!have_name) {
if (PL_curstash)
sv_setpvs(PL_subname, "__ANON__");
else
sv_setpvs(PL_subname, "__ANON__::__ANON__");
- if (is_sigsub)
- TOKEN(ANON_SIGSUB);
+ if (is_method)
+ TOKEN(KW_METHOD_anon);
+ else if (is_sigsub)
+ TOKEN(KW_SUB_anon_sig);
else
- TOKEN(ANONSUB);
+ TOKEN(KW_SUB_anon);
}
force_ident_maybe_lex('&');
- if (is_sigsub)
- TOKEN(SIGSUB);
+ if (is_method)
+ TOKEN(KW_METHOD_named);
+ else if (is_sigsub)
+ TOKEN(KW_SUB_named_sig);
else
- TOKEN(SUB);
+ TOKEN(KW_SUB_named);
}
static int
PL_lex_state = LEX_INTERPCONCAT;
}
PL_lex_allbrackets--;
- return REPORT(')');
+ return REPORT(PERLY_PAREN_CLOSE);
}
else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
/* Got an unpaired \E */
{
PL_lex_casestack[--PL_lex_casemods] = '\0';
PL_lex_allbrackets--;
- return REPORT(')');
+ return REPORT(PERLY_PAREN_CLOSE);
}
if (PL_lex_casemods > 10)
Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
PL_lex_casestack[PL_lex_casemods] = '\0';
PL_lex_state = LEX_INTERPCONCAT;
NEXTVAL_NEXTTOKE.ival = 0;
- force_next((2<<24)|'(');
+ force_next((2<<24)|PERLY_PAREN_OPEN);
if (*s == 'l')
NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
else if (*s == 'u')
{
if (GvIMPORTED_CV(gv))
ogv = gv;
- else if (! CvMETHOD(cv))
+ else if (! CvNOWARN_AMBIGUOUS(cv))
hgv = gv;
}
if (!ogv
TOKEN(ARROW);
}
if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
- s = force_word(s,METHOD,FALSE,TRUE);
+ s = force_word(s,METHCALL0,FALSE,TRUE);
TOKEN(ARROW);
}
else if (*s == '$')
yyl_star(pTHX_ char *s)
{
if (PL_expect == XPOSTDEREF)
- POSTDEREF('*');
+ POSTDEREF(PERLY_STAR);
if (PL_expect != XOPERATOR) {
s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
PL_expect = XOPERATOR;
- force_ident(PL_tokenbuf, '*');
+ force_ident(PL_tokenbuf, PERLY_STAR);
if (!*PL_tokenbuf)
- PREREF('*');
- TERM('*');
+ PREREF(PERLY_STAR);
+ TERM(PERLY_STAR);
}
s++;
s = skipspace(s);
attrs = NULL;
while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
- bool sig = PL_parser->sig_seen;
I32 tmp;
SV *sv;
STRLEN len;
- char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
+ char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
if (tmp < 0) tmp = -tmp;
switch (tmp) {
if (*d == '(') {
d = scan_str(d,TRUE,TRUE,FALSE,NULL);
if (!d) {
- if (attrs)
- op_free(attrs);
- sv_free(sv);
+ op_free(attrs);
+ ASSUME(sv && SvREFCNT(sv) == 1);
+ SvREFCNT_dec(sv);
Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
}
COPLINE_SET_FROM_MULTI_END;
PL_lex_stuff = NULL;
}
else {
- /* NOTE: any CV attrs applied here need to be part of
- the CVf_BUILTIN_ATTRS define in cv.h! */
- if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
- sv_free(sv);
- if (!sig)
- CvLVALUE_on(PL_compcv);
- }
- else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
- sv_free(sv);
- if (!sig)
- CvMETHOD_on(PL_compcv);
- }
- else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
- sv_free(sv);
- if (!sig) {
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
- ":const is experimental"
- );
- CvANONCONST_on(PL_compcv);
- if (!CvANON(PL_compcv))
- yyerror(":const is not permitted on named "
- "subroutines");
- }
- }
- /* After we've set the flags, it could be argued that
- we don't need to do the attributes.pm-based setting
- process, and shouldn't bother appending recognized
- flags. To experiment with that, uncomment the
- following "else". (Note that's already been
- uncommented. That keeps the above-applied built-in
- attributes from being intercepted (and possibly
- rejected) by a package's attribute routines, but is
- justified by the performance win for the common case
- of applying only built-in attributes.) */
- else
- attrs = op_append_elem(OP_LIST, attrs,
- newSVOP(OP_CONST, 0,
- sv));
+ attrs = op_append_elem(OP_LIST, attrs,
+ newSVOP(OP_CONST, 0, sv));
}
s = skipspace(d);
if (*s == ':' && s[1] != ':')
if (*s != ';'
&& *s != '}'
&& !(PL_expect == XOPERATOR
- ? (*s == '=' || *s == ')')
- : (*s == '{' || *s == '(')))
+ /* if an operator is expected, permit =, //= and ||= or ) to end */
+ ? (*s == '=' || *s == ')' || *s == '/' || *s == '|')
+ : (*s == '{' || *s == '(')))
{
const char q = ((*s == '\'') ? '"' : '\'');
/* If here for an expression, and parsed no attrs, back off. */
? Perl_form(aTHX_ "Invalid separator character "
"%c%c%c in attribute list", q, *s, q)
: "Unterminated attribute list" ) );
- if (attrs)
- op_free(attrs);
+ op_free(attrs);
OPERATOR(PERLY_COLON);
}
if (PL_parser->sig_seen) {
/* see comment about about sig_seen and parser error
* handling */
- if (attrs)
- op_free(attrs);
+ op_free(attrs);
Perl_croak(aTHX_ "Subroutine attributes must come "
"before the signature");
}
}
if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
STRLEN len;
- d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
- FALSE, &len);
+ d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
+ FALSE, &len, FALSE);
while (d < PL_bufend && SPACE_OR_TAB(*d))
d++;
if (*d == '}') {
/* This hack is to get the ${} in the message. */
PL_bufptr = s+1;
yyerror("syntax error");
+ yyquit();
break;
}
OPERATOR(HASHBRACK);
TOKEN(0);
s += 2;
Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
- "Smartmatch is experimental");
+ packWARN(WARN_DEPRECATED__SMARTMATCH),
+ "Smartmatch is deprecated");
NCEop(OP_SMARTMATCH);
}
s++;
PL_expect = XTERM;
s = skipspace(s);
PL_lex_allbrackets++;
- TOKEN('(');
+ TOKEN(PERLY_PAREN_OPEN);
}
static int
PL_lex_allbrackets--;
s = skipspace(s);
if (*s == '{')
- PREBLOCK(')');
- TERM(')');
+ PREBLOCK(PERLY_PAREN_CLOSE);
+ TERM(PERLY_PAREN_CLOSE);
}
static int
loc = PerlIO_tell(PL_rsfp);
(void)PerlIO_seek(PL_rsfp, 0L, 0);
}
- if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
+ if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
if (loc > 0)
PerlIO_seek(PL_rsfp, loc, 0);
}
PL_last_uni = PL_oldbufptr;
PL_last_lop_op = OP_REQUIRE;
s = skipspace(s);
- return REPORT( (int)REQUIRE );
+ return REPORT( (int)KW_REQUIRE );
}
static int
if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
char *p = s;
SSize_t s_off = s - SvPVX(PL_linestr);
- STRLEN len;
-
- if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
- p += 2;
+ bool paren_is_valid = FALSE;
+ bool maybe_package = FALSE;
+ bool saw_core = FALSE;
+ bool core_valid = FALSE;
+
+ if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
+ saw_core = TRUE;
+ p += 6;
+ }
+ if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
+ core_valid = TRUE;
+ paren_is_valid = TRUE;
+ if (isSPACE(p[2])) {
+ p = skipspace(p + 3);
+ maybe_package = TRUE;
+ }
+ else {
+ p += 2;
+ }
}
- else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
- p += 3;
+ else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
+ core_valid = TRUE;
+ if (isSPACE(p[3])) {
+ p = skipspace(p + 4);
+ maybe_package = TRUE;
+ }
+ else {
+ p += 3;
+ }
}
-
- p = skipspace(p);
- /* skip optional package name, as in "for my abc $x (..)" */
- if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
- p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
- p = skipspace(p);
+ else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
+ core_valid = TRUE;
+ if (isSPACE(p[5])) {
+ p = skipspace(p + 6);
+ }
+ else {
+ p += 5;
+ }
}
- if (*p != '$' && *p != '\\')
+ if (saw_core && !core_valid) {
Perl_croak(aTHX_ "Missing $ on loop variable");
+ }
+ if (maybe_package && !saw_core) {
+ /* skip optional package name, as in "for my abc $x (..)" */
+ if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
+ STRLEN len;
+ p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
+ p = skipspace(p);
+ paren_is_valid = FALSE;
+ }
+ }
+
+ if (UNLIKELY(paren_is_valid && *p == '(')) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__FOR_LIST),
+ "for my (...) is experimental");
+ }
+ else if (UNLIKELY(*p != '$' && *p != '\\')) {
+ /* "for myfoo (" will end up here, but with p pointing at the 'f' */
+ Perl_croak(aTHX_ "Missing $ on loop variable");
+ }
/* The buffer may have been reallocated, update s */
s = SvPVX(PL_linestr) + s_off;
}
- OPERATOR(FOR);
+ OPERATOR(KW_FOR);
}
static int
{
s = skipspace(s);
if (*s == '{')
- PRETERMBLOCK(DO);
+ PRETERMBLOCK(KW_DO);
if (*s != '\'') {
char *d;
STRLEN len;
*PL_tokenbuf = '&';
- d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
- 1, &len);
+ d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
+ 1, &len, TRUE);
if (len && memNEs(PL_tokenbuf+1, len, "CORE")
&& !keyword(PL_tokenbuf + 1, len, 0)) {
SSize_t off = s-SvPVX(PL_linestr);
pl_yylval.ival = 1;
else
pl_yylval.ival = 0;
- OPERATOR(DO);
+ OPERATOR(KW_DO);
}
static int
s = skipspace(s);
if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
STRLEN len;
- s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
+ s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
if (memEQs(PL_tokenbuf, len, "sub"))
return yyl_sub(aTHX_ s, my);
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
"Declaring references is experimental");
}
- OPERATOR(MY);
+ OPERATOR(KW_MY);
}
static int yyl_try(pTHX_ char*);
if (*s == '\'' || (*s == ':' && s[1] == ':')) {
STRLEN morelen;
- s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
- TRUE, &morelen);
+ s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
+ TRUE, &morelen, TRUE);
if (no_op_error) {
no_op("Bareword",s);
no_op_error = FALSE;
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
PL_expect = XBLOCKTERM;
PL_bufptr = s;
- return REPORT(METHOD);
+ return REPORT(METHCALL0);
}
/* If followed by a bareword, see if it looks like indir obj. */
else SvUTF8_off(c.sv);
}
op_free(c.rv2cv_op);
- if (key == METHOD && !PL_lex_allbrackets
+ if (key == METHCALL0 && !PL_lex_allbrackets
&& PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
{
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
case KEY___LINE__:
FUN0OP(
newSVOP(OP_CONST, 0,
- Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
+ Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop)))
);
case KEY___PACKAGE__:
return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
case KEY___SUB__:
+ /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
+ * OP_CONST. We need to make it big enough to allow room for that if
+ * so */
FUN0OP(CvCLONE(PL_compcv)
? newOP(OP_RUNCV, 0)
- : newPVOP(OP_RUNCV,0,NULL));
+ : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
+
+ case KEY___CLASS__:
+ FUN0(OP_CLASSNAME);
case KEY_AUTOLOAD:
case KEY_DESTROY:
return yyl_sub(aTHX_ PL_bufptr, key);
return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
+ case KEY_ADJUST:
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
+
+ /* The way that KEY_CHECK et.al. are handled currently are nothing
+ * short of crazy. We won't copy that model for new phasers, but use
+ * this as an experiment to test if this will work
+ */
+ PHASERBLOCK(KEY_ADJUST);
+
case KEY_abs:
UNI(OP_ABS);
case KEY_break:
FUN0(OP_BREAK);
+ case KEY_catch:
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
+ PREBLOCK(KW_CATCH);
+
case KEY_chop:
UNI(OP_CHOP);
+ case KEY_class:
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
+
+ s = force_word(s,BAREWORD,FALSE,TRUE);
+ s = skipspace(s);
+ s = force_strict_version(s);
+ PL_expect = XATTRBLOCK;
+ TOKEN(KW_CLASS);
+
case KEY_continue:
/* We have to disambiguate the two senses of
"continue". If the next token is a '{' then
*/
s = skipspace(s);
if (*s == '{')
- PREBLOCK(CONTINUE);
+ PREBLOCK(KW_CONTINUE);
else
FUN0(OP_CONTINUE);
UNI(OP_CHROOT);
case KEY_default:
- PREBLOCK(DEFAULT);
+ PREBLOCK(KW_DEFAULT);
+
+ case KEY_defer:
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
+ PREBLOCK(KW_DEFER);
case KEY_do:
return yyl_do(aTHX_ s, orig_keyword);
LOOPX(OP_DUMP);
case KEY_else:
- PREBLOCK(ELSE);
+ PREBLOCK(KW_ELSE);
case KEY_elsif:
pl_yylval.ival = CopLINE(PL_curcop);
- OPERATOR(ELSIF);
+ OPERATOR(KW_ELSIF);
case KEY_eq:
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
case KEY_endgrent:
FUN0(OP_EGRENT);
+ case KEY_field:
+ /* TODO: maybe this should use the same parser/grammar structures as
+ * `my`, but it's also rather messy because of the `our` conflation
+ */
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
+
+ croak_kw_unless_class("field");
+
+ PL_parser->in_my = KEY_field;
+ OPERATOR(KW_FIELD);
+
+ case KEY_finally:
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
+ PREBLOCK(KW_FINALLY);
+
case KEY_for:
case KEY_foreach:
return yyl_foreach(aTHX_ s);
case KEY_given:
pl_yylval.ival = CopLINE(PL_curcop);
- Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
- "given is experimental");
- OPERATOR(GIVEN);
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
+ "given is deprecated");
+ OPERATOR(KW_GIVEN);
case KEY_glob:
LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
- OPERATOR(IF);
+ OPERATOR(KW_IF);
case KEY_index:
LOP(OP_INDEX,XTERM);
LOP(OP_IOCTL,XTERM);
case KEY_isa:
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
NCRop(OP_ISA);
case KEY_join:
UNI(OP_LCFIRST);
case KEY_local:
- OPERATOR(LOCAL);
+ OPERATOR(KW_LOCAL);
case KEY_length:
UNI(OP_LENGTH);
case KEY_no:
s = tokenize_use(0, s);
- TOKEN(USE);
+ TOKEN(KW_USE_or_NO);
case KEY_not:
if (*s == '(' || (s = skipspace(s), *s == '('))
s = skipspace(s);
if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
const char *t;
- char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
+ char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
for (t=d; isSPACE(*t);)
t++;
if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
s = force_word(s,BAREWORD,FALSE,TRUE);
s = skipspace(s);
s = force_strict_version(s);
- PREBLOCK(PACKAGE);
+ PREBLOCK(KW_PACKAGE);
case KEY_pipe:
LOP(OP_PIPE_OP,XTERM);
case KEY_substr:
LOP(OP_SUBSTR,XTERM);
+ case KEY_method:
+ /* For now we just treat 'method' identical to 'sub' plus a warning */
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
+ return yyl_sub(aTHX_ s, KEY_method);
+
case KEY_format:
case KEY_sub:
return yyl_sub(aTHX_ s, key);
case KEY_truncate:
LOP(OP_TRUNCATE,XTERM);
+ case KEY_try:
+ pl_yylval.ival = CopLINE(PL_curcop);
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
+ PREBLOCK(KW_TRY);
+
case KEY_uc:
UNI(OP_UC);
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
- OPERATOR(UNTIL);
+ OPERATOR(KW_UNTIL);
case KEY_unless:
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
- OPERATOR(UNLESS);
+ OPERATOR(KW_UNLESS);
case KEY_unlink:
LOP(OP_UNLINK,XTERM);
case KEY_use:
s = tokenize_use(1, s);
- TOKEN(USE);
+ TOKEN(KW_USE_or_NO);
case KEY_values:
UNI(OP_VALUES);
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
- "when is experimental");
- OPERATOR(WHEN);
+ packWARN(WARN_DEPRECATED__SMARTMATCH),
+ "when is deprecated");
+ OPERATOR(KW_WHEN);
case KEY_while:
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
- OPERATOR(WHILE);
+ OPERATOR(KW_WHILE);
case KEY_warn:
PL_hints |= HINT_BLOCK_SCOPE;
STRLEN olen = len;
char *d = s;
s += 2;
- s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
+ s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
if ((*s == ':' && s[1] == ':')
|| (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
{
return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
}
+struct Perl_custom_infix_result {
+ struct Perl_custom_infix *def;
+ SV *parsedata;
+};
+
+static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
+{
+ enum Perl_custom_infix_precedence prec = def->prec;
+ if(prec <= INFIX_PREC_LOW)
+ return PLUGIN_LOW_OP;
+ if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
+ return PLUGIN_LOGICAL_OR_LOW_OP;
+ if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
+ return PLUGIN_LOGICAL_AND_LOW_OP;
+ if(prec <= INFIX_PREC_ASSIGN)
+ return PLUGIN_ASSIGN_OP;
+ if(prec <= INFIX_PREC_LOGICAL_OR)
+ return PLUGIN_LOGICAL_OR_OP;
+ if(prec <= INFIX_PREC_LOGICAL_AND)
+ return PLUGIN_LOGICAL_AND_OP;
+ if(prec <= INFIX_PREC_REL)
+ return PLUGIN_REL_OP;
+ if(prec <= INFIX_PREC_ADD)
+ return PLUGIN_ADD_OP;
+ if(prec <= INFIX_PREC_MUL)
+ return PLUGIN_MUL_OP;
+ if(prec <= INFIX_PREC_POW)
+ return PLUGIN_POW_OP;
+ return PLUGIN_HIGH_OP;
+}
+
+OP *
+Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata)
+{
+ PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
+
+ struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata;
+ SAVEFREEPV(result);
+ if(result->parsedata)
+ SAVEFREESV(result->parsedata);
+
+ return (*result->def->build_op)(aTHX_
+ &result->parsedata, lhs, rhs, result->def);
+}
+
static int
yyl_keylookup(pTHX_ char *s, GV *gv)
{
c.gv = gv;
PL_bufptr = s;
- s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
+ s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
/* Some keywords can be followed by any delimiter, including ':' */
anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
}
}
+ /* Check for plugged-in named operator */
+ if(PLUGINFIX_IS_ENABLED) {
+ struct Perl_custom_infix *def;
+ STRLEN result;
+ result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
+ if(result) {
+ if(result != len)
+ Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
+ result, PL_tokenbuf);
+ PL_bufptr = s = d;
+ struct Perl_custom_infix_result *result;
+ Newx(result, 1, struct Perl_custom_infix_result);
+ result->def = def;
+ result->parsedata = NULL;
+ if(def->parse) {
+ (*def->parse)(aTHX_ &result->parsedata, def);
+ s = PL_bufptr; /* restore local s variable */
+ }
+ pl_yylval.pval = result;
+ CLINE;
+ OPERATOR(tokentype_for_plugop(def));
+ }
+ }
+
/* Is this a label? */
if (!anydelim && PL_expect == XSTATE
&& d < PL_bufend && *d == ':' && *(d + 1) != ':') {
SVt_PVCV);
c.off = 0;
if (!c.gv) {
- sv_free(c.sv);
+ ASSUME(c.sv && SvREFCNT(c.sv) == 1);
+ SvREFCNT_dec(c.sv);
c.sv = NULL;
return yyl_just_a_word(aTHX_ s, len, 0, c);
}
int tok;
retry:
+ /* Check for plugged-in symbolic operator */
+ if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
+ struct Perl_custom_infix *def;
+ char *s_end = s, *d = PL_tokenbuf;
+ STRLEN len;
+
+ /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
+ while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
+ *d++ = *s_end++;
+ *d = '\0';
+
+ if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
+ s += len;
+ struct Perl_custom_infix_result *result;
+ Newx(result, 1, struct Perl_custom_infix_result);
+ result->def = def;
+ result->parsedata = NULL;
+ if(def->parse) {
+ PL_bufptr = s;
+ (*def->parse)(aTHX_ &result->parsedata, def);
+ s = PL_bufptr; /* restore local s variable */
+ }
+ pl_yylval.pval = result;
+ CLINE;
+ OPERATOR(tokentype_for_plugop(def));
+ }
+ }
+
switch (*s) {
default:
if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
goto retry;
case 0:
- if ((!PL_rsfp || PL_lex_inwhat)
- && (!PL_parser->filtered || s+1 < PL_bufend)) {
- PL_last_uni = 0;
- PL_last_lop = 0;
- if (PL_lex_brackets
+ if ((!PL_rsfp || PL_lex_inwhat)
+ && (!PL_parser->filtered || s+1 < PL_bufend)) {
+ PL_last_uni = 0;
+ PL_last_lop = 0;
+ if (PL_lex_brackets
&& PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
{
- yyerror((const char *)
- (PL_lex_formbrack
- ? "Format not terminated"
- : "Missing right curly or square bracket"));
- }
+ yyerror((const char *)
+ (PL_lex_formbrack
+ ? "Format not terminated"
+ : "Missing right curly or square bracket"));
+ }
DEBUG_T({
PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
});
- TOKEN(0);
- }
- if (s++ < PL_bufend)
- goto retry; /* ignore stray nulls */
- PL_last_uni = 0;
- PL_last_lop = 0;
- if (!PL_in_eval && !PL_preambled) {
- PL_preambled = TRUE;
- if (PL_perldb) {
- /* Generate a string of Perl code to load the debugger.
- * If PERL5DB is set, it will return the contents of that,
- * otherwise a compile-time require of perl5db.pl. */
-
- const char * const pdb = PerlEnv_getenv("PERL5DB");
-
- if (pdb) {
- sv_setpv(PL_linestr, pdb);
- sv_catpvs(PL_linestr,";");
- } else {
- SETERRNO(0,SS_NORMAL);
- sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
- }
- PL_parser->preambling = CopLINE(PL_curcop);
- } else
+ TOKEN(0);
+ }
+ if (s++ < PL_bufend)
+ goto retry; /* ignore stray nulls */
+ PL_last_uni = 0;
+ PL_last_lop = 0;
+ if (!PL_in_eval && !PL_preambled) {
+ PL_preambled = TRUE;
+ if (PL_perldb) {
+ /* Generate a string of Perl code to load the debugger.
+ * If PERL5DB is set, it will return the contents of that,
+ * otherwise a compile-time require of perl5db.pl. */
+
+ const char * const pdb = PerlEnv_getenv("PERL5DB");
+
+ if (pdb) {
+ sv_setpv(PL_linestr, pdb);
+ sv_catpvs(PL_linestr,";");
+ } else {
+ SETERRNO(0,SS_NORMAL);
+ sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
+ }
+ PL_parser->preambling = CopLINE(PL_curcop);
+ } else
SvPVCLEAR(PL_linestr);
- if (PL_preambleav) {
- SV **svp = AvARRAY(PL_preambleav);
- SV **const end = svp + AvFILLp(PL_preambleav);
- while(svp <= end) {
- sv_catsv(PL_linestr, *svp);
- ++svp;
- sv_catpvs(PL_linestr, ";");
- }
- sv_free(MUTABLE_SV(PL_preambleav));
- PL_preambleav = NULL;
- }
- if (PL_minus_E)
- sv_catpvs(PL_linestr,
- "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
- if (PL_minus_n || PL_minus_p) {
- sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
- if (PL_minus_l)
- sv_catpvs(PL_linestr,"chomp;");
- if (PL_minus_a) {
- if (PL_minus_F) {
+ if (PL_preambleav) {
+ SV **svp = AvARRAY(PL_preambleav);
+ SV **const end = svp + AvFILLp(PL_preambleav);
+ while(svp <= end) {
+ sv_catsv(PL_linestr, *svp);
+ ++svp;
+ sv_catpvs(PL_linestr, ";");
+ }
+ SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
+ PL_preambleav = NULL;
+ }
+ if (PL_minus_E)
+ sv_catpvs(PL_linestr,
+ "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
+ if (PL_minus_n || PL_minus_p) {
+ sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
+ if (PL_minus_l)
+ sv_catpvs(PL_linestr,"chomp;");
+ if (PL_minus_a) {
+ if (PL_minus_F) {
if ( ( *PL_splitstr == '/'
|| *PL_splitstr == '\''
|| *PL_splitstr == '"')
&& strchr(PL_splitstr + 1, *PL_splitstr))
{
/* strchr is ok, because -F pattern can't contain
- * embeddded NULs */
- Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
+ * embedded NULs */
+ Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
+ }
+ else {
+ /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
+ bytes can be used as quoting characters. :-) */
+ const char *splits = PL_splitstr;
+ sv_catpvs(PL_linestr, "our @F=split(q\0");
+ do {
+ /* Need to \ \s */
+ if (*splits == '\\')
+ sv_catpvn(PL_linestr, splits, 1);
+ sv_catpvn(PL_linestr, splits, 1);
+ } while (*splits++);
+ /* This loop will embed the trailing NUL of
+ PL_linestr as the last thing it does before
+ terminating. */
+ sv_catpvs(PL_linestr, ");");
}
- else {
- /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
- bytes can be used as quoting characters. :-) */
- const char *splits = PL_splitstr;
- sv_catpvs(PL_linestr, "our @F=split(q\0");
- do {
- /* Need to \ \s */
- if (*splits == '\\')
- sv_catpvn(PL_linestr, splits, 1);
- sv_catpvn(PL_linestr, splits, 1);
- } while (*splits++);
- /* This loop will embed the trailing NUL of
- PL_linestr as the last thing it does before
- terminating. */
- sv_catpvs(PL_linestr, ");");
- }
- }
- else
- sv_catpvs(PL_linestr,"our @F=split(' ');");
- }
- }
- sv_catpvs(PL_linestr, "\n");
- PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
- if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
- update_debugger_info(PL_linestr, NULL, 0);
- goto retry;
- }
+ }
+ else
+ sv_catpvs(PL_linestr,"our @F=split(' ');");
+ }
+ }
+ sv_catpvs(PL_linestr, "\n");
+ PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = NULL;
+ if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
+ update_debugger_info(PL_linestr, NULL, 0);
+ goto retry;
+ }
if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
return tok;
goto retry_bufptr;
case '\r':
#ifdef PERL_STRICT_CR
- Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
- Perl_croak(aTHX_
+ Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
+ Perl_croak(aTHX_
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
case ' ': case '\t': case '\f': case '\v':
- s++;
- goto retry;
+ s++;
+ goto retry;
case '#':
case '\n': {
return yyl_tilde(aTHX_ s);
case ',':
- if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
- TOKEN(0);
- s++;
- OPERATOR(PERLY_COMMA);
+ if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
+ TOKEN(0);
+ s++;
+ OPERATOR(PERLY_COMMA);
case ':':
- if (s[1] == ':')
+ if (s[1] == ':')
return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
return yyl_colon(aTHX_ s + 1);
return yyl_leftparen(aTHX_ s + 1);
case ';':
- if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
- TOKEN(0);
- CLINE;
- s++;
- PL_expect = XSTATE;
- TOKEN(PERLY_SEMICOLON);
-
- case ')':
- return yyl_rightparen(aTHX_ s);
-
- case ']':
+ if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
+ TOKEN(0);
+ CLINE;
+ s++;
+ PL_expect = XSTATE;
+ TOKEN(PERLY_SEMICOLON);
+
+ case ')':
+ return yyl_rightparen(aTHX_ s);
+
+ case ']':
return yyl_rightsquare(aTHX_ s);
case '{':
return yyl_leftcurly(aTHX_ s + 1, 0);
case '}':
- if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
- TOKEN(0);
+ if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
+ TOKEN(0);
return yyl_rightcurly(aTHX_ s, 0);
case '&':
goto retry;
}
- s++;
- {
- const char tmp = *s++;
- if (tmp == '=') {
- if (!PL_lex_allbrackets
+ s++;
+ {
+ const char tmp = *s++;
+ if (tmp == '=') {
+ if (!PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
{
- s -= 2;
- TOKEN(0);
- }
- ChEop(OP_EQ);
- }
- if (tmp == '>') {
- if (!PL_lex_allbrackets
+ s -= 2;
+ TOKEN(0);
+ }
+ ChEop(OP_EQ);
+ }
+ if (tmp == '>') {
+ if (!PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
{
- s -= 2;
- TOKEN(0);
- }
- OPERATOR(PERLY_COMMA);
- }
- if (tmp == '~')
- PMop(OP_MATCH);
- if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
- && memCHRs("+-*/%.^&|<",tmp))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Reversed %c= operator",(int)tmp);
- s--;
- if (PL_expect == XSTATE
+ s -= 2;
+ TOKEN(0);
+ }
+ OPERATOR(PERLY_COMMA);
+ }
+ if (tmp == '~')
+ PMop(OP_MATCH);
+ if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
+ && memCHRs("+-*/%.^&|<",tmp))
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Reversed %c= operator",(int)tmp);
+ s--;
+ if (PL_expect == XSTATE
&& isALPHA(tmp)
&& (s == PL_linestart+1 || s[-2] == '\n') )
{
PL_parser->in_pod = 1;
goto retry;
}
- }
- if (PL_expect == XBLOCK) {
- const char *t = s;
+ }
+ if (PL_expect == XBLOCK) {
+ const char *t = s;
#ifdef PERL_STRICT_CR
- while (SPACE_OR_TAB(*t))
+ while (SPACE_OR_TAB(*t))
#else
- while (SPACE_OR_TAB(*t) || *t == '\r')
+ while (SPACE_OR_TAB(*t) || *t == '\r')
#endif
- t++;
- if (*t == '\n' || *t == '#') {
- ENTER_with_name("lex_format");
- SAVEI8(PL_parser->form_lex_state);
- SAVEI32(PL_lex_formbrack);
- PL_parser->form_lex_state = PL_lex_state;
- PL_lex_formbrack = PL_lex_brackets + 1;
+ t++;
+ if (*t == '\n' || *t == '#') {
+ ENTER_with_name("lex_format");
+ SAVEI8(PL_parser->form_lex_state);
+ SAVEI32(PL_lex_formbrack);
+ PL_parser->form_lex_state = PL_lex_state;
+ PL_lex_formbrack = PL_lex_brackets + 1;
PL_parser->sub_error_count = PL_error_count;
return yyl_leftcurly(aTHX_ s, 1);
- }
- }
- if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
- s--;
- TOKEN(0);
- }
- pl_yylval.ival = 0;
- OPERATOR(ASSIGNOP);
+ }
+ }
+ if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ s--;
+ TOKEN(0);
+ }
+ pl_yylval.ival = 0;
+ OPERATOR(ASSIGNOP);
case '!':
return yyl_bang(aTHX_ s + 1);
return yyl_slash(aTHX_ s);
case '?': /* conditional */
- s++;
- if (!PL_lex_allbrackets
+ s++;
+ if (!PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
{
- s--;
- TOKEN(0);
- }
- PL_lex_allbrackets++;
- OPERATOR(PERLY_QUESTION_MARK);
+ s--;
+ TOKEN(0);
+ }
+ PL_lex_allbrackets++;
+ OPERATOR(PERLY_QUESTION_MARK);
case '.':
- if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
+ if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
#ifdef PERL_STRICT_CR
- && s[1] == '\n'
+ && s[1] == '\n'
#else
- && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
+ && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
#endif
- && (s == PL_linestart || s[-1] == '\n') )
- {
- PL_expect = XSTATE;
+ && (s == PL_linestart || s[-1] == '\n') )
+ {
+ PL_expect = XSTATE;
/* formbrack==2 means dot seen where arguments expected */
return yyl_rightcurly(aTHX_ s, 2);
- }
- if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
- s += 3;
- OPERATOR(YADAYADA);
- }
- if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
- char tmp = *s++;
- if (*s == tmp) {
- if (!PL_lex_allbrackets
+ }
+ if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
+ s += 3;
+ OPERATOR(YADAYADA);
+ }
+ if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
+ char tmp = *s++;
+ if (*s == tmp) {
+ if (!PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
{
- s--;
- TOKEN(0);
- }
- s++;
- if (*s == tmp) {
- s++;
- pl_yylval.ival = OPf_SPECIAL;
- }
- else
- pl_yylval.ival = 0;
- OPERATOR(DOTDOT);
- }
- if (*s == '=' && !PL_lex_allbrackets
+ s--;
+ TOKEN(0);
+ }
+ s++;
+ if (*s == tmp) {
+ s++;
+ pl_yylval.ival = OPf_SPECIAL;
+ }
+ else
+ pl_yylval.ival = 0;
+ OPERATOR(DOTDOT);
+ }
+ if (*s == '=' && !PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
{
- s--;
- TOKEN(0);
- }
- Aop(OP_CONCAT);
- }
- /* FALLTHROUGH */
+ s--;
+ TOKEN(0);
+ }
+ Aop(OP_CONCAT);
+ }
+ /* FALLTHROUGH */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- s = scan_num(s, &pl_yylval);
- DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
- if (PL_expect == XOPERATOR)
- no_op("Number",s);
- TERM(THING);
+ s = scan_num(s, &pl_yylval);
+ DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
+ if (PL_expect == XOPERATOR)
+ no_op("Number",s);
+ TERM(THING);
case '\'':
return yyl_sglquote(aTHX_ s);
return yyl_backslash(aTHX_ s + 1);
case 'v':
- if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
- char *start = s + 2;
- while (isDIGIT(*start) || *start == '_')
- start++;
- if (*start == '.' && isDIGIT(start[1])) {
- s = scan_num(s, &pl_yylval);
- TERM(THING);
- }
- else if ((*start == ':' && start[1] == ':')
+ if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
+ char *start = s + 2;
+ while (isDIGIT(*start) || *start == '_')
+ start++;
+ if (*start == '.' && isDIGIT(start[1])) {
+ s = scan_num(s, &pl_yylval);
+ TERM(THING);
+ }
+ else if ((*start == ':' && start[1] == ':')
|| (PL_expect == XSTATE && *start == ':')) {
if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
return tok;
goto retry_bufptr;
}
- else if (PL_expect == XSTATE) {
- d = start;
- while (d < PL_bufend && isSPACE(*d)) d++;
- if (*d == ':') {
+ else if (PL_expect == XSTATE) {
+ d = start;
+ while (d < PL_bufend && isSPACE(*d)) d++;
+ if (*d == ':') {
if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
return tok;
goto retry_bufptr;
}
- }
- /* avoid v123abc() or $h{v1}, allow C<print v10;> */
- if (!isALPHA(*start) && (PL_expect == XTERM
- || PL_expect == XREF || PL_expect == XSTATE
- || PL_expect == XTERMORDORDOR)) {
- GV *const gv = gv_fetchpvn_flags(s, start - s,
+ }
+ /* avoid v123abc() or $h{v1}, allow C<print v10;> */
+ if (!isALPHA(*start) && (PL_expect == XTERM
+ || PL_expect == XREF || PL_expect == XSTATE
+ || PL_expect == XTERMORDORDOR)) {
+ GV *const gv = gv_fetchpvn_flags(s, start - s,
UTF ? SVf_UTF8 : 0, SVt_PVCV);
- if (!gv) {
- s = scan_num(s, &pl_yylval);
- TERM(THING);
- }
- }
- }
+ if (!gv) {
+ s = scan_num(s, &pl_yylval);
+ TERM(THING);
+ }
+ }
+ }
if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
return tok;
goto retry_bufptr;
case 'x':
- if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
- s++;
- Mop(OP_REPEAT);
- }
+ if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
+ s++;
+ Mop(OP_REPEAT);
+ }
if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
return tok;
goto retry_bufptr;
case 's': case 'S':
case 't': case 'T':
case 'u': case 'U':
- case 'V':
+ case 'V':
case 'w': case 'W':
- case 'X':
+ case 'X':
case 'y': case 'Y':
case 'z': case 'Z':
if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
Structure:
Check if we have already built the token; if so, use it.
Switch based on the current state:
- - if we have a case modifier in a string, deal with that
- - handle other cases of interpolation inside a string
- - scan the next line if we are inside a format
+ - if we have a case modifier in a string, deal with that
+ - handle other cases of interpolation inside a string
+ - scan the next line if we are inside a format
In the normal state, switch on the next character:
- - default:
- if alphabetic, go to key lookup
- unrecognized character - croak
- - 0/4/26: handle end-of-line or EOF
- - cases for whitespace
- - \n and #: handle comments and line numbers
- - various operators, brackets and sigils
- - numbers
- - quotes
- - 'v': vstrings (or go to key lookup)
- - 'x' repetition operator (or go to key lookup)
- - other ASCII alphanumerics (key lookup begins here):
- word before => ?
- keyword plugin
- scan built-in keyword (but do nothing with it yet)
- check for statement label
- check for lexical subs
- return yyl_just_a_word if there is one
- see whether built-in keyword is overridden
- switch on keyword number:
- - default: return yyl_just_a_word:
- not a built-in keyword; handle bareword lookup
- disambiguate between method and sub call
- fall back to bareword
- - cases for built-in keywords
+ - default:
+ if alphabetic, go to key lookup
+ unrecognized character - croak
+ - 0/4/26: handle end-of-line or EOF
+ - cases for whitespace
+ - \n and #: handle comments and line numbers
+ - various operators, brackets and sigils
+ - numbers
+ - quotes
+ - 'v': vstrings (or go to key lookup)
+ - 'x' repetition operator (or go to key lookup)
+ - other ASCII alphanumerics (key lookup begins here):
+ word before => ?
+ keyword plugin
+ scan built-in keyword (but do nothing with it yet)
+ check for statement label
+ check for lexical subs
+ return yyl_just_a_word if there is one
+ see whether built-in keyword is overridden
+ switch on keyword number:
+ - default: return yyl_just_a_word:
+ not a built-in keyword; handle bareword lookup
+ disambiguate between method and sub call
+ fall back to bareword
+ - cases for built-in keywords
*/
-#ifdef NETWARE
-#define RSFP_FILENO (PL_rsfp)
-#else
-#define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
-#endif
-
-
int
Perl_yylex(pTHX)
{
PL_parser->recheck_utf8_validity = FALSE;
}
DEBUG_T( {
- SV* tmp = newSVpvs("");
- PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
- (IV)CopLINE(PL_curcop),
- lex_state_names[PL_lex_state],
- exp_name[PL_expect],
- pv_display(tmp, s, strlen(s), 0, 60));
- SvREFCNT_dec(tmp);
+ SV* tmp = newSVpvs("");
+ PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
+ CopLINE(PL_curcop),
+ lex_state_names[PL_lex_state],
+ exp_name[PL_expect],
+ pv_display(tmp, s, strlen(s), 0, 60));
+ SvREFCNT_dec(tmp);
} );
/* when we've already built the next token, just pull it out of the queue */
if (PL_nexttoke) {
- PL_nexttoke--;
- pl_yylval = PL_nextval[PL_nexttoke];
- {
- I32 next_type;
- next_type = PL_nexttype[PL_nexttoke];
- if (next_type & (7<<24)) {
- if (next_type & (1<<24)) {
- if (PL_lex_brackets > 100)
- Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
- PL_lex_brackstack[PL_lex_brackets++] =
- (char) ((next_type >> 16) & 0xff);
- }
- if (next_type & (2<<24))
- PL_lex_allbrackets++;
- if (next_type & (4<<24))
- PL_lex_allbrackets--;
- next_type &= 0xffff;
- }
- return REPORT(next_type == 'p' ? pending_ident() : next_type);
- }
+ PL_nexttoke--;
+ pl_yylval = PL_nextval[PL_nexttoke];
+ {
+ I32 next_type;
+ next_type = PL_nexttype[PL_nexttoke];
+ if (next_type & (7<<24)) {
+ if (next_type & (1<<24)) {
+ if (PL_lex_brackets > 100)
+ Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
+ PL_lex_brackstack[PL_lex_brackets++] =
+ (char) ((U8) (next_type >> 16));
+ }
+ if (next_type & (2<<24))
+ PL_lex_allbrackets++;
+ if (next_type & (4<<24))
+ PL_lex_allbrackets--;
+ next_type &= 0xffff;
+ }
+ return REPORT(next_type == 'p' ? pending_ident() : next_type);
+ }
}
switch (PL_lex_state) {
case LEX_NORMAL:
case LEX_INTERPNORMAL:
- break;
+ break;
/* interpolated case modifiers like \L \U, including \Q and \E.
when we get here, PL_bufptr is at the \
*/
case LEX_INTERPCASEMOD:
- /* handle \E or end of string */
+ /* handle \E or end of string */
return yyl_interpcasemod(aTHX_ s);
case LEX_INTERPPUSH:
return REPORT(sublex_push());
case LEX_INTERPSTART:
- if (PL_bufptr == PL_bufend)
- return REPORT(sublex_done());
- DEBUG_T({
+ if (PL_bufptr == PL_bufend)
+ return REPORT(sublex_done());
+ DEBUG_T({
if(*PL_bufptr != '(')
PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
});
- PL_expect = XTERM;
+ PL_expect = XTERM;
/* for /@a/, we leave the joining for the regex engine to do
* (unless we're within \Q etc) */
- PL_lex_dojoin = (*PL_bufptr == '@'
+ PL_lex_dojoin = (*PL_bufptr == '@'
&& (!PL_lex_inpat || PL_lex_casemods));
- PL_lex_state = LEX_INTERPNORMAL;
- if (PL_lex_dojoin) {
- NEXTVAL_NEXTTOKE.ival = 0;
- force_next(PERLY_COMMA);
- force_ident("\"", '$');
- NEXTVAL_NEXTTOKE.ival = 0;
- force_next('$');
- NEXTVAL_NEXTTOKE.ival = 0;
- force_next((2<<24)|'(');
- NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
- force_next(FUNC);
- }
- /* Convert (?{...}) and friends to 'do {...}' */
- if (PL_lex_inpat && *PL_bufptr == '(') {
- PL_parser->lex_shared->re_eval_start = PL_bufptr;
- PL_bufptr += 2;
- if (*PL_bufptr != '{')
- PL_bufptr++;
- PL_expect = XTERMBLOCK;
- force_next(DO);
- }
-
- if (PL_lex_starts++) {
- s = PL_bufptr;
- /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
- if (!PL_lex_casemods && PL_lex_inpat)
- TOKEN(PERLY_COMMA);
- else
- AopNOASSIGN(OP_CONCAT);
- }
- return yylex();
+ PL_lex_state = LEX_INTERPNORMAL;
+ if (PL_lex_dojoin) {
+ NEXTVAL_NEXTTOKE.ival = 0;
+ force_next(PERLY_COMMA);
+ force_ident("\"", PERLY_DOLLAR);
+ NEXTVAL_NEXTTOKE.ival = 0;
+ force_next(PERLY_DOLLAR);
+ NEXTVAL_NEXTTOKE.ival = 0;
+ force_next((2<<24)|PERLY_PAREN_OPEN);
+ NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
+ force_next(FUNC);
+ }
+ /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */
+ if (PL_lex_inpat && *PL_bufptr == '(') {
+ PL_parser->lex_shared->re_eval_start = PL_bufptr;
+ PL_bufptr += 2;
+ if (*PL_bufptr != '{')
+ PL_bufptr++;
+ PL_expect = XTERMBLOCK;
+ force_next(KW_DO);
+ }
+
+ if (PL_lex_starts++) {
+ s = PL_bufptr;
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (!PL_lex_casemods && PL_lex_inpat)
+ TOKEN(PERLY_COMMA);
+ else
+ AopNOASSIGN(OP_CONCAT);
+ }
+ return yylex();
case LEX_INTERPENDMAYBE:
- if (intuit_more(PL_bufptr, PL_bufend)) {
- PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
- break;
- }
- /* FALLTHROUGH */
+ if (intuit_more(PL_bufptr, PL_bufend)) {
+ PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
+ break;
+ }
+ /* FALLTHROUGH */
case LEX_INTERPEND:
- if (PL_lex_dojoin) {
- const U8 dojoin_was = PL_lex_dojoin;
- PL_lex_dojoin = FALSE;
- PL_lex_state = LEX_INTERPCONCAT;
- PL_lex_allbrackets--;
- return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
- }
- if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
- && SvEVALED(PL_lex_repl))
- {
- if (PL_bufptr != PL_bufend)
- Perl_croak(aTHX_ "Bad evalled substitution pattern");
- PL_lex_repl = NULL;
- }
- /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
- re_eval_str. If the here-doc body’s length equals the previous
- value of re_eval_start, re_eval_start will now be null. So
- check re_eval_str as well. */
- if (PL_parser->lex_shared->re_eval_start
- || PL_parser->lex_shared->re_eval_str) {
- SV *sv;
- if (*PL_bufptr != ')')
- Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
- PL_bufptr++;
- /* having compiled a (?{..}) expression, return the original
- * text too, as a const */
- if (PL_parser->lex_shared->re_eval_str) {
- sv = PL_parser->lex_shared->re_eval_str;
- PL_parser->lex_shared->re_eval_str = NULL;
- SvCUR_set(sv,
- PL_bufptr - PL_parser->lex_shared->re_eval_start);
- SvPV_shrink_to_cur(sv);
- }
- else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
- PL_bufptr - PL_parser->lex_shared->re_eval_start);
- NEXTVAL_NEXTTOKE.opval =
+ if (PL_lex_dojoin) {
+ const U8 dojoin_was = PL_lex_dojoin;
+ PL_lex_dojoin = FALSE;
+ PL_lex_state = LEX_INTERPCONCAT;
+ PL_lex_allbrackets--;
+ return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
+ }
+ if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
+ && SvEVALED(PL_lex_repl))
+ {
+ if (PL_bufptr != PL_bufend)
+ Perl_croak(aTHX_ "Bad evalled substitution pattern");
+ PL_lex_repl = NULL;
+ }
+ /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
+ re_eval_str. If the here-doc body's length equals the previous
+ value of re_eval_start, re_eval_start will now be null. So
+ check re_eval_str as well. */
+ if (PL_parser->lex_shared->re_eval_start
+ || PL_parser->lex_shared->re_eval_str) {
+ SV *sv;
+ if (*PL_bufptr != ')')
+ Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
+ PL_bufptr++;
+ /* having compiled a (?{..}) expression, return the original
+ * text too, as a const */
+ if (PL_parser->lex_shared->re_eval_str) {
+ sv = PL_parser->lex_shared->re_eval_str;
+ PL_parser->lex_shared->re_eval_str = NULL;
+ SvCUR_set(sv,
+ PL_bufptr - PL_parser->lex_shared->re_eval_start);
+ SvPV_shrink_to_cur(sv);
+ }
+ else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
+ PL_bufptr - PL_parser->lex_shared->re_eval_start);
+ NEXTVAL_NEXTTOKE.opval =
newSVOP(OP_CONST, 0,
- sv);
- force_next(THING);
- PL_parser->lex_shared->re_eval_start = NULL;
- PL_expect = XTERM;
- return REPORT(PERLY_COMMA);
- }
-
- /* FALLTHROUGH */
+ sv);
+ force_next(THING);
+ PL_parser->lex_shared->re_eval_start = NULL;
+ PL_expect = XTERM;
+ return REPORT(PERLY_COMMA);
+ }
+
+ /* FALLTHROUGH */
case LEX_INTERPCONCAT:
#ifdef DEBUGGING
- if (PL_lex_brackets)
- Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
- (long) PL_lex_brackets);
+ if (PL_lex_brackets)
+ Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
+ (long) PL_lex_brackets);
#endif
- if (PL_bufptr == PL_bufend)
- return REPORT(sublex_done());
+ if (PL_bufptr == PL_bufend)
+ return REPORT(sublex_done());
- /* m'foo' still needs to be parsed for possible (?{...}) */
- if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
- SV *sv = newSVsv(PL_linestr);
- sv = tokeq(sv);
+ /* m'foo' still needs to be parsed for possible (?{...}) */
+ if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
+ SV *sv = newSVsv(PL_linestr);
+ sv = tokeq(sv);
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
- s = PL_bufend;
- }
- else {
+ s = PL_bufend;
+ }
+ else {
int save_error_count = PL_error_count;
- s = scan_const(PL_bufptr);
+ s = scan_const(PL_bufptr);
/* Set flag if this was a pattern and there were errors. op.c will
* refuse to compile a pattern with this flag set. Otherwise, we
if (PL_lex_inpat && PL_error_count > save_error_count) {
((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
}
- if (*s == '\\')
- PL_lex_state = LEX_INTERPCASEMOD;
- else
- PL_lex_state = LEX_INTERPSTART;
- }
-
- if (s != PL_bufptr) {
- NEXTVAL_NEXTTOKE = pl_yylval;
- PL_expect = XTERM;
- force_next(THING);
- if (PL_lex_starts++) {
- /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
- if (!PL_lex_casemods && PL_lex_inpat)
- TOKEN(PERLY_COMMA);
- else
- AopNOASSIGN(OP_CONCAT);
- }
- else {
- PL_bufptr = s;
- return yylex();
- }
- }
-
- return yylex();
+ if (*s == '\\')
+ PL_lex_state = LEX_INTERPCASEMOD;
+ else
+ PL_lex_state = LEX_INTERPSTART;
+ }
+
+ if (s != PL_bufptr) {
+ NEXTVAL_NEXTTOKE = pl_yylval;
+ PL_expect = XTERM;
+ force_next(THING);
+ if (PL_lex_starts++) {
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (!PL_lex_casemods && PL_lex_inpat)
+ TOKEN(PERLY_COMMA);
+ else
+ AopNOASSIGN(OP_CONCAT);
+ }
+ else {
+ PL_bufptr = s;
+ return yylex();
+ }
+ }
+
+ return yylex();
case LEX_FORMLINE:
if (PL_parser->sub_error_count != PL_error_count) {
/* There was an error parsing a formline, which tends to
*/
yyquit();
}
- assert(PL_lex_formbrack);
- s = scan_formline(PL_bufptr);
- if (!PL_lex_formbrack)
+ assert(PL_lex_formbrack);
+ s = scan_formline(PL_bufptr);
+ if (!PL_lex_formbrack)
return yyl_rightcurly(aTHX_ s, 1);
- PL_bufptr = s;
- return yylex();
+ PL_bufptr = s;
+ return yylex();
}
/* We really do *not* want PL_linestr ever becoming a COW. */
Structure:
if we're in a my declaration
- croak if they tried to say my($foo::bar)
- build the ops for a my() declaration
+ croak if they tried to say my($foo::bar)
+ build the ops for a my() declaration
if it's an access to a my() variable
- build ops for access to a my() variable
+ build ops for access to a my() variable
if in a dq string, and they've said @foo and we can't find @foo
- warn
+ warn
build ops for a bareword
*/
/* PL_no_myglob is constant */
GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
- PL_in_my == KEY_my ? "my" : "state",
+ PL_in_my == KEY_my ? "my" :
+ PL_in_my == KEY_field ? "field" : "state",
*PL_tokenbuf == '&' ? "subroutine" : "variable",
PL_tokenbuf),
UTF ? SVf_UTF8 : 0);
PL_in_my = 0;
pl_yylval.opval = o;
- return PRIVATEREF;
+ return PRIVATEREF;
}
}
*/
if (!has_colon) {
- if (!PL_in_my)
- tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
+ if (!PL_in_my)
+ tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
/* build ops for a bareword */
- HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
- HEK * const stashname = HvNAME_HEK(stash);
- SV * const sym = newSVhek(stashname);
+ HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
+ HEK * const stashname = HvNAME_HEK(stash);
+ SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
- )
+ )
{
/* Downgraded from fatal to warning 20000522 mjd */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %" UTF8f
- " in string",
- UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
+ "Possible unintended interpolation of %" UTF8f
+ " in string",
+ UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
}
}
/* build ops for a bareword */
pl_yylval.opval = newSVOP(OP_CONST, 0,
- newSVpvn_flags(PL_tokenbuf + 1,
+ newSVpvn_flags(PL_tokenbuf + 1,
tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
UTF ? SVf_UTF8 : 0 ));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
- (PL_in_eval ? GV_ADDMULTI : GV_ADD)
+ (PL_in_eval ? GV_ADDMULTI : GV_ADD)
| ( UTF ? SVf_UTF8 : 0 ),
- ((PL_tokenbuf[0] == '$') ? SVt_PV
- : (PL_tokenbuf[0] == '@') ? SVt_PVAV
- : SVt_PVHV));
+ ((PL_tokenbuf[0] == '$') ? SVt_PV
+ : (PL_tokenbuf[0] == '@') ? SVt_PVAV
+ : SVt_PVHV));
return BAREWORD;
}
PERL_ARGS_ASSERT_CHECKCOMMA;
if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
- if (ckWARN(WARN_SYNTAX)) {
- int level = 1;
- const char *w;
- for (w = s+2; *w && level; w++) {
- if (*w == '(')
- ++level;
- else if (*w == ')')
- --level;
- }
- while (isSPACE(*w))
- ++w;
- /* the list of chars below is for end of statements or
- * block / parens, boolean operators (&&, ||, //) and branch
- * constructs (or, and, if, until, unless, while, err, for).
- * Not a very solid hack... */
- if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "%s (...) interpreted as function",name);
- }
+ if (ckWARN(WARN_SYNTAX)) {
+ int level = 1;
+ const char *w;
+ for (w = s+2; *w && level; w++) {
+ if (*w == '(')
+ ++level;
+ else if (*w == ')')
+ --level;
+ }
+ while (isSPACE(*w))
+ ++w;
+ /* the list of chars below is for end of statements or
+ * block / parens, boolean operators (&&, ||, //) and branch
+ * constructs (or, and, if, until, unless, while, err, for).
+ * Not a very solid hack... */
+ if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "%s (...) interpreted as function",name);
+ }
}
while (s < PL_bufend && isSPACE(*s))
- s++;
+ s++;
if (*s == '(')
- s++;
+ s++;
while (s < PL_bufend && isSPACE(*s))
- s++;
+ s++;
if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
- const char * const w = s;
+ const char * const w = s;
s += UTF ? UTF8SKIP(s) : 1;
- while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
- s += UTF ? UTF8SKIP(s) : 1;
- while (s < PL_bufend && isSPACE(*s))
- s++;
- if (*s == ',') {
- GV* gv;
- if (keyword(w, s - w, 0))
- return;
-
- gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
- if (gv && GvCVu(gv))
- return;
- if (s - w <= 254) {
+ while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
+ s += UTF ? UTF8SKIP(s) : 1;
+ while (s < PL_bufend && isSPACE(*s))
+ s++;
+ if (*s == ',') {
+ GV* gv;
+ if (keyword(w, s - w, 0))
+ return;
+
+ gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
+ if (gv && GvCVu(gv))
+ return;
+ if (s - w <= 254) {
PADOFFSET off;
- char tmpbuf[256];
- Copy(w, tmpbuf+1, s - w, char);
- *tmpbuf = '&';
- off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
- if (off != NOT_IN_PAD) return;
- }
- Perl_croak(aTHX_ "No comma allowed after %s", what);
- }
+ char tmpbuf[256];
+ Copy(w, tmpbuf+1, s - w, char);
+ *tmpbuf = '&';
+ off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
+ if (off != NOT_IN_PAD) return;
+ }
+ Perl_croak(aTHX_ "No comma allowed after %s", what);
+ }
}
}
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
- SV *sv, SV *pv, const char *type, STRLEN typelen,
+ SV *sv, SV *pv, const char *type, STRLEN typelen,
const char ** error_msg)
{
dSP;
sv_2mortal(sv); /* Parent created it permanently */
if ( ! table
- || ! (PL_hints & HINT_LOCALIZE_HH))
+ || ! (PL_hints & HINT_LOCALIZE_HH))
{
why1 = "unknown";
optional_colon = "";
cv = *cvp;
if (!pv && s)
- pv = newSVpvn_flags(s, len, SVs_TEMP);
+ pv = newSVpvn_flags(s, len, SVs_TEMP);
if (type && pv)
- typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
+ typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
else
- typesv = &PL_sv_undef;
+ typesv = &PL_sv_undef;
PUSHSTACKi(PERLSI_OVERLOAD);
ENTER ;
PUSHMARK(SP) ;
EXTEND(sp, 3);
if (pv)
- PUSHs(pv);
+ PUSHs(pv);
PUSHs(sv);
if (pv)
- PUSHs(typesv);
+ PUSHs(typesv);
PUTBACK;
call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
/* Check the eval first */
if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
- STRLEN errlen;
- const char * errstr;
- sv_catpvs(errsv, "Propagated");
- errstr = SvPV_const(errsv, errlen);
- yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
- (void)POPs;
- res = SvREFCNT_inc_simple_NN(sv);
+ STRLEN errlen;
+ const char * errstr;
+ sv_catpvs(errsv, "Propagated");
+ errstr = SvPV_const(errsv, errlen);
+ yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
+ (void)POPs;
+ res = SvREFCNT_inc_simple_NN(sv);
}
else {
- res = POPs;
- SvREFCNT_inc_simple_void_NN(res);
+ res = POPs;
+ SvREFCNT_inc_simple_void_NN(res);
}
PUTBACK ;
else
break;
}
- if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
- && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
- char *this_d;
- char *d2;
- Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
- d2 = this_d;
- SAVEFREEPV(this_d);
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Old package separator used in string");
- if (olds[-1] == '#')
- *d2++ = olds[-2];
- *d2++ = olds[-1];
- while (olds < *s) {
- if (*olds == '\'') {
- *d2++ = '\\';
- *d2++ = *olds++;
- }
- else
- *d2++ = *olds++;
- }
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Did you mean \"%" UTF8f "\" instead?)\n",
- UTF8fARG(is_utf8, d2-this_d, this_d));
+ if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
+ if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
+ char *this_d;
+ char *d2;
+ Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
+ d2 = this_d;
+ SAVEFREEPV(this_d);
+
+ Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
+ "Old package separator used in string");
+ if (olds[-1] == '#')
+ *d2++ = olds[-2];
+ *d2++ = olds[-1];
+ while (olds < *s) {
+ if (*olds == '\'') {
+ *d2++ = '\\';
+ *d2++ = *olds++;
+ }
+ else
+ *d2++ = *olds++;
+ }
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\t(Did you mean \"%" UTF8f "\" instead?)\n",
+ UTF8fARG(is_utf8, d2-this_d, this_d));
+ }
+ else {
+ Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
+ "Old package separator \"'\" deprecated");
+ }
}
return;
}
/* Returns a NUL terminated string, with the length of the string written to
*slp
+
+ scan_word6() may be removed once ' in names is removed.
*/
char *
-Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
{
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
bool is_utf8 = cBOOL(UTF);
- PERL_ARGS_ASSERT_SCAN_WORD;
+ PERL_ARGS_ASSERT_SCAN_WORD6;
- parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
+ parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
*d = '\0';
*slp = d - dest;
return s;
}
-/* Is the byte 'd' a legal single character identifier name? 'u' is true
- * iff Unicode semantics are to be used. The legal ones are any of:
- * a) all ASCII characters except:
- * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
- * 2) '{'
- * The final case currently doesn't get this far in the program, so we
- * don't test for it. If that were to change, it would be ok to allow it.
- * b) When not under Unicode rules, any upper Latin1 character
- * c) Otherwise, when unicode rules are used, all XIDS characters.
- *
- * Because all ASCII characters have the same representation whether
- * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not. */
-#define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8_safe(s, e) \
- : (isGRAPH_L1(*s) \
- && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+char *
+Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+{
+ PERL_ARGS_ASSERT_SCAN_WORD;
+ return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
+}
+/* scan s and extract an identifier ($var) from it if possible
+ * into dest.
+ * XXX: This function has subtle implications on parsing, and
+ * changing how it behaves can cause a variable to change from
+ * being a run time rv2sv call or a compile time binding to a
+ * specific variable name.
+ */
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
{
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
bool is_utf8 = cBOOL(UTF);
- I32 orig_copline = 0, tmp_copline = 0;
+ line_t orig_copline = 0, tmp_copline = 0;
PERL_ARGS_ASSERT_SCAN_IDENT;
if (isSPACE(*s) || !*s)
- s = skipspace(s);
+ s = skipspace(s);
if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
bool is_zero= *s == '0' ? TRUE : FALSE;
char *digit_start= d;
if (d >= e)
Perl_croak(aTHX_ "%s", ident_too_long);
*d++ = *s++;
- }
+ }
if (is_zero && d - digit_start > 1)
Perl_croak(aTHX_ ident_var_zero_multi_digit);
}
if (*d) {
/* Either a digit variable, or parse_ident() found an identifier
(anything valid as a bareword), so job done and return. */
- if (PL_lex_state != LEX_NORMAL)
- PL_lex_state = LEX_INTERPENDMAYBE;
- return s;
+ if (PL_lex_state != LEX_NORMAL)
+ PL_lex_state = LEX_INTERPENDMAYBE;
+ return s;
}
/* Here, it is not a run-of-the-mill identifier name */
/* Dereferencing a value in a scalar variable.
The alternatives are different syntaxes for a scalar variable.
Using ' as a leading package separator isn't allowed. :: is. */
- return s;
+ return s;
}
/* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
if (*s == '{') {
- bracket = s - SvPVX(PL_linestr);
- s++;
- orig_copline = CopLINE(PL_curcop);
+ bracket = s - SvPVX(PL_linestr);
+ s++;
+ orig_copline = CopLINE(PL_curcop);
if (s < PL_bufend && isSPACE(*s)) {
s = skipspace(s);
}
}
+
+
+ /* Extract the first character of the variable name from 's' and
+ * copy it, null terminated into 'd'. Note that this does not
+ * involve checking for just IDFIRST characters, as it allows the
+ * '^' for ${^FOO} type variable names, and it allows all the
+ * characters that are legal in a single character variable name.
+ *
+ * The legal ones are any of:
+ * a) all ASCII characters except:
+ * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
+ * 2) '{'
+ * The final case currently doesn't get this far in the program, so we
+ * don't test for it. If that were to change, it would be ok to allow it.
+ * b) When not under Unicode rules, any upper Latin1 character
+ * c) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ * Because all ASCII characters have the same representation whether
+ * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ * '{' without knowing if is UTF-8 or not. */
+
if ((s <= PL_bufend - ((is_utf8)
? UTF8SKIP(s)
: 1))
- && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
- {
+ && (
+ isGRAPH_A(*s)
+ ||
+ ( is_utf8
+ ? isIDFIRST_utf8_safe(s, PL_bufend)
+ : (isGRAPH_L1(*s)
+ && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
+ )
+ )
+ )
+ ){
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
}
else {
*d = *s++;
- /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
- if (isDIGIT(*d)) {
- bool is_zero= *d == '0' ? TRUE : FALSE;
- char *digit_start= d;
- while (s < PL_bufend && isDIGIT(*s)) {
- d++;
- if (d >= e)
- Perl_croak(aTHX_ "%s", ident_too_long);
- *d= *s++;
- }
- if (is_zero && d - digit_start > 1)
- Perl_croak(aTHX_ ident_var_zero_multi_digit);
- }
d[1] = '\0';
}
}
+
+ /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
+ if (isDIGIT(*d)) {
+ bool is_zero= *d == '0' ? TRUE : FALSE;
+ char *digit_start= d;
+ while (s < PL_bufend && isDIGIT(*s)) {
+ d++;
+ if (d >= e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ *d= *s++;
+ }
+ if (is_zero && d - digit_start >= 1) /* d points at the last digit */
+ Perl_croak(aTHX_ ident_var_zero_multi_digit);
+ d[1] = '\0';
+ }
+
/* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
- if (*d == '^' && *s && isCONTROLVAR(*s)) {
- *d = toCTRL(*s);
- s++;
+ else if (*d == '^' && *s && isCONTROLVAR(*s)) {
+ *d = toCTRL(*s);
+ s++;
}
/* Warn about ambiguous code after unary operators if {...} notation isn't
used. There's no difference in ambiguity; it's merely a heuristic
about when not to warn. */
else if (ck_uni && bracket == -1)
- check_uni();
+ check_uni();
+
if (bracket != -1) {
bool skip;
char *s2;
if (s < PL_bufend && isSPACE(*s)) {
s = skipspace(s);
}
- if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
+ if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
/* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
- if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
- const char * const brack =
- (const char *)
- ((*s == '[') ? "[...]" : "{...}");
+ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
+ const char * const brack =
+ (const char *)
+ ((*s == '[') ? "[...]" : "{...}");
orig_copline = CopLINE(PL_curcop);
CopLINE_set(PL_curcop, tmp_copline);
/* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of %c{%s%s} resolved to %c%s%s",
- funny, dest, brack, funny, dest, brack);
+ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of %c{%s%s} resolved to %c%s%s",
+ funny, dest, brack, funny, dest, brack);
CopLINE_set(PL_curcop, orig_copline);
- }
- bracket++;
- PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
- PL_lex_allbrackets++;
- return s;
- }
- }
+ }
+ bracket++;
+ PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
+ PL_lex_allbrackets++;
+ return s;
+ }
+ }
if ( !tmp_copline )
tmp_copline = CopLINE(PL_curcop);
/* Now increment line numbers if applicable. */
if (skip)
s = skipspace(s);
- s++;
- if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
- PL_lex_state = LEX_INTERPEND;
- PL_expect = XREF;
- }
- if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
- if (ckWARN(WARN_AMBIGUOUS)
+ s++;
+ if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
+ PL_lex_state = LEX_INTERPEND;
+ PL_expect = XREF;
+ }
+ if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
+ if (ckWARN(WARN_AMBIGUOUS)
&& (keyword(dest, d - dest, 0)
- || get_cvn_flags(dest, d - dest, is_utf8
+ || get_cvn_flags(dest, d - dest, is_utf8
? SVf_UTF8
: 0)))
- {
+ {
SV *tmp = newSVpvn_flags( dest, d - dest,
SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
- if (funny == '#')
- funny = '@';
+ if (funny == '#')
+ funny = '@';
orig_copline = CopLINE(PL_curcop);
CopLINE_set(PL_curcop, tmp_copline);
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
- funny, SVfARG(tmp), funny, SVfARG(tmp));
+ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
+ funny, SVfARG(tmp), funny, SVfARG(tmp));
CopLINE_set(PL_curcop, orig_copline);
- }
- }
- }
- else {
+ }
+ }
+ }
+ else {
/* Didn't find the closing } at the point we expected, so restore
state such that the next thing to process is the opening { and */
- s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
+ s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
CopLINE_set(PL_curcop, orig_copline);
PL_parser->herelines = herelines;
- *dest = '\0';
+ *dest = '\0';
PL_parser->sub_no_recover = TRUE;
- }
+ }
}
else if ( PL_lex_state == LEX_INTERPNORMAL
&& !PL_lex_brackets
&& !intuit_more(s, PL_bufend))
- PL_lex_state = LEX_INTERPEND;
+ PL_lex_state = LEX_INTERPEND;
return s;
}
case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
- case LOCALE_PAT_MOD:
- if (*charset) {
- goto multiple_charsets;
- }
- set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
- *charset = c;
- break;
- case UNICODE_PAT_MOD:
- if (*charset) {
- goto multiple_charsets;
- }
- set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
- *charset = c;
- break;
- case ASCII_RESTRICT_PAT_MOD:
- if (! *charset) {
- set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
- }
- else {
-
- /* Error if previous modifier wasn't an 'a', but if it was, see
- * if, and accept, a second occurrence (only) */
- if (*charset != 'a'
- || get_regex_charset(*pmfl)
- != REGEX_ASCII_RESTRICTED_CHARSET)
- {
- goto multiple_charsets;
- }
- set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
- }
- *charset = c;
- break;
- case DEPENDS_PAT_MOD:
- if (*charset) {
- goto multiple_charsets;
- }
- set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
- *charset = c;
- break;
+ case LOCALE_PAT_MOD:
+ if (*charset) {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
+ *charset = c;
+ break;
+ case UNICODE_PAT_MOD:
+ if (*charset) {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
+ *charset = c;
+ break;
+ case ASCII_RESTRICT_PAT_MOD:
+ if (! *charset) {
+ set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
+ }
+ else {
+
+ /* Error if previous modifier wasn't an 'a', but if it was, see
+ * if, and accept, a second occurrence (only) */
+ if (*charset != 'a'
+ || get_regex_charset(*pmfl)
+ != REGEX_ASCII_RESTRICTED_CHARSET)
+ {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
+ }
+ *charset = c;
+ break;
+ case DEPENDS_PAT_MOD:
+ if (*charset) {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
+ *charset = c;
+ break;
}
(*s)++;
return TRUE;
multiple_charsets:
- if (*charset != c) {
- yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
- }
- else if (c == 'a') {
+ if (*charset != c) {
+ yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
+ }
+ else if (c == 'a') {
/* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
- yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
- }
- else {
- yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
- }
+ yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
+ }
+ else {
+ yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
+ }
- /* Pretend that it worked, so will continue processing before dieing */
- (*s)++;
- return TRUE;
+ /* Pretend that it worked, so will continue processing before dieing */
+ (*s)++;
+ return TRUE;
}
STATIC char *
PMOP *pm;
char *s;
const char * const valid_flags =
- (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
+ (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
unsigned int x_mod_count = 0;
s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
if (!s)
- Perl_croak(aTHX_ "Search pattern not terminated");
+ Perl_croak(aTHX_ "Search pattern not terminated");
pm = (PMOP*)newPMOP(type, 0);
if (PL_multi_open == '?') {
- /* This is the only point in the code that sets PMf_ONCE: */
- pm->op_pmflags |= PMf_ONCE;
-
- /* Hence it's safe to do this bit of PMOP book-keeping here, which
- allows us to restrict the list needed by reset to just the ??
- matches. */
- assert(type != OP_TRANS);
- if (PL_curstash) {
- MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
- U32 elements;
- if (!mg) {
- mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
- 0);
- }
- elements = mg->mg_len / sizeof(PMOP**);
- Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
- ((PMOP**)mg->mg_ptr) [elements++] = pm;
- mg->mg_len = elements * sizeof(PMOP**);
- PmopSTASH_set(pm,PL_curstash);
- }
+ /* This is the only point in the code that sets PMf_ONCE: */
+ pm->op_pmflags |= PMf_ONCE;
+
+ /* Hence it's safe to do this bit of PMOP book-keeping here, which
+ allows us to restrict the list needed by reset to just the ??
+ matches. */
+ assert(type != OP_TRANS);
+ if (PL_curstash) {
+ MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
+ U32 elements;
+ if (!mg) {
+ mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
+ 0);
+ }
+ elements = mg->mg_len / sizeof(PMOP**);
+ Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
+ ((PMOP**)mg->mg_ptr) [elements++] = pm;
+ mg->mg_len = elements * sizeof(PMOP**);
+ PmopSTASH_set(pm,PL_curstash);
+ }
}
/* if qr/...(?{..}).../, then need to parse the pattern within a new
* anon CV. False positives like qr/[(?{]/ are harmless */
if (type == OP_QR) {
- STRLEN len;
- char *e, *p = SvPV(PL_lex_stuff, len);
- e = p + len;
- for (; p < e; p++) {
- if (p[0] == '(' && p[1] == '?'
- && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
- {
- pm->op_pmflags |= PMf_HAS_CV;
- break;
- }
- }
- pm->op_pmflags |= PMf_IS_QR;
+ STRLEN len;
+ char *e, *p = SvPV(PL_lex_stuff, len);
+ e = p + len;
+ for (; p < e; p++) {
+ if (p[0] == '(' && (
+ (p[1] == '?' && (p[2] == '{' ||
+ (p[2] == '?' && p[3] == '{'))) ||
+ (p[1] == '*' && (p[2] == '{' ||
+ (p[2] == '*' && p[3] == '{')))
+ )){
+ pm->op_pmflags |= PMf_HAS_CV;
+ break;
+ }
+ }
+ pm->op_pmflags |= PMf_IS_QR;
}
while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
{
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
- "Use of /c modifier is meaningless without /g" );
+ "Use of /c modifier is meaningless without /g" );
}
PL_lex_op = (OP*)pm;
s = scan_str(start, TRUE, FALSE, FALSE, &t);
if (!s)
- Perl_croak(aTHX_ "Substitution pattern not terminated");
+ Perl_croak(aTHX_ "Substitution pattern not terminated");
s = t;
first_line = CopLINE(PL_curcop);
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s) {
- SvREFCNT_dec_NN(PL_lex_stuff);
- PL_lex_stuff = NULL;
- Perl_croak(aTHX_ "Substitution replacement not terminated");
+ SvREFCNT_dec_NN(PL_lex_stuff);
+ PL_lex_stuff = NULL;
+ Perl_croak(aTHX_ "Substitution replacement not terminated");
}
PL_multi_start = first_start; /* so whole substitution is taken together */
while (*s) {
- if (*s == EXEC_PAT_MOD) {
- s++;
- es++;
- }
- else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
+ if (*s == EXEC_PAT_MOD) {
+ s++;
+ es++;
+ }
+ else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
&s, &charset, &x_mod_count))
- {
- break;
- }
+ {
+ break;
+ }
}
if ((pm->op_pmflags & PMf_CONTINUE)) {
}
if (es) {
- SV * const repl = newSVpvs("");
+ SV * const repl = newSVpvs("");
- PL_multi_end = 0;
- pm->op_pmflags |= PMf_EVAL;
+ PL_multi_end = 0;
+ pm->op_pmflags |= PMf_EVAL;
for (; es > 1; es--) {
sv_catpvs(repl, "eval ");
}
sv_catpvs(repl, "do {");
- sv_catsv(repl, PL_parser->lex_sub_repl);
- sv_catpvs(repl, "}");
- SvREFCNT_dec(PL_parser->lex_sub_repl);
- PL_parser->lex_sub_repl = repl;
+ sv_catsv(repl, PL_parser->lex_sub_repl);
+ sv_catpvs(repl, "}");
+ SvREFCNT_dec(PL_parser->lex_sub_repl);
+ PL_parser->lex_sub_repl = repl;
}
linediff = CopLINE(PL_curcop) - first_line;
if (linediff)
- CopLINE_set(PL_curcop, first_line);
+ CopLINE_set(PL_curcop, first_line);
if (linediff || es) {
/* the IVX field indicates that the replacement string is a s///e;
s = scan_str(start,FALSE,FALSE,FALSE,&t);
if (!s)
- Perl_croak(aTHX_ "Transliteration pattern not terminated");
+ Perl_croak(aTHX_ "Transliteration pattern not terminated");
s = t;
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s) {
- SvREFCNT_dec_NN(PL_lex_stuff);
- PL_lex_stuff = NULL;
- Perl_croak(aTHX_ "Transliteration replacement not terminated");
+ SvREFCNT_dec_NN(PL_lex_stuff);
+ PL_lex_stuff = NULL;
+ Perl_croak(aTHX_ "Transliteration replacement not terminated");
}
complement = del = squash = 0;
while (1) {
- switch (*s) {
- case 'c':
- complement = OPpTRANS_COMPLEMENT;
- break;
- case 'd':
- del = OPpTRANS_DELETE;
- break;
- case 's':
- squash = OPpTRANS_SQUASH;
- break;
- case 'r':
- nondestruct = 1;
- break;
- default:
- goto no_more;
- }
- s++;
+ switch (*s) {
+ case 'c':
+ complement = OPpTRANS_COMPLEMENT;
+ break;
+ case 'd':
+ del = OPpTRANS_DELETE;
+ break;
+ case 's':
+ squash = OPpTRANS_SQUASH;
+ break;
+ case 'r':
+ nondestruct = 1;
+ break;
+ default:
+ goto no_more;
+ }
+ s++;
}
no_more:
peek = s;
if (*peek == '~') {
- indented = TRUE;
- peek++; s++;
+ indented = TRUE;
+ peek++; s++;
}
while (SPACE_OR_TAB(*peek))
- peek++;
+ peek++;
if (*peek == '`' || *peek == '\'' || *peek =='"') {
- s = peek;
- term = *s++;
- s = delimcpy(d, e, s, PL_bufend, term, &len);
- if (s == PL_bufend)
- Perl_croak(aTHX_ "Unterminated delimiter for here document");
- d += len;
- s++;
+ s = peek;
+ term = *s++;
+ s = delimcpy(d, e, s, PL_bufend, term, &len);
+ if (s == PL_bufend)
+ Perl_croak(aTHX_ "Unterminated delimiter for here document");
+ d += len;
+ s++;
}
else {
- if (*s == '\\')
+ if (*s == '\\')
/* <<\FOO is equivalent to <<'FOO' */
- s++, term = '\'';
- else
- term = '"';
+ s++, term = '\'';
+ else
+ term = '"';
- if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
- Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
+ if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
+ Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
- peek = s;
+ peek = s;
while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
- peek += UTF ? UTF8SKIP(peek) : 1;
- }
+ peek += UTF ? UTF8SKIP(peek) : 1;
+ }
- len = (peek - s >= e - d) ? (e - d) : (peek - s);
- Copy(s, d, len, char);
- s += len;
- d += len;
+ len = (peek - s >= e - d) ? (e - d) : (peek - s);
+ Copy(s, d, len, char);
+ s += len;
+ d += len;
}
if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
- Perl_croak(aTHX_ "Delimiter for here document is too long");
+ Perl_croak(aTHX_ "Delimiter for here document is too long");
*d++ = '\n';
*d = '\0';
#ifndef PERL_STRICT_CR
d = (char *) memchr(s, '\r', PL_bufend - s);
if (d) {
- char * const olds = s;
- s = d;
- while (s < PL_bufend) {
- if (*s == '\r') {
- *d++ = '\n';
- if (*++s == '\n')
- s++;
- }
- else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
- *d++ = *s++;
- s++;
- }
- else
- *d++ = *s++;
- }
- *d = '\0';
- PL_bufend = d;
- SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
- s = olds;
+ char * const olds = s;
+ s = d;
+ while (s < PL_bufend) {
+ if (*s == '\r') {
+ *d++ = '\n';
+ if (*++s == '\n')
+ s++;
+ }
+ else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
+ *d++ = *s++;
+ s++;
+ }
+ else
+ *d++ = *s++;
+ }
+ *d = '\0';
+ PL_bufend = d;
+ SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
+ s = olds;
}
#endif
tmpstr = newSV_type(SVt_PVIV);
- SvGROW(tmpstr, 80);
if (term == '\'') {
- op_type = OP_CONST;
- SvIV_set(tmpstr, -1);
+ op_type = OP_CONST;
+ SvIV_set(tmpstr, -1);
}
else if (term == '`') {
- op_type = OP_BACKTICK;
- SvIV_set(tmpstr, '\\');
+ op_type = OP_BACKTICK;
+ SvIV_set(tmpstr, '\\');
}
PL_multi_start = origline + 1 + PL_parser->herelines;
/* inside a string eval or quote-like operator */
if (!infile || PL_lex_inwhat) {
- SV *linestr;
- char *bufend;
- char * const olds = s;
- PERL_CONTEXT * const cx = CX_CUR();
- /* These two fields are not set until an inner lexing scope is
- entered. But we need them set here. */
- shared->ls_bufptr = s;
- shared->ls_linestr = PL_linestr;
+ SV *linestr;
+ char *bufend;
+ char * const olds = s;
+ PERL_CONTEXT * const cx = CX_CUR();
+ /* These two fields are not set until an inner lexing scope is
+ entered. But we need them set here. */
+ shared->ls_bufptr = s;
+ shared->ls_linestr = PL_linestr;
if (PL_lex_inwhat) {
/* Look for a newline. If the current buffer does not have one,
up as many levels as necessary to find one with a newline
after bufptr.
*/
- while (!(s = (char *)memchr(
+ while (!(s = (char *)memchr(
(void *)shared->ls_bufptr, '\n',
SvEND(shared->ls_linestr)-shared->ls_bufptr
- )))
+ )))
{
shared = shared->ls_prev;
/* shared is only null if we have gone beyond the outermost
}
}
}
- else { /* eval or we've already hit EOF */
- s = (char*)memchr((void*)s, '\n', PL_bufend - s);
- if (!s)
+ else { /* eval or we've already hit EOF */
+ s = (char*)memchr((void*)s, '\n', PL_bufend - s);
+ if (!s)
goto interminable;
- }
-
- linestr = shared->ls_linestr;
- bufend = SvEND(linestr);
- d = s;
- if (indented) {
- char *myolds = s;
-
- while (s < bufend - len + 1) {
- if (*s++ == '\n')
- ++PL_parser->herelines;
-
- if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
- char *backup = s;
- indent_len = 0;
-
- /* Only valid if it's preceded by whitespace only */
- while (backup != myolds && --backup >= myolds) {
- if (! SPACE_OR_TAB(*backup)) {
- break;
- }
- indent_len++;
- }
-
- /* No whitespace or all! */
- if (backup == s || *backup == '\n') {
- Newx(indent, indent_len + 1, char);
- memcpy(indent, backup + 1, indent_len);
- indent[indent_len] = 0;
- s--; /* before our delimiter */
- PL_parser->herelines--; /* this line doesn't count */
- break;
- }
- }
- }
- }
+ }
+
+ linestr = shared->ls_linestr;
+ bufend = SvEND(linestr);
+ d = s;
+ if (indented) {
+ char *myolds = s;
+
+ while (s < bufend - len + 1) {
+ if (*s++ == '\n')
+ ++PL_parser->herelines;
+
+ if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
+ char *backup = s;
+ indent_len = 0;
+
+ /* Only valid if it's preceded by whitespace only */
+ while (backup != myolds && --backup >= myolds) {
+ if (! SPACE_OR_TAB(*backup)) {
+ break;
+ }
+ indent_len++;
+ }
+
+ /* No whitespace or all! */
+ if (backup == s || *backup == '\n') {
+ Newx(indent, indent_len + 1, char);
+ memcpy(indent, backup + 1, indent_len);
+ indent[indent_len] = 0;
+ s--; /* before our delimiter */
+ PL_parser->herelines--; /* this line doesn't count */
+ break;
+ }
+ }
+ }
+ }
else {
- while (s < bufend - len + 1
- && memNE(s,PL_tokenbuf,len) )
- {
- if (*s++ == '\n')
- ++PL_parser->herelines;
- }
- }
-
- if (s >= bufend - len + 1) {
- goto interminable;
- }
-
- sv_setpvn(tmpstr,d+1,s-d);
- s += len - 1;
- /* the preceding stmt passes a newline */
- PL_parser->herelines++;
-
- /* s now points to the newline after the heredoc terminator.
- d points to the newline before the body of the heredoc.
- */
-
- /* We are going to modify linestr in place here, so set
- aside copies of the string if necessary for re-evals or
- (caller $n)[6]. */
- /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
- check shared->re_eval_str. */
- if (shared->re_eval_start || shared->re_eval_str) {
- /* Set aside the rest of the regexp */
- if (!shared->re_eval_str)
- shared->re_eval_str =
- newSVpvn(shared->re_eval_start,
- bufend - shared->re_eval_start);
- shared->re_eval_start -= s-d;
- }
-
- if (cxstack_ix >= 0
+ while (s < bufend - len + 1
+ && memNE(s,PL_tokenbuf,len) )
+ {
+ if (*s++ == '\n')
+ ++PL_parser->herelines;
+ }
+ }
+
+ if (s >= bufend - len + 1) {
+ goto interminable;
+ }
+
+ sv_setpvn_fresh(tmpstr,d+1,s-d);
+ s += len - 1;
+ /* the preceding stmt passes a newline */
+ PL_parser->herelines++;
+
+ /* s now points to the newline after the heredoc terminator.
+ d points to the newline before the body of the heredoc.
+ */
+
+ /* We are going to modify linestr in place here, so set
+ aside copies of the string if necessary for re-evals or
+ (caller $n)[6]. */
+ /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
+ check shared->re_eval_str. */
+ if (shared->re_eval_start || shared->re_eval_str) {
+ /* Set aside the rest of the regexp */
+ if (!shared->re_eval_str)
+ shared->re_eval_str =
+ newSVpvn(shared->re_eval_start,
+ bufend - shared->re_eval_start);
+ shared->re_eval_start -= s-d;
+ }
+
+ if (cxstack_ix >= 0
&& CxTYPE(cx) == CXt_EVAL
&& CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
&& cx->blk_eval.cur_text == linestr)
{
- cx->blk_eval.cur_text = newSVsv(linestr);
- cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
- }
-
- /* Copy everything from s onwards back to d. */
- Move(s,d,bufend-s + 1,char);
- SvCUR_set(linestr, SvCUR(linestr) - (s-d));
- /* Setting PL_bufend only applies when we have not dug deeper
- into other scopes, because sublex_done sets PL_bufend to
- SvEND(PL_linestr). */
- if (shared == PL_parser->lex_shared)
+ cx->blk_eval.cur_text = newSVsv(linestr);
+ cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
+ }
+
+ /* Copy everything from s onwards back to d. */
+ Move(s,d,bufend-s + 1,char);
+ SvCUR_set(linestr, SvCUR(linestr) - (s-d));
+ /* Setting PL_bufend only applies when we have not dug deeper
+ into other scopes, because sublex_done sets PL_bufend to
+ SvEND(PL_linestr). */
+ if (shared == PL_parser->lex_shared)
PL_bufend = SvEND(linestr);
- s = olds;
+ s = olds;
}
else {
SV *linestr_save;
char *oldbufptr_save;
char *oldoldbufptr_save;
streaming:
- SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
+ sv_grow_fresh(tmpstr, 80);
+ SvPVCLEAR_FRESH(tmpstr); /* avoid "uninitialized" warning */
term = PL_tokenbuf[1];
len--;
linestr_save = PL_linestr; /* must restore this afterwards */
does not matter what PL_linestr points to, since we are
about to croak; but in a quote-like op, linestr_save
will have been prospectively freed already, via
- SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
restore PL_linestr. */
SvREFCNT_dec_NN(PL_linestr);
PL_linestr = linestr_save;
PL_multi_end = origline + PL_parser->herelines;
if (indented && indent) {
- STRLEN linecount = 1;
- STRLEN herelen = SvCUR(tmpstr);
- char *ss = SvPVX(tmpstr);
- char *se = ss + herelen;
+ STRLEN linecount = 1;
+ STRLEN herelen = SvCUR(tmpstr);
+ char *ss = SvPVX(tmpstr);
+ char *se = ss + herelen;
SV *newstr = newSV(herelen+1);
SvPOK_on(newstr);
- /* Trim leading whitespace */
- while (ss < se) {
- /* newline only? Copy and move on */
- if (*ss == '\n') {
- sv_catpvs(newstr,"\n");
- ss++;
- linecount++;
+ /* Trim leading whitespace */
+ while (ss < se) {
+ /* newline only? Copy and move on */
+ if (*ss == '\n') {
+ sv_catpvs(newstr,"\n");
+ ss++;
+ linecount++;
- /* Found our indentation? Strip it */
- }
+ /* Found our indentation? Strip it */
+ }
else if (se - ss >= indent_len
- && memEQ(ss, indent, indent_len))
- {
- STRLEN le = 0;
- ss += indent_len;
+ && memEQ(ss, indent, indent_len))
+ {
+ STRLEN le = 0;
+ ss += indent_len;
- while ((ss + le) < se && *(ss + le) != '\n')
- le++;
+ while ((ss + le) < se && *(ss + le) != '\n')
+ le++;
- sv_catpvn(newstr, ss, le);
- ss += le;
+ sv_catpvn(newstr, ss, le);
+ ss += le;
- /* Line doesn't begin with our indentation? Croak */
- }
+ /* Line doesn't begin with our indentation? Croak */
+ }
else {
Safefree(indent);
- Perl_croak(aTHX_
- "Indentation on line %d of here-doc doesn't match delimiter",
- (int)linecount
- );
- }
- } /* while */
-
- /* avoid sv_setsv() as we dont wan't to COW here */
+ Perl_croak(aTHX_
+ "Indentation on line %d of here-doc doesn't match delimiter",
+ (int)linecount
+ );
+ }
+ } /* while */
+
+ /* avoid sv_setsv() as we don't want to COW here */
sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
- Safefree(indent);
- SvREFCNT_dec_NN(newstr);
+ Safefree(indent);
+ SvREFCNT_dec_NN(newstr);
}
if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
- SvPV_shrink_to_cur(tmpstr);
+ SvPV_shrink_to_cur(tmpstr);
}
if (!IN_BYTES) {
- if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
- SvUTF8_on(tmpstr);
+ if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
+ SvUTF8_on(tmpstr);
}
PL_lex_stuff = tmpstr;
interminable:
if (indent)
- Safefree(indent);
+ Safefree(indent);
SvREFCNT_dec(tmpstr);
CopLINE_set(PL_curcop, origline);
missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
/* scan_inputsymbol
takes: position of first '<' in input buffer
returns: position of first char following the matching '>' in
- input buffer
+ input buffer
side-effects: pl_yylval and lex_op are set.
This code handles:
end = (char *) memchr(s, '\n', PL_bufend - s);
if (!end)
- end = PL_bufend;
+ end = PL_bufend;
if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
nomagicopen = TRUE;
*d = '\0';
*/
if (len >= (I32)sizeof PL_tokenbuf)
- Perl_croak(aTHX_ "Excessively long <> operator");
+ Perl_croak(aTHX_ "Excessively long <> operator");
if (s >= end)
- Perl_croak(aTHX_ "Unterminated <> operator");
+ Perl_croak(aTHX_ "Unterminated <> operator");
s++;
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
- d += UTF ? UTF8SKIP(d) : 1;
+ d += UTF ? UTF8SKIP(d) : 1;
}
/* If we've tried to read what we allow filehandles to look like, and
*/
if (d - PL_tokenbuf != len) {
- pl_yylval.ival = OP_GLOB;
- s = scan_str(start,FALSE,FALSE,FALSE,NULL);
- if (!s)
- Perl_croak(aTHX_ "Glob not terminated");
- return s;
+ pl_yylval.ival = OP_GLOB;
+ s = scan_str(start,FALSE,FALSE,FALSE,NULL);
+ if (!s)
+ Perl_croak(aTHX_ "Glob not terminated");
+ return s;
}
else {
- bool readline_overriden = FALSE;
- GV *gv_readline;
- /* we're in a filehandle read situation */
- d = PL_tokenbuf;
-
- /* turn <> into <ARGV> */
- if (!len)
- Copy("ARGV",d,5,char);
-
- /* Check whether readline() is overriden */
- if ((gv_readline = gv_override("readline",8)))
- readline_overriden = TRUE;
-
- /* if <$fh>, create the ops to turn the variable into a
- filehandle
- */
- if (*d == '$') {
- /* try to find it in the pad for this block, otherwise find
- add symbol table ops
- */
- const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
- if (tmp != NOT_IN_PAD) {
- if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
- HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
- HEK * const stashname = HvNAME_HEK(stash);
- SV * const sym = sv_2mortal(newSVhek(stashname));
- sv_catpvs(sym, "::");
- sv_catpv(sym, d+1);
- d = SvPVX(sym);
- goto intro_sym;
- }
- else {
- OP * const o = newOP(OP_PADSV, 0);
- o->op_targ = tmp;
- PL_lex_op = readline_overriden
+ bool readline_overridden = FALSE;
+ GV *gv_readline;
+ /* we're in a filehandle read situation */
+ d = PL_tokenbuf;
+
+ /* turn <> into <ARGV> */
+ if (!len)
+ Copy("ARGV",d,5,char);
+
+ /* Check whether readline() is overridden */
+ if ((gv_readline = gv_override("readline",8)))
+ readline_overridden = TRUE;
+
+ /* if <$fh>, create the ops to turn the variable into a
+ filehandle
+ */
+ if (*d == '$') {
+ /* try to find it in the pad for this block, otherwise find
+ add symbol table ops
+ */
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
+ if (tmp != NOT_IN_PAD) {
+ if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
+ HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
+ HEK * const stashname = HvNAME_HEK(stash);
+ SV * const sym = newSVhek_mortal(stashname);
+ sv_catpvs(sym, "::");
+ sv_catpv(sym, d+1);
+ d = SvPVX(sym);
+ goto intro_sym;
+ }
+ else {
+ OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
+ PL_lex_op = readline_overridden
? newUNOP(OP_ENTERSUB, OPf_STACKED,
- op_append_elem(OP_LIST, o,
- newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
+ op_append_elem(OP_LIST, o,
+ newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
: newUNOP(OP_READLINE, 0, o);
- }
- }
- else {
- GV *gv;
- ++d;
+ }
+ }
+ else {
+ GV *gv;
+ ++d;
intro_sym:
- gv = gv_fetchpv(d,
- GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
- SVt_PV);
- PL_lex_op = readline_overriden
+ gv = gv_fetchpv(d,
+ GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
+ SVt_PV);
+ PL_lex_op = readline_overridden
? newUNOP(OP_ENTERSUB, OPf_STACKED,
- op_append_elem(OP_LIST,
- newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
- newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
+ op_append_elem(OP_LIST,
+ newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
+ newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
: newUNOP(OP_READLINE, 0,
- newUNOP(OP_RV2SV, 0,
- newGVOP(OP_GV, 0, gv)));
- }
- /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
- pl_yylval.ival = OP_NULL;
- }
-
- /* If it's none of the above, it must be a literal filehandle
- (<Foo::BAR> or <FOO>) so build a simple readline OP */
- else {
- GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
- PL_lex_op = readline_overriden
+ newUNOP(OP_RV2SV, 0,
+ newGVOP(OP_GV, 0, gv)));
+ }
+ /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
+ pl_yylval.ival = OP_NULL;
+ }
+
+ /* If it's none of the above, it must be a literal filehandle
+ (<Foo::BAR> or <FOO>) so build a simple readline OP */
+ else {
+ GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
+ PL_lex_op = readline_overridden
? newUNOP(OP_ENTERSUB, OPf_STACKED,
- op_append_elem(OP_LIST,
- newGVOP(OP_GV, 0, gv),
- newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
+ op_append_elem(OP_LIST,
+ newGVOP(OP_GV, 0, gv),
+ newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
: newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
- pl_yylval.ival = OP_NULL;
- }
+ pl_yylval.ival = OP_NULL;
+
+ /* leave the token generation above to avoid confusing the parser */
+ if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
+ no_bareword_filehandle(d);
+ }
+ }
}
return s;
/* scan_str
takes:
- start position in buffer
+ start position in buffer
keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
only if they are of the open/close form
- keep_delims preserve the delimiters around the string
- re_reparse compiling a run-time /(?{})/:
- collapse // to /, and skip encoding src
- delimp if non-null, this is set to the position of
- the closing delimiter, or just after it if
- the closing and opening delimiters differ
- (i.e., the opening delimiter of a substitu-
- tion replacement)
+ keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
+ delimp if non-null, this is set to the position of
+ the closing delimiter, or just after it if
+ the closing and opening delimiters differ
+ (i.e., the opening delimiter of a substitu-
+ tion replacement)
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
- updates the read buffer.
+ updates the read buffer.
This subroutine pulls a string out of the input. It is called for:
- q single quotes q(literal text)
- ' single quotes 'literal text'
- qq double quotes qq(interpolate $here please)
- " double quotes "interpolate $here please"
- qx backticks qx(/bin/ls -l)
- ` backticks `/bin/ls -l`
- qw quote words @EXPORT_OK = qw( func() $spam )
- m// regexp match m/this/
- s/// regexp substitute s/this/that/
- tr/// string transliterate tr/this/that/
- y/// string transliterate y/this/that/
- ($*@) sub prototypes sub foo ($)
- (stuff) sub attr parameters sub foo : attr(stuff)
- <> readline or globs <FOO>, <>, <$fh>, or <*.c>
+ q single quotes q(literal text)
+ ' single quotes 'literal text'
+ qq double quotes qq(interpolate $here please)
+ " double quotes "interpolate $here please"
+ qx backticks qx(/bin/ls -l)
+ ` backticks `/bin/ls -l`
+ qw quote words @EXPORT_OK = qw( func() $spam )
+ m// regexp match m/this/
+ s/// regexp substitute s/this/that/
+ tr/// string transliterate tr/this/that/
+ y/// string transliterate y/this/that/
+ ($*@) sub prototypes sub foo ($)
+ (stuff) sub attr parameters sub foo : attr(stuff)
+ <> readline or globs <FOO>, <>, <$fh>, or <*.c>
In most of these cases (all but <>, patterns and transliterate)
yylex() calls scan_str(). m// makes yylex() call scan_pat() which
char *
Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
- char **delimp
+ char **delimp
)
{
SV *sv; /* scalar value: string */
- const char *tmps; /* temp string, used for delimiter matching */
char *s = start; /* current position in the buffer */
- char term; /* terminating character */
char *to; /* current position in the sv's data */
- I32 brackets = 1; /* bracket nesting level */
+ int brackets = 1; /* bracket nesting level */
bool d_is_utf8 = FALSE; /* is there any utf8 content? */
- IV termcode; /* terminating char. code */
- U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
- STRLEN termlen; /* length of terminating string */
+ UV open_delim_code; /* code point */
+ char open_delim_str[UTF8_MAXBYTES+1];
+ STRLEN delim_byte_len; /* each delimiter currently is the same number
+ of bytes */
line_t herelines;
- /* The delimiters that have a mirror-image closing one */
- const char * opening_delims = "([{<";
- const char * closing_delims = ")]}>";
-
/* The only non-UTF character that isn't a stand alone grapheme is
* white-space, hence can't be a delimiter. */
const char * non_grapheme_msg = "Use of unassigned code point or"
PERL_ARGS_ASSERT_SCAN_STR;
/* skip space before the delimiter */
- if (isSPACE(*s)) {
- s = skipspace(s);
+ if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so
+ 'start' also has to change */
+ s = start = skipspace(s);
}
/* mark where we are, in case we need to report errors */
CLINE;
- /* after skipping whitespace, the next character is the terminator */
- term = *s;
- if (!UTF || UTF8_IS_INVARIANT(term)) {
- termcode = termstr[0] = term;
- termlen = 1;
+ /* after skipping whitespace, the next character is the delimiter */
+ if (! UTF || UTF8_IS_INVARIANT(*s)) {
+ open_delim_code = (U8) *s;
+ open_delim_str[0] = *s;
+ delim_byte_len = 1;
}
else {
- termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
- if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- termcode)))
+ open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
+ &delim_byte_len);
+ if (UNLIKELY(! is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ open_delim_code)))
{
yyerror(non_grapheme_msg);
}
- Copy(s, termstr, termlen, U8);
+ Copy(s, open_delim_str, delim_byte_len, char);
}
+ open_delim_str[delim_byte_len] = '\0'; /* Only for safety */
+
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
- PL_multi_open = termcode;
+ PL_multi_open = open_delim_code;
herelines = PL_parser->herelines;
+ const char * legal_paired_opening_delims;
+ const char * legal_paired_closing_delims;
+ const char * deprecated_opening_delims;
+ if (FEATURE_MORE_DELIMS_IS_ENABLED) {
+ if (UTF) {
+ legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
+ legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
+
+ /* We are deprecating using a closing delimiter as the opening, in
+ * case we want in the future to accept them reversed. The string
+ * may include ones that are legal, but the code below won't look
+ * at this string unless it didn't find a legal opening one */
+ deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
+ }
+ else {
+ legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
+ legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
+ deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
+ }
+ }
+ else {
+ legal_paired_opening_delims = "([{<";
+ legal_paired_closing_delims = ")]}>";
+ deprecated_opening_delims = (UTF)
+ ? DEPRECATED_OPENING_UTF8_BRACKETS
+ : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
+ }
+
+ const char * legal_paired_opening_delims_end = legal_paired_opening_delims
+ + strlen(legal_paired_opening_delims);
+ const char * deprecated_delims_end = deprecated_opening_delims
+ + strlen(deprecated_opening_delims);
+
+ const char * close_delim_str = open_delim_str;
+ UV close_delim_code = open_delim_code;
+
/* If the delimiter has a mirror-image closing one, get it */
- if (term && (tmps = strchr(opening_delims, term))) {
- termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
+ const char *tmps = ninstr(legal_paired_opening_delims,
+ legal_paired_opening_delims_end,
+ open_delim_str, open_delim_str + delim_byte_len);
+ if (tmps) {
+ /* Here, there is a paired delimiter, and tmps points to its position
+ in the string of the accepted opening paired delimiters. The
+ corresponding position in the string of closing ones is the
+ beginning of the paired mate. Both contain the same number of
+ bytes. */
+ close_delim_str = legal_paired_closing_delims
+ + (tmps - legal_paired_opening_delims);
+
+ /* The list of paired delimiters contains all the ASCII ones that have
+ * always been legal, and no other ASCIIs. Don't raise a message if
+ * using one of these */
+ if (! isASCII(open_delim_code)) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
+ "Use of '%" UTF8f "' is experimental as a string delimiter",
+ UTF8fARG(UTF, delim_byte_len, open_delim_str));
+ }
+
+ close_delim_code = (UTF)
+ ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
+ : * (U8 *) close_delim_str;
+ }
+ else { /* Here, the delimiter isn't paired, hence the close is the same as
+ the open; and has already been set up. But make sure it isn't
+ deprecated to use this particular delimiter, as we plan
+ eventually to make it paired. */
+ if (ninstr(deprecated_opening_delims, deprecated_delims_end,
+ open_delim_str, open_delim_str + delim_byte_len))
+ {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
+ "Use of '%" UTF8f "' is deprecated as a string delimiter",
+ UTF8fARG(UTF, delim_byte_len, open_delim_str));
+ }
+
+ /* Note that a NUL may be used as a delimiter, and this happens when
+ * delimiting an empty string, and no special handling for it is
+ * needed, as ninstr() calls are used */
}
- PL_multi_close = termcode;
+ PL_multi_close = close_delim_code;
if (PL_multi_open == PL_multi_close) {
keep_bracketed_quoted = FALSE;
/* create a new SV to hold the contents. 79 is the SV's initial length.
What a random number. */
sv = newSV_type(SVt_PVIV);
- SvGROW(sv, 80);
- SvIV_set(sv, termcode);
+ sv_grow_fresh(sv, 79);
+ SvIV_set(sv, close_delim_code);
(void)SvPOK_only(sv); /* validate pointer */
/* move past delimiter and try to read a complete string */
if (keep_delims)
- sv_catpvn(sv, s, termlen);
- s += termlen;
+ sv_catpvn(sv, s, delim_byte_len);
+ s += delim_byte_len;
for (;;) {
- /* extend sv if need be */
- SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
- /* set 'to' to the next character in the sv's string */
- to = SvPVX(sv)+SvCUR(sv);
-
- /* if open delimiter is the close delimiter read unbridle */
- if (PL_multi_open == PL_multi_close) {
- for (; s < PL_bufend; s++,to++) {
- /* embedded newlines increment the current line number */
- if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
- COPLINE_INC_WITH_HERELINES;
- /* handle quoted delimiters */
- if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
- if (!keep_bracketed_quoted
- && (s[1] == term
- || (re_reparse && s[1] == '\\'))
- )
- s++;
- else /* any other quotes are simply copied straight through */
- *to++ = *s++;
- }
- /* terminate when run out of buffer (the for() condition), or
- have found the terminator */
- else if (*s == term) { /* First byte of terminator matches */
- if (termlen == 1) /* If is the only byte, are done */
- break;
-
- /* If the remainder of the terminator matches, also are
- * done, after checking that is a separate grapheme */
- if ( s + termlen <= PL_bufend
- && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
- {
- if ( UTF
- && UNLIKELY(! is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- termcode)))
- {
- yyerror(non_grapheme_msg);
- }
- break;
- }
- }
- else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
- d_is_utf8 = TRUE;
+ /* extend sv if need be */
+ SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
+ /* set 'to' to the next character in the sv's string */
+ to = SvPVX(sv)+SvCUR(sv);
+
+ /* read until we run out of string, or we find the closing delimiter */
+ while (s < PL_bufend) {
+ /* embedded newlines increment the line count */
+ if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
+ COPLINE_INC_WITH_HERELINES;
+
+ /* backslashes can escape the closing delimiter */
+ if ( *s == '\\' && s < PL_bufend - delim_byte_len
+
+ /* ... but not if the delimiter itself is a backslash */
+ && close_delim_code != '\\')
+ {
+ /* Here, we have an escaping backslash. If we're supposed to
+ * discard those that escape the closing delimiter, just
+ * discard this one */
+ if ( ! keep_bracketed_quoted
+ && ( memEQ(s + 1, open_delim_str, delim_byte_len)
+ || ( PL_multi_open == PL_multi_close
+ && re_reparse && s[1] == '\\')
+ || memEQ(s + 1, close_delim_str, delim_byte_len)))
+ {
+ s++;
+ }
+ else /* any other escapes are simply copied straight through */
+ *to++ = *s++;
+ }
+ else if ( s < PL_bufend - (delim_byte_len - 1)
+ && memEQ(s, close_delim_str, delim_byte_len)
+ && --brackets <= 0)
+ {
+ /* Found unescaped closing delimiter, unnested if we care about
+ * that; so are done.
+ *
+ * In the case of the opening and closing delimiters being
+ * different, we have to deal with nesting; the conditional
+ * above makes sure we don't get here until the nesting level,
+ * 'brackets', is back down to zero. In the other case,
+ * nesting isn't an issue, and 'brackets' never can get
+ * incremented above 0, so will come here at the first closing
+ * delimiter.
+ *
+ * Only grapheme delimiters are legal. */
+ if ( UTF /* All Non-UTF-8's are graphemes */
+ && UNLIKELY(! is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ close_delim_code)))
+ {
+ yyerror(non_grapheme_msg);
}
- *to = *s;
- }
- }
-
- /* if the terminator isn't the same as the start character (e.g.,
- matched brackets), we have to allow more in the quoting, and
- be prepared for nested brackets.
- */
- else {
- /* read until we run out of string, or we find the terminator */
- for (; s < PL_bufend; s++,to++) {
- /* embedded newlines increment the line count */
- if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
- COPLINE_INC_WITH_HERELINES;
- /* backslashes can escape the open or closing characters */
- if (*s == '\\' && s+1 < PL_bufend) {
- if (!keep_bracketed_quoted
- && ( ((UV)s[1] == PL_multi_open)
- || ((UV)s[1] == PL_multi_close) ))
- {
- s++;
- }
- else
- *to++ = *s++;
- }
- /* allow nested opens and closes */
- else if ((UV)*s == PL_multi_close && --brackets <= 0)
- break;
- else if ((UV)*s == PL_multi_open)
- brackets++;
- else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
- d_is_utf8 = TRUE;
- *to = *s;
- }
- }
- /* terminate the copied string and update the sv's end-of-string */
- *to = '\0';
- SvCUR_set(sv, to - SvPVX_const(sv));
-
- /*
- * this next chunk reads more into the buffer if we're not done yet
- */
-
- if (s < PL_bufend)
- break; /* handle case where we are done yet :-) */
+ break;
+ }
+ /* No nesting if open eq close */
+ else if ( PL_multi_open != PL_multi_close
+ && s < PL_bufend - (delim_byte_len - 1)
+ && memEQ(s, open_delim_str, delim_byte_len))
+ {
+ brackets++;
+ }
+
+ /* Here, still in the middle of the string; copy this character */
+ if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
+ *to++ = *s++;
+ }
+ else {
+ size_t this_char_len = UTF8SKIP(s);
+ Copy(s, to, this_char_len, char);
+ s += this_char_len;
+ to += this_char_len;
+
+ d_is_utf8 = TRUE;
+ }
+ } /* End of loop through buffer */
+
+ /* Here, found end of the string, OR ran out of buffer: terminate the
+ * copied string and update the sv's end-of-string */
+ *to = '\0';
+ SvCUR_set(sv, to - SvPVX_const(sv));
+
+ /*
+ * this next chunk reads more into the buffer if we're not done yet
+ */
+
+ if (s < PL_bufend)
+ break; /* handle case where we are done yet :-) */
#ifndef PERL_STRICT_CR
- if (to - SvPVX_const(sv) >= 2) {
- if ( (to[-2] == '\r' && to[-1] == '\n')
+ if (to - SvPVX_const(sv) >= 2) {
+ if ( (to[-2] == '\r' && to[-1] == '\n')
|| (to[-2] == '\n' && to[-1] == '\r'))
- {
- to[-2] = '\n';
- to--;
- SvCUR_set(sv, to - SvPVX_const(sv));
- }
- else if (to[-1] == '\r')
- to[-1] = '\n';
- }
- else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
- to[-1] = '\n';
+ {
+ to[-2] = '\n';
+ to--;
+ SvCUR_set(sv, to - SvPVX_const(sv));
+ }
+ else if (to[-1] == '\r')
+ to[-1] = '\n';
+ }
+ else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
+ to[-1] = '\n';
#endif
- /* if we're out of file, or a read fails, bail and reset the current
- line marker so we can report where the unterminated string began
- */
- COPLINE_INC_WITH_HERELINES;
- PL_bufptr = PL_bufend;
- if (!lex_next_chunk(0)) {
- sv_free(sv);
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- return NULL;
- }
- s = start = PL_bufptr;
- }
+ /* if we're out of file, or a read fails, bail and reset the current
+ line marker so we can report where the unterminated string began
+ */
+ COPLINE_INC_WITH_HERELINES;
+ PL_bufptr = PL_bufend;
+ if (!lex_next_chunk(0)) {
+ ASSUME(sv);
+ SvREFCNT_dec(sv);
+ CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+ return NULL;
+ }
+ s = start = PL_bufptr;
+ } /* End of infinite loop */
/* at this point, we have successfully read the delimited string */
if (keep_delims)
- sv_catpvn(sv, s, termlen);
- s += termlen;
+ sv_catpvn(sv, s, delim_byte_len);
+ s += delim_byte_len;
if (d_is_utf8)
- SvUTF8_on(sv);
+ SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
CopLINE_set(PL_curcop, PL_multi_start);
/* if we allocated too much space, give some back */
if (SvCUR(sv) + 5 < SvLEN(sv)) {
- SvLEN_set(sv, SvCUR(sv) + 1);
- SvPV_shrink_to_cur(sv);
+ SvLEN_set(sv, SvCUR(sv) + 1);
+ SvPV_shrink_to_cur(sv);
}
/* decide whether this is the first or second quoted string we've read
*/
if (PL_lex_stuff)
- PL_parser->lex_sub_repl = sv;
+ PL_parser->lex_sub_repl = sv;
else
- PL_lex_stuff = sv;
- if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
+ PL_lex_stuff = sv;
+ if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
return s;
}
bool warned_about_underscore = 0;
I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
#define WARN_ABOUT_UNDERSCORE() \
- do { \
- if (!warned_about_underscore) { \
- warned_about_underscore = 1; \
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
- "Misplaced _ in number"); \
- } \
- } while(0)
+ do { \
+ if (!warned_about_underscore) { \
+ warned_about_underscore = 1; \
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
+ "Misplaced _ in number"); \
+ } \
+ } while(0)
/* Hexadecimal floating point.
*
* In many places (where we have quads and NV is IEEE 754 double)
switch (*s) {
default:
- Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
+ Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
/* if it starts with a 0, it could be an octal number, a decimal in
0.13 disguise, or a hexadecimal number, or a binary number. */
case '0':
- {
- /* variables:
- u holds the "number so far"
- overflowed was the number more than we can hold?
-
- Shift is used when we add a digit. It also serves as an "are
- we in octal/hex/binary?" indicator to disallow hex characters
- when in octal mode.
- */
- NV n = 0.0;
- UV u = 0;
- bool overflowed = FALSE;
- bool just_zero = TRUE; /* just plain 0 or binary number? */
+ {
+ /* variables:
+ u holds the "number so far"
+ overflowed was the number more than we can hold?
+
+ Shift is used when we add a digit. It also serves as an "are
+ we in octal/hex/binary?" indicator to disallow hex characters
+ when in octal mode.
+ */
+ NV n = 0.0;
+ UV u = 0;
+ bool overflowed = FALSE;
+ bool just_zero = TRUE; /* just plain 0 or binary number? */
bool has_digs = FALSE;
- static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
- static const char* const bases[5] =
- { "", "binary", "", "octal", "hexadecimal" };
- static const char* const Bases[5] =
- { "", "Binary", "", "Octal", "Hexadecimal" };
- static const char* const maxima[5] =
- { "",
- "0b11111111111111111111111111111111",
- "",
- "037777777777",
- "0xffffffff" };
-
- /* check for hex */
- if (isALPHA_FOLD_EQ(s[1], 'x')) {
- shift = 4;
- s += 2;
- just_zero = FALSE;
- } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
- shift = 1;
- s += 2;
- just_zero = FALSE;
- }
- /* check for a decimal in disguise */
- else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
- goto decimal;
- /* so it must be octal */
- else {
- shift = 3;
- s++;
+ static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
+ static const char* const bases[5] =
+ { "", "binary", "", "octal", "hexadecimal" };
+ static const char* const Bases[5] =
+ { "", "Binary", "", "Octal", "Hexadecimal" };
+ static const char* const maxima[5] =
+ { "",
+ "0b11111111111111111111111111111111",
+ "",
+ "037777777777",
+ "0xffffffff" };
+
+ /* check for hex */
+ if (isALPHA_FOLD_EQ(s[1], 'x')) {
+ shift = 4;
+ s += 2;
+ just_zero = FALSE;
+ } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
+ shift = 1;
+ s += 2;
+ just_zero = FALSE;
+ }
+ /* check for a decimal in disguise */
+ else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
+ goto decimal;
+ /* so it must be octal */
+ else {
+ shift = 3;
+ s++;
if (isALPHA_FOLD_EQ(*s, 'o')) {
s++;
just_zero = FALSE;
new_octal = TRUE;
}
- }
-
- if (*s == '_') {
- WARN_ABOUT_UNDERSCORE();
- lastub = s++;
- }
-
- /* read the rest of the number */
- for (;;) {
- /* x is used in the overflow test,
- b is the digit we're adding on. */
- UV x, b;
-
- switch (*s) {
-
- /* if we don't mention it, we're done */
- default:
- goto out;
-
- /* _ are ignored -- but warned about if consecutive */
- case '_':
- if (lastub && s == lastub + 1)
- WARN_ABOUT_UNDERSCORE();
- lastub = s++;
- break;
-
- /* 8 and 9 are not octal */
- case '8': case '9':
- if (shift == 3)
- yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
- /* FALLTHROUGH */
-
- /* octal digits */
- case '2': case '3': case '4':
- case '5': case '6': case '7':
- if (shift == 1)
- yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
- /* FALLTHROUGH */
-
- case '0': case '1':
- b = *s++ & 15; /* ASCII digit -> value of digit */
- goto digit;
-
- /* hex digits */
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- /* make sure they said 0x */
- if (shift != 4)
- goto out;
- b = (*s++ & 7) + 9;
-
- /* Prepare to put the digit we have onto the end
- of the number so far. We check for overflows.
- */
-
- digit:
- just_zero = FALSE;
+ }
+
+ if (*s == '_') {
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s++;
+ }
+
+ /* read the rest of the number */
+ for (;;) {
+ /* x is used in the overflow test,
+ b is the digit we're adding on. */
+ UV x, b;
+
+ switch (*s) {
+
+ /* if we don't mention it, we're done */
+ default:
+ goto out;
+
+ /* _ are ignored -- but warned about if consecutive */
+ case '_':
+ if (lastub && s == lastub + 1)
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s++;
+ break;
+
+ /* 8 and 9 are not octal */
+ case '8': case '9':
+ if (shift == 3)
+ yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
+ /* FALLTHROUGH */
+
+ /* octal digits */
+ case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ if (shift == 1)
+ yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
+ /* FALLTHROUGH */
+
+ case '0': case '1':
+ b = *s++ & 15; /* ASCII digit -> value of digit */
+ goto digit;
+
+ /* hex digits */
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ /* make sure they said 0x */
+ if (shift != 4)
+ goto out;
+ b = (*s++ & 7) + 9;
+
+ /* Prepare to put the digit we have onto the end
+ of the number so far. We check for overflows.
+ */
+
+ digit:
+ just_zero = FALSE;
has_digs = TRUE;
- if (!overflowed) {
- assert(shift >= 0);
- x = u << shift; /* make room for the digit */
+ if (!overflowed) {
+ assert(shift >= 0);
+ x = u << shift; /* make room for the digit */
total_bits += shift;
- if ((x >> shift) != u
- && !(PL_hints & HINT_NEW_BINARY)) {
- overflowed = TRUE;
- n = (NV) u;
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in %s number",
+ if ((x >> shift) != u
+ && !(PL_hints & HINT_NEW_BINARY)) {
+ overflowed = TRUE;
+ n = (NV) u;
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in %s number",
bases[shift]);
- } else
- u = x | b; /* add the digit to the end */
- }
- if (overflowed) {
- n *= nvshift[shift];
- /* If an NV has not enough bits in its
- * mantissa to represent an UV this summing of
- * small low-order numbers is a waste of time
- * (because the NV cannot preserve the
- * low-order bits anyway): we could just
- * remember when did we overflow and in the
- * end just multiply n by the right
- * amount. */
- n += (NV) b;
- }
+ } else
+ u = x | b; /* add the digit to the end */
+ }
+ if (overflowed) {
+ n *= nvshift[shift];
+ /* If an NV has not enough bits in its
+ * mantissa to represent an UV this summing of
+ * small low-order numbers is a waste of time
+ * (because the NV cannot preserve the
+ * low-order bits anyway): we could just
+ * remember when did we overflow and in the
+ * end just multiply n by the right
+ * amount. */
+ n += (NV) b;
+ }
if (high_non_zero == 0 && b > 0)
high_non_zero = b;
goto out;
}
- break;
- }
- }
+ break;
+ }
+ }
- /* if we get here, we had success: make a scalar value from
- the number.
- */
- out:
+ /* if we get here, we had success: make a scalar value from
+ the number.
+ */
+ out:
- /* final misplaced underbar check */
- if (s[-1] == '_')
- WARN_ABOUT_UNDERSCORE();
+ /* final misplaced underbar check */
+ if (s[-1] == '_')
+ WARN_ABOUT_UNDERSCORE();
if (UNLIKELY(HEXFP_PEEK(s))) {
/* Do sloppy (on the underbars) but quick detection
NV nv_mult = 1.0;
#endif
bool accumulate = TRUE;
- U8 b;
+ U8 b = 0; /* silence compiler warning */
int lim = 1 << shift;
for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
*h == '_'); h++) {
if (significant_bits < NV_MANT_DIG) {
/* We are in the long "run" of xdigits,
* accumulate the full four bits. */
- assert(shift >= 0);
+ assert(shift >= 0);
hexfp_uquad <<= shift;
hexfp_uquad |= b;
hexfp_frac_bits += shift;
significant_bits - NV_MANT_DIG;
if (tail <= 0)
tail += shift;
- assert(tail >= 0);
+ assert(tail >= 0);
hexfp_uquad <<= tail;
- assert((shift - tail) >= 0);
+ assert((shift - tail) >= 0);
hexfp_uquad |= b >> (shift - tail);
hexfp_frac_bits += tail;
PL_bufptr = oldbp;
}
- if (overflowed) {
- if (n > 4294967295.0)
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "%s number > %s non-portable",
+ if (overflowed) {
+ if (n > 4294967295.0)
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
+ "%s number > %s non-portable",
Bases[shift],
new_octal ? "0o37777777777" : maxima[shift]);
- sv = newSVnv(n);
- }
- else {
+ sv = newSVnv(n);
+ }
+ else {
#if UVSIZE > 4
- if (u > 0xffffffff)
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "%s number > %s non-portable",
+ if (u > 0xffffffff)
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
+ "%s number > %s non-portable",
Bases[shift],
new_octal ? "0o37777777777" : maxima[shift]);
#endif
- sv = newSVuv(u);
- }
- if (just_zero && (PL_hints & HINT_NEW_INTEGER))
- sv = new_constant(start, s - start, "integer",
- sv, NULL, NULL, 0, NULL);
- else if (PL_hints & HINT_NEW_BINARY)
- sv = new_constant(start, s - start, "binary",
+ sv = newSVuv(u);
+ }
+ if (just_zero && (PL_hints & HINT_NEW_INTEGER))
+ sv = new_constant(start, s - start, "integer",
sv, NULL, NULL, 0, NULL);
- }
- break;
+ else if (PL_hints & HINT_NEW_BINARY)
+ sv = new_constant(start, s - start, "binary",
+ sv, NULL, NULL, 0, NULL);
+ }
+ break;
/*
handle decimal numbers.
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': case '.':
decimal:
- d = PL_tokenbuf;
- e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
+ d = PL_tokenbuf;
+ e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
floatit = FALSE;
if (hexfp) {
floatit = TRUE;
}
}
- /* read next group of digits and _ and copy into d */
- while (isDIGIT(*s)
+ /* read next group of digits and _ and copy into d */
+ while (isDIGIT(*s)
|| *s == '_'
|| UNLIKELY(hexfp && isXDIGIT(*s)))
{
- /* skip underscores, checking for misplaced ones
- if -w is on
- */
- if (*s == '_') {
- if (lastub && s == lastub + 1)
- WARN_ABOUT_UNDERSCORE();
- lastub = s++;
- }
- else {
- /* check for end of fixed-length buffer */
- if (d >= e)
- Perl_croak(aTHX_ "%s", number_too_long);
- /* if we're ok, copy the character */
- *d++ = *s++;
- }
- }
-
- /* final misplaced underbar check */
- if (lastub && s == lastub + 1)
- WARN_ABOUT_UNDERSCORE();
-
- /* read a decimal portion if there is one. avoid
- 3..5 being interpreted as the number 3. followed
- by .5
- */
- if (*s == '.' && s[1] != '.') {
- floatit = TRUE;
- *d++ = *s++;
-
- if (*s == '_') {
- WARN_ABOUT_UNDERSCORE();
- lastub = s;
- }
-
- /* copy, ignoring underbars, until we run out of digits.
- */
- for (; isDIGIT(*s)
+ /* skip underscores, checking for misplaced ones
+ if -w is on
+ */
+ if (*s == '_') {
+ if (lastub && s == lastub + 1)
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s++;
+ }
+ else {
+ /* check for end of fixed-length buffer */
+ if (d >= e)
+ Perl_croak(aTHX_ "%s", number_too_long);
+ /* if we're ok, copy the character */
+ *d++ = *s++;
+ }
+ }
+
+ /* final misplaced underbar check */
+ if (lastub && s == lastub + 1)
+ WARN_ABOUT_UNDERSCORE();
+
+ /* read a decimal portion if there is one. avoid
+ 3..5 being interpreted as the number 3. followed
+ by .5
+ */
+ if (*s == '.' && s[1] != '.') {
+ floatit = TRUE;
+ *d++ = *s++;
+
+ if (*s == '_') {
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s;
+ }
+
+ /* copy, ignoring underbars, until we run out of digits.
+ */
+ for (; isDIGIT(*s)
|| *s == '_'
|| UNLIKELY(hexfp && isXDIGIT(*s));
s++)
{
- /* fixed length buffer check */
- if (d >= e)
- Perl_croak(aTHX_ "%s", number_too_long);
- if (*s == '_') {
- if (lastub && s == lastub + 1)
- WARN_ABOUT_UNDERSCORE();
- lastub = s;
- }
- else
- *d++ = *s;
- }
- /* fractional part ending in underbar? */
- if (s[-1] == '_')
- WARN_ABOUT_UNDERSCORE();
- if (*s == '.' && isDIGIT(s[1])) {
- /* oops, it's really a v-string, but without the "v" */
- s = start;
- goto vstring;
- }
- }
-
- /* read exponent part, if present */
- if ((isALPHA_FOLD_EQ(*s, 'e')
+ /* fixed length buffer check */
+ if (d >= e)
+ Perl_croak(aTHX_ "%s", number_too_long);
+ if (*s == '_') {
+ if (lastub && s == lastub + 1)
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s;
+ }
+ else
+ *d++ = *s;
+ }
+ /* fractional part ending in underbar? */
+ if (s[-1] == '_')
+ WARN_ABOUT_UNDERSCORE();
+ if (*s == '.' && isDIGIT(s[1])) {
+ /* oops, it's really a v-string, but without the "v" */
+ s = start;
+ goto vstring;
+ }
+ }
+
+ /* read exponent part, if present */
+ if ((isALPHA_FOLD_EQ(*s, 'e')
|| UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
&& memCHRs("+-0123456789_", s[1]))
{
/* regardless of whether user said 3E5 or 3e5, use lower 'e',
ditto for p (hexfloats) */
if ((isALPHA_FOLD_EQ(*s, 'e'))) {
- /* At least some Mach atof()s don't grok 'E' */
+ /* At least some Mach atof()s don't grok 'E' */
*d++ = 'e';
}
else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
*d++ = 'p';
}
- s++;
+ s++;
- /* stray preinitial _ */
- if (*s == '_') {
- WARN_ABOUT_UNDERSCORE();
- lastub = s++;
- }
+ /* stray preinitial _ */
+ if (*s == '_') {
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s++;
+ }
- /* allow positive or negative exponent */
- if (*s == '+' || *s == '-')
- *d++ = *s++;
+ /* allow positive or negative exponent */
+ if (*s == '+' || *s == '-')
+ *d++ = *s++;
- /* stray initial _ */
- if (*s == '_') {
- WARN_ABOUT_UNDERSCORE();
- lastub = s++;
- }
+ /* stray initial _ */
+ if (*s == '_') {
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s++;
+ }
- /* read digits of exponent */
- while (isDIGIT(*s) || *s == '_') {
- if (isDIGIT(*s)) {
+ /* read digits of exponent */
+ while (isDIGIT(*s) || *s == '_') {
+ if (isDIGIT(*s)) {
++exp_digits;
- if (d >= e)
- Perl_croak(aTHX_ "%s", number_too_long);
- *d++ = *s++;
- }
- else {
- if (((lastub && s == lastub + 1)
+ if (d >= e)
+ Perl_croak(aTHX_ "%s", number_too_long);
+ *d++ = *s++;
+ }
+ else {
+ if (((lastub && s == lastub + 1)
|| (!isDIGIT(s[1]) && s[1] != '_')))
- WARN_ABOUT_UNDERSCORE();
- lastub = s++;
- }
- }
+ WARN_ABOUT_UNDERSCORE();
+ lastub = s++;
+ }
+ }
if (!exp_digits) {
/* no exponent digits, the [eEpP] could be for something else,
else {
floatit = TRUE;
}
- }
+ }
- /*
+ /*
We try to do an integer conversion first if no characters
indicating "float" have been found.
- */
+ */
- if (!floatit) {
- UV uv;
- const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
+ if (!floatit) {
+ UV uv;
+ const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
if (flags == IS_NUMBER_IN_UV) {
if (uv <= IV_MAX)
- sv = newSViv(uv); /* Prefer IVs over UVs. */
+ sv = newSViv(uv); /* Prefer IVs over UVs. */
else
- sv = newSVuv(uv);
+ sv = newSVuv(uv);
} else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
if (uv <= (UV) IV_MIN)
sv = newSViv(-(IV)uv);
else
- floatit = TRUE;
+ floatit = TRUE;
} else
floatit = TRUE;
}
- if (floatit) {
- /* terminate the string */
- *d = '\0';
+ if (floatit) {
+ /* terminate the string */
+ *d = '\0';
if (UNLIKELY(hexfp)) {
# ifdef NV_MANT_DIG
if (significant_bits > NV_MANT_DIG)
nv = Atof(PL_tokenbuf);
}
sv = newSVnv(nv);
- }
+ }
- if ( floatit
- ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
- const char *const key = floatit ? "float" : "integer";
- const STRLEN keylen = floatit ? 5 : 7;
- sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
- key, keylen, sv, NULL, NULL, 0, NULL);
- }
- break;
+ if ( floatit
+ ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
+ const char *const key = floatit ? "float" : "integer";
+ const STRLEN keylen = floatit ? 5 : 7;
+ sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
+ key, keylen, sv, NULL, NULL, 0, NULL);
+ }
+ break;
/* if it starts with a v, it could be a v-string */
case 'v':
vstring:
- sv = newSV(5); /* preallocate storage space */
- ENTER_with_name("scan_vstring");
- SAVEFREESV(sv);
- s = scan_vstring(s, PL_bufend, sv);
- SvREFCNT_inc_simple_void_NN(sv);
- LEAVE_with_name("scan_vstring");
- break;
+ sv = newSV(5); /* preallocate storage space */
+ ENTER_with_name("scan_vstring");
+ SAVEFREESV(sv);
+ s = scan_vstring(s, PL_bufend, sv);
+ SvREFCNT_inc_simple_void_NN(sv);
+ LEAVE_with_name("scan_vstring");
+ break;
}
/* make the op for the constant and return */
if (sv)
- lvalp->opval = newSVOP(OP_CONST, 0, sv);
+ lvalp->opval = newSVOP(OP_CONST, 0, sv);
else
- lvalp->opval = NULL;
+ lvalp->opval = NULL;
return (char *)s;
}
while (!needargs) {
char *eol;
- if (*s == '.') {
+ if (*s == '.') {
char *t = s+1;
#ifdef PERL_STRICT_CR
- while (SPACE_OR_TAB(*t))
- t++;
+ while (SPACE_OR_TAB(*t))
+ t++;
#else
- while (SPACE_OR_TAB(*t) || *t == '\r')
- t++;
+ while (SPACE_OR_TAB(*t) || *t == '\r')
+ t++;
#endif
- if (*t == '\n' || t == PL_bufend) {
- eofmt = TRUE;
- break;
- }
- }
- eol = (char *) memchr(s,'\n',PL_bufend-s);
- if (!eol++)
- eol = PL_bufend;
- if (*s != '#') {
+ if (*t == '\n' || t == PL_bufend) {
+ eofmt = TRUE;
+ break;
+ }
+ }
+ eol = (char *) memchr(s,'\n',PL_bufend-s);
+ if (! eol) {
+ eol = PL_bufend;
+ }
+ else {
+ eol++;
+ }
+ if (*s != '#') {
char *t;
- for (t = s; t < eol; t++) {
- if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
- needargs = FALSE;
- goto enough; /* ~~ must be first line in formline */
- }
- if (*t == '@' || *t == '^')
- needargs = TRUE;
- }
- if (eol > s) {
- sv_catpvn(stuff, s, eol-s);
+ for (t = s; t < eol; t++) {
+ if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
+ needargs = FALSE;
+ goto enough; /* ~~ must be first line in formline */
+ }
+ if (*t == '@' || *t == '^')
+ needargs = TRUE;
+ }
+ if (eol > s) {
+ sv_catpvn(stuff, s, eol-s);
#ifndef PERL_STRICT_CR
- if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
- char *end = SvPVX(stuff) + SvCUR(stuff);
- end[-2] = '\n';
- end[-1] = '\0';
- SvCUR_set(stuff, SvCUR(stuff) - 1);
- }
+ if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
+ char *end = SvPVX(stuff) + SvCUR(stuff);
+ end[-2] = '\n';
+ end[-1] = '\0';
+ SvCUR_set(stuff, SvCUR(stuff) - 1);
+ }
#endif
- }
- else
- break;
- }
- s = (char*)eol;
- if ((PL_rsfp || PL_parser->filtered)
- && PL_parser->form_lex_state == LEX_NORMAL) {
- bool got_some;
- PL_bufptr = PL_bufend;
- COPLINE_INC_WITH_HERELINES;
- got_some = lex_next_chunk(0);
- CopLINE_dec(PL_curcop);
- s = PL_bufptr;
- if (!got_some)
- break;
- }
- incline(s, PL_bufend);
+ }
+ else
+ break;
+ }
+ s = (char*)eol;
+ if ((PL_rsfp || PL_parser->filtered)
+ && PL_parser->form_lex_state == LEX_NORMAL) {
+ bool got_some;
+ PL_bufptr = PL_bufend;
+ COPLINE_INC_WITH_HERELINES;
+ got_some = lex_next_chunk(0);
+ CopLINE_dec(PL_curcop);
+ s = PL_bufptr;
+ if (!got_some)
+ break;
+ }
+ incline(s, PL_bufend);
}
enough:
if (!SvCUR(stuff) || needargs)
- PL_lex_state = PL_parser->form_lex_state;
+ PL_lex_state = PL_parser->form_lex_state;
if (SvCUR(stuff)) {
- PL_expect = XSTATE;
- if (needargs) {
- const char *s2 = s;
- while (isSPACE(*s2) && *s2 != '\n')
- s2++;
- if (*s2 == '{') {
- PL_expect = XTERMBLOCK;
- NEXTVAL_NEXTTOKE.ival = 0;
- force_next(DO);
- }
- NEXTVAL_NEXTTOKE.ival = 0;
- force_next(FORMLBRACK);
- }
- if (!IN_BYTES) {
- if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
- SvUTF8_on(stuff);
- }
+ PL_expect = XSTATE;
+ if (needargs) {
+ const char *s2 = s;
+ while (isSPACE(*s2) && *s2 != '\n')
+ s2++;
+ if (*s2 == '{') {
+ PL_expect = XTERMBLOCK;
+ NEXTVAL_NEXTTOKE.ival = 0;
+ force_next(KW_DO);
+ }
+ NEXTVAL_NEXTTOKE.ival = 0;
+ force_next(FORMLBRACK);
+ }
+ if (!IN_BYTES) {
+ if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
+ SvUTF8_on(stuff);
+ }
NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
- force_next(THING);
+ force_next(THING);
}
else {
- SvREFCNT_dec(stuff);
- if (eofmt)
- PL_lex_formbrack = 0;
+ SvREFCNT_dec(stuff);
+ if (eofmt)
+ PL_lex_formbrack = 0;
}
return s;
}
+/*
+=for apidoc start_subparse
+
+Set things up for parsing a subroutine.
+
+If C<is_format> is non-zero, the input is to be considered a format sub
+(a specialised sub used to implement perl's C<format> feature); else a
+normal C<sub>.
+
+C<flags> are added to the flags for C<PL_compcv>. C<flags> may include the
+C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
+
+This returns the value of C<PL_savestack_ix> that was in effect upon entry to
+the function;
+
+=cut
+*/
+
I32
Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
{
const I32 oldsavestack_ix = PL_savestack_ix;
CV* const outsidecv = PL_compcv;
+ bool is_method = flags & CVf_IsMETHOD;
+
+ if (is_method)
+ croak_kw_unless_class("method");
SAVEI32(PL_subline);
save_item(PL_subname);
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
if (outsidecv && CvPADLIST(outsidecv))
- CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
+ CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
+ if (is_method)
+ class_prepare_method_parse(PL_compcv);
return oldsavestack_ix;
}
+/* If o represents a builtin attribute, apply it to cv and returns true.
+ * Otherwise does nothing and returns false
+ */
+
+STATIC bool
+S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o)
+{
+ assert(o->op_type == OP_CONST);
+ SV *sv = cSVOPo_sv;
+ STRLEN len = SvCUR(sv);
+
+ /* NOTE: any CV attrs applied here need to be part of
+ the CVf_BUILTIN_ATTRS define in cv.h! */
+
+ if(memEQs(SvPVX(sv), len, "lvalue"))
+ CvLVALUE_on(cv);
+ else if(memEQs(SvPVX(sv), len, "method"))
+ CvNOWARN_AMBIGUOUS_on(cv);
+ else if(memEQs(SvPVX(sv), len, "const")) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+ ":const is experimental"
+ );
+ CvANONCONST_on(cv);
+ if (!CvANON(cv))
+ yyerror(":const is not permitted on named subroutines");
+ }
+ else
+ return false;
+
+ return true;
+}
+
+/*
+=for apidoc apply_builtin_cv_attributes
+
+Given an OP_LIST containing attribute definitions, filter it for known builtin
+attributes to apply to the cv, returning a possibly-smaller list containing
+just the remaining ones.
+
+=cut
+*/
+
+OP *
+Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist)
+{
+ PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
+
+ if(!attrlist)
+ return attrlist;
+
+ if(attrlist->op_type != OP_LIST) {
+ /* Not in fact a list but just a single attribute */
+ if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
+ op_free(attrlist);
+ return NULL;
+ }
+
+ return attrlist;
+ }
+
+ OP *prev = cLISTOPx(attrlist)->op_first;
+ assert(prev->op_type == OP_PUSHMARK);
+ OP *o = OpSIBLING(prev);
+
+ OP *next;
+ for(; o; o = next) {
+ next = OpSIBLING(o);
+
+ if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
+ op_sibling_splice(attrlist, prev, 1, NULL);
+ op_free(o);
+ }
+ else {
+ prev = o;
+ }
+ }
+
+ if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
+ return attrlist;
+
+ /* The list is now entirely empty, we might as well discard it */
+ op_free(attrlist);
+ return NULL;
+}
+
/* Do extra initialisation of a CV (typically one just created by
* start_subparse()) if that CV is for a named sub
}
void
-Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
+Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
{
PERL_ARGS_ASSERT_ABORT_EXECUTION;
- if (PL_minus_c)
- Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
- else {
- Perl_croak(aTHX_
- "%sExecution of %s aborted due to compilation errors.\n", msg, name);
+ if (msg_sv) {
+ if (PL_minus_c)
+ Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
+ else {
+ Perl_croak(aTHX_
+ "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
+ }
+ } else {
+ if (PL_minus_c)
+ Perl_croak(aTHX_ "%s had compilation errors.\n", name);
+ else {
+ Perl_croak(aTHX_
+ "Execution of %s aborted due to compilation errors.\n", name);
+ }
}
+
NOT_REACHED; /* NOTREACHED */
}
Perl_yyerror(pTHX_ const char *const s)
{
PERL_ARGS_ASSERT_YYERROR;
- return yyerror_pvn(s, strlen(s), 0);
+ int r = yyerror_pvn(s, strlen(s), 0);
+ return r;
}
int
Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
{
PERL_ARGS_ASSERT_YYERROR_PV;
- return yyerror_pvn(s, strlen(s), flags);
+ int r = yyerror_pvn(s, strlen(s), flags);
+ return r;
}
int
&& PL_oldoldbufptr != PL_oldbufptr
&& PL_oldbufptr != PL_bufptr)
{
- /*
- Only for NetWare:
- The code below is removed for NetWare because it
- abends/crashes on NetWare when the script has error such as
- not having the closing quotes like:
- if ($var eq "value)
- Checking of white spaces is anyway done in NetWare code.
- */
-#ifndef NETWARE
while (isSPACE(*PL_oldoldbufptr))
PL_oldoldbufptr++;
-#endif
context = PL_oldoldbufptr;
contlen = PL_bufptr - PL_oldoldbufptr;
}
else if ( PL_oldbufptr
&& PL_bufptr > PL_oldbufptr
&& PL_bufptr - PL_oldbufptr < 200
- && PL_oldbufptr != PL_bufptr) {
- /*
- Only for NetWare:
- The code below is removed for NetWare because it
- abends/crashes on NetWare when the script has error such as
- not having the closing quotes like:
- if ($var eq "value)
- Checking of white spaces is anyway done in NetWare code.
- */
-#ifndef NETWARE
+ && PL_oldbufptr != PL_bufptr)
+ {
while (isSPACE(*PL_oldbufptr))
PL_oldbufptr++;
-#endif
context = PL_oldbufptr;
contlen = PL_bufptr - PL_oldbufptr;
}
Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
}
msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
- Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
+ Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
OutCopFILE(PL_curcop),
- (IV)(PL_parser->preambling == NOLINE
+ (PL_parser->preambling == NOLINE
? CopLINE(PL_curcop)
: PL_parser->preambling));
if (context)
{
Perl_sv_catpvf(aTHX_ msg,
" (Might be a runaway multi-line %c%c string starting on"
- " line %" IVdf ")\n",
- (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
+ " line %" LINE_Tf ")\n",
+ (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
PL_multi_end = 0;
}
if (PL_in_eval & EVAL_WARNONLY) {
qerror(msg);
}
}
- if (s == NULL || PL_error_count >= 10) {
- const char * msg = "";
- const char * const name = OutCopFILE(PL_curcop);
-
- if (PL_in_eval) {
- SV * errsv = ERRSV;
- if (SvCUR(errsv)) {
- msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
- }
- }
+ /* if there was no message then this is a yyquit(), which is actualy handled
+ * by qerror() with a NULL argument */
+ if (s == NULL)
+ qerror(NULL);
- if (s == NULL) {
- abort_execution(msg, name);
- }
- else {
- Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
- }
- }
PL_in_my = 0;
PL_in_my_stash = NULL;
return 0;
switch (s[0]) {
case 0xFF:
- if (s[1] == 0xFE) {
- /* UTF-16 little-endian? (or UTF-32LE?) */
- if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
- /* diag_listed_as: Unsupported script encoding %s */
- Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
+ if (s[1] == 0xFE) {
+ /* UTF-16 little-endian? (or UTF-32LE?) */
+ if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
+ /* diag_listed_as: Unsupported script encoding %s */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
#ifndef PERL_NO_UTF16_FILTER
#ifdef DEBUGGING
- if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
#endif
- s += 2;
- if (PL_bufend > (char*)s) {
- s = add_utf16_textfilter(s, TRUE);
- }
+ s += 2;
+ if (PL_bufend > (char*)s) {
+ s = add_utf16_textfilter(s, TRUE);
+ }
#else
- /* diag_listed_as: Unsupported script encoding %s */
- Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
+ /* diag_listed_as: Unsupported script encoding %s */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
#endif
- }
- break;
+ }
+ break;
case 0xFE:
- if (s[1] == 0xFF) { /* UTF-16 big-endian? */
+ if (s[1] == 0xFF) { /* UTF-16 big-endian? */
#ifndef PERL_NO_UTF16_FILTER
#ifdef DEBUGGING
- if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
#endif
- s += 2;
- if (PL_bufend > (char *)s) {
- s = add_utf16_textfilter(s, FALSE);
- }
+ s += 2;
+ if (PL_bufend > (char *)s) {
+ s = add_utf16_textfilter(s, FALSE);
+ }
#else
- /* diag_listed_as: Unsupported script encoding %s */
- Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
+ /* diag_listed_as: Unsupported script encoding %s */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
#endif
- }
- break;
+ }
+ break;
case BOM_UTF8_FIRST_BYTE: {
if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
#ifdef DEBUGGING
break;
}
case 0:
- if (slen > 3) {
- if (s[1] == 0) {
- if (s[2] == 0xFE && s[3] == 0xFF) {
- /* UTF-32 big-endian */
- /* diag_listed_as: Unsupported script encoding %s */
- Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
- }
- }
- else if (s[2] == 0 && s[3] != 0) {
- /* Leading bytes
- * 00 xx 00 xx
- * are a good indicator of UTF-16BE. */
+ if (slen > 3) {
+ if (s[1] == 0) {
+ if (s[2] == 0xFE && s[3] == 0xFF) {
+ /* UTF-32 big-endian */
+ /* diag_listed_as: Unsupported script encoding %s */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
+ }
+ }
+ else if (s[2] == 0 && s[3] != 0) {
+ /* Leading bytes
+ * 00 xx 00 xx
+ * are a good indicator of UTF-16BE. */
#ifndef PERL_NO_UTF16_FILTER
#ifdef DEBUGGING
- if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
#endif
- s = add_utf16_textfilter(s, FALSE);
+ s = add_utf16_textfilter(s, FALSE);
#else
- /* diag_listed_as: Unsupported script encoding %s */
- Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
+ /* diag_listed_as: Unsupported script encoding %s */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
#endif
- }
- }
+ }
+ }
break;
default:
- if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
- /* Leading bytes
- * xx 00 xx 00
- * are a good indicator of UTF-16LE. */
+ if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
+ /* Leading bytes
+ * xx 00 xx 00
+ * are a good indicator of UTF-16LE. */
#ifndef PERL_NO_UTF16_FILTER
#ifdef DEBUGGING
- if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
#endif
- s = add_utf16_textfilter(s, TRUE);
+ s = add_utf16_textfilter(s, TRUE);
#else
- /* diag_listed_as: Unsupported script encoding %s */
- Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
+ /* diag_listed_as: Unsupported script encoding %s */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
#endif
- }
+ }
}
return (char*)s;
}
from this file, we can be sure that we're not called in block mode. Hence
don't bother writing code to deal with block mode. */
if (maxlen) {
- Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
+ Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
}
if (status < 0) {
- Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
+ Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
}
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
- FPTR2DPTR(void *, S_utf16_textfilter),
- reverse ? 'l' : 'b', idx, maxlen, status,
- (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+ "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
+ FPTR2DPTR(void *, S_utf16_textfilter),
+ reverse ? 'l' : 'b', idx, maxlen, status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
while (1) {
- STRLEN chars;
- STRLEN have;
- Size_t newlen;
- U8 *end;
- /* First, look in our buffer of existing UTF-8 data: */
- char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
-
- if (nl) {
- ++nl;
- } else if (status == 0) {
- /* EOF */
- IoPAGE(filter) = 0;
- nl = SvEND(utf8_buffer);
- }
- if (nl) {
- STRLEN got = nl - SvPVX(utf8_buffer);
- /* Did we have anything to append? */
- retval = got != 0;
- sv_catpvn(sv, SvPVX(utf8_buffer), got);
- /* Everything else in this code works just fine if SVp_POK isn't
- set. This, however, needs it, and we need it to work, else
- we loop infinitely because the buffer is never consumed. */
- sv_chop(utf8_buffer, nl);
- break;
- }
-
- /* OK, not a complete line there, so need to read some more UTF-16.
- Read an extra octect if the buffer currently has an odd number. */
- while (1) {
- if (status <= 0)
- break;
- if (SvCUR(utf16_buffer) >= 2) {
- /* Location of the high octet of the last complete code point.
- Gosh, UTF-16 is a pain. All the benefits of variable length,
- *coupled* with all the benefits of partial reads and
- endianness. */
- const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
- + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
-
- if (*last_hi < 0xd8 || *last_hi > 0xdb) {
- break;
- }
-
- /* We have the first half of a surrogate. Read more. */
- DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
- }
-
- status = FILTER_READ(idx + 1, utf16_buffer,
- 160 + (SvCUR(utf16_buffer) & 1));
- DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
- DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
- if (status < 0) {
- /* Error */
- IoPAGE(filter) = status;
- return status;
- }
- }
+ STRLEN chars;
+ STRLEN have;
+ Size_t newlen;
+ U8 *end;
+ /* First, look in our buffer of existing UTF-8 data: */
+ char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
+
+ if (nl) {
+ ++nl;
+ } else if (status == 0) {
+ /* EOF */
+ IoPAGE(filter) = 0;
+ nl = SvEND(utf8_buffer);
+ }
+ if (nl) {
+ STRLEN got = nl - SvPVX(utf8_buffer);
+ /* Did we have anything to append? */
+ retval = got != 0;
+ sv_catpvn(sv, SvPVX(utf8_buffer), got);
+ /* Everything else in this code works just fine if SVp_POK isn't
+ set. This, however, needs it, and we need it to work, else
+ we loop infinitely because the buffer is never consumed. */
+ sv_chop(utf8_buffer, nl);
+ break;
+ }
+
+ /* OK, not a complete line there, so need to read some more UTF-16.
+ Read an extra octect if the buffer currently has an odd number. */
+ while (1) {
+ if (status <= 0)
+ break;
+ if (SvCUR(utf16_buffer) >= 2) {
+ /* Location of the high octet of the last complete code point.
+ Gosh, UTF-16 is a pain. All the benefits of variable length,
+ *coupled* with all the benefits of partial reads and
+ endianness. */
+ const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
+ + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
+
+ if (*last_hi < 0xd8 || *last_hi > 0xdb) {
+ break;
+ }
+
+ /* We have the first half of a surrogate. Read more. */
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
+ }
+
+ status = FILTER_READ(idx + 1, utf16_buffer,
+ 160 + (SvCUR(utf16_buffer) & 1));
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
+ DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
+ if (status < 0) {
+ /* Error */
+ IoPAGE(filter) = status;
+ return status;
+ }
+ }
/* 'chars' isn't quite the right name, as code points above 0xFFFF
* require 4 bytes per char */
- chars = SvCUR(utf16_buffer) >> 1;
- have = SvCUR(utf8_buffer);
+ chars = SvCUR(utf16_buffer) >> 1;
+ have = SvCUR(utf8_buffer);
/* Assume the worst case size as noted by the functions: twice the
* number of input bytes */
- SvGROW(utf8_buffer, have + chars * 4 + 1);
-
- if (reverse) {
- end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
- (U8*)SvPVX_const(utf8_buffer) + have,
- chars * 2, &newlen);
- } else {
- end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
- (U8*)SvPVX_const(utf8_buffer) + have,
- chars * 2, &newlen);
- }
- SvCUR_set(utf8_buffer, have + newlen);
- *end = '\0';
-
- /* No need to keep this SV "well-formed" with a '\0' after the end, as
- it's private to us, and utf16_to_utf8{,reversed} take a
- (pointer,length) pair, rather than a NUL-terminated string. */
- if(SvCUR(utf16_buffer) & 1) {
- *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
- SvCUR_set(utf16_buffer, 1);
- } else {
- SvCUR_set(utf16_buffer, 0);
- }
+ SvGROW(utf8_buffer, have + chars * 4 + 1);
+
+ if (reverse) {
+ end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ } else {
+ end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ }
+ SvCUR_set(utf8_buffer, have + newlen);
+ *end = '\0';
+
+ /* No need to keep this SV "well-formed" with a '\0' after the end, as
+ it's private to us, and utf16_to_utf8{,reversed} take a
+ (pointer,length) pair, rather than a NUL-terminated string. */
+ if(SvCUR(utf16_buffer) & 1) {
+ *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
+ SvCUR_set(utf16_buffer, 1);
+ } else {
+ SvCUR_set(utf16_buffer, 0);
+ }
}
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
- status,
- (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+ "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
+ status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
return retval;
}
ignore any error return from this. */
SvCUR_set(PL_linestr, 0);
if (FILTER_READ(0, PL_linestr, 0)) {
- SvUTF8_on(PL_linestr);
+ SvUTF8_on(PL_linestr);
} else {
- SvUTF8_on(PL_linestr);
+ SvUTF8_on(PL_linestr);
}
PL_bufend = SvEND(PL_linestr);
return (U8*)SvPVX(PL_linestr);
#endif
/*
+=for apidoc scan_vstring
+
Returns a pointer to the next character after the parsed
vstring, as well as updating the passed in sv.
Function must be called like
- sv = sv_2mortal(newSV(5));
- s = scan_vstring(s,e,sv);
+ sv = sv_2mortal(newSV(5));
+ s = scan_vstring(s,e,sv);
where s and e are the start and end of the string.
The sv should already be large enough to store the vstring
a leak). Make sure to do SvREFCNT_inc afterwards if you use
sv_2mortal.
+=cut
*/
char *
if (*pos == 'v') pos++; /* get past 'v' */
while (pos < e && (isDIGIT(*pos) || *pos == '_'))
- pos++;
+ pos++;
if ( *pos != '.') {
- /* this may not be a v-string if followed by => */
- const char *next = pos;
- while (next < e && isSPACE(*next))
- ++next;
- if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
- /* return string not v-string */
- sv_setpvn(sv,(char *)s,pos-s);
- return (char *)pos;
- }
+ /* this may not be a v-string if followed by => */
+ const char *next = pos;
+ while (next < e && isSPACE(*next))
+ ++next;
+ if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
+ /* return string not v-string */
+ sv_setpvn(sv,(char *)s,pos-s);
+ return (char *)pos;
+ }
}
if (!isALPHA(*pos)) {
- U8 tmpbuf[UTF8_MAXBYTES+1];
+ U8 tmpbuf[UTF8_MAXBYTES+1];
- if (*s == 'v')
- s++; /* get past 'v' */
+ if (*s == 'v')
+ s++; /* get past 'v' */
SvPVCLEAR(sv);
- for (;;) {
- /* this is atoi() that tolerates underscores */
- U8 *tmpend;
- UV rev = 0;
- const char *end = pos;
- UV mult = 1;
- while (--end >= s) {
- if (*end != '_') {
- const UV orev = rev;
- rev += (*end - '0') * mult;
- mult *= 10;
- if (orev > rev)
- /* diag_listed_as: Integer overflow in %s number */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in decimal number");
- }
- }
-
- /* Append native character for the rev point */
- tmpend = uvchr_to_utf8(tmpbuf, rev);
- sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
- if (!UVCHR_IS_INVARIANT(rev))
- SvUTF8_on(sv);
- if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
- s = ++pos;
- else {
- s = pos;
- break;
- }
- while (pos < e && (isDIGIT(*pos) || *pos == '_'))
- pos++;
- }
- SvPOK_on(sv);
- sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
- SvRMAGICAL_on(sv);
+ for (;;) {
+ /* this is atoi() that tolerates underscores */
+ U8 *tmpend;
+ UV rev = 0;
+ const char *end = pos;
+ UV mult = 1;
+ while (--end >= s) {
+ if (*end != '_') {
+ const UV orev = rev;
+ rev += (*end - '0') * mult;
+ mult *= 10;
+ if (orev > rev)
+ /* diag_listed_as: Integer overflow in %s number */
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in decimal number");
+ }
+ }
+
+ /* Append native character for the rev point */
+ tmpend = uvchr_to_utf8(tmpbuf, rev);
+ sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
+ if (!UVCHR_IS_INVARIANT(rev))
+ SvUTF8_on(sv);
+ if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
+ s = ++pos;
+ else {
+ s = pos;
+ break;
+ }
+ while (pos < e && (isDIGIT(*pos) || *pos == '_'))
+ pos++;
+ }
+ SvPOK_on(sv);
+ sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
+ SvRMAGICAL_on(sv);
}
return (char *)s;
}
int
Perl_keyword_plugin_standard(pTHX_
- char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
+ char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
{
PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
PERL_UNUSED_CONTEXT;
return KEYWORD_PLUGIN_DECLINE;
}
+STRLEN
+Perl_infix_plugin_standard(pTHX_
+ char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def)
+{
+ PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
+ PERL_UNUSED_CONTEXT;
+ PERL_UNUSED_ARG(operator_ptr);
+ PERL_UNUSED_ARG(operator_len);
+ PERL_UNUSED_ARG(def);
+ return 0;
+}
+
/*
+=for apidoc_section $lexer
=for apidoc wrap_keyword_plugin
Puts a C function into the chain of keyword plugins. This is the
KEYWORD_PLUGIN_MUTEX_UNLOCK;
}
+/*
+=for apidoc wrap_infix_plugin
+
+B<NOTE:> This API exists entirely for the purpose of making the CPAN module
+C<XS::Parse::Infix> work. It is not expected that additional modules will make
+use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
+of new infix operators.
+
+Puts a C function into the chain of infix plugins. This is the preferred
+way to manipulate the L</PL_infix_plugin> variable. C<new_plugin> is a
+pointer to the C function that is to be added to the infix plugin chain, and
+C<old_plugin_p> points to a storage location where a pointer to the next
+function in the chain will be stored. The value of C<new_plugin> is written
+into the L</PL_infix_plugin> variable, while the value previously stored there
+is written to C<*old_plugin_p>.
+
+Direct access to L</PL_infix_plugin> should be avoided.
+
+=cut
+*/
+
+void
+Perl_wrap_infix_plugin(pTHX_
+ Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
+{
+
+ PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
+ if (*old_plugin_p) return;
+ /* We use the same mutex as for PL_keyword_plugin as it's so rare either
+ * of them is actually updated; no need for a dedicated one each */
+ KEYWORD_PLUGIN_MUTEX_LOCK;
+ if (!*old_plugin_p) {
+ *old_plugin_p = PL_infix_plugin;
+ PL_infix_plugin = new_plugin;
+ }
+ KEYWORD_PLUGIN_MUTEX_UNLOCK;
+}
+
#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
static void
S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
{
SAVEI32(PL_lex_brackets);
if (PL_lex_brackets > 100)
- Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
+ Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
SAVEI32(PL_lex_allbrackets);
PL_lex_allbrackets = 0;
SAVEI8(PL_lex_fakeeof);
PL_lex_fakeeof = (U8)fakeeof;
if(yyparse(gramtype) && !PL_parser->error_count)
- qerror(Perl_mess(aTHX_ "Parse error"));
+ qerror(Perl_mess(aTHX_ "Parse error"));
}
#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
{
OP *exprop;
if (flags & ~PARSE_OPTIONAL)
- Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
if (!exprop && !(flags & PARSE_OPTIONAL)) {
- if (!PL_parser->error_count)
- qerror(Perl_mess(aTHX_ "Parse error"));
- exprop = newOP(OP_NULL, 0);
+ if (!PL_parser->error_count)
+ qerror(Perl_mess(aTHX_ "Parse error"));
+ exprop = newOP(OP_NULL, 0);
}
return exprop;
}
Perl_parse_block(pTHX_ U32 flags)
{
if (flags)
- Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
}
Perl_parse_barestmt(pTHX_ U32 flags)
{
if (flags)
- Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
}
Perl_parse_label(pTHX_ U32 flags)
{
if (flags & ~PARSE_OPTIONAL)
- Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
if (PL_nexttoke) {
- PL_parser->yychar = yylex();
- if (PL_parser->yychar == LABEL) {
- SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
- PL_parser->yychar = YYEMPTY;
- cSVOPx(pl_yylval.opval)->op_sv = NULL;
- op_free(pl_yylval.opval);
- return labelsv;
- } else {
- yyunlex();
- goto no_label;
- }
+ PL_parser->yychar = yylex();
+ if (PL_parser->yychar == LABEL) {
+ SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
+ PL_parser->yychar = YYEMPTY;
+ cSVOPx(pl_yylval.opval)->op_sv = NULL;
+ op_free(pl_yylval.opval);
+ return labelsv;
+ } else {
+ yyunlex();
+ goto no_label;
+ }
} else {
- char *s, *t;
- STRLEN wlen, bufptr_pos;
- lex_read_space(0);
- t = s = PL_bufptr;
+ char *s, *t;
+ STRLEN wlen, bufptr_pos;
+ lex_read_space(0);
+ t = s = PL_bufptr;
if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
- goto no_label;
- t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
- if (word_takes_any_delimiter(s, wlen))
- goto no_label;
- bufptr_pos = s - SvPVX(PL_linestr);
- PL_bufptr = t;
- lex_read_space(LEX_KEEP_PREVIOUS);
- t = PL_bufptr;
- s = SvPVX(PL_linestr) + bufptr_pos;
- if (t[0] == ':' && t[1] != ':') {
- PL_oldoldbufptr = PL_oldbufptr;
- PL_oldbufptr = s;
- PL_bufptr = t+1;
- return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
- } else {
- PL_bufptr = s;
- no_label:
- if (flags & PARSE_OPTIONAL) {
- return NULL;
- } else {
- qerror(Perl_mess(aTHX_ "Parse error"));
- return newSVpvs("x");
- }
- }
+ goto no_label;
+ t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
+ if (word_takes_any_delimiter(s, wlen))
+ goto no_label;
+ bufptr_pos = s - SvPVX(PL_linestr);
+ PL_bufptr = t;
+ lex_read_space(LEX_KEEP_PREVIOUS);
+ t = PL_bufptr;
+ s = SvPVX(PL_linestr) + bufptr_pos;
+ if (t[0] == ':' && t[1] != ':') {
+ PL_oldoldbufptr = PL_oldbufptr;
+ PL_oldbufptr = s;
+ PL_bufptr = t+1;
+ return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
+ } else {
+ PL_bufptr = s;
+ no_label:
+ if (flags & PARSE_OPTIONAL) {
+ return NULL;
+ } else {
+ qerror(Perl_mess(aTHX_ "Parse error"));
+ return newSVpvs("x");
+ }
+ }
}
}
Perl_parse_fullstmt(pTHX_ U32 flags)
{
if (flags)
- Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
}
OP *stmtseqop;
I32 c;
if (flags)
- Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
c = lex_peek_unichar(0);
if (c != -1 && c != /*{*/'}')
- qerror(Perl_mess(aTHX_ "Parse error"));
+ qerror(Perl_mess(aTHX_ "Parse error"));
return stmtseqop;
}