* LOOPX : loop exiting command (goto, last, dump, etc)
* FTST : file test operator
* FUN0 : zero-argument function
+ * FUN0OP : zero-argument function, with its op created in this file
* FUN1 : not used, except for not, which isn't a UNIOP
* BOop : bitwise or or xor
* BAop : bitwise and
#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
+#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
{ FORMAT, TOKENTYPE_NONE, "FORMAT" },
{ FUNC, TOKENTYPE_OPNUM, "FUNC" },
{ FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
+ { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
{ FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
{ FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
{ FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
and I<rsfp> supplies the remainder of the source.
The I<flags> parameter is reserved for future use, and must always
-be zero.
+be zero, except for one flag that is currently reserved for perl's internal
+use.
=cut
*/
+/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
+ can share filters with the current parser. */
+
void
Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
{
const char *s = NULL;
STRLEN len;
yy_parser *parser, *oparser;
- if (flags)
+ if (flags && flags != LEX_START_SAME_FILTER)
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
/* create and initialise a parser */
parser->lex_state = LEX_NORMAL;
parser->expect = XSTATE;
parser->rsfp = rsfp;
- parser->rsfp_filters = newAV();
+ parser->rsfp_filters =
+ !(flags & LEX_START_SAME_FILTER) || !oparser
+ ? newAV()
+ : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
if (c != -1) {
if (c == '\n')
CopLINE_inc(PL_curcop);
- PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
+ if (UTF)
+ PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
+ else
+ ++(PL_parser->bufptr);
}
return c;
}
const char *t;
const char *n;
const char *e;
+ line_t line_num;
PERL_ARGS_ASSERT_INCLINE;
if (*e != '\n' && *e != '\0')
return; /* false alarm */
+ line_num = atoi(n)-1;
+
if (t - s > 0) {
const STRLEN len = t - s;
-#ifndef USE_ITHREADS
SV *const temp_sv = CopFILESV(PL_curcop);
const char *cf;
STRLEN tmplen;
gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
/* adjust ${"::_<newfilename"} to store the new file name */
GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
- GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
- GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+ /* The line number may differ. If that is the case,
+ alias the saved lines that are in the array.
+ Otherwise alias the whole array. */
+ if (CopLINE(PL_curcop) == line_num) {
+ GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
+ GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+ }
+ else if (GvAV(*gvp)) {
+ AV * const av = GvAV(*gvp);
+ const I32 start = CopLINE(PL_curcop)+1;
+ I32 items = AvFILLp(av) - start;
+ if (items > 0) {
+ AV * const av2 = GvAVn(gv2);
+ SV **svp = AvARRAY(av) + start;
+ I32 l = (I32)line_num+1;
+ while (items--)
+ av_store(av2, l++, SvREFCNT_inc(*svp++));
+ }
+ }
}
if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
}
if (tmpbuf != smallbuf) Safefree(tmpbuf);
}
-#endif
CopFILE_free(PL_curcop);
CopFILE_setn(PL_curcop, s, len);
}
- CopLINE_set(PL_curcop, atoi(n)-1);
+ CopLINE_set(PL_curcop, line_num);
}
#ifdef PERL_MAD
if (PL_lex_inpat) {
- /* Pass through to the regex compiler unchanged. The
- * reason we evaluated the number above is to make sure
- * there wasn't a syntax error. */
+ /* On non-EBCDIC platforms, pass through to the regex
+ * compiler unchanged. The reason we evaluated the
+ * number above is to make sure there wasn't a syntax
+ * error. But on EBCDIC we convert to native so
+ * downstream code can continue to assume it's native
+ */
s -= 5; /* Include the '\N{U+' */
+#ifdef EBCDIC
+ d += my_snprintf(d, e - s + 1 + 1, /* includes the }
+ and the \0 */
+ "\\N{U+%X}",
+ (unsigned int) UNI_TO_NATIVE(uv));
+#else
Copy(s, d, e - s + 1, char); /* 1 = include the } */
d += e - s + 1;
+#endif
}
else { /* Not a pattern: convert the hex to string */
}
/* Convert first code point to hex, including the
- * boiler plate before it */
+ * boiler plate before it. For all these, we
+ * convert to native format so that downstream code
+ * can continue to assume the input is native */
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X", (unsigned int) uv);
+ "\\N{U+%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- ".%X", (unsigned int) uv);
+ ".%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
d = off + SvGROW(sv, off
+ output_length
if (PL_lex_brackets > 100)
Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
PL_lex_brackstack[PL_lex_brackets++] =
- (next_type >> 16) & 0xff;
+ (char) ((next_type >> 16) & 0xff);
}
if (next_type & (2<<24))
PL_lex_allbrackets++;
*(U8*)s == 0xEF ||
*(U8*)s >= 0xFE ||
s[1] == 0)) {
- bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
+ Off_t offset = (IV)PerlIO_tell(PL_rsfp);
+ bof = (offset == (Off_t)SvCUR(PL_linestr));
+#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
+ /* offset may include swallowed CR */
+ if (!bof)
+ bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
+#endif
if (bof) {
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = swallow_bom((U8*)s);
PREREF('$');
}
- /* This kludge not intended to be bulletproof. */
- if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
- pl_yylval.opval = newSVOP(OP_CONST, 0,
- newSViv(CopARYBASE_get(&PL_compiling)));
- pl_yylval.opval->op_private = OPpCONST_ARYBASE;
- TERM(THING);
- }
-
d = s;
{
const char tmp = *s;
goto safe_bareword;
{
- OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
+ OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
const_op->op_private = OPpCONST_BARE;
rv2cv_op = newCVREF(0, const_op);
}
}
case KEY___FILE__:
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
- newSVpv(CopFILE(PL_curcop),0));
- TERM(THING);
+ FUN0OP(
+ (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
+ );
case KEY___LINE__:
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
- Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
- TERM(THING);
+ FUN0OP(
+ (OP*)newSVOP(OP_CONST, 0,
+ Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
+ );
case KEY___PACKAGE__:
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+ FUN0OP(
+ (OP*)newSVOP(OP_CONST, 0,
(PL_curstash
? newSVhek(HvNAME_HEK(PL_curstash))
- : &PL_sv_undef));
- TERM(THING);
+ : &PL_sv_undef))
+ );
case KEY___DATA__:
case KEY___END__: {
#else
if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
#endif /* NETWARE */
-#ifdef PERLIO_IS_STDIO /* really? */
-# if defined(__BORLANDC__)
- /* XXX see note in do_binmode() */
- ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
-# endif
-#endif
if (loc > 0)
PerlIO_seek(PL_rsfp, loc, 0);
}
s += 2;
d = s;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
- if (!(tmp = keyword(PL_tokenbuf, len, 0)))
+ if (!(tmp = keyword(PL_tokenbuf, len, 1)))
Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
if (tmp < 0)
tmp = -tmp;
UNI(OP_CHOP);
case KEY_continue:
- /* When 'use switch' is in effect, continue has a dual
- life as a control operator. */
- {
- if (!FEATURE_IS_ENABLED("switch"))
- PREBLOCK(CONTINUE);
- else {
/* We have to disambiguate the two senses of
"continue". If the next token is a '{' then
treat it as the start of a continue block;
PREBLOCK(CONTINUE);
else
FUN0(OP_CONTINUE);
- }
- }
case KEY_chdir:
/* may use HOME */
missingterm(NULL);
PL_expect = XOPERATOR;
if (SvCUR(PL_lex_stuff)) {
- int warned = 0;
+ int warned_comma = !ckWARN(WARN_QW);
+ int warned_comment = warned_comma;
d = SvPV_force(PL_lex_stuff, len);
while (len) {
for (; isSPACE(*d) && len; --len, ++d)
if (len) {
SV *sv;
const char *b = d;
- if (!warned && ckWARN(WARN_QW)) {
+ if (!warned_comma || !warned_comment) {
for (; !isSPACE(*d) && len; --len, ++d) {
- if (*d == ',') {
+ if (!warned_comma && *d == ',') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to separate words with commas");
- ++warned;
+ ++warned_comma;
}
- else if (*d == '#') {
+ else if (!warned_comment && *d == '#') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to put comments in qw() list");
- ++warned;
+ ++warned_comment;
}
}
}
yyerror(Perl_form(aTHX_ "No package name allowed for "
"variable %s in \"our\"",
PL_tokenbuf));
- tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
}
else {
if (has_colon)
PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
pl_yylval.opval = newOP(OP_PADANY, 0);
- pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
return PRIVATEREF;
}
}
if (!has_colon) {
if (!PL_in_my)
- tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
const char * const brack =
(const char *)
((*s == '[') ? "[...]" : "{...}");
+ /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
"Ambiguous use of %c{%s%s} resolved to %c%s%s",
funny, dest, brack, funny, dest, brack);
return s;
}
-static U32
-S_pmflag(U32 pmfl, const char ch) {
- switch (ch) {
- CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
- case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
- case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
- case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
- case KEEPCOPY_PAT_MOD: pmfl |= RXf_PMf_KEEPCOPY; break;
- case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
- }
- return pmfl;
+static bool
+S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
+
+ /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
+ * the parse starting at 's', based on the subset that are valid in this
+ * context input to this routine in 'valid_flags'. Advances s. Returns
+ * TRUE if the input was a valid flag, so the next char may be as well;
+ * otherwise FALSE. 'charset' should point to a NUL upon first call on the
+ * current regex. This routine will set it to any charset modifier found.
+ * The caller shouldn't change it. This way, another charset modifier
+ * encountered in the parse can be detected as an error, as we have decided
+ * allow only one */
+
+ const char c = **s;
+
+ if (! strchr(valid_flags, c)) {
+ if (isALNUM(c)) {
+ goto deprecate;
+ }
+ return FALSE;
+ }
+
+ switch (c) {
+
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
+ case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
+ case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
+ case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
+ case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
+ case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
+ case LOCALE_PAT_MOD:
+
+ /* In 5.14, qr//lt is legal but deprecated; the 't' means they
+ * can't be regex modifiers.
+ * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
+ * much as easily done. s///lei, for example, has to mean regex
+ * modifiers if it's not an error (as does any word character
+ * following the 'e'). Otherwise, we resolve to the backwards-
+ * compatible, but less likely 's/// le ...', i.e. as meaning
+ * less-than-or-equal. The reason it's not likely is that s//
+ * returns a number for code in the field (/r returns a string, but
+ * that wasn't added until the 5.13 series), and so '<=' should be
+ * used for comparing, not 'le'. */
+ if (*((*s) + 1) == 't') {
+ goto deprecate;
+ }
+ else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
+
+ /* 'e' is valid only for substitutes, s///e. If it is not
+ * valid in the current context, then 'm//le' must mean the
+ * comparison operator, so use the regular deprecation message.
+ */
+ if (! strchr(valid_flags, 'e')) {
+ goto deprecate;
+ }
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
+ return FALSE;
+ }
+ if (*charset) {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
+ *charset = c;
+ break;
+ case UNICODE_PAT_MOD:
+ /* In 5.14, qr//unless and qr//until are legal but deprecated; the
+ * 'n' means they can't be regex modifiers */
+ if (*((*s) + 1) == 'n') {
+ goto deprecate;
+ }
+ if (*charset) {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
+ *charset = c;
+ break;
+ case ASCII_RESTRICT_PAT_MOD:
+ /* In 5.14, qr//and is legal but deprecated; the 'n' means they
+ * can't be regex modifiers */
+ if (*((*s) + 1) == 'n') {
+ goto deprecate;
+ }
+
+ if (! *charset) {
+ set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
+ }
+ else {
+
+ /* Error if previous modifier wasn't an 'a', but if it was, see
+ * if, and accept, a second occurrence (only) */
+ if (*charset != 'a'
+ || get_regex_charset(*pmfl)
+ != REGEX_ASCII_RESTRICTED_CHARSET)
+ {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
+ }
+ *charset = c;
+ break;
+ case DEPENDS_PAT_MOD:
+ if (*charset) {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
+ *charset = c;
+ break;
+ }
+
+ (*s)++;
+ return TRUE;
+
+ deprecate:
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
+ "Having no space between pattern and following word is deprecated");
+ return FALSE;
+
+ multiple_charsets:
+ if (*charset != c) {
+ yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
+ }
+ else if (c == 'a') {
+ yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
+ }
+ else {
+ yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
+ }
+
+ /* Pretend that it worked, so will continue processing before dieing */
+ (*s)++;
+ return TRUE;
}
STATIC char *
char *s = scan_str(start,!!PL_madskills,FALSE);
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
+ char charset = '\0'; /* character set modifier */
#ifdef PERL_MAD
char *modstart;
#endif
#ifdef PERL_MAD
modstart = s;
#endif
- while (*s && strchr(valid_flags, *s))
- pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
-
- if (isALNUM(*s)) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
- "Having no space between pattern and following word is deprecated");
-
- }
+ while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
#ifdef PERL_MAD
if (PL_madskills && modstart != s) {
SV* tmptoken = newSVpvn(modstart, s - modstart);
S_scan_subst(pTHX_ char *start)
{
dVAR;
- register char *s;
+ char *s;
register PMOP *pm;
I32 first_start;
I32 es = 0;
+ char charset = '\0'; /* character set modifier */
#ifdef PERL_MAD
char *modstart;
#endif
s++;
es++;
}
- else if (strchr(S_PAT_MODS, *s))
- pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
- else {
- if (isALNUM(*s)) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
- "Having no space between pattern and following word is deprecated");
-
- }
+ else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
+ {
break;
}
}
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
*(SvPVX(PL_linestr) + off ) = ' ';
+ lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
sv_catsv(PL_linestr,herewas);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy(d, len, 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);