and I<rsfp> supplies the remainder of the source.
The I<flags> parameter is reserved for future use, and must always
-be zero.
+be zero, except for one flag that is currently reserved for perl's internal
+use.
=cut
*/
+/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
+ can share filters with the current parser. */
+
void
Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
{
const char *s = NULL;
STRLEN len;
yy_parser *parser, *oparser;
- if (flags)
+ if (flags && flags != LEX_START_SAME_FILTER)
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
/* create and initialise a parser */
parser->lex_state = LEX_NORMAL;
parser->expect = XSTATE;
parser->rsfp = rsfp;
- parser->rsfp_filters = newAV();
+ parser->rsfp_filters =
+ !(flags & LEX_START_SAME_FILTER) || !oparser
+ ? newAV()
+ : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
if (PL_lex_inpat) {
- /* Pass through to the regex compiler unchanged. The
- * reason we evaluated the number above is to make sure
- * there wasn't a syntax error. */
+ /* On non-EBCDIC platforms, pass through to the regex
+ * compiler unchanged. The reason we evaluated the
+ * number above is to make sure there wasn't a syntax
+ * error. But on EBCDIC we convert to native so
+ * downstream code can continue to assume it's native
+ */
s -= 5; /* Include the '\N{U+' */
+#ifdef EBCDIC
+ d += my_snprintf(d, e - s + 1 + 1, /* includes the }
+ and the \0 */
+ "\\N{U+%X}",
+ (unsigned int) UNI_TO_NATIVE(uv));
+#else
Copy(s, d, e - s + 1, char); /* 1 = include the } */
d += e - s + 1;
+#endif
}
else { /* Not a pattern: convert the hex to string */
}
/* Convert first code point to hex, including the
- * boiler plate before it */
+ * boiler plate before it. For all these, we
+ * convert to native format so that downstream code
+ * can continue to assume the input is native */
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X", (unsigned int) uv);
+ "\\N{U+%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- ".%X", (unsigned int) uv);
+ ".%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
d = off + SvGROW(sv, off
+ output_length
*(U8*)s == 0xEF ||
*(U8*)s >= 0xFE ||
s[1] == 0)) {
- bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
+ Off_t offset = (IV)PerlIO_tell(PL_rsfp);
+ bof = (offset == (Off_t)SvCUR(PL_linestr));
+#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
+ /* offset may include swallowed CR */
+ if (!bof)
+ bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
+#endif
if (bof) {
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = swallow_bom((U8*)s);
goto safe_bareword;
{
- OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
+ OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
const_op->op_private = OPpCONST_BARE;
rv2cv_op = newCVREF(0, const_op);
}
s += 2;
d = s;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
- if (!(tmp = keyword(PL_tokenbuf, len, 0)))
+ if (!(tmp = keyword(PL_tokenbuf, len, 1)))
Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
if (tmp < 0)
tmp = -tmp;
UNI(OP_CHOP);
case KEY_continue:
- /* When 'use switch' is in effect, continue has a dual
- life as a control operator. */
- {
- if (!FEATURE_IS_ENABLED("switch"))
- PREBLOCK(CONTINUE);
- else {
/* We have to disambiguate the two senses of
"continue". If the next token is a '{' then
treat it as the start of a continue block;
PREBLOCK(CONTINUE);
else
FUN0(OP_CONTINUE);
- }
- }
case KEY_chdir:
/* may use HOME */
missingterm(NULL);
PL_expect = XOPERATOR;
if (SvCUR(PL_lex_stuff)) {
- int warned = 0;
+ int warned_comma = !ckWARN(WARN_QW);
+ int warned_comment = warned_comma;
d = SvPV_force(PL_lex_stuff, len);
while (len) {
for (; isSPACE(*d) && len; --len, ++d)
if (len) {
SV *sv;
const char *b = d;
- if (!warned && ckWARN(WARN_QW)) {
+ if (!warned_comma || !warned_comment) {
for (; !isSPACE(*d) && len; --len, ++d) {
- if (*d == ',') {
+ if (!warned_comma && *d == ',') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to separate words with commas");
- ++warned;
+ ++warned_comma;
}
- else if (*d == '#') {
+ else if (!warned_comment && *d == '#') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to put comments in qw() list");
- ++warned;
+ ++warned_comment;
}
}
}
yyerror(Perl_form(aTHX_ "No package name allowed for "
"variable %s in \"our\"",
PL_tokenbuf));
- tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
}
else {
if (has_colon)
PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
pl_yylval.opval = newOP(OP_PADANY, 0);
- pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
return PRIVATEREF;
}
}
if (!has_colon) {
if (!PL_in_my)
- tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
if (*((*s) + 1) == 'n') {
goto deprecate;
}
- if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
- /* Doubled modifier implies more restricted */
- set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
- (*s)++;
- }
- else {
+
+ if (! *charset) {
set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
}
- if (*charset) { /* Do this after the increment of *s in /aa, so
- the return advances the ptr correctly */
- goto multiple_charsets;
+ else {
+
+ /* Error if previous modifier wasn't an 'a', but if it was, see
+ * if, and accept, a second occurrence (only) */
+ if (*charset != 'a'
+ || get_regex_charset(*pmfl)
+ != REGEX_ASCII_RESTRICTED_CHARSET)
+ {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
}
*charset = c;
break;
if (*charset != c) {
yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
}
+ else if (c == 'a') {
+ yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
+ }
else {
yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
}
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
*(SvPVX(PL_linestr) + off ) = ' ';
+ lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
sv_catsv(PL_linestr,herewas);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy(d, len, 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);