and I<rsfp> supplies the remainder of the source.
The I<flags> parameter is reserved for future use, and must always
-be zero.
+be zero, except for one flag that is currently reserved for perl's internal
+use.
=cut
*/
+/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
+ can share filters with the current parser. */
+
void
Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
{
const char *s = NULL;
STRLEN len;
yy_parser *parser, *oparser;
- if (flags)
+ if (flags && flags != LEX_START_SAME_FILTER)
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
/* create and initialise a parser */
parser->lex_state = LEX_NORMAL;
parser->expect = XSTATE;
parser->rsfp = rsfp;
- parser->rsfp_filters = newAV();
+ parser->rsfp_filters =
+ !(flags & LEX_START_SAME_FILTER) || !oparser
+ ? newAV()
+ : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
if (PL_lex_inpat) {
- /* Pass through to the regex compiler unchanged. The
- * reason we evaluated the number above is to make sure
- * there wasn't a syntax error. */
+ /* On non-EBCDIC platforms, pass through to the regex
+ * compiler unchanged. The reason we evaluated the
+ * number above is to make sure there wasn't a syntax
+ * error. But on EBCDIC we convert to native so
+ * downstream code can continue to assume it's native
+ */
s -= 5; /* Include the '\N{U+' */
+#ifdef EBCDIC
+ d += my_snprintf(d, e - s + 1 + 1, /* includes the }
+ and the \0 */
+ "\\N{U+%X}",
+ (unsigned int) UNI_TO_NATIVE(uv));
+#else
Copy(s, d, e - s + 1, char); /* 1 = include the } */
d += e - s + 1;
+#endif
}
else { /* Not a pattern: convert the hex to string */
}
/* Convert first code point to hex, including the
- * boiler plate before it */
+ * boiler plate before it. For all these, we
+ * convert to native format so that downstream code
+ * can continue to assume the input is native */
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X", (unsigned int) uv);
+ "\\N{U+%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- ".%X", (unsigned int) uv);
+ ".%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
d = off + SvGROW(sv, off
+ output_length
*(U8*)s == 0xEF ||
*(U8*)s >= 0xFE ||
s[1] == 0)) {
- bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
+ Off_t offset = (IV)PerlIO_tell(PL_rsfp);
+ bof = (offset == (Off_t)SvCUR(PL_linestr));
+#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
+ /* offset may include swallowed CR */
+ if (!bof)
+ bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
+#endif
if (bof) {
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = swallow_bom((U8*)s);
goto safe_bareword;
{
- OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
+ OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
const_op->op_private = OPpCONST_BARE;
rv2cv_op = newCVREF(0, const_op);
}
s += 2;
d = s;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
- if (!(tmp = keyword(PL_tokenbuf, len, 0)))
+ if (!(tmp = keyword(PL_tokenbuf, len, 1)))
Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
if (tmp < 0)
tmp = -tmp;
UNI(OP_CHOP);
case KEY_continue:
- /* When 'use switch' is in effect, continue has a dual
- life as a control operator. */
- {
- if (!FEATURE_IS_ENABLED("switch"))
- PREBLOCK(CONTINUE);
- else {
/* We have to disambiguate the two senses of
"continue". If the next token is a '{' then
treat it as the start of a continue block;
PREBLOCK(CONTINUE);
else
FUN0(OP_CONTINUE);
- }
- }
case KEY_chdir:
/* may use HOME */
missingterm(NULL);
PL_expect = XOPERATOR;
if (SvCUR(PL_lex_stuff)) {
- int warned = 0;
+ int warned_comma = !ckWARN(WARN_QW);
+ int warned_comment = warned_comma;
d = SvPV_force(PL_lex_stuff, len);
while (len) {
for (; isSPACE(*d) && len; --len, ++d)
if (len) {
SV *sv;
const char *b = d;
- if (!warned && ckWARN(WARN_QW)) {
+ if (!warned_comma || !warned_comment) {
for (; !isSPACE(*d) && len; --len, ++d) {
- if (*d == ',') {
+ if (!warned_comma && *d == ',') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to separate words with commas");
- ++warned;
+ ++warned_comma;
}
- else if (*d == '#') {
+ else if (!warned_comment && *d == '#') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to put comments in qw() list");
- ++warned;
+ ++warned_comment;
}
}
}
yyerror(Perl_form(aTHX_ "No package name allowed for "
"variable %s in \"our\"",
PL_tokenbuf));
- tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
}
else {
if (has_colon)
PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
pl_yylval.opval = newOP(OP_PADANY, 0);
- pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
return PRIVATEREF;
}
}
if (!has_colon) {
if (!PL_in_my)
- tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
}
static bool
-S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
+S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
/* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
* the parse starting at 's', based on the subset that are valid in this
* context input to this routine in 'valid_flags'. Advances s. Returns
* TRUE if the input was a valid flag, so the next char may be as well;
- * otherwise FALSE */
+ * otherwise FALSE. 'charset' should point to a NUL upon first call on the
+ * current regex. This routine will set it to any charset modifier found.
+ * The caller shouldn't change it. This way, another charset modifier
+ * encountered in the parse can be detected as an error, as we have decided
+ * allow only one */
const char c = **s;
"Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
return FALSE;
}
+ if (*charset) {
+ goto multiple_charsets;
+ }
set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
+ *charset = c;
break;
case UNICODE_PAT_MOD:
/* In 5.14, qr//unless and qr//until are legal but deprecated; the
if (*((*s) + 1) == 'n') {
goto deprecate;
}
+ if (*charset) {
+ goto multiple_charsets;
+ }
set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
+ *charset = c;
break;
case ASCII_RESTRICT_PAT_MOD:
/* In 5.14, qr//and is legal but deprecated; the 'n' means they
if (*((*s) + 1) == 'n') {
goto deprecate;
}
- if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
- /* Doubled modifier implies more restricted */
- set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
- (*s)++;
+
+ if (! *charset) {
+ set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
}
else {
- set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
+
+ /* Error if previous modifier wasn't an 'a', but if it was, see
+ * if, and accept, a second occurrence (only) */
+ if (*charset != 'a'
+ || get_regex_charset(*pmfl)
+ != REGEX_ASCII_RESTRICTED_CHARSET)
+ {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
}
+ *charset = c;
break;
case DEPENDS_PAT_MOD:
+ if (*charset) {
+ goto multiple_charsets;
+ }
set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
+ *charset = c;
break;
}
Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
"Having no space between pattern and following word is deprecated");
return FALSE;
+
+ multiple_charsets:
+ if (*charset != c) {
+ yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
+ }
+ else if (c == 'a') {
+ yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
+ }
+ else {
+ yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
+ }
+
+ /* Pretend that it worked, so will continue processing before dieing */
+ (*s)++;
+ return TRUE;
}
STATIC char *
char *s = scan_str(start,!!PL_madskills,FALSE);
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
+ char charset = '\0'; /* character set modifier */
#ifdef PERL_MAD
char *modstart;
#endif
#ifdef PERL_MAD
modstart = s;
#endif
- while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s)) {};
+ while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
#ifdef PERL_MAD
if (PL_madskills && modstart != s) {
SV* tmptoken = newSVpvn(modstart, s - modstart);
register PMOP *pm;
I32 first_start;
I32 es = 0;
+ char charset = '\0'; /* character set modifier */
#ifdef PERL_MAD
char *modstart;
#endif
s++;
es++;
}
- else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s)) {
+ else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
+ {
break;
}
}
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
*(SvPVX(PL_linestr) + off ) = ' ';
+ lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
sv_catsv(PL_linestr,herewas);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy(d, len, 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);