else if (!rv)
sv_catpvs(report, "EOF");
else
- Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
+ Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
switch (type) {
case TOKENTYPE_NONE:
break;
case TOKENTYPE_IVAL:
- Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
+ Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
break;
case TOKENTYPE_OPNUM:
Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
if (is_first)
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"\t(Missing semicolon on previous line?)\n");
- else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
+ else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
+ PL_bufend,
+ UTF))
+ {
const char *t;
- for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
- t += UTF ? UTF8SKIP(t) : 1)
+ for (t = PL_oldoldbufptr;
+ (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
+ t += UTF ? UTF8SKIP(t) : 1)
+ {
NOOP;
+ }
if (t < PL_bufptr && isSPACE(*t))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Do you need to predeclare %"UTF8f"?)\n",
+ "\t(Do you need to predeclare %" UTF8f "?)\n",
UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
}
else {
assert(s >= oldbp);
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Missing operator before %"UTF8f"?)\n",
+ "\t(Missing operator before %" UTF8f "?)\n",
UTF8fARG(UTF, s - oldbp, oldbp));
}
}
sv = sv_2mortal(newSVpv(s,0));
if (uni)
SvUTF8_on(sv);
- Perl_croak(aTHX_ "Can't find string terminator %c%"SVf
+ Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
"%c anywhere before EOF",q,SVfARG(sv),q);
}
PL_parser = parser;
parser->stack = NULL;
+ parser->stack_max1 = NULL;
parser->ps = NULL;
- parser->stack_size = 0;
/* on scope exit, free this parser and restore any outer one */
SAVEPARSER(parser);
char *buf;
STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
+ bool current;
+
linestr = PL_parser->linestr;
buf = SvPVX(linestr);
if (len <= SvLEN(linestr))
return buf;
+
+ /* Is the lex_shared linestr SV the same as the current linestr SV?
+ * Only in this case does re_eval_start need adjusting, since it
+ * points within lex_shared->ls_linestr's buffer */
+ current = ( !PL_parser->lex_shared->ls_linestr
+ || linestr == PL_parser->lex_shared->ls_linestr);
+
bufend_pos = PL_parser->bufend - buf;
bufptr_pos = PL_parser->bufptr - buf;
oldbufptr_pos = PL_parser->oldbufptr - buf;
linestart_pos = PL_parser->linestart - buf;
last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
- re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
+ re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
PL_parser->lex_shared->re_eval_start - buf : 0;
buf = sv_grow(linestr, len);
PL_parser->last_uni = buf + last_uni_pos;
if (PL_parser->last_lop)
PL_parser->last_lop = buf + last_lop_pos;
- if (PL_parser->lex_shared->re_eval_start)
+ if (current && PL_parser->lex_shared->re_eval_start)
PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
return buf;
}
} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
p++;
highhalf++;
- } else if (! UTF8_IS_INVARIANT(c)) {
- /* malformed UTF-8 */
- ENTER;
- SAVESPTR(PL_warnhook);
- PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
- LEAVE;
+ } else if (! UTF8_IS_INVARIANT(c)) {
+ _force_out_malformed_utf8_message((U8 *) p, (U8 *) e,
+ 0,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
}
}
if (!highhalf)
STRLEN linestart_pos, last_uni_pos, last_lop_pos;
bool got_some_for_debugger = 0;
bool got_some;
+ const U8* first_bad_char_loc;
+
if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
new_bufend_pos = SvCUR(linestr);
PL_parser->bufend = buf + new_bufend_pos;
PL_parser->bufptr = buf + bufptr_pos;
+
+ if (UTF && ! is_utf8_string_loc((U8 *) PL_parser->bufptr,
+ PL_parser->bufend - PL_parser->bufptr,
+ &first_bad_char_loc))
+ {
+ _force_out_malformed_utf8_message(first_bad_char_loc,
+ (U8 *) PL_parser->bufend,
+ 0,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
+ }
+
PL_parser->oldbufptr = buf + oldbufptr_pos;
PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
PL_parser->linestart = buf + linestart_pos;
}
unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
if (retlen == (STRLEN)-1) {
- /* malformed UTF-8 */
- ENTER;
- SAVESPTR(PL_warnhook);
- PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
- LEAVE;
+ _force_out_malformed_utf8_message((U8 *) s,
+ (U8 *) bufend,
+ 0,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
}
return unichar;
} else {
if (proto_after_greedy_proto)
Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Prototype after '%c' for %"SVf" : %s",
+ "Prototype after '%c' for %" SVf " : %s",
greedy_proto, SVfARG(name), p);
if (in_brackets)
Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Missing ']' in prototype for %"SVf" : %s",
+ "Missing ']' in prototype for %" SVf " : %s",
SVfARG(name), p);
if (bad_proto)
Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Illegal character in prototype for %"SVf" : %s",
+ "Illegal character in prototype for %" SVf " : %s",
SVfARG(name), p);
if (bad_proto_after_underscore)
Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Illegal character after '_' in prototype for %"SVf" : %s",
+ "Illegal character after '_' in prototype for %" SVf " : %s",
SVfARG(name), p);
}
while (isSPACE(*PL_last_uni))
PL_last_uni++;
s = PL_last_uni;
- while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
+ while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
s += UTF ? UTF8SKIP(s) : 1;
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+ "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
}
start = skipspace(start);
s = start;
- if (isIDFIRST_lazy_if(s,UTF)
+ if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| (allow_pack && *s == ':' && s[1] == ':') )
{
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
* Pattern matching will set PL_lex_op to the pattern-matching op to
* make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
*
- * OP_CONST and OP_READLINE are easy--just make the new op and return.
+ * OP_CONST is easy--just make the new op and return.
*
* Everything else becomes a FUNC.
*
- * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
- * had an OP_CONST or OP_READLINE). This just sets us up for a
+ * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
+ * had an OP_CONST. This just sets us up for a
* call to S_sublex_push().
*/
if (is_heredoc)
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
PL_copline = NOLINE;
-
+
Newxz(shared, 1, LEXSHARED);
shared->ls_prev = PL_parser->lex_shared;
PL_parser->lex_shared = shared;
e - backslash_ptr,
&first_bad_char_loc))
{
- /* If warnings are on, this will print a more detailed analysis of what
- * is wrong than the error message below */
- utf8n_to_uvchr(first_bad_char_loc,
- e - ((char *) first_bad_char_loc),
- NULL, 0);
-
- /* We deliberately don't try to print the malformed character, which
- * might not print very well; it also may be just the first of many
- * malformations, so don't print what comes after it */
+ _force_out_malformed_utf8_message(first_bad_char_loc,
+ (U8 *) PL_parser->bufend,
+ 0,
+ 0 /* 0 means don't die */ );
yyerror_pv(Perl_form(aTHX_
"Malformed UTF-8 character immediately after '%.*s'",
(int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
STRLEN len;
const char* const str = SvPV_const(res, len);
if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
- /* If warnings are on, this will print a more detailed analysis of
- * what is wrong than the error message below */
- utf8n_to_uvchr(first_bad_char_loc,
- (char *) first_bad_char_loc - str,
- NULL, 0);
-
- /* We deliberately don't try to print the malformed character,
- * which might not print very well; it also may be just the first
- * of many malformations, so don't print what comes after it */
+ _force_out_malformed_utf8_message(first_bad_char_loc,
+ (U8 *) PL_parser->bufend,
+ 0,
+ 0 /* 0 means don't die */ );
yyerror_pv(
Perl_form(aTHX_
"Malformed UTF-8 returned by %.*s immediately after '%.*s'",
when the source isn't utf8, as for
example when it is entirely composed
of hex constants */
+ STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
+ number of characters found so far
+ that will expand (into 2 bytes)
+ should we have to convert to
+ UTF-8) */
SV *res; /* result from charnames */
STRLEN offset_to_max; /* The offset in the output to where the range
high-end character is temporarily placed */
* the needed size, SvGROW() is called. Its size parameter each time is
* based on the best guess estimate at the time, namely the length used so
* far, plus the length the current construct will occupy, plus room for
- * the trailing NUL, plus one byte for every input byte still unscanned */
+ * the trailing NUL, plus one byte for every input byte still unscanned */
UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
before set */
* Ranges entirely within Latin1 are expanded out entirely, in
* order to avoid the significant overhead of making a swash.
* Ranges that extend above Latin1 have to have a swash, so there
- * is no advantage to abbreviating them here, so they are stored
+ * is no advantage to expanding them here, so they are stored
* here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies
* a hyphen without any possible ambiguity. On EBCDIC machines, if
* the range is expressed as Unicode, the Latin1 portion is
* Unicode value (\N{...}), or if the range is a subset of
* [A-Z] or [a-z], and both ends are literal characters,
* like 'A', and not like \x{C1} */
- if ((convert_unicode
- = cBOOL(backslash_N) /* \N{} forces Unicode, hence
+ convert_unicode =
+ cBOOL(backslash_N) /* \N{} forces Unicode, hence
portable range */
|| ( ! non_portable_endpoint
&& (( isLOWER_A(range_min) && isLOWER_A(range_max))
- || (isUPPER_A(range_min) && isUPPER_A(range_max))))
- )) {
+ || (isUPPER_A(range_min) && isUPPER_A(range_max))));
+ if (convert_unicode) {
/* Special handling is needed for these portable ranges.
* They are defined to all be in Unicode terms, which
else if (convert_unicode) {
/* diag_listed_as: Invalid range "%s" in transliteration operator */
Perl_croak(aTHX_
- "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
+ "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
" in transliteration operator",
range_min, range_max);
}
else {
/* diag_listed_as: Invalid range "%s" in transliteration operator */
Perl_croak(aTHX_
- "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
+ "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
" in transliteration operator",
range_min, range_max);
}
if (!esc)
in_charclass = TRUE;
}
-
- else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
char *s1 = s-1;
int esc = 0;
while (s1 >= start && *s1-- == '\\')
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
else if (*s == '@' && s[1]) {
- if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
+ if (UTF
+ ? isIDFIRST_utf8_safe(s+1, send)
+ : isWORDCHAR_A(s[1]))
+ {
break;
+ }
if (strchr(":'{$", s[1]))
break;
if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
}
else {
if (!has_utf8 && uv > 255) {
- /* Might need to recode whatever we have accumulated so
- * far if it contains any chars variant in utf8 or
- * utf-ebcdic. */
-
- SvCUR_set(sv, d - SvPVX_const(sv));
- SvPOK_on(sv);
- *d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(
- sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
- /* Above-latin1 in string
- * implies no encoding */
- |SV_UTF8_NO_ENCODING,
- UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
- d = SvPVX(sv) + SvCUR(sv);
- has_utf8 = TRUE;
+
+ /* Here, 'uv' won't fit unless we convert to UTF-8.
+ * If we've only seen invariants so far, all we have to
+ * do is turn on the flag */
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ }
+ else {
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
+
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+
+ /* Since we're having to grow here,
+ * make sure we have enough room for
+ * this escape and a NUL, so the
+ * code immediately below won't have
+ * to actually grow again */
+ UVCHR_SKIP(uv)
+ + (STRLEN)(send - s) + 1);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
+
+ has_utf8 = TRUE;
}
- if (has_utf8) {
+ if (! has_utf8) {
+ *d++ = (char)uv;
+ utf8_variant_count++;
+ }
+ else {
/* Usually, there will already be enough room in 'sv'
* since such escapes are likely longer than any UTF-8
* sequence they can end up as. This isn't the case on
* EBCDIC where \x{40000000} contains 12 bytes, and the
* UTF-8 for it contains 14. And, we have to allow for
* a trailing NUL. It probably can't happen on ASCII
- * platforms, but be safe */
- const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
+ * platforms, but be safe. See Note on sizing above. */
+ const STRLEN needed = d - SvPVX(sv)
+ + UVCHR_SKIP(uv)
+ + (send - s)
+ 1;
if (UNLIKELY(needed > SvLEN(sv))) {
SvCUR_set(sv, d - SvPVX_const(sv));
- d = sv_grow(sv, needed) + SvCUR(sv);
+ d = SvCUR(sv) + SvGROW(sv, needed);
}
d = (char*)uvchr_to_utf8((U8*)d, uv);
(PL_lex_repl ? OPpTRANS_FROM_UTF
: OPpTRANS_TO_UTF);
}
- }
- else {
- *d++ = (char)uv;
}
}
#ifdef EBCDIC
* braces */
s++;
if (*s != '{') {
- yyerror("Missing braces on \\N{}");
+ yyerror("Missing braces on \\N{}");
continue;
}
s++;
if (! has_utf8 && ( uv > 0xFF
|| PL_lex_inwhat != OP_TRANS))
{
+ /* See Note on sizing above. */
+ const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
+
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(
- sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- OFFUNISKIP(uv) + (STRLEN)(send - e) + 1);
- d = SvPVX(sv) + SvCUR(sv);
+
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
+ }
+ else {
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ extra);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
+
has_utf8 = TRUE;
}
* \N{} implies Unicode semantics, and scalars have
* to be in utf8 to guarantee those semantics; but
* not needed in tr/// */
- sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
+ sv_utf8_upgrade_flags(res, 0);
str = SvPV_const(res, len);
}
/* Upgrade destination to be utf8 if this new
* component is */
if (! has_utf8 && SvUTF8(res)) {
+ /* See Note on sizing above. */
+ const STRLEN extra = len + (send - s) + 1;
+
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
+
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
+ }
+ else {
+ sv_utf8_upgrade_flags_grow(sv,
SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- len + (STRLEN)(send - s) + 1);
- d = SvPVX(sv) + SvCUR(sv);
+ extra);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
has_utf8 = TRUE;
} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
/* See Note on sizing above. (NOTE: SvCUR() is not
* set correctly here). */
+ const STRLEN extra = len + (send - e) + 1;
const STRLEN off = d - SvPVX_const(sv);
- d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
+ d = off + SvGROW(sv, off + extra);
}
Copy(str, d, len, char);
d += len;
} /* end if (backslash) */
default_action:
- /* If we started with encoded form, or already know we want it,
- then encode the next character */
- if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
- STRLEN len = 1;
-
- /* One might think that it is wasted effort in the case of the
- * source being utf8 (this_utf8 == TRUE) to take the next character
- * in the source, convert it to an unsigned value, and then convert
- * it back again. But the source has not been validated here. The
- * routine that does the conversion checks for errors like
- * malformed utf8 */
+ /* Just copy the input to the output, though we may have to convert
+ * to/from UTF-8.
+ *
+ * If the input has the same representation in UTF-8 as not, it will be
+ * a single byte, and we don't care about UTF8ness; just copy the byte */
+ if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
+ *d++ = *s++;
+ }
+ else if (! this_utf8 && ! has_utf8) {
+ /* If neither source nor output is UTF-8, is also a single byte,
+ * just copy it; but this byte counts should we later have to
+ * convert to UTF-8 */
+ *d++ = *s++;
+ utf8_variant_count++;
+ }
+ else if (this_utf8 && has_utf8) { /* Both UTF-8, can just copy */
+ const STRLEN len = UTF8SKIP(s);
+ /* We expect the source to have already been checked for
+ * malformedness */
+ assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
+
+ Copy(s, d, len, U8);
+ d += len;
+ s += len;
+ }
+ else { /* UTF8ness matters and doesn't match, need to convert */
+ STRLEN len = 1;
const UV nextuv = (this_utf8)
? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
: (UV) ((U8) *s);
- const STRLEN need = UVCHR_SKIP(nextuv);
+ STRLEN need = UVCHR_SKIP(nextuv);
+
if (!has_utf8) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- need + (STRLEN)(send - s) + 1);
- d = SvPVX(sv) + SvCUR(sv);
+
+ /* See Note on sizing above. */
+ need += (STRLEN)(send - s) + 1;
+
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
+ }
+ else {
+ sv_utf8_upgrade_flags_grow(sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ need);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
has_utf8 = TRUE;
} else if (need > len) {
/* encoded value larger than old, may need extra space (NOTE:
* SvCUR() is not set correctly here). See Note on sizing
* above. */
+ const STRLEN extra = need + (send - s) + 1;
const STRLEN off = d - SvPVX_const(sv);
- d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
+ d = off + SvGROW(sv, off + extra);
}
s += len;
d = (char*)uvchr_to_utf8((U8*)d, nextuv);
}
- else {
- *d++ = *s++;
- }
} /* while loop to process each character */
/* terminate the string and set up the sv */
*d = '\0';
SvCUR_set(sv, d - SvPVX_const(sv));
if (SvCUR(sv) >= SvLEN(sv))
- Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
- " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
+ Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
+ " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
if (has_utf8) {
case '&':
case '$':
weight -= seen[un_char] * 10;
- if (isWORDCHAR_lazy_if(s+1,UTF)) {
+ if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
int len;
char *tmp = PL_bufend;
PL_bufend = (char*)send;
STRLEN const last_lop_pos =
PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
av_push(PL_rsfp_filters, linestr);
- PL_parser->linestr =
+ PL_parser->linestr =
newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
buf = SvPVX(PL_parser->linestr);
PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
S_check_scalar_slice(pTHX_ char *s)
{
s++;
- while (*s == ' ' || *s == '\t') s++;
- if (*s == 'q' && s[1] == 'w'
- && !isWORDCHAR_lazy_if(s+2,UTF))
+ while (SPACE_OR_TAB(*s)) s++;
+ if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
+ PL_bufend,
+ UTF))
+ {
return;
- while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
- s += UTF ? UTF8SKIP(s) : 1;
+ }
+ while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
+ || (*s && strchr(" \t$#+-'\"", *s)))
+ {
+ s += UTF ? UTF8SKIP(s) : 1;
+ }
if (*s == '}' || *s == ']')
pl_yylval.ival = OPpSLICEWARNING;
}
DEBUG_T( {
SV* tmp = newSVpvs("");
- PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
+ PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
(IV)CopLINE(PL_curcop),
lex_state_names[PL_lex_state],
exp_name[PL_expect],
if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
if ((*s == 'L' || *s == 'U' || *s == 'F')
- && (strchr(PL_lex_casestack, 'L')
- || strchr(PL_lex_casestack, 'U')
- || strchr(PL_lex_casestack, 'F')))
+ && (strpbrk(PL_lex_casestack, "LUF")))
{
PL_lex_casestack[--PL_lex_casemods] = '\0';
PL_lex_allbrackets--;
break;
}
s = skipspace(s);
- if (isIDFIRST_lazy_if(s, UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
char *dest = PL_tokenbuf + 1;
/* read var name, including sigil, into PL_tokenbuf */
PL_tokenbuf[0] = sigil;
default:
if (UTF) {
if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
- ENTER;
- SAVESPTR(PL_warnhook);
- PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
- LEAVE;
+ _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend,
+ 0,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
}
- if (isIDFIRST_utf8((U8*)s)) {
+ if (isIDFIRST_utf8_safe(s, PL_bufend)) {
goto keylookup;
}
}
} else {
d = PL_linestart;
}
- Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
+ Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
UTF8fARG(UTF, (s - d), d),
(int) len + 1);
}
PL_expect = XPOSTDEREF;
TOKEN(ARROW);
}
- if (isIDFIRST_lazy_if(s,UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
s = force_word(s,METHOD,FALSE,TRUE);
TOKEN(ARROW);
}
grabattrs:
s = skipspace(s);
attrs = NULL;
- while (isIDFIRST_lazy_if(s,UTF)) {
+ while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
I32 tmp;
SV *sv;
d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
while (d < PL_bufend && SPACE_OR_TAB(*d))
d++;
}
- if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
+ if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
FALSE, &len);
while (d < PL_bufend && SPACE_OR_TAB(*d))
}
else
/* skip plain q word */
- while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+ while ( t < PL_bufend
+ && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+ {
t += UTF ? UTF8SKIP(t) : 1;
+ }
}
- else if (isWORDCHAR_lazy_if(t,UTF)) {
+ else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
t += UTF ? UTF8SKIP(t) : 1;
- while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+ while ( t < PL_bufend
+ && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+ {
t += UTF ? UTF8SKIP(t) : 1;
+ }
}
while (t < PL_bufend && isSPACE(*t))
t++;
}
s--;
if (PL_expect == XOPERATOR) {
- if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
- && isIDFIRST_lazy_if(s,UTF))
+ if ( PL_bufptr == PL_linestart
+ && ckWARN(WARN_SEMICOLON)
+ && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
{
CopLINE_dec(PL_curcop);
Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
POSTDEREF('$');
}
- if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
+ if ( s[1] == '#'
+ && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
+ || strchr("{$:+-@", s[2])))
+ {
PL_tokenbuf[0] = '@';
s = scan_ident(s + 1, PL_tokenbuf + 1,
sizeof PL_tokenbuf - 1, FALSE);
if (ckWARN(WARN_SYNTAX)) {
char *t = s+1;
- while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
+ while ( isSPACE(*t)
+ || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
+ || *t == '$')
+ {
t += UTF ? UTF8SKIP(t) : 1;
+ }
if (*t++ == ',') {
PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
while (t < PL_bufend && *t != ']')
t++;
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Multidimensional syntax %"UTF8f" not supported",
+ "Multidimensional syntax %" UTF8f " not supported",
UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
}
}
do {
t++;
} while (isSPACE(*t));
- if (isIDFIRST_lazy_if(t,UTF)) {
+ if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
STRLEN len;
t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
&len);
while (isSPACE(*t))
t++;
- if (*t == ';'
- && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
+ if ( *t == ';'
+ && get_cvn_flags(tmpbuf, len, UTF
+ ? SVf_UTF8
+ : 0))
+ {
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "You need to quote \"%"UTF8f"\"",
+ "You need to quote \"%" UTF8f "\"",
UTF8fARG(UTF, len, tmpbuf));
+ }
}
}
}
PL_expect = XOPERATOR;
else if (strchr("$@\"'`q", *s))
PL_expect = XTERM; /* e.g. print $fh "foo" */
- else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
+ else if ( strchr("&*<%", *s)
+ && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
+ {
PL_expect = XTERM; /* e.g. print $fh &sub */
- else if (isIDFIRST_lazy_if(s,UTF)) {
+ }
+ else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
char tmpbuf[sizeof PL_tokenbuf];
int t2;
scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
}
else {
/* Disable warning on "study /blah/" */
- if (PL_oldoldbufptr == PL_last_uni
- && (*PL_last_uni != 's' || s - PL_last_uni < 5
- || memNE(PL_last_uni, "study", 5)
- || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
+ if ( PL_oldoldbufptr == PL_last_uni
+ && ( *PL_last_uni != 's' || s - PL_last_uni < 5
+ || memNE(PL_last_uni, "study", 5)
+ || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
))
check_uni();
s = scan_pat(s,OP_MATCH);
s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
TRUE, &morelen);
if (!morelen)
- Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+ Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
UTF8fARG(UTF, len, PL_tokenbuf),
*s == '\'' ? "'" : "::");
len += morelen;
if (ckWARN(WARN_BAREWORD)
&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
- "Bareword \"%"UTF8f"\" refers to nonexistent package",
- UTF8fARG(UTF, len, PL_tokenbuf));
+ "Bareword \"%" UTF8f
+ "\" refers to nonexistent package",
+ UTF8fARG(UTF, len, PL_tokenbuf));
len -= 2;
PL_tokenbuf[len] = '\0';
gv = NULL;
s = skipspace(s);
/* Two barewords in a row may indicate method call. */
-
- if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
+ if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
+ || *s == '$')
&& (tmp = intuit_method(s, lex ? NULL : sv, cv)))
{
goto method;
/* If followed by a bareword, see if it looks like indir obj. */
- if (tmp == 1 && !orig_keyword
- && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
- && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+ if ( tmp == 1
+ && !orig_keyword
+ && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
+ && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
+ {
method:
if (lex && !off) {
assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
&& saw_infix_sigil) {
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Operator or semicolon missing before %c%"UTF8f,
+ "Operator or semicolon missing before %c%" UTF8f,
lastchar,
UTF8fARG(UTF, strlen(PL_tokenbuf),
PL_tokenbuf));
case KEY___LINE__:
FUN0OP(
newSVOP(OP_CONST, 0,
- Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
+ Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
);
case KEY___PACKAGE__:
goto just_a_word;
}
if (!tmp)
- Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+ Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
UTF8fARG(UTF, len, PL_tokenbuf));
if (tmp < 0)
tmp = -tmp;
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
s = skipspace(s);
- if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
+ if ( PL_expect == XSTATE
+ && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
+ {
char *p = s;
if ((PL_bufend - p) >= 3
p += 3;
p = skipspace(p);
/* skip optional package name, as in "for my abc $x (..)" */
- if (isIDFIRST_lazy_if(p,UTF)) {
+ if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
p = skipspace(p);
}
}
PL_in_my = (U16)tmp;
s = skipspace(s);
- if (isIDFIRST_lazy_if(s,UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
if (len == 3 && strEQs(PL_tokenbuf, "sub"))
goto really_sub;
case KEY_open:
s = skipspace(s);
- if (isIDFIRST_lazy_if(s,UTF)) {
- const char *t;
- d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
- &len);
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+ const char *t;
+ d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+ &len);
for (t=d; isSPACE(*t);)
t++;
if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
&& !keyword(s, d-s, 0)
) {
Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
- "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+ "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
}
}
{
*PL_tokenbuf = '\0';
s = force_word(s,BAREWORD,TRUE,TRUE);
- if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
+ if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
+ PL_tokenbuf + sizeof(PL_tokenbuf),
+ UTF))
+ {
gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
GV_ADD | (UTF ? SVf_UTF8 : 0));
+ }
else if (*s == '<')
yyerror("<> at require-statement should be quotes");
}
orig_keyword = 0;
pl_yylval.ival = 1;
}
- else
+ else
pl_yylval.ival = 0;
PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
PL_bufptr = s;
s = skipspace(s);
d = SvPVX(PL_linestr)+off;
- if (isIDFIRST_lazy_if(s,UTF)
+ if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| *s == '\''
|| (*s == ':' && s[1] == ':'))
{
if (!have_name)
Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
else if (*s != ';' && *s != '}')
- Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
+ Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
}
if (have_proto) {
{
/* Downgraded from fatal to warning 20000522 mjd */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %"UTF8f
+ "Possible unintended interpolation of %" UTF8f
" in string",
UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
}
s++;
while (s < PL_bufend && isSPACE(*s))
s++;
- if (isIDFIRST_lazy_if(s,UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
const char * const w = s;
s += UTF ? UTF8SKIP(s) : 1;
- while (isWORDCHAR_lazy_if(s,UTF))
+ while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
s += UTF ? UTF8SKIP(s) : 1;
while (s < PL_bufend && isSPACE(*s))
s++;
PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
- bool is_utf8, bool check_dollar) {
+ bool is_utf8, bool check_dollar)
+{
PERL_ARGS_ASSERT_PARSE_IDENT;
- for (;;) {
+ while (*s < PL_bufend) {
if (*d >= e)
Perl_croak(aTHX_ "%s", ident_too_long);
- if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+ if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
/* The UTF-8 case must come first, otherwise things
* like c\N{COMBINING TILDE} would start failing, as the
* isWORDCHAR_A case below would gobble the 'c' up.
*/
char *t = *s + UTF8SKIP(*s);
- while (isIDCONT_utf8((U8*)t))
+ while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
t += UTF8SKIP(t);
+ }
if (*d + (t - *s) > e)
Perl_croak(aTHX_ "%s", ident_too_long);
Copy(*s, *d, t - *s, char);
*(*d)++ = *(*s)++;
} while (isWORDCHAR_A(**s) && *d < e);
}
- else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
+ else if ( allow_package
+ && **s == '\''
+ && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
+ {
*(*d)++ = ':';
*(*d)++ = ':';
(*s)++;
* Because all ASCII characters have the same representation whether
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
* '{' without knowing if is UTF-8 or not. */
-#define VALID_LEN_ONE_IDENT(s, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8((U8*) (s)) \
- : (isGRAPH_L1(*s) \
+#define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
+ (isGRAPH_A(*(s)) || ((is_utf8) \
+ ? isIDFIRST_utf8_safe(s, e) \
+ : (isGRAPH_L1(*s) \
&& LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
STATIC char *
/* Here, it is not a run-of-the-mill identifier name */
if (*s == '$' && s[1]
- && (isIDFIRST_lazy_if(s+1,is_utf8)
+ && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
|| isDIGIT_A((U8)s[1])
|| s[1] == '$'
|| s[1] == '{'
if ((s <= PL_bufend - (is_utf8)
? UTF8SKIP(s)
: 1)
- && VALID_LEN_ONE_IDENT(s, is_utf8))
+ && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
{
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
bool skip;
char *s2;
/* If we were processing {...} notation then... */
- if (isIDFIRST_lazy_if(d,is_utf8)) {
+ if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
/* if it starts as a valid identifier, assume that it is one.
(the later check for } being at the expected point will trap
cases where this doesn't pan out.) */
s2 = peekspace(s);
else
s2 = s;
-
+
/* Expect to find a closing } after consuming any trailing whitespace.
*/
if (*s2 == '}') {
orig_copline = CopLINE(PL_curcop);
CopLINE_set(PL_curcop, tmp_copline);
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
+ "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
funny, SVfARG(tmp), funny, SVfARG(tmp));
CopLINE_set(PL_curcop, orig_copline);
}
STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
if ( charlen != 1 || ! strchr(valid_flags, c) ) {
- if (isWORDCHAR_lazy_if(*s, UTF)) {
+ if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
UTF ? SVf_UTF8 : 0);
(*s) += charlen;
/* issue a warning if /c is specified,but /g is not */
if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
{
- Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
"Use of /c modifier is meaningless without /g" );
}
* spreads over */
sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
- ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = es;
+ ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
+ cBOOL(es);
}
PL_lex_op = (OP*)pm;
s++, term = '\'';
else
term = '"';
- if (!isWORDCHAR_lazy_if(s,UTF))
+ if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
deprecate("bare << to mean <<\"\"");
peek = s;
- while (isWORDCHAR_lazy_if(peek,UTF)) {
+ while (
+ isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
+ {
peek += UTF ? UTF8SKIP(peek) : 1;
}
len = (peek - s >= e - d) ? (e - d) : (peek - s);
/* Only valid if it's preceded by whitespace only */
while (backup != myolds && --backup >= myolds) {
- if (*backup != ' ' && *backup != '\t') {
+ if (! SPACE_OR_TAB(*backup)) {
break;
}
/* Only valid if it's preceded by whitespace only */
while (backup != s && --backup >= s) {
- if (*backup != ' ' && *backup != '\t') {
+ if (! SPACE_OR_TAB(*backup)) {
break;
}
indent_len++;
}
/* All whitespace or none! */
- if (backup == found || *backup == ' ' || *backup == '\t') {
+ if (backup == found || SPACE_OR_TAB(*backup)) {
Newxz(indent, indent_len + 1, char);
memcpy(indent, backup, indent_len);
SvREFCNT_dec(PL_linestr);
STRLEN herelen = SvCUR(tmpstr);
char *ss = SvPVX(tmpstr);
char *se = ss + herelen;
- SV *newstr = newSVpvs("");
- SvGROW(newstr, herelen);
+ SV *newstr = newSV(herelen+1);
+ SvPOK_on(newstr);
/* Trim leading whitespace */
while (ss < se) {
if (*ss == '\n') {
sv_catpv(newstr,"\n");
ss++;
+ linecount++;
/* Found our indentation? Strip it */
} else if (se - ss >= indent_len
(int)linecount
);
}
-
- linecount++;
}
-
- sv_setsv(tmpstr,newstr);
-
+ /* avoid sv_setsv() as we dont wan't to COW here */
+ sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
Safefree(indent);
SvREFCNT_dec_NN(newstr);
}
if (*d == '$' && d[1]) d++;
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
- while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
+ while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
d += UTF ? UTF8SKIP(d) : 1;
+ }
/* If we've tried to read what we allow filehandles to look like, and
there's still text left, then it must be a glob() and not a getline.
STRLEN termlen; /* length of terminating string */
line_t herelines;
+ /* The delimiters that have a mirror-image closing one */
+ const char * opening_delims = "([{<";
+ const char * closing_delims = ")]}>";
+
+ const char * non_grapheme_msg = "Use of unassigned code point or"
+ " non-standalone grapheme for a delimiter"
+ " will be a fatal error starting in Perl"
+ " v5.30";
+ /* The only non-UTF character that isn't a stand alone grapheme is
+ * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */
+ bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
+
PERL_ARGS_ASSERT_SCAN_STR;
/* skip space before the delimiter */
/* after skipping whitespace, the next character is the terminator */
term = *s;
- if (!UTF) {
+ if (!UTF || UTF8_IS_INVARIANT(term)) {
termcode = termstr[0] = term;
termlen = 1;
}
else {
termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
+ if (check_grapheme) {
+ if ( UNLIKELY(UNICODE_IS_SUPER(termcode))
+ || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
+ {
+ /* These are considered graphemes, and since the ending
+ * delimiter will be the same, we don't have to check the other
+ * end */
+ check_grapheme = FALSE;
+ }
+ else if (UNLIKELY(! _is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ termcode)))
+ {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
+
+ /* Don't have to check the other end, as have already warned at
+ * this one */
+ check_grapheme = FALSE;
+ }
+ }
+
Copy(s, termstr, termlen, U8);
- if (!UTF8_IS_INVARIANT(term))
- has_utf8 = TRUE;
}
/* mark where we are */
PL_multi_open = termcode;
herelines = PL_parser->herelines;
- /* find corresponding closing delimiter */
- if (term && (tmps = strchr("([{< )]}> )]}>",term)))
- termcode = termstr[0] = term = tmps[5];
+ /* If the delimiter has a mirror-image closing one, get it */
+ if (term && (tmps = strchr(opening_delims, term))) {
+ termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
+ }
PL_multi_close = termcode;
if (termlen == 1)
break;
if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
+ if ( check_grapheme
+ && UNLIKELY(! _is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ termcode)))
+ {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "%s", non_grapheme_msg);
+ }
break;
}
else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
return NULL;
}
- s = PL_bufptr;
+ s = start = PL_bufptr;
}
/* at this point, we have successfully read the delimited string */
PL_expect = XSTATE;
if (needargs) {
const char *s2 = s;
- while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
- || *s2 == '\v')
+ while (isSPACE(*s2) && *s2 != '\n')
s2++;
if (*s2 == '{') {
PL_expect = XTERMBLOCK;
Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
}
msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
- Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
+ Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
OutCopFILE(PL_curcop),
(IV)(PL_parser->preambling == NOLINE
? CopLINE(PL_curcop)
: PL_parser->preambling));
if (context)
- Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+ Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
UTF8fARG(UTF, contlen, context));
else
- Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
+ Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
Perl_sv_catpvf(aTHX_ msg,
- " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
+ " (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n",
(int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
PL_multi_end = 0;
}
if (PL_in_eval & EVAL_WARNONLY) {
PL_in_eval &= ~EVAL_WARNONLY;
- Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
}
else
qerror(msg);
if (PL_error_count >= 10) {
SV * errsv;
if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
- Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
+ Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n",
SVfARG(errsv), OutCopFILE(PL_curcop));
else
Perl_croak(aTHX_ "%s has too many errors.\n",
Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
}
if (status < 0) {
- Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
+ Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
}
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+ "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
FPTR2DPTR(void *, S_utf16_textfilter),
reverse ? 'l' : 'b', idx, maxlen, status,
(UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
status = FILTER_READ(idx + 1, utf16_buffer,
160 + (SvCUR(utf16_buffer) & 1));
- DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
if (status < 0) {
/* Error */
}
}
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+ "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
status,
(UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
STRLEN wlen, bufptr_pos;
lex_read_space(0);
t = s = PL_bufptr;
- if (!isIDFIRST_lazy_if(s, UTF))
+ if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
goto no_label;
t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
if (word_takes_any_delimiter(s, wlen))