#include "dquote_inline.h"
#include "invlist_inline.h"
-#define new_constant(a,b,c,d,e,f,g) \
- S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+#define new_constant(a,b,c,d,e,f,g, h) \
+ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
#define pl_yylval (PL_parser->yylval)
}
else if (GvAV(cfgv)) {
AV * const av = GvAV(cfgv);
- const I32 start = CopLINE(PL_curcop)+1;
- I32 items = AvFILLp(av) - start;
+ const line_t start = CopLINE(PL_curcop)+1;
+ SSize_t items = AvFILLp(av) - start;
if (items > 0) {
AV * const av2 = GvAVn(gv2);
SV **svp = AvARRAY(av) + start;
- I32 l = (I32)line_num+1;
- while (items--)
- av_store(av2, l++, SvREFCNT_inc(*svp++));
+ Size_t l = line_num+1;
+ while (items-- && l < SSize_t_MAX && l == (line_t)l)
+ av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
}
}
}
SV * const sv = newSVpvn_utf8(start, len,
! IN_BYTES
&& UTF
+ && len != 0
&& is_utf8_non_invariant_string((const U8*)start, len));
return sv;
}
SvCUR_set(sv, d - SvPVX_const(sv));
finish:
if ( PL_hints & HINT_NEW_STRING )
- return new_constant(NULL, 0, "q", sv, pv, "q", 1);
+ return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
return sv;
}
const line_t l = CopLINE(PL_curcop);
LEAVE;
if (PL_parser->sub_error_count != PL_error_count) {
- const char * const name = OutCopFILE(PL_curcop);
if (PL_parser->sub_no_recover) {
- const char * msg = "";
- if (PL_in_eval) {
- SV *errsv = ERRSV;
- if (SvCUR(ERRSV)) {
- msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
- }
- }
- abort_execution(msg, name);
+ yyquit();
NOT_REACHED;
}
}
}
STATIC SV*
-S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
+{
+ /* This justs wraps get_and_check_backslash_N_name() to output any error
+ * message it returns. */
+
+ const char * error_msg = NULL;
+ SV * result;
+
+ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
+
+ /* charnames doesn't work well if there have been errors found */
+ if (PL_error_count > 0) {
+ return NULL;
+ }
+
+ result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
+
+ if (error_msg) {
+ yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
+ }
+
+ return result;
+}
+
+SV*
+Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
+ const char* const e,
+ const bool is_utf8,
+ const char ** error_msg)
{
/* <s> points to first character of interior of \N{}, <e> to one beyond the
* interior, hence to the "}". Finds what the name resolves to, returning
- * an SV* containing it; NULL if no valid one found */
-
- SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+ * an SV* containing it; NULL if no valid one found.
+ *
+ * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
+ * doesn't have to be. */
+ SV* res;
HV * table;
SV **cvp;
SV *cv;
SV *rv;
HV *stash;
const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+ dVAR;
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+ assert(e >= s);
+ assert(s > (char *) 3);
+
+ res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+
if (!SvCUR(res)) {
SvREFCNT_dec_NN(res);
/* diag_listed_as: Unknown charname '%s' */
- yyerror("Unknown charname ''");
+ *error_msg = Perl_form(aTHX_ "Unknown charname ''");
return NULL;
}
res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
/* include the <}> */
- e - backslash_ptr + 1);
+ e - backslash_ptr + 1, error_msg);
if (! SvPOK(res)) {
SvREFCNT_dec_NN(res);
return NULL;
* characters that begin a character name alias are alphabetic, otherwise
* would have to create a isCHARNAME_BEGIN macro */
- if (! UTF) {
+ if (! is_utf8) {
if (! isALPHAU(*s)) {
goto bad_charname;
}
/* diag_listed_as: charnames alias definitions may not contain
trailing white-space; marked by <-- HERE in %s
*/
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain trailing "
"white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
- if (SvUTF8(res)) { /* Don't accept malformed input */
+ if (SvUTF8(res)) { /* Don't accept malformed charname value */
const U8* first_bad_char_loc;
STRLEN len;
const char* const str = SvPV_const(res, len);
0 /* 0 means don't die */ );
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
immediately after '%s' */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Malformed UTF-8 returned by %.*s immediately after '%.*s'",
(int) (e - backslash_ptr + 1), backslash_ptr,
- (int) ((char *) first_bad_char_loc - str), str
- ),
- SVf_UTF8);
+ (int) ((char *) first_bad_char_loc - str), str);
return NULL;
}
}
* that this print won't run off the end of the string */
/* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
in \N{%s} */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
/* diag_listed_as: charnames alias definitions may not contain a
sequence of multiple spaces; marked by <-- HERE
in %s */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain a sequence of "
"multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
}
}
else /* Here is \N{NAME} but not \N{U+...}. */
- if ((res = get_and_check_backslash_N_name(s, e)))
+ if ((res = get_and_check_backslash_N_name_wrapper(s, e)))
{
STRLEN len;
const char *str = SvPV_const(res, len);
}
sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
- type, typelen);
+ type, typelen, NULL);
}
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
}
return yylex();
case LEX_FORMLINE:
+ if (PL_parser->sub_error_count != PL_error_count) {
+ /* There was an error parsing a formline, which tends to
+ mess up the parser.
+ Unlike interpolated sub-parsing, we can't treat any of
+ these as recoverable, so no need to check sub_no_recover.
+ */
+ yyquit();
+ }
assert(PL_lex_formbrack);
s = scan_formline(PL_bufptr);
if (!PL_lex_formbrack)
SAVEI32(PL_lex_formbrack);
PL_parser->form_lex_state = PL_lex_state;
PL_lex_formbrack = PL_lex_brackets + 1;
+ PL_parser->sub_error_count = PL_error_count;
goto leftbracket;
}
}
else { /* no override */
tmp = -tmp;
if (tmp == KEY_dump) {
- Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
- "dump() better written as CORE::dump(). "
- "dump() will no longer be available "
- "in Perl 5.30");
+ Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
}
gv = NULL;
gvp = 0;
/* Look for a prototype */
if (*s == '(' && !is_sigsub) {
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
- COPLINE_SET_FROM_MULTI_END;
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
+ COPLINE_SET_FROM_MULTI_END;
(void)validate_proto(PL_subname, PL_lex_stuff,
ckWARN(WARN_ILLEGALPROTO), 0);
have_proto = TRUE;
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
and <type> is used with error messages only.
- <type> is assumed to be well formed UTF-8 */
+ <type> is assumed to be well formed UTF-8.
+
+ If error_msg is not NULL, *error_msg will be set to any error encountered.
+ Otherwise yyerror() will be used to output it */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
- SV *sv, SV *pv, const char *type, STRLEN typelen)
+ SV *sv, SV *pv, const char *type, STRLEN typelen,
+ const char ** error_msg)
{
dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
if (*key == 'c') { assert (strEQ(key, "charnames")); }
assert(type || s);
- /* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && *key == 'c')
- {
- SvREFCNT_dec_NN(sv);
- return &PL_sv_undef;
- }
-
sv_2mortal(sv); /* Parent created it permanently */
if (!table
|| ! (PL_hints & HINT_LOCALIZE_HH)
(type ? type: s), why1, why2, why3);
}
}
- yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ if (error_msg) {
+ *error_msg = msg;
+ }
+ else {
+ yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ }
return SvREFCNT_inc_simple_NN(sv);
}
now_ok:
while (ss < se) {
/* newline only? Copy and move on */
if (*ss == '\n') {
- sv_catpv(newstr,"\n");
+ sv_catpvs(newstr,"\n");
ss++;
linecount++;
const char * opening_delims = "([{<";
const char * closing_delims = ")]}>";
+ /* The only non-UTF character that isn't a stand alone grapheme is
+ * white-space, hence can't be a delimiter. */
const char * non_grapheme_msg = "Use of unassigned code point or"
" non-standalone grapheme for a delimiter"
- " will be a fatal error starting in Perl"
- " 5.30";
- /* The only non-UTF character that isn't a stand alone grapheme is
- * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */
- bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
-
+ " is not allowed";
PERL_ARGS_ASSERT_SCAN_STR;
/* skip space before the delimiter */
}
else {
termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
- if (check_grapheme) {
- if ( UNLIKELY(UNICODE_IS_SUPER(termcode))
- || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
- {
- /* These are considered graphemes, and since the ending
- * delimiter will be the same, we don't have to check the other
- * end */
- check_grapheme = FALSE;
- }
- else if (UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- termcode)))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
-
- /* Don't have to check the other end, as have already warned at
- * this one */
- check_grapheme = FALSE;
- }
+ if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ termcode)))
+ {
+ yyerror(non_grapheme_msg);
}
Copy(s, termstr, termlen, U8);
if ( s + termlen <= PL_bufend
&& memEQ(s + 1, (char*)termstr + 1, termlen - 1))
{
- if ( check_grapheme
+ if ( UTF
&& UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
+ (U8 *) s,
+ (U8 *) PL_bufend,
termcode)))
{
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "%s", non_grapheme_msg);
+ yyerror(non_grapheme_msg);
}
break;
}
}
if (just_zero && (PL_hints & HINT_NEW_INTEGER))
sv = new_constant(start, s - start, "integer",
- sv, NULL, NULL, 0);
+ sv, NULL, NULL, 0, NULL);
else if (PL_hints & HINT_NEW_BINARY)
- sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
+ sv = new_constant(start, s - start, "binary",
+ sv, NULL, NULL, 0, NULL);
}
break;
const char *const key = floatit ? "float" : "integer";
const STRLEN keylen = floatit ? 5 : 7;
sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
- key, keylen, sv, NULL, NULL, 0);
+ key, keylen, sv, NULL, NULL, 0, NULL);
}
break;