/* The here-doc parser needs to be able to peek into outer lexing
scopes to find the body of the here-doc. So we put PL_linestr and
- PL_bufptr into lex_shared, to ‘share’ those values.
+ PL_bufptr into lex_shared, to 'share' those values.
*/
PL_parser->lex_shared->ls_linestr = PL_linestr;
PL_parser->lex_shared->ls_bufptr = PL_bufptr;
return(datasv);
}
+/*
+=for apidoc_section $filters
+=for apidoc filter_del
+
+Delete most recently added instance of the filter function argument
+
+=cut
+*/
-/* Delete most recently added instance of this filter function. */
void
Perl_filter_del(pTHX_ filter_t funcp)
{
PL_lex_repl = NULL;
}
/* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
- re_eval_str. If the here-doc body’s length equals the previous
+ re_eval_str. If the here-doc body's length equals the previous
value of re_eval_start, re_eval_start will now be null. So
check re_eval_str as well. */
if (PL_parser->lex_shared->re_eval_start
does not matter what PL_linestr points to, since we are
about to croak; but in a quote-like op, linestr_save
will have been prospectively freed already, via
- SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
restore PL_linestr. */
SvREFCNT_dec_NN(PL_linestr);
PL_linestr = linestr_save;
)
{
SV *sv; /* scalar value: string */
- const char *tmps; /* temp string, used for delimiter matching */
char *s = start; /* current position in the buffer */
char *to; /* current position in the sv's data */
int brackets = 1; /* bracket nesting level */
bool d_is_utf8 = FALSE; /* is there any utf8 content? */
UV open_delim_code; /* code point */
- UV close_delim_code; /* code point */
char open_delim_str[UTF8_MAXBYTES+1];
- char close_delim_str[UTF8_MAXBYTES+1];
- char close_delim_byte0;
STRLEN delim_byte_len; /* each delimiter currently is the same number
of bytes */
line_t herelines;
- /* The delimiters that have a mirror-image closing one */
- const char * opening_delims = "([{<";
- const char * closing_delims = ")]}>";
-
/* The only non-UTF character that isn't a stand alone grapheme is
* white-space, hence can't be a delimiter. */
const char * non_grapheme_msg = "Use of unassigned code point or"
PERL_ARGS_ASSERT_SCAN_STR;
/* skip space before the delimiter */
- if (isSPACE(*s)) {
- s = skipspace(s);
+ if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so
+ 'start' also has to change */
+ s = start = skipspace(s);
}
/* mark where we are, in case we need to report errors */
CLINE;
/* after skipping whitespace, the next character is the delimiter */
- close_delim_byte0 = *s;
- if (!UTF || UTF8_IS_INVARIANT(close_delim_byte0)) {
- close_delim_str[0] = close_delim_byte0;
- open_delim_str[0] = close_delim_str[0];
-
- close_delim_code = (U8) close_delim_str[0];
- open_delim_code = close_delim_code;
+ if (! UTF || UTF8_IS_INVARIANT(*s)) {
+ open_delim_code = (U8) *s;
+ open_delim_str[0] = *s;
delim_byte_len = 1;
}
else {
- open_delim_code = close_delim_code =
- utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &delim_byte_len);
- if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- open_delim_code)))
+ open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
+ &delim_byte_len);
+ if (UNLIKELY(! is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ open_delim_code)))
{
yyerror(non_grapheme_msg);
}
- Copy(s, open_delim_str, delim_byte_len, char);
- Copy(s, close_delim_str, delim_byte_len, char);
+ Copy(s, open_delim_str, delim_byte_len, char);
}
+ open_delim_str[delim_byte_len] = '\0'; /* Only for safety */
+
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
PL_multi_open = open_delim_code;
herelines = PL_parser->herelines;
+ const char * legal_paired_opening_delims;
+ const char * legal_paired_closing_delims;
+ const char * deprecated_opening_delims;
+ if (FEATURE_MORE_DELIMS_IS_ENABLED) {
+ if (UTF) {
+ legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
+ legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
+
+ /* We are deprecating using a closing delimiter as the opening, in
+ * case we want in the future to accept them reversed. The string
+ * may include ones that are legal, but the code below won't look
+ * at this string unless it didn't find a legal opening one */
+ deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
+ }
+ else {
+ legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
+ legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
+ deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
+ }
+ }
+ else {
+ legal_paired_opening_delims = "([{<";
+ legal_paired_closing_delims = ")]}>";
+ deprecated_opening_delims = (UTF)
+ ? DEPRECATED_OPENING_UTF8_BRACKETS
+ : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
+ }
+
+ const char * legal_paired_opening_delims_end = legal_paired_opening_delims
+ + strlen(legal_paired_opening_delims);
+ const char * deprecated_delims_end = deprecated_opening_delims
+ + strlen(deprecated_opening_delims);
+
+ const char * close_delim_str = open_delim_str;
+ UV close_delim_code = open_delim_code;
+
/* If the delimiter has a mirror-image closing one, get it */
- if (close_delim_byte0 && (tmps = strchr(opening_delims, close_delim_byte0))) {
- close_delim_str[0] = close_delim_byte0 = closing_delims[tmps - opening_delims];
- close_delim_code = (U8) close_delim_str[0];
+ const char *tmps = ninstr(legal_paired_opening_delims,
+ legal_paired_opening_delims_end,
+ open_delim_str, open_delim_str + delim_byte_len);
+ if (tmps) {
+ /* Here, there is a paired delimiter, and tmps points to its position
+ in the string of the accepted opening paired delimiters. The
+ corresponding position in the string of closing ones is the
+ beginning of the paired mate. Both contain the same number of
+ bytes. */
+ close_delim_str = legal_paired_closing_delims
+ + (tmps - legal_paired_opening_delims);
+
+ /* The list of paired delimiters contains all the ASCII ones that have
+ * always been legal, and no other ASCIIs. Don't raise a message if
+ * using one of these */
+ if (! isASCII(open_delim_code)) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
+ "Use of '%" UTF8f "' is experimental as a string delimiter",
+ UTF8fARG(UTF, delim_byte_len, open_delim_str));
+ }
+
+ close_delim_code = (UTF)
+ ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
+ : * (U8 *) close_delim_str;
+ }
+ else { /* Here, the delimiter isn't paired, hence the close is the same as
+ the open; and has aready been set up. But make sure it isn't
+ deprecated to use this particular delimiter, as we plan
+ eventually to make it paired. */
+ if (ninstr(deprecated_opening_delims, deprecated_delims_end,
+ open_delim_str, open_delim_str + delim_byte_len))
+ {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+ "Use of '%" UTF8f "' is deprecated as a string delimiter",
+ UTF8fARG(UTF, delim_byte_len, open_delim_str));
+ }
+
+ /* Note that a NUL may be used as a delimiter, and this happens when
+ * delimitting an empty string, and no special handling for it is
+ * needed, as ninstr() calls are used */
}
PL_multi_close = close_delim_code;
/* create a new SV to hold the contents. 79 is the SV's initial length.
What a random number. */
sv = newSV_type(SVt_PVIV);
- SvGROW(sv, 80);
+ SvGROW(sv, 79);
SvIV_set(sv, close_delim_code);
(void)SvPOK_only(sv); /* validate pointer */
if ( *s == '\\' && s < PL_bufend - delim_byte_len
/* ... but not if the delimiter itself is a backslash */
- && close_delim_byte0 != '\\')
+ && close_delim_code != '\\')
{
/* Here, we have an escaping backslash. If we're supposed to
* discard those that escape the closing delimiter, just
* Only grapheme delimiters are legal. */
if ( UTF /* All Non-UTF-8's are graphemes */
&& UNLIKELY(! is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- close_delim_code)))
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ close_delim_code)))
{
yyerror(non_grapheme_msg);
}
brackets++;
}
- if (UTF && ! UTF8_IS_INVARIANT((U8) *s)) {
+ /* Here, still in the middle of the string; copy this character */
+ if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
+ *to++ = *s++;
+ }
+ else {
size_t this_char_len = UTF8SKIP(s);
Copy(s, to, this_char_len, char);
s += this_char_len;
d_is_utf8 = TRUE;
}
- else {
- *to++ = *s++;
- }
- }
+ } /* End of loop through buffer */
- /* terminate the copied string and update the sv's end-of-string */
+ /* Here, found end of the string, OR ran out of buffer: terminate the
+ * copied string and update the sv's end-of-string */
*to = '\0';
SvCUR_set(sv, to - SvPVX_const(sv));
return NULL;
}
s = start = PL_bufptr;
- }
+ } /* End of infinite loop */
/* at this point, we have successfully read the delimited string */
#endif
/*
+=for apidoc scan_vstring
+
Returns a pointer to the next character after the parsed
vstring, as well as updating the passed in sv.
a leak). Make sure to do SvREFCNT_inc afterwards if you use
sv_2mortal.
+=cut
*/
char *
}
/*
+=for apidoc_section $lexer
=for apidoc wrap_keyword_plugin
Puts a C function into the chain of keyword plugins. This is the