return(datasv);
}
+/*
+=for apidoc_section $filters
+=for apidoc filter_del
+
+Delete most recently added instance of the filter function argument
+
+=cut
+*/
-/* Delete most recently added instance of this filter function. */
void
Perl_filter_del(pTHX_ filter_t funcp)
{
)
{
SV *sv; /* scalar value: string */
- const char *tmps; /* temp string, used for delimiter matching */
char *s = start; /* current position in the buffer */
char *to; /* current position in the sv's data */
int brackets = 1; /* bracket nesting level */
bool d_is_utf8 = FALSE; /* is there any utf8 content? */
UV open_delim_code; /* code point */
- UV close_delim_code; /* code point */
char open_delim_str[UTF8_MAXBYTES+1];
- char close_delim_str[UTF8_MAXBYTES+1];
- char close_delim_byte0;
STRLEN delim_byte_len; /* each delimiter currently is the same number
of bytes */
line_t herelines;
CLINE;
/* after skipping whitespace, the next character is the delimiter */
- close_delim_byte0 = *s;
- if (! UTF || UTF8_IS_INVARIANT(close_delim_byte0)) {
- close_delim_str[0] = close_delim_byte0;
- open_delim_str[0] = close_delim_str[0];
-
- close_delim_code = (U8) close_delim_str[0];
- open_delim_code = close_delim_code;
+ if (! UTF || UTF8_IS_INVARIANT(*s)) {
+ open_delim_code = (U8) *s;
+ open_delim_str[0] = *s;
delim_byte_len = 1;
}
else {
- open_delim_code = close_delim_code =
- utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &delim_byte_len);
- if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- open_delim_code)))
+ open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
+ &delim_byte_len);
+ if (UNLIKELY(! is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ open_delim_code)))
{
yyerror(non_grapheme_msg);
}
Copy(s, open_delim_str, delim_byte_len, char);
- open_delim_str[delim_byte_len] = '\0';
- Copy(s, close_delim_str, delim_byte_len, char);
- close_delim_str[delim_byte_len] = '\0';
}
+ open_delim_str[delim_byte_len] = '\0'; /* Only for safety */
+
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
herelines = PL_parser->herelines;
const char * legal_paired_opening_delims;
- const char * legal_paired_opening_delims_end;
const char * legal_paired_closing_delims;
- const char * deprecated_opening_delims = "";
- const char * deprecated_delims_end = deprecated_opening_delims;
+ const char * deprecated_opening_delims;
if (FEATURE_MORE_DELIMS_IS_ENABLED) {
if (UTF) {
legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
- legal_paired_opening_delims_end =
- C_ARRAY_END(EXTRA_OPENING_UTF8_BRACKETS);
legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
/* We are deprecating using a closing delimiter as the opening, in
* may include ones that are legal, but the code below won't look
* at this string unless it didn't find a legal opening one */
deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
- deprecated_delims_end =
- C_ARRAY_END(DEPRECATED_OPENING_UTF8_BRACKETS);
}
else {
legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
- legal_paired_opening_delims_end =
- C_ARRAY_END(EXTRA_OPENING_NON_UTF8_BRACKETS);
legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
-
deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
- deprecated_delims_end =
- C_ARRAY_END(DEPRECATED_OPENING_NON_UTF8_BRACKETS);
}
}
else {
legal_paired_opening_delims = "([{<";
legal_paired_closing_delims = ")]}>";
- legal_paired_opening_delims_end = legal_paired_opening_delims + 4;
-
- if (UTF) {
- deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
- deprecated_delims_end =
- C_ARRAY_END(DEPRECATED_OPENING_UTF8_BRACKETS);
- }
- else {
- deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
- deprecated_delims_end =
- C_ARRAY_END(DEPRECATED_OPENING_NON_UTF8_BRACKETS);
- }
+ deprecated_opening_delims = (UTF)
+ ? DEPRECATED_OPENING_UTF8_BRACKETS
+ : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
}
+ const char * legal_paired_opening_delims_end = legal_paired_opening_delims
+ + strlen(legal_paired_opening_delims);
+ const char * deprecated_delims_end = deprecated_opening_delims
+ + strlen(deprecated_opening_delims);
+
+ const char * close_delim_str = open_delim_str;
+ UV close_delim_code = open_delim_code;
+
/* If the delimiter has a mirror-image closing one, get it */
- if (close_delim_byte0 == '\0') { /* We, *shudder*, accept NUL as a
- delimiter, but it makes empty string
- processing just fall out */
- close_delim_code = close_delim_str[0] = close_delim_byte0 = '\0';
- }
- else if ((tmps = ninstr(legal_paired_opening_delims,
- legal_paired_opening_delims_end,
- open_delim_str, open_delim_str + delim_byte_len)))
- { /* Here, there is a paired delimiter, and tmps points to its position
+ const char *tmps = ninstr(legal_paired_opening_delims,
+ legal_paired_opening_delims_end,
+ open_delim_str, open_delim_str + delim_byte_len);
+ if (tmps) {
+ /* Here, there is a paired delimiter, and tmps points to its position
in the string of the accepted opening paired delimiters. The
corresponding position in the string of closing ones is the
beginning of the paired mate. Both contain the same number of
bytes. */
- Copy(legal_paired_closing_delims
- + (tmps - legal_paired_opening_delims),
- close_delim_str, delim_byte_len, char);
- close_delim_str[delim_byte_len] = '\0';
- close_delim_byte0 = close_delim_str[0];
- close_delim_code = valid_utf8_to_uvchr((U8 *) close_delim_str, NULL);
+ close_delim_str = legal_paired_closing_delims
+ + (tmps - legal_paired_opening_delims);
/* The list of paired delimiters contains all the ASCII ones that have
* always been legal, and no other ASCIIs. Don't raise a message if
* using one of these */
- if (! isASCII(close_delim_byte0)) {
+ if (! isASCII(open_delim_code)) {
Perl_ck_warner_d(aTHX_
packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
"Use of '%" UTF8f "' is experimental as a string delimiter",
UTF8fARG(UTF, delim_byte_len, open_delim_str));
}
+
+ close_delim_code = (UTF)
+ ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
+ : * (U8 *) close_delim_str;
}
else { /* Here, the delimiter isn't paired, hence the close is the same as
the open; and has aready been set up. But make sure it isn't
"Use of '%" UTF8f "' is deprecated as a string delimiter",
UTF8fARG(UTF, delim_byte_len, open_delim_str));
}
+
+ /* Note that a NUL may be used as a delimiter, and this happens when
+ * delimitting an empty string, and no special handling for it is
+ * needed, as ninstr() calls are used */
}
PL_multi_close = close_delim_code;
/* create a new SV to hold the contents. 79 is the SV's initial length.
What a random number. */
sv = newSV_type(SVt_PVIV);
- SvGROW(sv, 80);
+ SvGROW(sv, 79);
SvIV_set(sv, close_delim_code);
(void)SvPOK_only(sv); /* validate pointer */
if ( *s == '\\' && s < PL_bufend - delim_byte_len
/* ... but not if the delimiter itself is a backslash */
- && close_delim_byte0 != '\\')
+ && close_delim_code != '\\')
{
/* Here, we have an escaping backslash. If we're supposed to
* discard those that escape the closing delimiter, just
}
/* Here, still in the middle of the string; copy this character */
- if (UTF && ! UTF8_IS_INVARIANT((U8) *s)) {
+ if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
+ *to++ = *s++;
+ }
+ else {
size_t this_char_len = UTF8SKIP(s);
Copy(s, to, this_char_len, char);
s += this_char_len;
d_is_utf8 = TRUE;
}
- else {
- *to++ = *s++;
- }
} /* End of loop through buffer */
/* Here, found end of the string, OR ran out of buffer: terminate the
#endif
/*
+=for apidoc scan_vstring
+
Returns a pointer to the next character after the parsed
vstring, as well as updating the passed in sv.
a leak). Make sure to do SvREFCNT_inc afterwards if you use
sv_2mortal.
+=cut
*/
char *
}
/*
+=for apidoc_section $lexer
=for apidoc wrap_keyword_plugin
Puts a C function into the chain of keyword plugins. This is the