This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perldelta: Regex sets are no longer experimental
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 1c71573..ea99050 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -4742,8 +4742,15 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
     return(datasv);
 }
 
+/*
+=for apidoc_section $filters
+=for apidoc filter_del
+
+Delete most recently added instance of the filter function argument
+
+=cut
+*/
 
-/* Delete most recently added instance of this filter function.        */
 void
 Perl_filter_del(pTHX_ filter_t funcp)
 {
@@ -11317,13 +11324,11 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
     )
 {
     SV *sv;                    /* scalar value: string */
-    const char *tmps;          /* temp string, used for delimiter matching */
     char *s = start;           /* current position in the buffer */
     char *to;                  /* current position in the sv's data */
     int brackets = 1;          /* bracket nesting level */
     bool d_is_utf8 = FALSE;    /* is there any utf8 content? */
     UV open_delim_code;         /* code point */
-    UV close_delim_code;        /* code point */
     char open_delim_str[UTF8_MAXBYTES+1];
     STRLEN delim_byte_len;      /* each delimiter currently is the same number
                                    of bytes */
@@ -11347,14 +11352,13 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
 
     /* after skipping whitespace, the next character is the delimiter */
     if (! UTF || UTF8_IS_INVARIANT(*s)) {
-        close_delim_code = (U8) *s;
-        open_delim_code  = close_delim_code;
-        open_delim_str[0] =     *s;
+        open_delim_code   = (U8) *s;
+        open_delim_str[0] =      *s;
         delim_byte_len = 1;
     }
     else {
-        open_delim_code = close_delim_code =
-                    utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &delim_byte_len);
+        open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
+                                            &delim_byte_len);
         if (UNLIKELY(! is_grapheme((U8 *) start,
                                    (U8 *) s,
                                    (U8 *) PL_bufend,
@@ -11364,8 +11368,9 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
         }
 
         Copy(s, open_delim_str, delim_byte_len, char);
-        open_delim_str[delim_byte_len] = '\0';
     }
+    open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
+
 
     /* mark where we are */
     PL_multi_start = CopLINE(PL_curcop);
@@ -11373,15 +11378,11 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
     herelines = PL_parser->herelines;
 
     const char * legal_paired_opening_delims;
-    const char * legal_paired_opening_delims_end;
     const char * legal_paired_closing_delims;
-    const char * deprecated_opening_delims = "";
-    const char * deprecated_delims_end = deprecated_opening_delims;
+    const char * deprecated_opening_delims;
     if (FEATURE_MORE_DELIMS_IS_ENABLED) {
         if (UTF) {
             legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
-            legal_paired_opening_delims_end =
-                              C_ARRAY_END(EXTRA_OPENING_UTF8_BRACKETS);
             legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
 
             /* We are deprecating using a closing delimiter as the opening, in
@@ -11389,56 +11390,41 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
              * may include ones that are legal, but the code below won't look
              * at this string unless it didn't find a legal opening one */
             deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_UTF8_BRACKETS);
         }
         else {
             legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
-            legal_paired_opening_delims_end =
-                              C_ARRAY_END(EXTRA_OPENING_NON_UTF8_BRACKETS);
             legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
-
             deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_NON_UTF8_BRACKETS);
         }
     }
     else {
         legal_paired_opening_delims = "([{<";
         legal_paired_closing_delims = ")]}>";
-        legal_paired_opening_delims_end = legal_paired_opening_delims + 4;
-
-        if (UTF) {
-            deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_UTF8_BRACKETS);
-        }
-        else {
-            deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_NON_UTF8_BRACKETS);
-        }
+        deprecated_opening_delims = (UTF)
+                                    ? DEPRECATED_OPENING_UTF8_BRACKETS
+                                    : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
     }
 
+    const char * legal_paired_opening_delims_end = legal_paired_opening_delims
+                                          + strlen(legal_paired_opening_delims);
+    const char * deprecated_delims_end = deprecated_opening_delims
+                                + strlen(deprecated_opening_delims);
+
     const char * close_delim_str = open_delim_str;
+    UV close_delim_code = open_delim_code;
 
     /* If the delimiter has a mirror-image closing one, get it */
-    if (close_delim_code == '\0') {    /* We, *shudder*, accept NUL as a
-                                           delimiter, but it makes empty string
-                                           processing just fall out */
-        close_delim_code = '\0';
-    }
-    else if ((tmps = ninstr(legal_paired_opening_delims,
-                            legal_paired_opening_delims_end,
-                            open_delim_str, open_delim_str + delim_byte_len)))
-    {   /* Here, there is a paired delimiter, and tmps points to its position
+    const char *tmps = ninstr(legal_paired_opening_delims,
+                              legal_paired_opening_delims_end,
+                              open_delim_str, open_delim_str + delim_byte_len);
+    if (tmps) {
+        /* Here, there is a paired delimiter, and tmps points to its position
            in the string of the accepted opening paired delimiters.  The
            corresponding position in the string of closing ones is the
            beginning of the paired mate.  Both contain the same number of
            bytes. */
         close_delim_str = legal_paired_closing_delims
                         + (tmps - legal_paired_opening_delims);
-        close_delim_code = valid_utf8_to_uvchr((U8 *) close_delim_str, NULL);
 
         /* The list of paired delimiters contains all the ASCII ones that have
          * always been legal, and no other ASCIIs.  Don't raise a message if
@@ -11449,6 +11435,10 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
                              "Use of '%" UTF8f "' is experimental as a string delimiter",
                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
         }
+
+        close_delim_code = (UTF)
+                           ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
+                           : * (U8 *) close_delim_str;
     }
     else {  /* Here, the delimiter isn't paired, hence the close is the same as
                the open; and has aready been set up.  But make sure it isn't
@@ -11461,6 +11451,10 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
                              "Use of '%" UTF8f "' is deprecated as a string delimiter",
                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
         }
+
+        /* Note that a NUL may be used as a delimiter, and this happens when
+         * delimitting an empty string, and no special handling for it is
+         * needed, as ninstr() calls are used */
     }
 
     PL_multi_close = close_delim_code;
@@ -11548,7 +11542,10 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
             }
 
             /* Here, still in the middle of the string; copy this character */
-            if (UTF && ! UTF8_IS_INVARIANT((U8) *s)) {
+            if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
+                *to++ = *s++;
+            }
+            else {
                 size_t this_char_len = UTF8SKIP(s);
                 Copy(s, to, this_char_len, char);
                 s  += this_char_len;
@@ -11556,9 +11553,6 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
 
                 d_is_utf8 = TRUE;
             }
-            else {
-                *to++ = *s++;
-            }
         } /* End of loop through buffer */
 
         /* Here, found end of the string, OR ran out of buffer: terminate the
@@ -12885,6 +12879,8 @@ S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
 #endif
 
 /*
+=for apidoc scan_vstring
+
 Returns a pointer to the next character after the parsed
 vstring, as well as updating the passed in sv.
 
@@ -12902,6 +12898,7 @@ calling scope, hence the sv_2mortal in the example (to prevent
 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
 sv_2mortal.
 
+=cut
 */
 
 char *
@@ -12987,6 +12984,7 @@ Perl_keyword_plugin_standard(pTHX_
 }
 
 /*
+=for apidoc_section $lexer
 =for apidoc wrap_keyword_plugin
 
 Puts a C function into the chain of keyword plugins.  This is the