perldelta: Regex sets are no longer experimental

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index b4b4786..ea99050 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -2570,7 +2570,7 @@ S_sublex_push(pTHX)
  
      /* The here-doc parser needs to be able to peek into outer lexing
         scopes to find the body of the here-doc.  So we put PL_linestr and
-       PL_bufptr into lex_shared, to ‘share’ those values.
+       PL_bufptr into lex_shared, to 'share' those values.
       */
      PL_parser->lex_shared->ls_linestr = PL_linestr;
      PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
@@ -4742,8 +4742,15 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
      return(datasv);
  }
  
+/*
+=for apidoc_section $filters
+=for apidoc filter_del
+
+Delete most recently added instance of the filter function argument
+
+=cut
+*/
  
-/* Delete most recently added instance of this filter function.        */
  void
  Perl_filter_del(pTHX_ filter_t funcp)
  {
@@ -9495,7 +9502,7 @@ Perl_yylex(pTHX)
              PL_lex_repl = NULL;
          }
          /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
-           re_eval_str.  If the here-doc body’s length equals the previous
+           re_eval_str.  If the here-doc body's length equals the previous
             value of re_eval_start, re_eval_start will now be null.  So
             check re_eval_str as well. */
          if (PL_parser->lex_shared->re_eval_start
@@ -10930,7 +10937,7 @@ S_scan_heredoc(pTHX_ char *s)
                     does not matter what PL_linestr points to, since we are
                     about to croak; but in a quote-like op, linestr_save
                     will have been prospectively freed already, via
-                   SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+                   SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
                     restore PL_linestr. */
                  SvREFCNT_dec_NN(PL_linestr);
                  PL_linestr = linestr_save;
@@ -11317,16 +11324,12 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
      )
  {
      SV *sv;                    /* scalar value: string */
-    const char *tmps;          /* temp string, used for delimiter matching */
      char *s = start;           /* current position in the buffer */
      char *to;                  /* current position in the sv's data */
      int brackets = 1;          /* bracket nesting level */
      bool d_is_utf8 = FALSE;    /* is there any utf8 content? */
      UV open_delim_code;         /* code point */
-    UV close_delim_code;        /* code point */
      char open_delim_str[UTF8_MAXBYTES+1];
-    char close_delim_str[UTF8_MAXBYTES+1];
-    char close_delim_byte0;
      STRLEN delim_byte_len;      /* each delimiter currently is the same number
                                     of bytes */
      line_t herelines;
@@ -11339,39 +11342,35 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
      PERL_ARGS_ASSERT_SCAN_STR;
  
      /* skip space before the delimiter */
-    if (isSPACE(*s)) {
-        s = skipspace(s);
+    if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
+                           'start' also has to change */
+        s = start = skipspace(s);
      }
  
      /* mark where we are, in case we need to report errors */
      CLINE;
  
      /* after skipping whitespace, the next character is the delimiter */
-    close_delim_byte0 = *s;
-    if (!UTF || UTF8_IS_INVARIANT(close_delim_byte0)) {
-        close_delim_str[0] = close_delim_byte0;
-        open_delim_str[0] = close_delim_str[0];
-
-        close_delim_code = (U8) close_delim_str[0];
-        open_delim_code  = close_delim_code;
+    if (! UTF || UTF8_IS_INVARIANT(*s)) {
+        open_delim_code   = (U8) *s;
+        open_delim_str[0] =      *s;
          delim_byte_len = 1;
      }
      else {
-        open_delim_code = close_delim_code =
-                    utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &delim_byte_len);
-        if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
-                                          (U8 *) s,
-                                          (U8 *) PL_bufend,
-                                                 open_delim_code)))
+        open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
+                                            &delim_byte_len);
+        if (UNLIKELY(! is_grapheme((U8 *) start,
+                                   (U8 *) s,
+                                   (U8 *) PL_bufend,
+                                   open_delim_code)))
          {
              yyerror(non_grapheme_msg);
          }
  
-        Copy(s,  open_delim_str, delim_byte_len, char);
-        open_delim_str[delim_byte_len] = '\0';
-        Copy(s, close_delim_str, delim_byte_len, char);
-        close_delim_str[delim_byte_len] = '\0';
+        Copy(s, open_delim_str, delim_byte_len, char);
      }
+    open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
+
  
      /* mark where we are */
      PL_multi_start = CopLINE(PL_curcop);
@@ -11379,15 +11378,11 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
      herelines = PL_parser->herelines;
  
      const char * legal_paired_opening_delims;
-    const char * legal_paired_opening_delims_end;
      const char * legal_paired_closing_delims;
-    const char * deprecated_opening_delims = "";
-    const char * deprecated_delims_end = deprecated_opening_delims;
+    const char * deprecated_opening_delims;
      if (FEATURE_MORE_DELIMS_IS_ENABLED) {
          if (UTF) {
              legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
-            legal_paired_opening_delims_end =
-                              C_ARRAY_END(EXTRA_OPENING_UTF8_BRACKETS);
              legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
  
              /* We are deprecating using a closing delimiter as the opening, in
@@ -11395,79 +11390,71 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
               * may include ones that are legal, but the code below won't look
               * at this string unless it didn't find a legal opening one */
              deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_UTF8_BRACKETS);
          }
          else {
              legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
-            legal_paired_opening_delims_end =
-                              C_ARRAY_END(EXTRA_OPENING_NON_UTF8_BRACKETS);
              legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
-
              deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_NON_UTF8_BRACKETS);
          }
      }
      else {
          legal_paired_opening_delims = "([{<";
          legal_paired_closing_delims = ")]}>";
-        legal_paired_opening_delims_end = legal_paired_opening_delims + 4;
-
-        if (UTF) {
-            deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_UTF8_BRACKETS);
-        }
-        else {
-            deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
-            deprecated_delims_end =
-                            C_ARRAY_END(DEPRECATED_OPENING_NON_UTF8_BRACKETS);
-        }
+        deprecated_opening_delims = (UTF)
+                                    ? DEPRECATED_OPENING_UTF8_BRACKETS
+                                    : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
      }
  
+    const char * legal_paired_opening_delims_end = legal_paired_opening_delims
+                                          + strlen(legal_paired_opening_delims);
+    const char * deprecated_delims_end = deprecated_opening_delims
+                                + strlen(deprecated_opening_delims);
+
+    const char * close_delim_str = open_delim_str;
+    UV close_delim_code = open_delim_code;
+
      /* If the delimiter has a mirror-image closing one, get it */
-    if (close_delim_byte0 == '\0') {    /* We, *shudder*, accept NUL as a
-                                           delimiter, but it makes empty string
-                                           processing just falls out */
-        close_delim_code = close_delim_str[0] = close_delim_byte0 = '\0';
-    }
-    else if ((tmps = ninstr(legal_paired_opening_delims,
-                            legal_paired_opening_delims_end,
-                            open_delim_str, open_delim_str + delim_byte_len)))
-    {   /* Here, there is a paired delimiter, and tmps points to its position
+    const char *tmps = ninstr(legal_paired_opening_delims,
+                              legal_paired_opening_delims_end,
+                              open_delim_str, open_delim_str + delim_byte_len);
+    if (tmps) {
+        /* Here, there is a paired delimiter, and tmps points to its position
             in the string of the accepted opening paired delimiters.  The
             corresponding position in the string of closing ones is the
-           beginning of the paired mate.  Both contain the same number of bytes
-         */
-        Copy(legal_paired_closing_delims
-                                        + (tmps - legal_paired_opening_delims),
-             close_delim_str, delim_byte_len, char);
-        close_delim_str[delim_byte_len] = '\0';
-        close_delim_byte0 = close_delim_str[0];
-        close_delim_code = valid_utf8_to_uvchr((U8 *) close_delim_str, NULL);
+           beginning of the paired mate.  Both contain the same number of
+           bytes. */
+        close_delim_str = legal_paired_closing_delims
+                        + (tmps - legal_paired_opening_delims);
  
          /* The list of paired delimiters contains all the ASCII ones that have
           * always been legal, and no other ASCIIs.  Don't raise a message if
           * using one of these */
-        if (! isASCII(close_delim_byte0)) {
+        if (! isASCII(open_delim_code)) {
              Perl_ck_warner_d(aTHX_
                               packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
                               "Use of '%" UTF8f "' is experimental as a string delimiter",
                               UTF8fARG(UTF, delim_byte_len, open_delim_str));
          }
+
+        close_delim_code = (UTF)
+                           ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
+                           : * (U8 *) close_delim_str;
      }
      else {  /* Here, the delimiter isn't paired, hence the close is the same as
                 the open; and has aready been set up.  But make sure it isn't
                 deprecated to use this particular delimiter, as we plan
                 eventually to make it paired. */
          if (ninstr(deprecated_opening_delims, deprecated_delims_end,
-                    open_delim_str, open_delim_str + delim_byte_len))
+                   open_delim_str, open_delim_str + delim_byte_len))
          {
              Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
                               "Use of '%" UTF8f "' is deprecated as a string delimiter",
                               UTF8fARG(UTF, delim_byte_len, open_delim_str));
          }
+
+        /* Note that a NUL may be used as a delimiter, and this happens when
+         * delimitting an empty string, and no special handling for it is
+         * needed, as ninstr() calls are used */
      }
  
      PL_multi_close = close_delim_code;
@@ -11479,7 +11466,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
      /* create a new SV to hold the contents.  79 is the SV's initial length.
         What a random number. */
      sv = newSV_type(SVt_PVIV);
-    SvGROW(sv, 80);
+    SvGROW(sv, 79);
      SvIV_set(sv, close_delim_code);
      (void)SvPOK_only(sv);              /* validate pointer */
  
@@ -11503,7 +11490,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
              if (   *s == '\\' && s < PL_bufend - delim_byte_len
  
                     /* ... but not if the delimiter itself is a backslash */
-                && close_delim_byte0 != '\\')
+                && close_delim_code != '\\')
              {
                  /* Here, we have an escaping backslash.  If we're supposed to
                   * discard those that escape the closing delimiter, just
@@ -11537,9 +11524,9 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
                   * Only grapheme delimiters are legal. */
                  if (   UTF  /* All Non-UTF-8's are graphemes */
                      && UNLIKELY(! is_grapheme((U8 *) start,
-                                               (U8 *) s,
-                                               (U8 *) PL_bufend,
-                                                      close_delim_code)))
+                                              (U8 *) s,
+                                              (U8 *) PL_bufend,
+                                              close_delim_code)))
                  {
                      yyerror(non_grapheme_msg);
                  }
@@ -11554,7 +11541,11 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
                  brackets++;
              }
  
-            if (UTF && ! UTF8_IS_INVARIANT((U8) *s)) {
+            /* Here, still in the middle of the string; copy this character */
+            if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
+                *to++ = *s++;
+            }
+            else {
                  size_t this_char_len = UTF8SKIP(s);
                  Copy(s, to, this_char_len, char);
                  s  += this_char_len;
@@ -11562,12 +11553,10 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
  
                  d_is_utf8 = TRUE;
              }
-            else {
-                *to++ = *s++;
-            }
-        }
+        } /* End of loop through buffer */
  
-        /* terminate the copied string and update the sv's end-of-string */
+        /* Here, found end of the string, OR ran out of buffer: terminate the
+         * copied string and update the sv's end-of-string */
          *to = '\0';
          SvCUR_set(sv, to - SvPVX_const(sv));
  
@@ -11605,7 +11594,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
              return NULL;
          }
          s = start = PL_bufptr;
-    }
+    } /* End of infinite loop */
  
      /* at this point, we have successfully read the delimited string */
  
@@ -12890,6 +12879,8 @@ S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
  #endif
  
  /*
+=for apidoc scan_vstring
+
  Returns a pointer to the next character after the parsed
  vstring, as well as updating the passed in sv.
  
@@ -12907,6 +12898,7 @@ calling scope, hence the sv_2mortal in the example (to prevent
  a leak).  Make sure to do SvREFCNT_inc afterwards if you use
  sv_2mortal.
  
+=cut
  */
  
  char *
@@ -12992,6 +12984,7 @@ Perl_keyword_plugin_standard(pTHX_
  }
  
  /*
+=for apidoc_section $lexer
  =for apidoc wrap_keyword_plugin
  
  Puts a C function into the chain of keyword plugins.  This is the