ExtUtils::ParseXS/lib/perlxs.pod: Nits

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 79df0bc..9f37f53 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -456,9 +456,9 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s)
  
      PERL_ARGS_ASSERT_PRINTBUF;
  
-    GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
+    GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
      PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
-    GCC_DIAG_RESTORE;
+    GCC_DIAG_RESTORE_STMT;
      SvREFCNT_dec(tmp);
  }
  
@@ -1035,13 +1035,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 SvCUR(PL_parser->linestr) + len+highhalf);
             PL_parser->bufend += len+highhalf;
             for (p = pv; p != e; p++) {
-               U8 c = (U8)*p;
-               if (! UTF8_IS_INVARIANT(c)) {
-                   *bufptr++ = UTF8_TWO_BYTE_HI(c);
-                   *bufptr++ = UTF8_TWO_BYTE_LO(c);
-               } else {
-                   *bufptr++ = (char)c;
-               }
+                append_utf8_from_native_byte(*p, (U8 **) &bufptr);
             }
         }
      } else {
@@ -1935,7 +1929,7 @@ S_check_uni(pTHX)
      s = PL_last_uni;
      while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
         s += UTF ? UTF8SKIP(s) : 1;
-    if (memchr(s, '(', PL_bufptr - s))
+    if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
         return;
  
      Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
@@ -2014,7 +2008,7 @@ S_force_next(pTHX_ I32 type)
   * S_postderef
   *
   * This subroutine handles postfix deref syntax after the arrow has already
- * been emitted.  @* $* etc. are emitted as two separate token right here.
+ * been emitted.  @* $* etc. are emitted as two separate tokens right here.
   * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
   * only the first, leaving yylex to find the next.
   */
@@ -2069,10 +2063,9 @@ STATIC SV *
  S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
  {
      SV * const sv = newSVpvn_utf8(start, len,
-                          !IN_BYTES
-                          && UTF
-                          && !is_utf8_invariant_string((const U8*)start, len)
-                          && is_utf8_string((const U8*)start, len));
+                    ! IN_BYTES
+                  &&  UTF
+                  &&  is_utf8_non_invariant_string((const U8*)start, len));
      return sv;
  }
  
@@ -2397,6 +2390,8 @@ S_sublex_start(pTHX)
      PL_parser->lex_super_state = PL_lex_state;
      PL_parser->lex_sub_inwhat = (U16)op_type;
      PL_parser->lex_sub_op = PL_lex_op;
+    PL_parser->sub_no_recover = FALSE;
+    PL_parser->sub_error_count = PL_error_count;
      PL_lex_state = LEX_INTERPPUSH;
  
      PL_expect = XTERM;
@@ -2576,6 +2571,20 @@ S_sublex_done(pTHX)
      else {
         const line_t l = CopLINE(PL_curcop);
         LEAVE;
+        if (PL_parser->sub_error_count != PL_error_count) {
+            const char * const name = OutCopFILE(PL_curcop);
+            if (PL_parser->sub_no_recover) {
+                const char * msg = "";
+                if (PL_in_eval) {
+                    SV *errsv = ERRSV;
+                    if (SvCUR(ERRSV)) {
+                        msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
+                    }
+                }
+                abort_execution(msg, name);
+                NOT_REACHED;
+            }
+        }
         if (PL_multi_close == '<')
             PL_parser->herelines += l - PL_multi_end;
         PL_bufend = SvPVX(PL_linestr);
@@ -2902,8 +2911,8 @@ S_scan_const(pTHX_ char *start)
                                             should we have to convert to
                                             UTF-8) */
      SV *res;                           /* result from charnames */
-    STRLEN offset_to_max;   /* The offset in the output to where the range
-                               high-end character is temporarily placed */
+    STRLEN offset_to_max = 0;   /* The offset in the output to where the range
+                                   high-end character is temporarily placed */
  
      /* Does something require special handling in tr/// ?  This avoids extra
       * work in a less likely case.  As such, khw didn't feel it was worth
@@ -4164,6 +4173,7 @@ S_intuit_more(pTHX_ char *s, char *e)
         return TRUE;
      if (*s != '{' && *s != '[')
         return FALSE;
+    PL_parser->sub_no_recover = TRUE;
      if (!PL_lex_inpat)
         return TRUE;
  
@@ -4499,6 +4509,7 @@ I32
  Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
  {
      filter_t funcp;
+    I32 ret;
      SV *datasv = NULL;
      /* This API is bad. It should have been using unsigned int for maxlen.
         Not sure if we want to change the API, but if not we should sanity
@@ -4581,7 +4592,11 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
      /* Call function. The function is expected to      */
      /* call "FILTER_READ(idx+1, buf_sv)" first.                */
      /* Return: <0:error, =0:eof, >0:not eof            */
-    return (*funcp)(aTHX_ idx, buf_sv, correct_length);
+    ENTER;
+    save_scalar(PL_errgv);
+    ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
+    LEAVE;
+    return ret;
  }
  
  STATIC char *
@@ -5138,7 +5153,7 @@ Perl_yylex(pTHX)
                  /* read var name, including sigil, into PL_tokenbuf */
                  PL_tokenbuf[0] = sigil;
                  parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
-                    0, cBOOL(UTF), FALSE);
+                    0, cBOOL(UTF), FALSE, FALSE);
                  *dest = '\0';
                  assert(PL_tokenbuf[1]); /* we have a variable name */
              }
@@ -6887,7 +6902,7 @@ Perl_yylex(pTHX)
         }
         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
             s += 3;
-           TERM(YADAYADA);
+           OPERATOR(YADAYADA);
         }
         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
             char tmp = *s++;
@@ -7268,7 +7283,20 @@ Perl_yylex(pTHX)
                 int pkgname = 0;
                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
                 bool safebw;
+               bool no_op_error = FALSE;
  
+               if (PL_expect == XOPERATOR) {
+                   if (PL_bufptr == PL_linestart) {
+                       CopLINE_dec(PL_curcop);
+                       Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
+                       CopLINE_inc(PL_curcop);
+                   }
+                   else
+                       /* We want to call no_op with s pointing after the
+                          bareword, so defer it.  But we want it to come
+                          before the Bad name croak.  */
+                       no_op_error = TRUE;
+               }
  
                 /* Get the rest if it looks like a package qualifier */
  
@@ -7276,6 +7304,10 @@ Perl_yylex(pTHX)
                     STRLEN morelen;
                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
                                   TRUE, &morelen);
+                   if (no_op_error) {
+                       no_op("Bareword",s);
+                       no_op_error = FALSE;
+                   }
                     if (!morelen)
                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
                                 UTF8fARG(UTF, len, PL_tokenbuf),
@@ -7284,15 +7316,8 @@ Perl_yylex(pTHX)
                     pkgname = 1;
                 }
  
-               if (PL_expect == XOPERATOR) {
-                   if (PL_bufptr == PL_linestart) {
-                       CopLINE_dec(PL_curcop);
-                       Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
-                       CopLINE_inc(PL_curcop);
-                   }
-                   else
+               if (no_op_error)
                         no_op("Bareword",s);
-               }
  
                 /* See if the name is "Foo::",
                    in which case Foo is a bareword
@@ -7603,10 +7628,10 @@ Perl_yylex(pTHX)
                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
                              {
                                  /* PL_warn_reserved is constant */
-                                GCC_DIAG_IGNORE(-Wformat-nonliteral);
+                                GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
                                        PL_tokenbuf);
-                                GCC_DIAG_RESTORE;
+                                GCC_DIAG_RESTORE_STMT;
                              }
                         }
                     }
@@ -7661,14 +7686,6 @@ Perl_yylex(pTHX)
                 if (!GvIO(gv))
                     GvIOp(gv) = newIO();
                 IoIFP(GvIOp(gv)) = PL_rsfp;
-#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
-               {
-                   const int fd = PerlIO_fileno(PL_rsfp);
-                    if (fd >= 3) {
-                        fcntl(fd,F_SETFD, FD_CLOEXEC);
-                    }
-               }
-#endif
                 /* Mark this internal pseudo-handle as clean */
                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
@@ -8935,6 +8952,7 @@ S_pending_ident(pTHX)
  
      DEBUG_T({ PerlIO_printf(Perl_debug_log,
            "### Pending identifier '%s'\n", PL_tokenbuf); });
+    assert(tokenbuf_len >= 2);
  
      /* if we're in a my(), we can't allow dynamics here.
         $foo'bar has already been turned into $foo::bar, so
@@ -8958,13 +8976,13 @@ S_pending_ident(pTHX)
              if (has_colon) {
                  /* "my" variable %s can't be in a package */
                  /* PL_no_myglob is constant */
-                GCC_DIAG_IGNORE(-Wformat-nonliteral);
+                GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
                  yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
                              PL_in_my == KEY_my ? "my" : "state",
                              *PL_tokenbuf == '&' ? "subroutin" : "variabl",
                              PL_tokenbuf),
                              UTF ? SVf_UTF8 : 0);
-                GCC_DIAG_RESTORE;
+                GCC_DIAG_RESTORE_STMT;
              }
  
              if (PL_in_my == KEY_sigvar) {
@@ -9276,8 +9294,10 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
  
  PERL_STATIC_INLINE void
  S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
-                    bool is_utf8, bool check_dollar)
+                    bool is_utf8, bool check_dollar, bool tick_warn)
  {
+    int saw_tick = 0;
+    const char *olds = *s;
      PERL_ARGS_ASSERT_PARSE_IDENT;
  
      while (*s < PL_bufend) {
@@ -9311,6 +9331,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
              *(*d)++ = ':';
              *(*d)++ = ':';
              (*s)++;
+            saw_tick++;
          }
          else if (allow_package && **s == ':' && (*s)[1] == ':'
             /* Disallow things like Foo::$bar. For the curious, this is
@@ -9324,6 +9345,30 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
          else
              break;
      }
+    if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
+              && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
+        char *d;
+       char *d2;
+        Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
+        d2 = d;
+        SAVEFREEPV(d);
+        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                         "Old package separator used in string");
+        if (olds[-1] == '#')
+            *d2++ = olds[-2];
+        *d2++ = olds[-1];
+        while (olds < *s) {
+            if (*olds == '\'') {
+                *d2++ = '\\';
+                *d2++ = *olds++;
+            }
+           else
+                *d2++ = *olds++;
+        }
+        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                         "\t(Did you mean \"%" UTF8f "\" instead?)\n",
+                          UTF8fARG(is_utf8, d2-d, d));
+    }
      return;
  }
  
@@ -9339,7 +9384,7 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN
  
      PERL_ARGS_ASSERT_SCAN_WORD;
  
-    parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
+    parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
      *d = '\0';
      *slp = d - dest;
      return s;
@@ -9387,7 +9432,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
         }
      }
      else {  /* See if it is a "normal" identifier */
-        parse_ident(&s, &d, e, 1, is_utf8, FALSE);
+        parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
      }
      *d = '\0';
      d = dest;
@@ -9465,7 +9510,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                     (the later check for } being at the expected point will trap
                     cases where this doesn't pan out.)  */
                  d += is_utf8 ? UTF8SKIP(d) : 1;
-                parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+                parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
                  *d = '\0';
              }
              else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
@@ -9552,6 +9597,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
              CopLINE_set(PL_curcop, orig_copline);
              PL_parser->herelines = herelines;
             *dest = '\0';
+            PL_parser->sub_no_recover = TRUE;
         }
      }
      else if (   PL_lex_state == LEX_INTERPNORMAL
@@ -10078,8 +10124,9 @@ S_scan_heredoc(pTHX_ char *s)
  
                     /* No whitespace or all! */
                     if (backup == s || *backup == '\n') {
-                       Newxz(indent, indent_len + 1, char);
+                       Newx(indent, indent_len + 1, char);
                         memcpy(indent, backup + 1, indent_len);
+                       indent[indent_len] = 0;
                         s--; /* before our delimiter */
                         PL_parser->herelines--; /* this line doesn't count */
                         break;
@@ -10213,8 +10260,9 @@ S_scan_heredoc(pTHX_ char *s)
  
                 /* All whitespace or none! */
                 if (backup == found || SPACE_OR_TAB(*backup)) {
-                   Newxz(indent, indent_len + 1, char);
+                   Newx(indent, indent_len + 1, char);
                     memcpy(indent, backup, indent_len);
+                   indent[indent_len] = 0;
                     SvREFCNT_dec(PL_linestr);
                     PL_linestr = linestr_save;
                     PL_linestart = SvPVX(linestr_save);
@@ -10533,7 +10581,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      I32 brackets = 1;          /* bracket nesting level */
      bool has_utf8 = FALSE;     /* is there any utf8 content? */
      IV termcode;               /* terminating char. code */
-    U8 termstr[UTF8_MAXBYTES]; /* terminating string */
+    U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
      STRLEN termlen;            /* length of terminating string */
      line_t herelines;
  
@@ -10960,6 +11008,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                   digit:
                     just_zero = FALSE;
                     if (!overflowed) {
+                       assert(shift >= 0);
                         x = u << shift; /* make room for the digit */
  
                          total_bits += shift;
@@ -11040,19 +11089,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                      NV nv_mult = 1.0;
  #endif
                      bool accumulate = TRUE;
-                    for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
+                    U8 b;
+                    int lim = 1 << shift;
+                    for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
+                               *h == '_'); h++) {
                          if (isXDIGIT(*h)) {
-                            U8 b = XDIGIT_VALUE(*h);
                              significant_bits += shift;
  #ifdef HEXFP_UQUAD
                              if (accumulate) {
                                  if (significant_bits < NV_MANT_DIG) {
                                      /* We are in the long "run" of xdigits,
                                       * accumulate the full four bits. */
+                                   assert(shift >= 0);
                                      hexfp_uquad <<= shift;
                                      hexfp_uquad |= b;
                                      hexfp_frac_bits += shift;
-                                } else {
+                                } else if (significant_bits - shift < NV_MANT_DIG) {
                                      /* We are at a hexdigit either at,
                                       * or straddling, the edge of mantissa.
                                       * We will try grabbing as many as
@@ -11061,7 +11113,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                                        significant_bits - NV_MANT_DIG;
                                      if (tail <= 0)
                                         tail += shift;
+                                   assert(tail >= 0);
                                      hexfp_uquad <<= tail;
+                                   assert((shift - tail) >= 0);
                                      hexfp_uquad |= b >> (shift - tail);
                                      hexfp_frac_bits += tail;
  
@@ -11100,7 +11154,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                              }
  #else /* HEXFP_NV */
                              if (accumulate) {
-                                nv_mult /= 16.0;
+                                nv_mult /= nvshift[shift];
                                  if (nv_mult > 0.0)
                                      hexfp_nv += b * nv_mult;
                                  else
@@ -11369,7 +11423,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                floatit = TRUE;
          }
         if (floatit) {
-            STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
             /* terminate the string */
             *d = '\0';
              if (UNLIKELY(hexfp)) {
@@ -11386,7 +11439,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
              } else {
                  nv = Atof(PL_tokenbuf);
              }
-            RESTORE_LC_NUMERIC_UNDERLYING();
              sv = newSVnv(nv);
         }
  
@@ -12072,6 +12124,79 @@ Perl_keyword_plugin_standard(pTHX_
      return KEYWORD_PLUGIN_DECLINE;
  }
  
+/*
+=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
+
+Puts a C function into the chain of keyword plugins.  This is the
+preferred way to manipulate the L</PL_keyword_plugin> variable.
+C<new_plugin> is a pointer to the C function that is to be added to the
+keyword plugin chain, and C<old_plugin_p> points to the storage location
+where a pointer to the next function in the chain will be stored.  The
+value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
+while the value previously stored there is written to C<*old_plugin_p>.
+
+L</PL_keyword_plugin> is global to an entire process, and a module wishing
+to hook keyword parsing may find itself invoked more than once per
+process, typically in different threads.  To handle that situation, this
+function is idempotent.  The location C<*old_plugin_p> must initially
+(once per process) contain a null pointer.  A C variable of static
+duration (declared at file scope, typically also marked C<static> to give
+it internal linkage) will be implicitly initialised appropriately, if it
+does not have an explicit initialiser.  This function will only actually
+modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
+function is also thread safe on the small scale.  It uses appropriate
+locking to avoid race conditions in accessing L</PL_keyword_plugin>.
+
+When this function is called, the function referenced by C<new_plugin>
+must be ready to be called, except for C<*old_plugin_p> being unfilled.
+In a threading situation, C<new_plugin> may be called immediately, even
+before this function has returned.  C<*old_plugin_p> will always be
+appropriately set before C<new_plugin> is called.  If C<new_plugin>
+decides not to do anything special with the identifier that it is given
+(which is the usual case for most calls to a keyword plugin), it must
+chain the plugin function referenced by C<*old_plugin_p>.
+
+Taken all together, XS code to install a keyword plugin should typically
+look something like this:
+
+    static Perl_keyword_plugin_t next_keyword_plugin;
+    static OP *my_keyword_plugin(pTHX_
+        char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
+    {
+        if (memEQs(keyword_ptr, keyword_len,
+                   "my_new_keyword")) {
+            ...
+        } else {
+            return next_keyword_plugin(aTHX_
+                keyword_ptr, keyword_len, op_ptr);
+        }
+    }
+    BOOT:
+        wrap_keyword_plugin(my_keyword_plugin,
+                            &next_keyword_plugin);
+
+Direct access to L</PL_keyword_plugin> should be avoided.
+
+=cut
+*/
+
+void
+Perl_wrap_keyword_plugin(pTHX_
+    Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
+{
+    dVAR;
+
+    PERL_UNUSED_CONTEXT;
+    PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
+    if (*old_plugin_p) return;
+    KEYWORD_PLUGIN_MUTEX_LOCK;
+    if (!*old_plugin_p) {
+        *old_plugin_p = PL_keyword_plugin;
+        PL_keyword_plugin = new_plugin;
+    }
+    KEYWORD_PLUGIN_MUTEX_UNLOCK;
+}
+
  #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
  static void
  S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)