Remove trailing '/' from prefix

[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c

index ff5d4ad..07e4df7 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -53,6 +53,19 @@ within non-zero characters.
  =cut
  */
  
+/* helper for Perl__force_out_malformed_utf8_message(). Like
+ * SAVECOMPILEWARNINGS(), but works with PL_curcop rather than
+ * PL_compiling */
+
+static void
+S_restore_cop_warnings(pTHX_ void *p)
+{
+    if (!specialWARN(PL_curcop->cop_warnings))
+        PerlMemShared_free(PL_curcop->cop_warnings);
+    PL_curcop->cop_warnings = (STRLEN*)p;
+}
+
+
  void
  Perl__force_out_malformed_utf8_message(pTHX_
              const U8 *const p,      /* First byte in UTF-8 sequence */
@@ -84,6 +97,10 @@ Perl__force_out_malformed_utf8_message(pTHX_
  
      PL_dowarn = G_WARN_ALL_ON|G_WARN_ON;
      if (PL_curcop) {
+        /* this is like SAVECOMPILEWARNINGS() except with PL_curcop rather
+         * than PL_compiling */
+        SAVEDESTRUCTOR_X(S_restore_cop_warnings,
+                (void*)PL_curcop->cop_warnings);
          PL_curcop->cop_warnings = pWARN_ALL;
      }
  
@@ -2254,10 +2271,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
  {
      PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
  
-    assert(s < send);
-
-    return utf8n_to_uvchr(s, send - s, retlen,
-                     ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+    return _utf8_to_uvchr_buf(s, send, retlen);
  }
  
  /* This is marked as deprecated
@@ -2320,14 +2334,14 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e)
       * the bitops (especially ~) can create illegal UTF-8.
       * In other words: in Perl UTF-8 is not just for Unicode. */
  
-    if (e < s)
+    if (UNLIKELY(e < s))
         goto warn_and_return;
      while (s < e) {
          s += UTF8SKIP(s);
         len++;
      }
  
-    if (e != s) {
+    if (UNLIKELY(e != s)) {
         len--;
          warn_and_return:
         if (PL_op)
@@ -2778,6 +2792,7 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
  bool
  Perl__is_uni_FOO(pTHX_ const U8 classnum, const UV c)
  {
+    dVAR;
      return _invlist_contains_cp(PL_XPosix_ptrs[classnum], c);
  }
  
@@ -2787,6 +2802,8 @@ Perl__is_uni_FOO(pTHX_ const U8 classnum, const UV c)
  bool
  Perl__is_utf8_idstart(pTHX_ const U8 *p)
  {
+    dVAR;
+
      PERL_ARGS_ASSERT__IS_UTF8_IDSTART;
  
      if (*p == '_')
@@ -2797,12 +2814,14 @@ Perl__is_utf8_idstart(pTHX_ const U8 *p)
  bool
  Perl__is_uni_perl_idcont(pTHX_ UV c)
  {
+    dVAR;
      return _invlist_contains_cp(PL_utf8_perl_idcont, c);
  }
  
  bool
  Perl__is_uni_perl_idstart(pTHX_ UV c)
  {
+    dVAR;
      return _invlist_contains_cp(PL_utf8_perl_idstart, c);
  }
  
@@ -2942,6 +2961,7 @@ Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)
       * The ordinal of the first character of the changed version is returned
       * (but note, as explained above, that there may be more.) */
  
+    dVAR;
      PERL_ARGS_ASSERT_TO_UNI_UPPER;
  
      if (c < 256) {
@@ -2954,6 +2974,7 @@ Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)
  UV
  Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
  {
+    dVAR;
      PERL_ARGS_ASSERT_TO_UNI_TITLE;
  
      if (c < 256) {
@@ -2993,6 +3014,7 @@ S_to_lower_latin1(const U8 c, U8* p, STRLEN *lenp, const char dummy)
  UV
  Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
  {
+    dVAR;
      PERL_ARGS_ASSERT_TO_UNI_LOWER;
  
      if (c < 256) {
@@ -3074,6 +3096,7 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
       *     FOLD_FLAGS_NOMIX_ASCII iff non-ASCII to ASCII folds are prohibited
       */
  
+    dVAR;
      PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS;
  
      if (flags & FOLD_FLAGS_LOCALE) {
@@ -3210,6 +3233,7 @@ Perl__is_utf8_FOO(pTHX_       U8   classnum,
                          const char * const file,
                          const unsigned line)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_FOO;
  
      warn_on_first_deprecated_use(name, alternative, use_locale, file, line);
@@ -3282,6 +3306,7 @@ bool
  Perl__is_utf8_FOO_with_len(pTHX_ const U8 classnum, const U8 *p,
                                                              const U8 * const e)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_FOO_WITH_LEN;
  
      return is_utf8_common_with_len(p, e, PL_XPosix_ptrs[classnum]);
@@ -3290,6 +3315,7 @@ Perl__is_utf8_FOO_with_len(pTHX_ const U8 classnum, const U8 *p,
  bool
  Perl__is_utf8_perl_idstart_with_len(pTHX_ const U8 *p, const U8 * const e)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_PERL_IDSTART_WITH_LEN;
  
      return is_utf8_common_with_len(p, e, PL_utf8_perl_idstart);
@@ -3298,6 +3324,7 @@ Perl__is_utf8_perl_idstart_with_len(pTHX_ const U8 *p, const U8 * const e)
  bool
  Perl__is_utf8_xidstart(pTHX_ const U8 *p)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_XIDSTART;
  
      if (*p == '_')
@@ -3308,6 +3335,7 @@ Perl__is_utf8_xidstart(pTHX_ const U8 *p)
  bool
  Perl__is_utf8_perl_idcont_with_len(pTHX_ const U8 *p, const U8 * const e)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_PERL_IDCONT_WITH_LEN;
  
      return is_utf8_common_with_len(p, e, PL_utf8_perl_idcont);
@@ -3316,6 +3344,7 @@ Perl__is_utf8_perl_idcont_with_len(pTHX_ const U8 *p, const U8 * const e)
  bool
  Perl__is_utf8_idcont(pTHX_ const U8 *p)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_IDCONT;
  
      return is_utf8_common(p, PL_utf8_idcont);
@@ -3324,6 +3353,7 @@ Perl__is_utf8_idcont(pTHX_ const U8 *p)
  bool
  Perl__is_utf8_xidcont(pTHX_ const U8 *p)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_XIDCONT;
  
      return is_utf8_common(p, PL_utf8_xidcont);
@@ -3332,6 +3362,7 @@ Perl__is_utf8_xidcont(pTHX_ const U8 *p)
  bool
  Perl__is_utf8_mark(pTHX_ const U8 *p)
  {
+    dVAR;
      PERL_ARGS_ASSERT__IS_UTF8_MARK;
  
      return is_utf8_common(p, PL_utf8_mark);
@@ -3535,6 +3566,7 @@ Perl__inverse_folds(pTHX_ const UV cp, unsigned int * first_folds_to,
       * the return can point to them, but single code points aren't, so would
       * need to be constructed if we didn't employ something like this API */
  
+    dVAR;
      /* 'index' is guaranteed to be non-negative, as this is an inversion map
       * that covers all possible inputs.  See [perl #133365] */
      SSize_t index = _invlist_search(PL_utf8_foldclosures, cp);
@@ -3761,6 +3793,7 @@ S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e,
       * sequence, and the entire sequence will be stored in *ustrp.  ustrp will
       * contain *lenp bytes */
  
+    dVAR;
      PERL_ARGS_ASSERT_TURKIC_LC;
      assert(e > p0);
  
@@ -3944,6 +3977,7 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p,
                                  const char * const file,
                                  const int line)
  {
+    dVAR;
      UV result;
      const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_UPPER,
                                                  cBOOL(flags), file, line);
@@ -3979,6 +4013,7 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p,
                                  const char * const file,
                                  const int line)
  {
+    dVAR;
      UV result;
      const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_TITLE,
                                                  cBOOL(flags), file, line);
@@ -4012,6 +4047,7 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p,
                                  const char * const file,
                                  const int line)
  {
+    dVAR;
      UV result;
      const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_LOWER,
                                                  cBOOL(flags), file, line);
@@ -4049,6 +4085,7 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
                                 const char * const file,
                                 const int line)
  {
+    dVAR;
      UV result;
      const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_FOLD,
                                                  cBOOL(flags), file, line);
@@ -4298,8 +4335,7 @@ Perl_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv,
             SAVEBOOL(TAINT_get);
             TAINT_NOT;
  #endif
-           Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvn(pkg,pkg_len),
-                            NULL);
+            require_pv("utf8_heavy.pl");
             {
                 /* Not ERRSV, as there is no need to vivify a scalar we are
                    about to discard. */
@@ -5459,6 +5495,9 @@ Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
       * handled the same way, speeding up this common case */
  
      if (UTF8_IS_INVARIANT(*s)) {  /* Assumes 's' contains at least 1 byte */
+        if (retlen) {
+            *retlen = 1;
+        }
          return (UV) *s;
      }