Fix uninitialized error in my_atof3()

[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c

index 8920982..345d810 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1275,10 +1275,10 @@ Also implemented as a macro in utf8.h
  */
  
  UV
-Perl_utf8n_to_uvchr(pTHX_ const U8 *s,
-                          STRLEN curlen,
-                          STRLEN *retlen,
-                          const U32 flags)
+Perl_utf8n_to_uvchr(const U8 *s,
+                    STRLEN curlen,
+                    STRLEN *retlen,
+                    const U32 flags)
  {
      PERL_ARGS_ASSERT_UTF8N_TO_UVCHR;
  
@@ -1404,7 +1404,7 @@ Also implemented as a macro in utf8.h
  */
  
  UV
-Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
+Perl_utf8n_to_uvchr_error(const U8 *s,
                            STRLEN curlen,
                            STRLEN *retlen,
                            const U32 flags,
@@ -1468,7 +1468,7 @@ The caller, of course, is responsible for freeing any returned AV.
  */
  
  UV
-Perl_utf8n_to_uvchr_msgs(pTHX_ const U8 *s,
+Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
                                 STRLEN curlen,
                                 STRLEN *retlen,
                                 const U32 flags,
@@ -1492,39 +1492,9 @@ Perl_utf8n_to_uvchr_msgs(pTHX_ const U8 *s,
      U8 temp_char_buf[UTF8_MAXBYTES + 1]; /* Used to avoid a Newx in this
                                              routine; see [perl #130921] */
      UV uv_so_far;
-    UV state = 0;
-
-    PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
-
-    /* Measurements show that this dfa is somewhat faster than the regular code
-     * below, so use it first, dropping down for the non-normal cases. */
-
-#define PERL_UTF8_DECODE_REJECT 1
-
-    while (s < send && LIKELY(state != PERL_UTF8_DECODE_REJECT)) {
-        UV type = strict_utf8_dfa_tab[*s];
-
-        uv = (state == 0)
-             ?  ((0xff >> type) & NATIVE_UTF8_TO_I8(*s))
-             : UTF8_ACCUMULATE(uv, *s);
-        state = strict_utf8_dfa_tab[256 + state + type];
-
-        if (state == 0) {
-            if (retlen) {
-                *retlen = s - s0 + 1;
-            }
-            if (errors) {
-                *errors = 0;
-            }
-            if (msgs) {
-                *msgs = NULL;
-            }
+    dTHX;
  
-            return uv;
-        }
-
-        s++;
-    }
+    PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER;
  
      /* Here, is one of: a) malformed; b) a problematic code point (surrogate,
       * non-unicode, or nonchar); or c) on ASCII platforms, one of the Hangul
@@ -2956,8 +2926,7 @@ Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)
         return _to_upper_title_latin1((U8) c, p, lenp, 'S');
      }
  
-    uvchr_to_utf8(p, c);
-    return CALL_UPPER_CASE(c, p, p, lenp);
+    return CALL_UPPER_CASE(c, NULL, p, lenp);
  }
  
  UV
@@ -2969,8 +2938,7 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
         return _to_upper_title_latin1((U8) c, p, lenp, 's');
      }
  
-    uvchr_to_utf8(p, c);
-    return CALL_TITLE_CASE(c, p, p, lenp);
+    return CALL_TITLE_CASE(c, NULL, p, lenp);
  }
  
  STATIC U8
@@ -3009,8 +2977,7 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
         return to_lower_latin1((U8) c, p, lenp, 0 /* 0 is a dummy arg */ );
      }
  
-    uvchr_to_utf8(p, c);
-    return CALL_LOWER_CASE(c, p, p, lenp);
+    return CALL_LOWER_CASE(c, NULL, p, lenp);
  }
  
  UV
@@ -3106,8 +3073,7 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
  
      /* Here, above 255.  If no special needs, just use the macro */
      if ( ! (flags & (FOLD_FLAGS_LOCALE|FOLD_FLAGS_NOMIX_ASCII))) {
-       uvchr_to_utf8(p, c);
-       return CALL_FOLD_CASE(c, p, p, lenp, flags & FOLD_FLAGS_FULL);
+       return CALL_FOLD_CASE(c, NULL, p, lenp, flags & FOLD_FLAGS_FULL);
      }
      else {  /* Otherwise, _toFOLD_utf8_flags has the intelligence to deal with
                the special flags. */
@@ -3401,7 +3367,7 @@ Perl__is_utf8_mark(pTHX_ const U8 *p)
  {
      PERL_ARGS_ASSERT__IS_UTF8_MARK;
  
-    return is_utf8_common(p, &PL_utf8_mark, "IsM", NULL);
+    return is_utf8_common(p, NULL, "IsM", PL_utf8_mark);
  }
  
  STATIC UV
@@ -3562,13 +3528,16 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p,
      /* Here, there was no mapping defined, which means that the code point maps
       * to itself.  Return the inputs */
    cases_to_self:
-    len = UTF8SKIP(p);
-    if (p != ustrp) {   /* Don't copy onto itself */
-        Copy(p, ustrp, len, U8);
+    if (p) {
+        len = UTF8SKIP(p);
+        if (p != ustrp) {   /* Don't copy onto itself */
+            Copy(p, ustrp, len, U8);
+        }
+        *lenp = len;
+    }
+    else {
+       *lenp = uvchr_to_utf8(ustrp, uv1) - ustrp;
      }
-
-    if (lenp)
-        *lenp = len;
  
      return uv1;
  
@@ -5860,6 +5829,7 @@ Perl_init_uniprops(pTHX)
      PL_utf8_tofold = _new_invlist_C_array(Case_Folding_invlist);
      PL_utf8_tosimplefold = _new_invlist_C_array(Simple_Case_Folding_invlist);
      PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
+    PL_utf8_mark = _new_invlist_C_array(PL_uni_prop_ptrs[PL_M]);
  }
  
  SV *