Clarification on the behaviour of qw// and x :

[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c

index 20f94df..02e202d 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -238,13 +238,13 @@ Perl_is_utf8_string(pTHX_ const U8 *s, STRLEN len)
  {
      const U8* x = s;
      const U8* send;
-    STRLEN c;
  
      if (!len && s)
         len = strlen((const char *)s);
      send = s + len;
  
      while (x < send) {
+       STRLEN c;
          /* Inline the easy bits of is_utf8_char() here for speed... */
          if (UTF8_IS_INVARIANT(*x))
               c = 1;
@@ -521,7 +521,7 @@ malformed:
         }
         
         if (warning) {
-           char *s = SvPVX(sv);
+           const char *s = SvPVX_const(sv);
  
             if (PL_op)
                 Perl_warner(aTHX_ packWARN(WARN_UTF8),
@@ -600,24 +600,16 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e)
       * the bitops (especially ~) can create illegal UTF-8.
       * In other words: in Perl UTF-8 is not just for Unicode. */
  
-    if (e < s) {
-        if (ckWARN_d(WARN_UTF8)) {
-           if (PL_op)
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
-                           "%s in %s", unees, OP_DESC(PL_op));
-           else
-               Perl_warner(aTHX_ packWARN(WARN_UTF8), unees);
-       }
-       return 0;
-    }
+    if (e < s)
+       goto warn_and_return;
      while (s < e) {
-       U8 t = UTF8SKIP(s);
-
+       const U8 t = UTF8SKIP(s);
         if (e - s < t) {
+           warn_and_return:
             if (ckWARN_d(WARN_UTF8)) {
                 if (PL_op)
                     Perl_warner(aTHX_ packWARN(WARN_UTF8),
-                               unees, OP_DESC(PL_op));
+                           "%s in %s", unees, OP_DESC(PL_op));
                 else
                     Perl_warner(aTHX_ packWARN(WARN_UTF8), unees);
             }
@@ -654,26 +646,18 @@ Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
      if (a < b) {
         while (a < b) {
             const U8 c = UTF8SKIP(a);
-
-           if (b - a < c) {
-               if (ckWARN_d(WARN_UTF8)) {
-                   if (PL_op)
-                       Perl_warner(aTHX_ packWARN(WARN_UTF8),
-                                   "%s in %s", unees, OP_DESC(PL_op));
-                   else
-                       Perl_warner(aTHX_ packWARN(WARN_UTF8), unees);
-               }
-               return off;
-           }
+           if (b - a < c)
+               goto warn_and_return;
             a += c;
             off--;
         }
      }
      else {
         while (b < a) {
-           U8 c = UTF8SKIP(b);
+           const U8 c = UTF8SKIP(b);
  
             if (a - b < c) {
+               warn_and_return:
                 if (ckWARN_d(WARN_UTF8)) {
                     if (PL_op)
                         Perl_warner(aTHX_ packWARN(WARN_UTF8),
@@ -705,7 +689,7 @@ on the first byte of character or just after the last byte of a character.
  */
  
  U8 *
-Perl_utf8_hop(pTHX_ U8 *s, I32 off)
+Perl_utf8_hop(pTHX_ const U8 *s, I32 off)
  {
      /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
       * the bitops (especially ~) can create illegal UTF-8.
@@ -722,7 +706,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off)
                 s--;
         }
      }
-    return s;
+    return (U8 *)s;
  }
  
  /*
@@ -1400,7 +1384,6 @@ The "normal" is a string like "ToLower" which means the swash
  UV
  Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, const char *normal, const char *special)
  {
-    UV uv1;
      U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
      STRLEN len = 0;
  
@@ -1408,7 +1391,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, const
      /* The NATIVE_TO_UNI() and UNI_TO_NATIVE() mappings
       * are necessary in EBCDIC, they are redundant no-ops
       * in ASCII-ish platforms, and hopefully optimized away. */
-    uv1 = NATIVE_TO_UNI(uv0);
+    const UV uv1 = NATIVE_TO_UNI(uv0);
      uvuni_to_utf8(tmpbuf, uv1);
  
      if (!*swashp) /* load on-demand */
@@ -1866,7 +1849,7 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim, UV f
          }
          u = utf8_to_uvchr((U8*)s, 0);
          if (u < 256) {
-            unsigned char c = (unsigned char)u & 0xFF;
+            const unsigned char c = (unsigned char)u & 0xFF;
              if (!ok && (flags & UNI_DISPLAY_BACKSLASH)) {
                  switch (c) {
                  case '\n':
@@ -2038,5 +2021,5 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const
   * indent-tabs-mode: t
   * End:
   *
- * vim: shiftwidth=4:
-*/
+ * ex: set ts=8 sts=4 sw=4 noet:
+ */