utf8.h: Add a #define synonym

author Karl Williamson <khw@cpan.org>

Mon, 14 Jun 2021 10:03:32 +0000 (04:03 -0600)

committer Karl Williamson <khw@cpan.org>

Sat, 7 Aug 2021 11:14:43 +0000 (05:14 -0600)
author Karl Williamson <khw@cpan.org>
Mon, 14 Jun 2021 10:03:32 +0000 (04:03 -0600)
committer Karl Williamson <khw@cpan.org>
Sat, 7 Aug 2021 11:14:43 +0000 (05:14 -0600)
diff --git a/utf8.h b/utf8.h

index ff24011..1b376fd 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -252,7 +252,7 @@ are in the character. */
   * sequence contributes to the specification of the code point.  In the bit
   * maps above, you see that the first 2 bits are a constant '10', leaving 6 of
   * real information */
-#define UTF_ACCUMULATION_SHIFT         6
+#  define UTF_CONTINUATION_BYTE_INFO_BITS 6
  
  /* ^? is defined to be DEL on ASCII systems.  See the definition of toCTRL()
   * for more */
@@ -269,10 +269,16 @@ are in the character. */
  
  #endif /* EBCDIC vs ASCII */
  
-/* 2**UTF_ACCUMULATION_SHIFT - 1.  This masks out all but the bits that carry
+/* Since the significant bits in a continuation byte are stored in the
+ * least-significant positions, we often find ourselves shifting by that
+ * amount.  This is a clearer name in such situations */
+#define UTF_ACCUMULATION_SHIFT  UTF_CONTINUATION_BYTE_INFO_BITS
+
+/* 2**info_bits - 1.  This masks out all but the bits that carry
   * real information in a continuation byte.  This turns out to be 0x3F in
   * UTF-8, 0x1F in UTF-EBCDIC. */
-#define UTF_CONTINUATION_MASK  ((U8) (nBIT_MASK(UTF_ACCUMULATION_SHIFT)))
+#define UTF_CONTINUATION_MASK                                               \
+                        ((U8) nBIT_MASK(UTF_CONTINUATION_BYTE_INFO_BITS))
  
  /* For use in UTF8_IS_CONTINUATION().  This turns out to be 0xC0 in UTF-8,
   * E0 in UTF-EBCDIC */
diff --git a/utfebcdic.h b/utfebcdic.h

index 4ae440c..56e268f 100644 (file)
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -207,8 +207,7 @@ explicitly forbidden, and the shortest possible encoding should always be used
   * there are 6 bits of real information in a UTF-8 continuation byte vs. 5 bits
   * in a UTF-EBCDIC one. */
  
-#define UTF_ACCUMULATION_SHIFT         5
-
+#define UTF_CONTINUATION_BYTE_INFO_BITS  5
  /* Also needed is how perl handles a start byte of 8 one bits.  The decision
   * was made to just append the minimal number of bytes after that so that code
   * points up to 64 bits wide could be represented.  In UTF-8, that was an extra
author	Karl Williamson <khw@cpan.org>
	Mon, 14 Jun 2021 10:03:32 +0000 (04:03 -0600)
committer	Karl Williamson <khw@cpan.org>
	Sat, 7 Aug 2021 11:14:43 +0000 (05:14 -0600)
utf8.h		patch \| blob \| blame \| history
utfebcdic.h		patch \| blob \| blame \| history