/* utfebcdic.h
*
- * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 by Larry Wall,
- * Nick Ing-Simmons, and others
+ * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009,
+ * 2010, 2011 by Larry Wall, Nick Ing-Simmons, and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
* Macros to implement UTF-EBCDIC as perl's internal encoding
- * Taken from version 7.1 of Unicode Techical Report #16:
+ * Taken from version 7.1 of Unicode Technical Report #16:
* http://www.unicode.org/unicode/reports/tr16
*
* To summarize, the way it works is:
* ordinal value of 'A' is 193 in EBCDIC, and also is 193 in UTF-EBCDIC.
* Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 converts
* it back to 193. As an example of how a variant character works, take
- * LATIN SMALL LETTER Y WITH DIAERESIS, which is typicially 0xDF in
+ * LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in
* EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2
* converts that to two bytes = 11000111 10111111 = C7 BF, and Step 3
* converts those to 0x8B 0x73. The table is constructed so that the
#endif /* 037 */
/* These tables moved from perl.h and converted to hex.
- They map platfrom code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
+ They map platform code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
*/
#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
#define LATIN_SMALL_LETTER_SHARP_S 0x59
#define MICRO_SIGN 0xA0
+#define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x0067
+#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x0047
EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
#define LATIN_SMALL_LETTER_SHARP_S 0x59
#define MICRO_SIGN 0xA0
+#define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x0067
+#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x0047
EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
#define LATIN_SMALL_LETTER_SHARP_S 0x59
#define MICRO_SIGN 0xA0
+#define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x0067
+#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x0047
EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
#define ASCII_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
/* Transform after encoding, essentially converts to/from I8 */
#define NATIVE_TO_UTF(ch) PL_e2utf[(U8)(ch)] /* to I8 */
+#define NATIVE_TO_I8(ch) NATIVE_TO_UTF(ch) /* synonym */
#define UTF_TO_NATIVE(ch) PL_utf2e[(U8)(ch)] /* from I8 */
+#define I8_TO_NATIVE(ch) UTF_TO_NATIVE(ch) /* synonym */
/* Transform in wide UV char space */
#define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch))
#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
#define UNI_IS_INVARIANT(c) ((c) < 0xA0)
/* UTF-EBCDIC semantic macros - transform back into I8 and then compare */
-#define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) != 0xA0)
+
+#define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xC5 && NATIVE_TO_UTF(c) != 0xE0)
#define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0)
#define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0)
-#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xF8) == 0xC0)
+#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xC5 && NATIVE_TO_UTF(c) <= 0xC7)
#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
* End:
*
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
*/