END
-# The data are at the end of this file. Each line represents one #define.
-# Each line begins with either a Unicode character name with the blanks in it
-# squeezed out or replaced by underscores; or it may be a hexadecimal code
-# point. In the latter case, the name will be looked-up to use as the name
-# of the macro. In either case, the macro name will have suffixes as
-# listed above, and all blanks will be replaced by underscores.
+# The data are at the end of this file. A blank line is output as-is.
+# Otherwise, each line represents one #define, and begins with either a
+# Unicode character name with the blanks in it squeezed out or replaced by
+# underscores; or it may be a hexadecimal Unicode code point. In the latter
+# case, the name will be looked-up to use as the name of the macro. In either
+# case, the macro name will have suffixes as listed above, and all blanks will
+# be replaced by underscores.
#
# Each line may optionally have one of the following flags on it, separated by
# white space from the initial token.
# having to figure things out.
while ( <DATA> ) {
+ if ($_ !~ /\S/) {
+ print $out_fh "\n";
+ next;
+ }
+
chomp;
unless ($_ =~ m/ ^ ( [^\ ]* ) # Name or code point token
(?: [\ ]+ ( .* ) )? # optional flag
0300
0301
0308
-03B9 tail
-03C5 tail
+
03B9 first
+03B9 tail
+
03C5 first
+03C5 tail
+
1100
1160
11A8
#define COMBINING_GRAVE_ACCENT_UTF8 "\xCC\x80" /* U+0300 */
#define COMBINING_ACUTE_ACCENT_UTF8 "\xCC\x81" /* U+0301 */
#define COMBINING_DIAERESIS_UTF8 "\xCC\x88" /* U+0308 */
-#define GREEK_SMALL_LETTER_IOTA_UTF8_TAIL "\xB9" /* U+03B9 */
-#define GREEK_SMALL_LETTER_UPSILON_UTF8_TAIL "\x85" /* U+03C5 */
+
#define GREEK_SMALL_LETTER_IOTA_UTF8_FIRST_BYTE 0xCE /* U+03B9 */
+#define GREEK_SMALL_LETTER_IOTA_UTF8_TAIL "\xB9" /* U+03B9 */
+
#define GREEK_SMALL_LETTER_UPSILON_UTF8_FIRST_BYTE 0xCF /* U+03C5 */
+#define GREEK_SMALL_LETTER_UPSILON_UTF8_TAIL "\x85" /* U+03C5 */
+
#define HANGUL_CHOSEONG_KIYEOK_UTF8 "\xE1\x84\x80" /* U+1100 */
#define HANGUL_JUNGSEONG_FILLER_UTF8 "\xE1\x85\xA0" /* U+1160 */
#define HANGUL_JONGSEONG_KIYEOK_UTF8 "\xE1\x86\xA8" /* U+11A8 */