-/* -*- buffer-read-only: t -*-
+/* -*- buffer-read-only: t -*-
*
* regcharclass.h
*
- * Copyright (C) 2007, by Larry Wall and others
+ * Copyright (C) 2007, 2011 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
* !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
- * This file is built by Porting/regcharclass.pl.
- *
+ * This file is built by regen/regcharclass.pl.
* Any changes made here will be lost!
- *
*/
+
+#ifndef H_REGCHARCLASS /* Guard against nested #includes */
+#define H_REGCHARCLASS 1
+
/*
LNBREAK: Line Break: \R
"\x0D\x0A" # CRLF - Network (Windows) line ending
- 0x0A # LF | LINE FEED
- 0x0B # VT | VERTICAL TAB
- 0x0C # FF | FORM FEED
- 0x0D # CR | CARRIAGE RETURN
- 0x85 # NEL | NEXT LINE
- 0x2028 # LINE SEPARATOR
- 0x2029 # PARAGRAPH SEPARATOR
+ \p{VertSpace}
*/
/*** GENERATED CODE ***/
#define is_LNBREAK(s,is_utf8) \
/*
HORIZWS: Horizontal Whitespace: \h \H
- 0x09 # HT
- 0x20 # SPACE
- 0xa0 # NBSP
- 0x1680 # OGHAM SPACE MARK
- 0x180e # MONGOLIAN VOWEL SEPARATOR
- 0x2000 # EN QUAD
- 0x2001 # EM QUAD
- 0x2002 # EN SPACE
- 0x2003 # EM SPACE
- 0x2004 # THREE-PER-EM SPACE
- 0x2005 # FOUR-PER-EM SPACE
- 0x2006 # SIX-PER-EM SPACE
- 0x2007 # FIGURE SPACE
- 0x2008 # PUNCTUATION SPACE
- 0x2009 # THIN SPACE
- 0x200A # HAIR SPACE
- 0x202f # NARROW NO-BREAK SPACE
- 0x205f # MEDIUM MATHEMATICAL SPACE
- 0x3000 # IDEOGRAPHIC SPACE
+ \p{HorizSpace}
*/
/*** GENERATED CODE ***/
#define is_HORIZWS(s,is_utf8) \
/*
VERTWS: Vertical Whitespace: \v \V
- 0x0A # LF
- 0x0B # VT
- 0x0C # FF
- 0x0D # CR
- 0x85 # NEL
- 0x2028 # LINE SEPARATOR
- 0x2029 # PARAGRAPH SEPARATOR
+ \p{VertSpace}
*/
/*** GENERATED CODE ***/
#define is_VERTWS(s,is_utf8) \
0x2029 == cp ) ) ) ) ) )
/*
- TRICKYFOLD: Problematic fold case letters.
+ GCB_L: Grapheme_Cluster_Break=L
- 0x00DF # LATIN1 SMALL LETTER SHARP S
- 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
- 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ \p{_X_GCB_L}
*/
/*** GENERATED CODE ***/
-#define is_TRICKYFOLD(s,is_utf8) \
-( ( is_utf8 ) ? \
- ( ( 0xC3 == ((U8*)s)[0] ) ? \
- ( ( 0x9F == ((U8*)s)[1] ) ? 2 : 0 ) \
- : ( 0xCE == ((U8*)s)[0] ) ? \
- ( ( 0x90 == ((U8*)s)[1] || 0xB0 == ((U8*)s)[1] ) ? 2 : 0 ) \
+#define is_GCB_L_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x84 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x9F ) ? 3 : 0 ) \
: 0 ) \
-: ( 0xDF == ((U8*)s)[0] ) )
+: ( 0xEA == ((U8*)s)[0] ) ? \
+ ( ( ( 0xA5 == ((U8*)s)[1] ) && ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBC ) ) ? 3 : 0 )\
+: 0 )
+/*
+ GCB_LV_LVT_V: Grapheme_Cluster_Break=(LV or LVT or V)
+
+ \p{_X_LV_LVT_V}
+*/
/*** GENERATED CODE ***/
-#define is_TRICKYFOLD_safe(s,e,is_utf8) \
-( ((e)-(s) > 1) ? \
- ( ( is_utf8 ) ? \
- ( ( 0xC3 == ((U8*)s)[0] ) ? \
- ( ( 0x9F == ((U8*)s)[1] ) ? 2 : 0 ) \
- : ( 0xCE == ((U8*)s)[0] ) ? \
- ( ( 0x90 == ((U8*)s)[1] || 0xB0 == ((U8*)s)[1] ) ? 2 : 0 ) \
- : 0 ) \
- : ( 0xDF == ((U8*)s)[0] ) ) \
-: ((e)-(s) > 0) ? \
- ( ( !( is_utf8 ) ) ? \
- ( 0xDF == ((U8*)s)[0] ) \
+#define is_GCB_LV_LVT_V_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA7 ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xEA == ((U8*)s)[0] ) ? \
+ ( ( ( 0xB0 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+: ( 0xEB == ((U8*)s)[0] || 0xEC == ((U8*)s)[0] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9D ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA3 ) || ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+ : ( 0x9F == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ? 3 : 0 ) \
: 0 ) \
: 0 )
+/*
+ GCB_Prepend: Grapheme_Cluster_Break=Prepend
+
+ \p{_X_GCB_Prepend}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_Prepend_utf8(s) \
+( 0 )
+
+/*
+ GCB_RI: Grapheme_Cluster_Break=RI
+
+ \p{_X_RI}
+*/
/*** GENERATED CODE ***/
-#define is_TRICKYFOLD_cp(cp) \
-( 0xDF == cp || ( 0xDF < cp && \
-( 0x390 == cp || ( 0x390 < cp && \
-0x3B0 == cp ) ) ) )
+#define is_GCB_RI_utf8(s) \
+( ( ( ( ( 0xF0 == ((U8*)s)[0] ) && ( 0x9F == ((U8*)s)[1] ) ) && ( 0x87 == ((U8*)s)[2] ) ) && ( 0xA6 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBF ) ) ? 4 : 0 )
+
+/*
+ GCB_SPECIAL_BEGIN: Grapheme_Cluster_Break=special_begins
+ \p{_X_Special_Begin}
+*/
/*** GENERATED CODE ***/
-#define what_TRICKYFOLD(s,is_utf8) \
-( ( is_utf8 ) ? \
- ( ( 0xC3 == ((U8*)s)[0] ) ? \
- ( ( 0x9F == ((U8*)s)[1] ) ? 0xDF : 0 ) \
- : ( 0xCE == ((U8*)s)[0] ) ? \
- ( ( 0x90 == ((U8*)s)[1] ) ? 0x390 \
- : ( 0xB0 == ((U8*)s)[1] ) ? 0x3B0 : 0 ) \
+#define is_GCB_SPECIAL_BEGIN_utf8(s) \
+( ( ( 0xE1 == ((U8*)s)[0] ) && ( 0x84 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x87 ) ) ? ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 )\
+: ( 0xEA == ((U8*)s)[0] ) ? \
+ ( ( 0xA5 == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBC ) ? 3 : 0 ) \
+ : ( 0xB0 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xEB == ((U8*)s)[0] || 0xEC == ((U8*)s)[0] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9D ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA3 ) || ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+ : ( 0x9F == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) || ( 0x8B <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBB ) ) ? 3 : 0 )\
: 0 ) \
-: ( 0xDF == ((U8*)s)[0] ) ? 0xDF : 0 )
+: ( 0xF0 == ((U8*)s)[0] ) ? \
+ ( ( ( ( 0x9F == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) && ( 0xA6 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBF ) ) ? 4 : 0 )\
+: 0 )
+/*
+ GCB_T: Grapheme_Cluster_Break=T
+
+ \p{_X_GCB_T}
+*/
/*** GENERATED CODE ***/
-#define what_TRICKYFOLD_safe(s,e,is_utf8) \
-( ((e)-(s) > 1) ? \
- ( ( is_utf8 ) ? \
- ( ( 0xC3 == ((U8*)s)[0] ) ? \
- ( ( 0x9F == ((U8*)s)[1] ) ? 0xDF : 0 ) \
- : ( 0xCE == ((U8*)s)[0] ) ? \
- ( ( 0x90 == ((U8*)s)[1] ) ? 0x390 \
- : ( 0xB0 == ((U8*)s)[1] ) ? 0x3B0 : 0 ) \
- : 0 ) \
- : ( 0xDF == ((U8*)s)[0] ) ? 0xDF : 0 ) \
-: ((e)-(s) > 0) ? \
- ( ( ( !( is_utf8 ) ) && ( 0xDF == ((U8*)s)[0] ) ) ? 0xDF : 0 ) \
+#define is_GCB_T_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0xA8 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x87 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( ( 0x9F == ((U8*)s)[1] ) && ( 0x8B <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBB ) ) ? 3 : 0 )\
: 0 )
+/*
+ GCB_V: Grapheme_Cluster_Break=V
+
+ \p{_X_GCB_V}
+*/
/*** GENERATED CODE ***/
-#define what_len_TRICKYFOLD(s,is_utf8,len) \
-( ( is_utf8 ) ? \
- ( ( 0xC3 == ((U8*)s)[0] ) ? \
- ( ( 0x9F == ((U8*)s)[1] ) ? len=2, 0xDF : 0 ) \
- : ( 0xCE == ((U8*)s)[0] ) ? \
- ( ( 0x90 == ((U8*)s)[1] ) ? len=2, 0x390 \
- : ( 0xB0 == ((U8*)s)[1] ) ? len=2, 0x3B0 : 0 ) \
+#define is_GCB_V_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA7 ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9F == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ? 3 : 0 ) \
: 0 ) \
-: ( 0xDF == ((U8*)s)[0] ) ? len=1, 0xDF : 0 )
+: 0 )
+
+/*
+ QUOTEMETA: Meta-characters that \Q should quote
+ \p{_Perl_Quotemeta}
+*/
/*** GENERATED CODE ***/
-#define what_len_TRICKYFOLD_safe(s,e,is_utf8,len) \
-( ((e)-(s) > 1) ? \
- ( ( is_utf8 ) ? \
- ( ( 0xC3 == ((U8*)s)[0] ) ? \
- ( ( 0x9F == ((U8*)s)[1] ) ? len=2, 0xDF : 0 ) \
- : ( 0xCE == ((U8*)s)[0] ) ? \
- ( ( 0x90 == ((U8*)s)[1] ) ? len=2, 0x390 \
- : ( 0xB0 == ((U8*)s)[1] ) ? len=2, 0x3B0 : 0 ) \
- : 0 ) \
- : ( 0xDF == ((U8*)s)[0] ) ? len=1, 0xDF : 0 ) \
-: ((e)-(s) > 0) ? \
- ( ( ( !( is_utf8 ) ) && ( 0xDF == ((U8*)s)[0] ) ) ? len=1, 0xDF : 0 ) \
+#define is_QUOTEMETA_high(s) \
+( ( 0xCD == ((U8*)s)[0] ) ? \
+ ( ( 0x8F == ((U8*)s)[1] ) ? 2 : 0 ) \
+: ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0x9F == ((U8*)s)[2] || 0xA0 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( 0xB4 == ((U8*)s)[2] || 0xB5 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xA0 == ((U8*)s)[1] ) ? \
+ ( ( 0x8B <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8E ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBE ) ? 3 : 0 ) \
+ : ( 0x81 == ((U8*)s)[1] ) ? \
+ ( ( ( 0x81 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x93 ) || ( 0x95 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xAF ) ) ? 3 : 0 )\
+ : ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0x90 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x87 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x90 ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x91 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x9F ) ? 3 : 0 ) \
+ : ( 0x94 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9C ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9D == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xB5 ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( 0x94 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( ( 0x9F <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xAF ) || 0xB8 == ((U8*)s)[1] || 0xB9 == ((U8*)s)[1] ) ?\
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xE3 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x83 ) || ( 0x88 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA0 ) || 0xB0 == ((U8*)s)[2] ) ? 3 : 0 )\
+ : ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0xA4 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xEF == ((U8*)s)[0] ) ? \
+ ( ( 0xB4 == ((U8*)s)[1] ) ? \
+ ( ( 0xBE == ((U8*)s)[2] || 0xBF == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xB8 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8F ) ? 3 : 0 ) \
+ : ( 0xB9 == ((U8*)s)[1] ) ? \
+ ( ( 0x85 == ((U8*)s)[2] || 0x86 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBB == ((U8*)s)[1] ) ? \
+ ( ( 0xBF == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBE == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xB8 ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xF0 == ((U8*)s)[0] ) ? \
+ ( ( ( ( 0x9D == ((U8*)s)[1] ) && ( 0x85 == ((U8*)s)[2] ) ) && ( 0xB3 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBA ) ) ? 4 : 0 )\
+: ( 0xF3 == ((U8*)s)[0] ) ? \
+ ( ( ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) && ( 0x80 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBF ) ) ? 4 : 0 )\
: 0 )
+
+#endif /* H_REGCHARCLASS */
+
/* ex: set ro: */