#define dump_trie_interim_table(a,b,c,d,e) S_dump_trie_interim_table(aTHX_ a,b,c,d,e)
#define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
#define put_byte(a,b) S_put_byte(aTHX_ a,b)
-#define put_latin1_charclass_innards(a,b) S_put_latin1_charclass_innards(aTHX_ a,b)
+#define put_charclass_bitmap_innards(a,b) S_put_charclass_bitmap_innards(aTHX_ a,b)
#define put_range(a,b,c) S_put_range(aTHX_ a,b,c)
#define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b)
#define regdump_intflags(a,b) S_regdump_intflags(aTHX_ a,b)
}
}
- /* An ANYOF node contains a bitmap for the first 256 code points, and an
- * inversion list for the others, but if there are code points that should
- * match only conditionally on the target string being UTF-8, those are
- * placed in the inversion list, and not the bitmap. Since there are
- * circumstances under which they could match, they are included in the
- * SSC. But if the ANYOF node is to be inverted, we have to exclude them
- * here, so that when we invert below, the end result actually does include
- * them. (Think about "\xe0" =~ /[^\xc0]/di;). We have to do this here
- * before we add the unconditionally matched code points */
+ /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
+ * code points, and an inversion list for the others, but if there are code
+ * points that should match only conditionally on the target string being
+ * UTF-8, those are placed in the inversion list, and not the bitmap.
+ * Since there are circumstances under which they could match, they are
+ * included in the SSC. But if the ANYOF node is to be inverted, we have
+ * to exclude them here, so that when we invert below, the end result
+ * actually does include them. (Think about "\xe0" =~ /[^\xc0]/di;). We
+ * have to do this here before we add the unconditionally matched code
+ * points */
if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
_invlist_intersection_complement_2nd(invlist,
PL_UpperLatin1,
}
/* Add in the points from the bit map */
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
if (ANYOF_BITMAP_TEST(node, i)) {
invlist = add_cp_to_invlist(invlist, i);
new_node_has_latin1 = TRUE;
{
/* The inversion list in the SSC is marked mortal; now we need a more
* permanent copy, which is stored the same way that is done in a regular
- * ANYOF node, with the first 256 code points in a bit map */
+ * ANYOF node, with the first NUM_ANYOF_CODE_POINTS code points in a bit
+ * map */
SV* invlist = invlist_clone(ssc->invlist);
}
/* Quit if are above what we should change */
- if (start > 255) {
+ if (start >= NUM_ANYOF_CODE_POINTS) {
break;
}
change_invlist = TRUE;
/* Set all the bits in the range, up to the max that we are doing */
- high = (end < 255) ? end : 255;
+ high = (end < NUM_ANYOF_CODE_POINTS - 1)
+ ? end
+ : NUM_ANYOF_CODE_POINTS - 1;
for (i = start; i <= (int) high; i++) {
if (! ANYOF_BITMAP_TEST(node, i)) {
ANYOF_BITMAP_SET(node, i);
* ignored in the recursion by means of a flag:
* <RExC_in_multi_char_class>.)
*
- * ANYOF nodes contain a bit map for the first 256 characters, with the
- * corresponding bit set if that character is in the list. For characters
- * above 255, a range list or swash is used. There are extra bits for \w,
- * etc. in locale ANYOFs, as what these match is not determinable at
- * compile time
+ * ANYOF nodes contain a bit map for the first NUM_ANYOF_CODE_POINTS
+ * characters, with the corresponding bit set if that character is in the
+ * list. For characters above this, a range list or swash is used. There
+ * are extra bits for \w, etc. in locale ANYOFs, as what these match is not
+ * determinable at compile time
*
* Returns NULL, setting *flagp to RESTART_UTF8 if the sizing scan needs
* to be restarted. This can only happen if ret_invlist is non-NULL.
);
if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
sv_catpvs(sv, "[");
- (void) put_latin1_charclass_innards(sv, IS_ANYOF_TRIE(op)
+ (void) put_charclass_bitmap_innards(sv, IS_ANYOF_TRIE(op)
? ANYOF_BITMAP(o)
: TRIE_BITMAP(trie));
sv_catpvs(sv, "]");
if (flags & ANYOF_INVERT)
sv_catpvs(sv, "^");
- /* output what the standard cp 0-255 bitmap matches */
- do_sep = put_latin1_charclass_innards(sv, ANYOF_BITMAP(o));
+ /* output what the standard cp 0-NUM_ANYOF_CODE_POINTS-1 bitmap matches
+ * */
+ do_sep = put_charclass_bitmap_innards(sv, ANYOF_BITMAP(o));
/* output any special charclass tests (used entirely under use
* locale) * */
/* Appends to 'sv' a displayable version of the range of code points from
* 'start' to 'end'. It assumes that only ASCII printables are displayable
- * as-is (though some of these will be escaped by put_byte()). For the
- * time being, this subroutine only works for latin1 (< 256) code points */
+ * as-is (though some of these will be escaped by put_byte()). */
assert(start <= end);
* hex. */
Perl_sv_catpvf(aTHX_ sv, "\\x{%02" UVXf "}-\\x{%02" UVXf "}",
start,
- (end < 256) ? end : 255);
+ (end < NUM_ANYOF_CODE_POINTS)
+ ? end
+ : NUM_ANYOF_CODE_POINTS - 1);
break;
}
}
STATIC bool
-S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap)
+S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap)
{
/* Appends to 'sv' a displayable version of the innards of the bracketed
* character class whose bitmap is 'bitmap'; Returns 'TRUE' if it actually
int i;
bool has_output_anything = FALSE;
- PERL_ARGS_ASSERT_PUT_LATIN1_CHARCLASS_INNARDS;
+ PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
if (BITMAP_TEST((U8 *) bitmap,i)) {
/* The character at index i should be output. Find the next
* character that should NOT be output */
int j;
- for (j = i + 1; j < 256; j++) {
+ for (j = i + 1; j < NUM_ANYOF_CODE_POINTS; j++) {
if (! BITMAP_TEST((U8 *) bitmap, j)) {
break;
}
U16 arg2;
};
+#define NUM_ANYOF_CODE_POINTS 256
-#define ANYOF_BITMAP_SIZE (256 / 8) /* 8 bits/Byte */
+#define ANYOF_BITMAP_SIZE (NUM_ANYOF_CODE_POINTS / 8) /* 8 bits/Byte */
/* Note that these form structs which are supersets of the next smaller one, by
* appending fields. Alignment problems can occur if one of those optional
memset (ANYOF_BITMAP(p), 255, ANYOF_BITMAP_SIZE)
#define ANYOF_BITMAP_CLEARALL(p) \
Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
-/* Check that all 256 bits are all set. Used in S_cl_is_anything() */
-#define ANYOF_BITMAP_TESTALLSET(p) /* Assumes sizeof(p) == 32 */ \
+#if ANYOF_BITMAP_SIZE == 32
+/* Check that all 256 bits are all set. */
+# define ANYOF_BITMAP_TESTALLSET(p) /* Assumes sizeof(p) == 32 */ \
memEQ (ANYOF_BITMAP(p), "\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377", ANYOF_BITMAP_SIZE)
+#else
+# error Need to fix this if raise bitmap size. (As of this writing this macro is unused in the core)
+#endif
#define ANYOF_SKIP ((ANYOF_SIZE - 1)/sizeof(regnode))
#define ANYOF_POSIXL_SKIP ((ANYOF_POSIXL_SIZE - 1)/sizeof(regnode))