*/
#include "regcharclass.h"
-typedef OP OP_4tree; /* Will be redefined later. */
-
-
/* Convert branch sequences to more efficient trie ops? */
#define PERL_ENABLE_TRIE_OPTIMISATION 1
/* Be really aggressive about optimising patterns with trie sequences? */
#define PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION 1
-/* Use old style unicode mappings for perl and posix character classes
- *
- * NOTE: Enabling this essentially breaks character class matching against unicode
- * strings, so that POSIX char classes match when they shouldn't, and \d matches
- * way more than 10 characters, and sometimes a charclass and its complement either
- * both match or neither match.
- * NOTE: Disabling this will cause various backwards compatibility issues to rear
- * their head, and tests to fail. However it will make the charclass behaviour
- * consistent regardless of internal string type, and make character class inversions
- * consistent. The tests that fail in the regex engine are basically broken tests.
- *
- * Personally I think 5.12 should disable this for sure. Its a bit more debatable for
- * 5.10, so for now im leaving it enabled.
- * XXX: It is now enabled for 5.11/5.12
- *
- * -demerphq
- */
-#define PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS 1
-
/* Should the optimiser take positive assertions into account? */
#define PERL_ENABLE_POSITIVE_ASSERTION_STUDY 0
#define RE_TRACK_PATTERN_OFFSETS
#endif
-/* Unless the next line is uncommented it is illegal to combine lazy
- matching with possessive matching. Frankly it doesn't make much sense
- to allow it as X*?+ matches nothing, X+?+ matches a single char only,
- and X{min,max}?+ matches min times only.
- */
-/* #define REG_ALLOW_MINMOD_SUSPEND */
-
/*
* The "internal use only" fields in regexp.h are present to pass info from
* compile to execute that permits the execute phase to run lots faster on
Used to make it easier to clone and free arbitrary
data that the regops need. Often the ARG field of
a regop is an index into this structure */
+ struct reg_code_block *code_blocks;/* positions of literal (?{}) */
+ int num_code_blocks; /* size of code_blocks[] */
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp_internal;
*
* See regexp.h for flags used externally to the regexp engine
*/
+#define RXp_INTFLAGS(rx) ((rx)->intflags)
+#define RX_INTFLAGS(prog) RXp_INTFLAGS(ReANY(prog))
+
#define PREGf_SKIP 0x00000001
#define PREGf_IMPLICIT 0x00000002 /* Converted .* to ^.* */
#define PREGf_NAUGHTY 0x00000004 /* how exponential is this pattern? */
#define PREGf_VERBARG_SEEN 0x00000008
#define PREGf_CUTGROUP_SEEN 0x00000010
+#define PREGf_USE_RE_EVAL 0x00000020 /* compiled with "use re 'eval'" */
+/* these used to be extflags, but are now intflags */
+#define PREGf_NOSCAN 0x00000040
+#define PREGf_CANY_SEEN 0x00000080
+#define PREGf_GPOS_SEEN 0x00000100
+#define PREGf_GPOS_FLOAT 0x00000200
+#define PREGf_ANCH_BOL 0x00000400
+#define PREGf_ANCH_MBOL 0x00000800
+#define PREGf_ANCH_SBOL 0x00001000
+#define PREGf_ANCH_GPOS 0x00002000
+
+#define PREGf_ANCH (PREGf_ANCH_SBOL | PREGf_ANCH_GPOS | \
+ PREGf_ANCH_MBOL | PREGf_ANCH_BOL )
/* this is where the old regcomp.h started */
U16 arg2;
};
+#define NUM_ANYOF_CODE_POINTS 256
+
+#define ANYOF_BITMAP_SIZE (NUM_ANYOF_CODE_POINTS / 8) /* 8 bits/Byte */
-#define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */
-#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */
+/* Note that these form structs which are supersets of the next smaller one, by
+ * appending fields. Alignment problems can occur if one of those optional
+ * fields requires stricter alignment than the base struct. And formal
+ * parameters that can really be two or more of the structs should be
+ * declared as the smallest one it could be. See commit message for
+ * 7dcac5f6a5195002b55c935ee1d67f67e1df280b. Regnode allocation is done
+ * without regard to alignment, and changing it to would also require changing
+ * the code that inserts and deletes regnodes. The basic single-argument
+ * regnode has a U32, which is what reganode() allocates as a unit. Therefore
+ * no field can require stricter alignment than U32. */
/* also used by trie */
struct regnode_charclass {
U8 flags;
U8 type;
U16 next_off;
- U32 arg1; /* used as ptr in S_regclass */
+ U32 arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */
};
/* has runtime (locale) \d, \w, ..., [:posix:] classes */
struct regnode_charclass_class {
- U8 flags; /* ANYOF_CLASS bit must go here */
+ U8 flags; /* ANYOF_POSIXL bit must go here */
U8 type;
U16 next_off;
- U32 arg1; /* used as ptr in S_regclass */
+ U32 arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */
- char classflags[ANYOF_CLASSBITMAP_SIZE]; /* and run-time */
+ U32 classflags; /* and run-time */
+};
+
+/* A synthetic start class (SSC); is a regnode_charclass_posixl_fold, plus an
+ * extra SV*, used only during its construction and which is not used by
+ * regexec.c. Note that the 'next_off' field is unused, as the SSC stands
+ * alone, so there is never a next node. Also, there is no alignment issue,
+ * becase these are declared or allocated as a complete unit so the compiler
+ * takes care of alignment. This is unlike the other regnodes which are
+ * allocated in terms of multiples of a single-argument regnode. SSC nodes can
+ * have a pointer field because there is no alignment issue, and because it is
+ * set to NULL after construction, before any cloning of the pattern */
+struct regnode_ssc {
+ U8 flags; /* ANYOF_POSIXL bit must go here */
+ U8 type;
+ U16 next_off;
+ U32 arg1;
+ char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */
+ U32 classflags; /* and run-time */
+
+ /* Auxiliary, only used during construction; NULL afterwards: list of code
+ * points matched */
+ SV* invlist;
};
+/* We take advantage of 'next_off' not otherwise being used in the SSC by
+ * actually using it: by setting it to 1. This allows us to test and
+ * distinguish between an SSC and other ANYOF node types, as 'next_off' cannot
+ * otherwise be 1, because it is the offset to the next regnode expressed in
+ * units of regnodes. Since an ANYOF node contains extra fields, it adds up
+ * to 12 regnode units on 32-bit systems, (hence the minimum this can be (if
+ * not 0) is 11 there. Even if things get tightly packed on a 64-bit system,
+ * it still would be more than 1. */
+#define set_ANYOF_SYNTHETIC(n) STMT_START{ OP(n) = ANYOF; \
+ NEXT_OFF(n) = 1; \
+ } STMT_END
+#define is_ANYOF_SYNTHETIC(n) (OP(n) == ANYOF && NEXT_OFF(n) == 1)
+
/* XXX fix this description.
Impose a limit of REG_INFTY on various pattern matching operations
to limit stack growth and to avoid "infinite" recursions.
#undef STRING
#define OP(p) ((p)->type)
-#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
- regnode types */
+#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
+ regnode types. For some, it's the \
+ character set of the regnode */
#define OPERAND(p) (((struct regnode_string *)p)->string)
#define MASK(p) ((char*)OPERAND(p))
#define STR_LEN(p) (((struct regnode_string *)p)->str_len)
#define REG_MAGIC 0234
-#define SIZE_ONLY (RExC_emit == &PL_regdummy)
+#define SIZE_ONLY (RExC_emit == (regnode *) & RExC_emit_dummy)
+#define PASS1 SIZE_ONLY
+#define PASS2 (! SIZE_ONLY)
+
+/* If the bitmap fully represents what this ANYOF node can match, the
+ * ARG is set to this special value (since 0, 1, ... are legal, but will never
+ * reach this high). */
+#define ANYOF_NONBITMAP_EMPTY ((U32) -1)
+
+/* Flags for node->flags of ANYOF. These are in short supply, with none
+ * currently available. The ABOVE_LATIN1_ALL bit could be freed up
+ * by resorting to creating a swash containing everything above 255. This
+ * introduces a performance penalty. An option that wouldn't slow things down
+ * would be to split one of the two LOC flags out into a separate
+ * node, like what was done with ANYOF_NON_UTF8_NON_ASCII_ALL in commit
+ * 34fdef848b1687b91892ba55e9e0c3430e0770f6 (but which was reverted because it
+ * wasn't the best option available at the time), and using a LOC flag is
+ * probably better than that commit anyway. But it could be reinstated if we
+ * need a bit. The LOC flags are only for /l nodes; the reverted commit was
+ * only for /d, so there are no combinatorial issues. The LOC flag to use is
+ * probably the POSIXL one.
+ * Several flags are not used in synthetic start class (SSC) nodes, so could be
+ * shared should new flags be needed for SSCs, like ANYOF_EMPTY_STRING now. */
+
+/* regexec.c is expecting this to be in the low bit */
+#define ANYOF_INVERT 0x01
+
+/* For the SSC node only, which cannot be inverted, so is shared with that bit.
+ * This means "Does this SSC match an empty string?" This is used only during
+ * regex compilation. */
+#define ANYOF_EMPTY_STRING ANYOF_INVERT
+
+/* Are there things that will match only if the target string is encoded in
+ * UTF-8? (This is not set if ANYOF_AOVE_LATIN1_ALL is set) */
+#define ANYOF_UTF8 0x02
-/* Flags for node->flags of several of the node types */
-#define USE_UNI 0x01
+/* The fold is calculated and stored in the bitmap where possible at compile
+ * time. However under locale, the actual folding varies depending on
+ * what the locale is at the time of execution, so it has to be deferred until
+ * then */
+#define ANYOF_LOC_FOLD 0x04
+
+/* Set if this is a regnode_charclass_posixl vs a regnode_charclass. This
+ * is used for runtime \d, \w, [:posix:], ..., which are used only in locale
+ * and the optimizer's synthetic start class. Non-locale \d, etc are resolved
+ * at compile-time */
+#define ANYOF_POSIXL 0x08
+#define ANYOF_CLASS ANYOF_POSIXL
+#define ANYOF_LARGE ANYOF_POSIXL
+
+/* Should we raise a warning if matching against an above-Unicode code point?
+ * */
+#define ANYOF_WARN_SUPER 0x10
-/* Flags for node->flags of ANYOF. These are in short supply, so some games
- * are done to share them, as described below. If necessary, the ANYOF_LOCALE
- * and ANYOF_CLASS bits could be shared with a space penalty for locale nodes
- * (and the code at the time this comment was written, is written so that all
- * that is necessary to make the change would be to redefine the ANYOF_CLASS
- * define). Once the planned change to compile all the above-latin1 code points
- * is done, then the UNICODE_ALL bit can be freed up. If flags need to be
- * added that are applicable to the synthetic start class only, with some work,
- * they could be put in the next-node field, or in an unused bit of the
- * classflags field. */
+/* Can match something outside the bitmap that isn't in utf8 */
+#define ANYOF_NONBITMAP_NON_UTF8 0x20
-#define ANYOF_LOCALE 0x01
+/* Matches every code point 0x100 and above*/
+#define ANYOF_ABOVE_LATIN1_ALL 0x40
+#define ANYOF_UNICODE_ALL ANYOF_ABOVE_LATIN1_ALL
-/* The fold is calculated and stored in the bitmap where possible at compile
- * time. However there are two cases where it isn't possible. These share
- * this bit: 1) under locale, where the actual folding varies depending on
- * what the locale is at the time of execution; and 2) where the folding is
- * specified in a swash, not the bitmap, such as characters which aren't
- * specified in the bitmap, or properties that aren't looked at at compile time
- */
-#define ANYOF_LOC_NONBITMAP_FOLD 0x02
+/* Match all Latin1 characters that aren't ASCII when the target string is not
+ * in utf8. */
+#define ANYOF_NON_UTF8_NON_ASCII_ALL 0x80
-#define ANYOF_INVERT 0x04
+#define ANYOF_FLAGS_ALL (0xff)
-/* EOS, meaning that it can match an empty string too, is used for the
- * synthetic start class (ssc) only. It can share the INVERT bit, as the ssc
- * is never inverted. The bit just needs to be turned off before regexec.c
- * gets a hold of it so that regexec.c doesn't think it's inverted, but this
- * happens automatically, as if the ssc can match an EOS, the ssc is discarded,
- * and never passed to regexec.c */
-#define ANYOF_EOS ANYOF_INVERT
+#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_POSIXL)
-/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ...
- * The non-locale ones are resolved at compile-time */
-#define ANYOF_CLASS 0x08
-#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
+/* These are the flags that apply to both regular ANYOF nodes and synthetic
+ * start class nodes during construction of the SSC. During finalization of
+ * the SSC, other of the flags could be added to it */
+#define ANYOF_COMMON_FLAGS (ANYOF_WARN_SUPER|ANYOF_UTF8)
-/* Can match something outside the bitmap that is expressible only in utf8 */
-#define ANYOF_UTF8 0x10
+/* Character classes for node->classflags of ANYOF */
+/* Should be synchronized with a table in regprop() */
+/* 2n should be the normal one, paired with its complement at 2n+1 */
+
+#define ANYOF_ALPHA ((_CC_ALPHA) * 2)
+#define ANYOF_NALPHA ((ANYOF_ALPHA) + 1)
+#define ANYOF_ALPHANUMERIC ((_CC_ALPHANUMERIC) * 2) /* [[:alnum:]] isalnum(3), utf8::IsAlnum */
+#define ANYOF_NALPHANUMERIC ((ANYOF_ALPHANUMERIC) + 1)
+#define ANYOF_ASCII ((_CC_ASCII) * 2)
+#define ANYOF_NASCII ((ANYOF_ASCII) + 1)
+#define ANYOF_BLANK ((_CC_BLANK) * 2) /* GNU extension: space and tab: non-vertical space */
+#define ANYOF_NBLANK ((ANYOF_BLANK) + 1)
+#define ANYOF_CASED ((_CC_CASED) * 2) /* Pseudo class for [:lower:] or
+ [:upper:] under /i */
+#define ANYOF_NCASED ((ANYOF_CASED) + 1)
+#define ANYOF_CNTRL ((_CC_CNTRL) * 2)
+#define ANYOF_NCNTRL ((ANYOF_CNTRL) + 1)
+#define ANYOF_DIGIT ((_CC_DIGIT) * 2) /* \d */
+#define ANYOF_NDIGIT ((ANYOF_DIGIT) + 1)
+#define ANYOF_GRAPH ((_CC_GRAPH) * 2)
+#define ANYOF_NGRAPH ((ANYOF_GRAPH) + 1)
+#define ANYOF_LOWER ((_CC_LOWER) * 2)
+#define ANYOF_NLOWER ((ANYOF_LOWER) + 1)
+#define ANYOF_PRINT ((_CC_PRINT) * 2)
+#define ANYOF_NPRINT ((ANYOF_PRINT) + 1)
+#define ANYOF_PSXSPC ((_CC_PSXSPC) * 2) /* POSIX space: \s plus the vertical tab */
+#define ANYOF_NPSXSPC ((ANYOF_PSXSPC) + 1)
+#define ANYOF_PUNCT ((_CC_PUNCT) * 2)
+#define ANYOF_NPUNCT ((ANYOF_PUNCT) + 1)
+#define ANYOF_SPACE ((_CC_SPACE) * 2) /* \s */
+#define ANYOF_NSPACE ((ANYOF_SPACE) + 1)
+#define ANYOF_UPPER ((_CC_UPPER) * 2)
+#define ANYOF_NUPPER ((ANYOF_UPPER) + 1)
+#define ANYOF_WORDCHAR ((_CC_WORDCHAR) * 2) /* \w, PL_utf8_alnum, utf8::IsWord, ALNUM */
+#define ANYOF_NWORDCHAR ((ANYOF_WORDCHAR) + 1)
+#define ANYOF_XDIGIT ((_CC_XDIGIT) * 2)
+#define ANYOF_NXDIGIT ((ANYOF_XDIGIT) + 1)
+
+/* pseudo classes below this, not stored in the class bitmap, but used as flags
+ during compilation of char classes */
-/* Can match something outside the bitmap that isn't in utf8 */
-#define ANYOF_NONBITMAP_NON_UTF8 0x20
+#define ANYOF_VERTWS ((_CC_VERTSPACE) * 2)
+#define ANYOF_NVERTWS ((ANYOF_VERTWS)+1)
-/* Set if the bitmap doesn't fully represent what this node can match */
-#define ANYOF_NONBITMAP (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8)
-#define ANYOF_UNICODE ANYOF_NONBITMAP /* old name, for back compat */
+/* It is best if this is the last one, as all above it are stored as bits in a
+ * bitmap, and it isn't part of that bitmap */
+#if _CC_VERTSPACE != _HIGHEST_REGCOMP_DOT_H_SYNC
+# error Problem with handy.h _HIGHEST_REGCOMP_DOT_H_SYNC #define
+#endif
-/* Matches every code point 0x100 and above*/
-#define ANYOF_UNICODE_ALL 0x40
+#define ANYOF_POSIXL_MAX (ANYOF_VERTWS) /* So upper loop limit is written:
+ * '< ANYOF_MAX'
+ * Hence doesn't include VERTWS, as that
+ * is a pseudo class */
+#define ANYOF_MAX ANYOF_POSIXL_MAX
-#define ANYOF_FLAGS_ALL 0xff
+#if (ANYOF_POSIXL_MAX > 32) /* Must fit in 32-bit word */
+# error Problem with handy.h _CC_foo #defines
+#endif
-/* Character classes for node->classflags of ANYOF */
-/* Should be synchronized with a table in regprop() */
-/* 2n should pair with 2n+1 */
-
-#define ANYOF_ALNUM 0 /* \w, PL_utf8_alnum, utf8::IsWord, ALNUM */
-#define ANYOF_NALNUM 1
-#define ANYOF_SPACE 2 /* \s */
-#define ANYOF_NSPACE 3
-#define ANYOF_DIGIT 4 /* \d */
-#define ANYOF_NDIGIT 5
-#define ANYOF_ALNUMC 6 /* [[:alnum:]] isalnum(3), utf8::IsAlnum, ALNUMC */
-#define ANYOF_NALNUMC 7
-#define ANYOF_ALPHA 8
-#define ANYOF_NALPHA 9
-#define ANYOF_ASCII 10
-#define ANYOF_NASCII 11
-#define ANYOF_CNTRL 12
-#define ANYOF_NCNTRL 13
-#define ANYOF_GRAPH 14
-#define ANYOF_NGRAPH 15
-#define ANYOF_LOWER 16
-#define ANYOF_NLOWER 17
-#define ANYOF_PRINT 18
-#define ANYOF_NPRINT 19
-#define ANYOF_PUNCT 20
-#define ANYOF_NPUNCT 21
-#define ANYOF_UPPER 22
-#define ANYOF_NUPPER 23
-#define ANYOF_XDIGIT 24
-#define ANYOF_NXDIGIT 25
-#define ANYOF_PSXSPC 26 /* POSIX space: \s plus the vertical tab */
-#define ANYOF_NPSXSPC 27
-#define ANYOF_BLANK 28 /* GNU extension: space and tab: non-vertical space */
-#define ANYOF_NBLANK 29
-
-#define ANYOF_MAX 32
-
-/* pseudo classes, not stored in the class bitmap, but used as flags
- during compilation of char classes */
+#define ANYOF_HORIZWS ((ANYOF_POSIXL_MAX)+2) /* = (ANYOF_NVERTWS + 1) */
+#define ANYOF_NHORIZWS ((ANYOF_POSIXL_MAX)+3)
-#define ANYOF_VERTWS (ANYOF_MAX+1)
-#define ANYOF_NVERTWS (ANYOF_MAX+2)
-#define ANYOF_HORIZWS (ANYOF_MAX+3)
-#define ANYOF_NHORIZWS (ANYOF_MAX+4)
+#define ANYOF_UNIPROP ((ANYOF_POSIXL_MAX)+4) /* Used to indicate a Unicode
+ property: \p{} or \P{} */
/* Backward source code compatibility. */
#define ANYOF_NALNUML ANYOF_NALNUM
#define ANYOF_SPACEL ANYOF_SPACE
#define ANYOF_NSPACEL ANYOF_NSPACE
+#define ANYOF_ALNUM ANYOF_WORDCHAR
+#define ANYOF_NALNUM ANYOF_NWORDCHAR
/* Utility macros for the bitmap and classes of ANYOF */
#define ANYOF_SIZE (sizeof(struct regnode_charclass))
-#define ANYOF_CLASS_SIZE (sizeof(struct regnode_charclass_class))
+#define ANYOF_POSIXL_SIZE (sizeof(regnode_charclass_posixl))
+#define ANYOF_CLASS_SIZE ANYOF_POSIXL_SIZE
#define ANYOF_FLAGS(p) ((p)->flags)
-#define ANYOF_BIT(c) (1 << ((c) & 7))
+#define ANYOF_BIT(c) (1U << ((c) & 7))
-#define ANYOF_CLASS_BYTE(p, c) (((struct regnode_charclass_class*)(p))->classflags[((c) >> 3) & 3])
-#define ANYOF_CLASS_SET(p, c) (ANYOF_CLASS_BYTE(p, c) |= ANYOF_BIT(c))
-#define ANYOF_CLASS_CLEAR(p, c) (ANYOF_CLASS_BYTE(p, c) &= ~ANYOF_BIT(c))
-#define ANYOF_CLASS_TEST(p, c) (ANYOF_CLASS_BYTE(p, c) & ANYOF_BIT(c))
+#define ANYOF_POSIXL_SET(p, c) (((regnode_charclass_posixl*) (p))->classflags |= (1U << (c)))
+#define ANYOF_CLASS_SET(p, c) ANYOF_POSIXL_SET((p), (c))
-#define ANYOF_CLASS_ZERO(ret) Zero(((struct regnode_charclass_class*)(ret))->classflags, ANYOF_CLASSBITMAP_SIZE, char)
-#define ANYOF_BITMAP_ZERO(ret) Zero(((struct regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
+#define ANYOF_POSIXL_CLEAR(p, c) (((regnode_charclass_posixl*) (p))->classflags &= ~ (1U <<(c)))
+#define ANYOF_CLASS_CLEAR(p, c) ANYOF_POSIXL_CLEAR((p), (c))
+#define ANYOF_POSIXL_TEST(p, c) (((regnode_charclass_posixl*) (p))->classflags & (1U << (c)))
+#define ANYOF_CLASS_TEST(p, c) ANYOF_POSIXL_TEST((p), (c))
+
+#define ANYOF_POSIXL_ZERO(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = 0; } STMT_END
+#define ANYOF_CLASS_ZERO(ret) ANYOF_POSIXL_ZERO(ret)
+
+/* Shifts a bit to get, eg. 0x4000_0000, then subtracts 1 to get 0x3FFF_FFFF */
+#define ANYOF_POSIXL_SETALL(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1; } STMT_END
+#define ANYOF_CLASS_SETALL(ret) ANYOF_POSIXL_SETALL(ret)
+
+#define ANYOF_POSIXL_TEST_ANY_SET(p) \
+ ((ANYOF_FLAGS(p) & ANYOF_POSIXL) \
+ && (((regnode_charclass_posixl*)(p))->classflags))
+#define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p)
+
+/* Since an SSC always has this field, we don't have to test for that; nor do
+ * we want to because the bit isn't set for SSC during its construction */
+#define ANYOF_POSIXL_SSC_TEST_ANY_SET(p) \
+ cBOOL(((regnode_ssc*)(p))->classflags)
+#define ANYOF_POSIXL_SSC_TEST_ALL_SET(p) /* Are all bits set? */ \
+ (((regnode_ssc*) (p))->classflags \
+ == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
+
+#define ANYOF_POSIXL_TEST_ALL_SET(p) \
+ ((ANYOF_FLAGS(p) & ANYOF_POSIXL) \
+ && ((regnode_charclass_posixl*) (p))->classflags \
+ == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
+
+#define ANYOF_POSIXL_OR(source, dest) STMT_START { (dest)->classflags |= (source)->classflags ; } STMT_END
+#define ANYOF_CLASS_OR(source, dest) ANYOF_POSIXL_OR((source), (dest))
+
+#define ANYOF_POSIXL_AND(source, dest) STMT_START { (dest)->classflags &= (source)->classflags ; } STMT_END
+
+#define ANYOF_BITMAP_ZERO(ret) Zero(((struct regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
#define ANYOF_BITMAP(p) (((struct regnode_charclass*)(p))->bitmap)
#define ANYOF_BITMAP_BYTE(p, c) (ANYOF_BITMAP(p)[(((U8)(c)) >> 3) & 31])
#define ANYOF_BITMAP_SET(p, c) (ANYOF_BITMAP_BYTE(p, c) |= ANYOF_BIT(c))
#define ANYOF_BITMAP_CLEAR(p,c) (ANYOF_BITMAP_BYTE(p, c) &= ~ANYOF_BIT(c))
-#define ANYOF_BITMAP_TEST(p, c) (ANYOF_BITMAP_BYTE(p, c) & ANYOF_BIT(c))
+#define ANYOF_BITMAP_TEST(p, c) cBOOL(ANYOF_BITMAP_BYTE(p, c) & ANYOF_BIT(c))
#define ANYOF_BITMAP_SETALL(p) \
memset (ANYOF_BITMAP(p), 255, ANYOF_BITMAP_SIZE)
#define ANYOF_BITMAP_CLEARALL(p) \
Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
-/* Check that all 256 bits are all set. Used in S_cl_is_anything() */
-#define ANYOF_BITMAP_TESTALLSET(p) /* Assumes sizeof(p) == 32 */ \
+#if ANYOF_BITMAP_SIZE == 32
+/* Check that all 256 bits are all set. */
+# define ANYOF_BITMAP_TESTALLSET(p) /* Assumes sizeof(p) == 32 */ \
memEQ (ANYOF_BITMAP(p), "\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377", ANYOF_BITMAP_SIZE)
-
-#define ANYOF_SKIP ((ANYOF_SIZE - 1)/sizeof(regnode))
-#define ANYOF_CLASS_SKIP ((ANYOF_CLASS_SIZE - 1)/sizeof(regnode))
-
-/* The class bit can be set to the locale one if necessary to save bits at the
- * expense of having locale ANYOF nodes always have a class bit map, and hence
- * take up extra space. This allows convenient changing it as development
- * proceeds on this */
-#if ANYOF_CLASS == ANYOF_LOCALE
-# undef ANYOF_CLASS_ADD_SKIP
-# define ANYOF_ADD_LOC_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP)
-
- /* Quicker way to see if there are actually any tests. This is because
- * currently the set of tests can be empty even when the class bitmap is
- * allocated */
-# if ANYOF_CLASSBITMAP_SIZE != 4
-# error ANYOF_CLASSBITMAP_SIZE is expected to be 4
-# endif
-# define ANYOF_CLASS_TEST_ANY_SET(p) /* assumes sizeof(p) = 4 */ \
- memNE (((struct regnode_charclass_class*)(p))->classflags, \
- "\0\0\0\0", ANYOF_CLASSBITMAP_SIZE)
#else
-# define ANYOF_CLASS_ADD_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP)
-# undef ANYOF_ADD_LOC_SKIP
-# define ANYOF_CLASS_TEST_ANY_SET(p) (ANYOF_FLAGS(p) & ANYOF_CLASS)
+# error Need to fix this if raise bitmap size. (As of this writing this macro is unused in the core)
#endif
+#define ANYOF_SKIP ((ANYOF_SIZE - 1)/sizeof(regnode))
+#define ANYOF_POSIXL_SKIP ((ANYOF_POSIXL_SIZE - 1)/sizeof(regnode))
+#define ANYOF_CLASS_SKIP ANYOF_POSIXL_SKIP
/*
* Utility definitions.
#define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode))
-#define REG_SEEN_ZERO_LEN 0x00000001
-#define REG_SEEN_LOOKBEHIND 0x00000002
-#define REG_SEEN_GPOS 0x00000004
-#define REG_SEEN_EVAL 0x00000008
-#define REG_SEEN_CANY 0x00000010
-#define REG_SEEN_SANY REG_SEEN_CANY /* src bckwrd cmpt */
-#define REG_SEEN_RECURSE 0x00000020
-#define REG_TOP_LEVEL_BRANCHES 0x00000040
-#define REG_SEEN_VERBARG 0x00000080
-#define REG_SEEN_CUTGROUP 0x00000100
-#define REG_SEEN_RUN_ON_COMMENT 0x00000200
+#define REG_ZERO_LEN_SEEN 0x00000001
+#define REG_LOOKBEHIND_SEEN 0x00000002
+#define REG_GPOS_SEEN 0x00000004
+/* spare */
+#define REG_CANY_SEEN 0x00000010
+#define REG_RECURSE_SEEN 0x00000020
+#define REG_TOP_LEVEL_BRANCHES_SEEN 0x00000040
+#define REG_VERBARG_SEEN 0x00000080
+#define REG_CUTGROUP_SEEN 0x00000100
+#define REG_RUN_ON_COMMENT_SEEN 0x00000200
+#define REG_UNFOLDED_MULTI_SEEN 0x00000400
+#define REG_GOSTART_SEEN 0x00000800
+#define REG_UNBOUNDED_QUANTIFIER_SEEN 0x00001000
+
START_EXTERN_C
Perl_reg_named_buff_iter,
Perl_reg_qr_package,
#if defined(USE_ITHREADS)
- Perl_regdupe_internal
+ Perl_regdupe_internal,
#endif
+ Perl_re_op_compile
};
#endif /* DOINIT */
#endif /* PLUGGABLE_RE_EXTENSION */
* The character describes the function of the corresponding .data item:
* a - AV for paren_name_list under DEBUGGING
* f - start-class data for regstclass optimization
- * n - Root of op tree for (?{EVAL}) item
- * o - Start op for (?{EVAL}) item
- * p - Pad for (?{EVAL}) item
+ * l - start op for literal (?{EVAL}) item
+ * L - start op for literal (?{EVAL}) item, with separate CV (qr//)
+ * r - pointer to an embedded code-containing qr, e.g. /ab$qr/
* s - swash for Unicode-style character class, and the multicharacter
* strings resulting from casefolding the single-character entries
* in the character class
#define check_offset_max substrs->data[2].max_offset
#define check_end_shift substrs->data[2].end_shift
-#define RX_ANCHORED_SUBSTR(rx) (((struct regexp *)SvANY(rx))->anchored_substr)
-#define RX_ANCHORED_UTF8(rx) (((struct regexp *)SvANY(rx))->anchored_utf8)
-#define RX_FLOAT_SUBSTR(rx) (((struct regexp *)SvANY(rx))->float_substr)
-#define RX_FLOAT_UTF8(rx) (((struct regexp *)SvANY(rx))->float_utf8)
+#define RX_ANCHORED_SUBSTR(rx) (ReANY(rx)->anchored_substr)
+#define RX_ANCHORED_UTF8(rx) (ReANY(rx)->anchored_utf8)
+#define RX_FLOAT_SUBSTR(rx) (ReANY(rx)->float_substr)
+#define RX_FLOAT_UTF8(rx) (ReANY(rx)->float_utf8)
/* trie related stuff */
if (re_debug_flags & RE_DEBUG_EXTRA_GPOS) x )
/* initialization */
-/* get_sv() can return NULL during global destruction. re_debug_flags can get
- * clobbered by a longjmp, so must be initialized */
+/* get_sv() can return NULL during global destruction. */
#define GET_RE_DEBUG_FLAGS DEBUG_r({ \
SV * re_debug_flags_sv = NULL; \
- re_debug_flags = 0; \
re_debug_flags_sv = get_sv(RE_DEBUG_FLAGS, 1); \
if (re_debug_flags_sv) { \
if (!SvIOK(re_debug_flags_sv)) \
#ifdef DEBUGGING
-#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; GET_RE_DEBUG_FLAGS;
+#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; \
+ PERL_UNUSED_VAR(re_debug_flags); GET_RE_DEBUG_FLAGS;
#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \
const char * const rpv = \
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
* End:
*
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
*/