*/
#include "regcharclass.h"
-typedef OP OP_4tree; /* Will be redefined later. */
-
-
/* Convert branch sequences to more efficient trie ops? */
#define PERL_ENABLE_TRIE_OPTIMISATION 1
-/* Be really agressive about optimising patterns with trie sequences? */
+/* Be really aggressive about optimising patterns with trie sequences? */
#define PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION 1
-/* Use old style unicode mappings for perl and posix character classes
- *
- * NOTE: Enabling this essentially breaks character class matching against unicode
- * strings, so that POSIX char classes match when they shouldn't, and \d matches
- * way more than 10 characters, and sometimes a charclass and its complement either
- * both match or neither match.
- * NOTE: Disabling this will cause various backwards compatibility issues to rear
- * their head, and tests to fail. However it will make the charclass behaviour
- * consistant regardless of internal string type, and make character class inversions
- * consistant. The tests that fail in the regex engine are basically broken tests.
- *
- * Personally I think 5.12 should disable this for sure. Its a bit more debatable for
- * 5.10, so for now im leaving it enabled.
- * XXX: It is now enabled for 5.11/5.12
- *
- * -demerphq
- */
-#define PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS 1
-
/* Should the optimiser take positive assertions into account? */
#define PERL_ENABLE_POSITIVE_ASSERTION_STUDY 0
#define RE_TRACK_PATTERN_OFFSETS
#endif
-/* Unless the next line is uncommented it is illegal to combine lazy
- matching with possessive matching. Frankly it doesn't make much sense
- to allow it as X*?+ matches nothing, X+?+ matches a single char only,
- and X{min,max}?+ matches min times only.
- */
-/* #define REG_ALLOW_MINMOD_SUSPEND */
-
/*
* The "internal use only" fields in regexp.h are present to pass info from
* compile to execute that permits the execute phase to run lots faster on
Used to make it easier to clone and free arbitrary
data that the regops need. Often the ARG field of
a regop is an index into this structure */
+ struct reg_code_block *code_blocks;/* positions of literal (?{}) */
+ int num_code_blocks; /* size of code_blocks[] */
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp_internal;
*
* See regexp.h for flags used externally to the regexp engine
*/
+#define RXp_INTFLAGS(rx) ((rx)->intflags)
+#define RX_INTFLAGS(prog) RXp_INTFLAGS(ReANY(prog))
+
#define PREGf_SKIP 0x00000001
#define PREGf_IMPLICIT 0x00000002 /* Converted .* to ^.* */
#define PREGf_NAUGHTY 0x00000004 /* how exponential is this pattern? */
#define PREGf_VERBARG_SEEN 0x00000008
#define PREGf_CUTGROUP_SEEN 0x00000010
+#define PREGf_USE_RE_EVAL 0x00000020 /* compiled with "use re 'eval'" */
+/* these used to be extflags, but are now intflags */
+#define PREGf_NOSCAN 0x00000040
+#define PREGf_CANY_SEEN 0x00000080
+#define PREGf_GPOS_SEEN 0x00000100
+#define PREGf_GPOS_FLOAT 0x00000200
+
+#define PREGf_ANCH_BOL 0x00000400
+#define PREGf_ANCH_MBOL 0x00000800
+#define PREGf_ANCH_SBOL 0x00001000
+#define PREGf_ANCH_GPOS 0x00002000
+#define PREGf_ANCH (PREGf_ANCH_SBOL | PREGf_ANCH_GPOS | \
+ PREGf_ANCH_MBOL | PREGf_ANCH_BOL )
/* this is where the old regcomp.h started */
#define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */
-#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */
/* also used by trie */
struct regnode_charclass {
U8 flags;
U8 type;
U16 next_off;
- U32 arg1; /* used as ptr in S_regclass */
+ U32 arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */
};
/* has runtime (locale) \d, \w, ..., [:posix:] classes */
struct regnode_charclass_class {
- U8 flags; /* ANYOF_CLASS bit must go here */
+ U8 flags; /* ANYOF_POSIXL bit must go here */
U8 type;
U16 next_off;
- U32 arg1; /* used as ptr in S_regclass */
+ U32 arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */
- char classflags[ANYOF_CLASSBITMAP_SIZE]; /* and run-time */
+ U32 classflags; /* and run-time */
+};
+
+/* like above, but also has folds that are used only if the runtime locale is
+ * UTF-8. */
+struct regnode_charclass_posixl_fold {
+ U8 flags; /* ANYOF_POSIXL bit must go here */
+ U8 type;
+ U16 next_off;
+ U32 arg1;
+ char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */
+ U32 classflags; /* and run-time */
+ SV* utf8_locale_list; /* list of code points matched by folds
+ in a UTF-8 locale */
+};
+
+/* A synthetic start class; is a regnode_charclass_posixl_fold, plus an extra
+ * SV*, used only during its construction and which is not used by regexec.c.
+ * Note that the 'next_off' field is unused, as the SSC stands alone, so there
+ * is never a next node. */
+struct regnode_ssc {
+ U8 flags; /* ANYOF_POSIXL bit must go here */
+ U8 type;
+ U16 next_off;
+ U32 arg1;
+ char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */
+ U32 classflags; /* and run-time */
+ SV* utf8_locale_list; /* list of code points matched by folds
+ in a UTF-8 locale */
+ SV* invlist; /* list of code points matched */
};
+/* We take advantage of 'next_off' not otherwise being used in the SSC by
+ * actually using it: by setting it to 1. This allows us to test and
+ * distinguish between an SSC and other ANYOF node types, as 'next_off' cannot
+ * otherwise be 1, because it is the offset to the next regnode expressed in
+ * units of regnodes. Since an ANYOF node contains extra fields, it adds up
+ * to 12 regnode units on 32-bit systems, (hence the minimum this can be (if
+ * not 0) is 11 there. Even if things get tightly packed on a 64-bit system,
+ * it still would be more than 1. */
+#define set_ANYOF_SYNTHETIC(n) STMT_START{ OP(n) = ANYOF; \
+ NEXT_OFF(n) = 1; \
+ } STMT_END
+#define is_ANYOF_SYNTHETIC(n) (OP(n) == ANYOF && NEXT_OFF(n) == 1)
+
/* XXX fix this description.
Impose a limit of REG_INFTY on various pattern matching operations
to limit stack growth and to avoid "infinite" recursions.
#undef STRING
#define OP(p) ((p)->type)
-#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
- regnode types */
+#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
+ regnode types. For some, it's the \
+ character set of the regnode */
#define OPERAND(p) (((struct regnode_string *)p)->string)
#define MASK(p) ((char*)OPERAND(p))
#define STR_LEN(p) (((struct regnode_string *)p)->str_len)
#define REG_MAGIC 0234
-#define SIZE_ONLY (RExC_emit == &PL_regdummy)
-
-/* Flags for node->flags of several of the node types */
-#define USE_UNI 0x01
+#define SIZE_ONLY (RExC_emit == (regnode *) & RExC_emit_dummy)
+#define PASS1 SIZE_ONLY
+#define PASS2 (! SIZE_ONLY)
+
+/* If the bitmap doesn't fully represent what this ANYOF node can match, the
+ * ARG is set to this special value (since 0, 1, ... are legal, but will never
+ * reach this high). */
+#define ANYOF_NONBITMAP_EMPTY ((U32) -1)
+
+/* The information used to be stored as as combination of the ANYOF_UTF8 and
+ * ANYOF_NONBITMAP_NON_UTF8 bits in the flags field, but was moved out of there
+ * to free up a bit for other uses. This tries to hide the change from
+ * existing code as much as possible. Now, the data structure that goes in ARG
+ * is not allocated unless it is needed, and that is what is used to determine
+ * if there is something outside the bitmap. The code now assumes that if
+ * that structure exists, that any UTF-8 encoded string should be tried against
+ * it, but a non-UTF8-encoded string will be tried only if the
+ * ANYOF_NONBITMAP_NON_UTF8 bit is also set. */
+#define ANYOF_NONBITMAP(node) (ARG(node) != ANYOF_NONBITMAP_EMPTY)
+
+/* Flags for node->flags of ANYOF. These are in short supply, with none
+ * currently available. If more are needed, the ANYOF_LOCALE and
+ * ANYOF_POSIXL bits could be shared, making a space penalty for all locale
+ * nodes. Also, the ABOVE_LATIN1_ALL bit could be freed up by resorting to
+ * creating a swash containing everything above 255. This introduces a
+ * performance penalty. Better would be to split it off into a separate node,
+ * which actually would improve performance a bit by allowing regexec.c to test
+ * for a UTF-8 character being above 255 without having to call a function nor
+ * calculate its code point value. Several flags are not used in synthetic
+ * start class (SSC) nodes, so could be shared should new flags be needed for
+ * SSCs. */
+
+/* regexec.c is expecting this to be in the low bit */
+#define ANYOF_INVERT 0x01
+
+/* For the SSC node only, which cannot be inverted, so is shared with that bit.
+ * This means "Does this SSC match an empty string?" This is used only during
+ * regex compilation. */
+#define ANYOF_EMPTY_STRING ANYOF_INVERT
+
+#define ANYOF_LOCALE 0x02 /* /l modifier */
+
+/* The fold is calculated and stored in the bitmap where possible at compile
+ * time. However under locale, the actual folding varies depending on
+ * what the locale is at the time of execution, so it has to be deferred until
+ * then */
+#define ANYOF_LOC_FOLD 0x04
+
+/* Set if this is a regnode_charclass_posixl vs a regnode_charclass. This
+ * is used for runtime \d, \w, [:posix:], ..., which are used only in locale
+ * and the optimizer's synthetic start class. Non-locale \d, etc are resolved
+ * at compile-time. Could be shared with ANYOF_LOCALE, forcing all locale
+ * nodes to be large */
+#define ANYOF_POSIXL 0x08
+#define ANYOF_CLASS ANYOF_POSIXL
+#define ANYOF_LARGE ANYOF_POSIXL
+
+/* Should we raise a warning if matching against an above-Unicode code point?
+ * */
+#define ANYOF_WARN_SUPER 0x10
+
+/* Can match something outside the bitmap that isn't in utf8 */
+#define ANYOF_NONBITMAP_NON_UTF8 0x20
+
+/* Matches every code point 0x100 and above*/
+#define ANYOF_ABOVE_LATIN1_ALL 0x40
+#define ANYOF_UNICODE_ALL ANYOF_ABOVE_LATIN1_ALL
+
+/* Match all Latin1 characters that aren't ASCII when the target string is not
+ * in utf8. */
+#define ANYOF_NON_UTF8_NON_ASCII_ALL 0x80
+
+#define ANYOF_FLAGS_ALL (0xff)
+
+#define ANYOF_LOCALE_FLAGS (ANYOF_LOCALE \
+ |ANYOF_LOC_FOLD \
+ |ANYOF_POSIXL)
+
+/* These are the flags that apply to both regular ANYOF nodes and synthetic
+ * start class nodes during construction of the SSC. During finalization of
+ * the SSC, other of the flags could be added to it */
+#define ANYOF_COMMON_FLAGS (ANYOF_LOCALE_FLAGS | ANYOF_WARN_SUPER)
-/* Flags for node->flags of ANYOF */
-
-#define ANYOF_LOCALE 0x01
-#define ANYOF_FOLD 0x02
-#define ANYOF_INVERT 0x04
-
-/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ... */
-/* For now, set it always when LOCALE is set, to save a bit for other uses. */
-#define ANYOF_CLASS ANYOF_LOCALE
-#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
+/* Character classes for node->classflags of ANYOF */
+/* Should be synchronized with a table in regprop() */
+/* 2n should be the normal one, paired with its complement at 2n+1 */
+
+#define ANYOF_ALPHA ((_CC_ALPHA) * 2)
+#define ANYOF_NALPHA ((ANYOF_ALPHA) + 1)
+#define ANYOF_ALPHANUMERIC ((_CC_ALPHANUMERIC) * 2) /* [[:alnum:]] isalnum(3), utf8::IsAlnum */
+#define ANYOF_NALPHANUMERIC ((ANYOF_ALPHANUMERIC) + 1)
+#define ANYOF_ASCII ((_CC_ASCII) * 2)
+#define ANYOF_NASCII ((ANYOF_ASCII) + 1)
+#define ANYOF_BLANK ((_CC_BLANK) * 2) /* GNU extension: space and tab: non-vertical space */
+#define ANYOF_NBLANK ((ANYOF_BLANK) + 1)
+#define ANYOF_CASED ((_CC_CASED) * 2) /* Pseudo class for [:lower:] or
+ [:upper:] under /i */
+#define ANYOF_NCASED ((ANYOF_CASED) + 1)
+#define ANYOF_CNTRL ((_CC_CNTRL) * 2)
+#define ANYOF_NCNTRL ((ANYOF_CNTRL) + 1)
+#define ANYOF_DIGIT ((_CC_DIGIT) * 2) /* \d */
+#define ANYOF_NDIGIT ((ANYOF_DIGIT) + 1)
+#define ANYOF_GRAPH ((_CC_GRAPH) * 2)
+#define ANYOF_NGRAPH ((ANYOF_GRAPH) + 1)
+#define ANYOF_LOWER ((_CC_LOWER) * 2)
+#define ANYOF_NLOWER ((ANYOF_LOWER) + 1)
+#define ANYOF_PRINT ((_CC_PRINT) * 2)
+#define ANYOF_NPRINT ((ANYOF_PRINT) + 1)
+#define ANYOF_PSXSPC ((_CC_PSXSPC) * 2) /* POSIX space: \s plus the vertical tab */
+#define ANYOF_NPSXSPC ((ANYOF_PSXSPC) + 1)
+#define ANYOF_PUNCT ((_CC_PUNCT) * 2)
+#define ANYOF_NPUNCT ((ANYOF_PUNCT) + 1)
+#define ANYOF_SPACE ((_CC_SPACE) * 2) /* \s */
+#define ANYOF_NSPACE ((ANYOF_SPACE) + 1)
+#define ANYOF_UPPER ((_CC_UPPER) * 2)
+#define ANYOF_NUPPER ((ANYOF_UPPER) + 1)
+#define ANYOF_WORDCHAR ((_CC_WORDCHAR) * 2) /* \w, PL_utf8_alnum, utf8::IsWord, ALNUM */
+#define ANYOF_NWORDCHAR ((ANYOF_WORDCHAR) + 1)
+#define ANYOF_XDIGIT ((_CC_XDIGIT) * 2)
+#define ANYOF_NXDIGIT ((ANYOF_XDIGIT) + 1)
+
+/* pseudo classes below this, not stored in the class bitmap, but used as flags
+ during compilation of char classes */
-/* EOS used for regstclass only */
-#define ANYOF_EOS 0x10 /* Can match an empty string too */
+#define ANYOF_VERTWS ((_CC_VERTSPACE) * 2)
+#define ANYOF_NVERTWS ((ANYOF_VERTWS)+1)
-/* Set if the bitmap doesn't fully represent what this node can match */
-#define ANYOF_NONBITMAP 0x20
-#define ANYOF_UNICODE ANYOF_NONBITMAP /* old name, for back compat */
+/* It is best if this is the last one, as all above it are stored as bits in a
+ * bitmap, and it isn't part of that bitmap */
+#if _CC_VERTSPACE != _HIGHEST_REGCOMP_DOT_H_SYNC
+# error Problem with handy.h _HIGHEST_REGCOMP_DOT_H_SYNC #define
+#endif
-#define ANYOF_UNICODE_ALL 0x40 /* Matches 0x100 - infinity */
+#define ANYOF_POSIXL_MAX (ANYOF_VERTWS) /* So upper loop limit is written:
+ * '< ANYOF_MAX'
+ * Hence doesn't include VERTWS, as that
+ * is a pseudo class */
+#define ANYOF_MAX ANYOF_POSIXL_MAX
-#define ANYOF_FLAGS_ALL 0xff
+#if (ANYOF_POSIXL_MAX > 32) /* Must fit in 32-bit word */
+# error Problem with handy.h _CC_foo #defines
+#endif
-/* Character classes for node->classflags of ANYOF */
-/* Should be synchronized with a table in regprop() */
-/* 2n should pair with 2n+1 */
-
-#define ANYOF_ALNUM 0 /* \w, PL_utf8_alnum, utf8::IsWord, ALNUM */
-#define ANYOF_NALNUM 1
-#define ANYOF_SPACE 2 /* \s */
-#define ANYOF_NSPACE 3
-#define ANYOF_DIGIT 4 /* \d */
-#define ANYOF_NDIGIT 5
-#define ANYOF_ALNUMC 6 /* [[:alnum:]] isalnum(3), utf8::IsAlnum, ALNUMC */
-#define ANYOF_NALNUMC 7
-#define ANYOF_ALPHA 8
-#define ANYOF_NALPHA 9
-#define ANYOF_ASCII 10
-#define ANYOF_NASCII 11
-#define ANYOF_CNTRL 12
-#define ANYOF_NCNTRL 13
-#define ANYOF_GRAPH 14
-#define ANYOF_NGRAPH 15
-#define ANYOF_LOWER 16
-#define ANYOF_NLOWER 17
-#define ANYOF_PRINT 18
-#define ANYOF_NPRINT 19
-#define ANYOF_PUNCT 20
-#define ANYOF_NPUNCT 21
-#define ANYOF_UPPER 22
-#define ANYOF_NUPPER 23
-#define ANYOF_XDIGIT 24
-#define ANYOF_NXDIGIT 25
-#define ANYOF_PSXSPC 26 /* POSIX space: \s plus the vertical tab */
-#define ANYOF_NPSXSPC 27
-#define ANYOF_BLANK 28 /* GNU extension: space and tab: non-vertical space */
-#define ANYOF_NBLANK 29
-
-#define ANYOF_MAX 32
-
-/* pseudo classes, not stored in the class bitmap, but used as flags
- during compilation of char classes */
+#define ANYOF_HORIZWS ((ANYOF_POSIXL_MAX)+2) /* = (ANYOF_NVERTWS + 1) */
+#define ANYOF_NHORIZWS ((ANYOF_POSIXL_MAX)+3)
-#define ANYOF_VERTWS (ANYOF_MAX+1)
-#define ANYOF_NVERTWS (ANYOF_MAX+2)
-#define ANYOF_HORIZWS (ANYOF_MAX+3)
-#define ANYOF_NHORIZWS (ANYOF_MAX+4)
+#define ANYOF_UNIPROP ((ANYOF_POSIXL_MAX)+4) /* Used to indicate a Unicode
+ property: \p{} or \P{} */
/* Backward source code compatibility. */
#define ANYOF_NALNUML ANYOF_NALNUM
#define ANYOF_SPACEL ANYOF_SPACE
#define ANYOF_NSPACEL ANYOF_NSPACE
+#define ANYOF_ALNUM ANYOF_WORDCHAR
+#define ANYOF_NALNUM ANYOF_NWORDCHAR
/* Utility macros for the bitmap and classes of ANYOF */
#define ANYOF_SIZE (sizeof(struct regnode_charclass))
-#define ANYOF_CLASS_SIZE (sizeof(struct regnode_charclass_class))
+#define ANYOF_POSIXL_SIZE (sizeof(regnode_charclass_posixl))
+#define ANYOF_CLASS_SIZE ANYOF_POSIXL_SIZE
+#define ANYOF_POSIXL_FOLD_SIZE (sizeof(regnode_charclass_posixl_fold))
#define ANYOF_FLAGS(p) ((p)->flags)
#define ANYOF_BIT(c) (1 << ((c) & 7))
-#define ANYOF_CLASS_BYTE(p, c) (((struct regnode_charclass_class*)(p))->classflags[((c) >> 3) & 3])
-#define ANYOF_CLASS_SET(p, c) (ANYOF_CLASS_BYTE(p, c) |= ANYOF_BIT(c))
-#define ANYOF_CLASS_CLEAR(p, c) (ANYOF_CLASS_BYTE(p, c) &= ~ANYOF_BIT(c))
-#define ANYOF_CLASS_TEST(p, c) (ANYOF_CLASS_BYTE(p, c) & ANYOF_BIT(c))
+#define ANYOF_POSIXL_SET(p, c) (((regnode_charclass_posixl*) (p))->classflags |= (1U << (c)))
+#define ANYOF_CLASS_SET(p, c) ANYOF_POSIXL_SET((p), (c))
-/* Quicker way to see if there are actually any tests. This is because
- * currently the set of tests can be empty even when the class bitmap is
- * allocated */
-#define ANYOF_CLASS_TEST_ANY_SET(p) /* assumes sizeof(p) = 4 */ \
- memNE (((struct regnode_charclass_class*)(p))->classflags, "0000", ANYOF_CLASS_SIZE)
+#define ANYOF_POSIXL_CLEAR(p, c) (((regnode_charclass_posixl*) (p))->classflags &= ~ (1U <<(c)))
+#define ANYOF_CLASS_CLEAR(p, c) ANYOF_POSIXL_CLEAR((p), (c))
-#define ANYOF_CLASS_ZERO(ret) Zero(((struct regnode_charclass_class*)(ret))->classflags, ANYOF_CLASSBITMAP_SIZE, char)
-#define ANYOF_BITMAP_ZERO(ret) Zero(((struct regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
+#define ANYOF_POSIXL_TEST(p, c) (((regnode_charclass_posixl*) (p))->classflags & (1U << (c)))
+#define ANYOF_CLASS_TEST(p, c) ANYOF_POSIXL_TEST((p), (c))
+
+#define ANYOF_POSIXL_ZERO(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = 0; } STMT_END
+#define ANYOF_CLASS_ZERO(ret) ANYOF_POSIXL_ZERO(ret)
+
+/* Shifts a bit to get, eg. 0x4000_0000, then subtracts 1 to get 0x3FFF_FFFF */
+#define ANYOF_POSIXL_SETALL(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1; } STMT_END
+#define ANYOF_CLASS_SETALL(ret) ANYOF_POSIXL_SETALL(ret)
+
+#define ANYOF_POSIXL_TEST_ANY_SET(p) \
+ ((ANYOF_FLAGS(p) & ANYOF_POSIXL) \
+ && (((regnode_charclass_posixl*)(p))->classflags))
+#define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p)
+#define ANYOF_POSIXL_TEST_ALL_SET(p) \
+ ((ANYOF_FLAGS(p) & ANYOF_POSIXL) \
+ && ((regnode_charclass_posixl*) (p))->classflags == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
+
+#define ANYOF_POSIXL_OR(source, dest) STMT_START { (dest)->classflags |= (source)->classflags ; } STMT_END
+#define ANYOF_CLASS_OR(source, dest) ANYOF_POSIXL_OR((source), (dest))
+
+#define ANYOF_POSIXL_AND(source, dest) STMT_START { (dest)->classflags &= (source)->classflags ; } STMT_END
+
+#define ANYOF_BITMAP_ZERO(ret) Zero(((struct regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
#define ANYOF_BITMAP(p) (((struct regnode_charclass*)(p))->bitmap)
#define ANYOF_BITMAP_BYTE(p, c) (ANYOF_BITMAP(p)[(((U8)(c)) >> 3) & 31])
#define ANYOF_BITMAP_SET(p, c) (ANYOF_BITMAP_BYTE(p, c) |= ANYOF_BIT(c))
memEQ (ANYOF_BITMAP(p), "\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377", ANYOF_BITMAP_SIZE)
#define ANYOF_SKIP ((ANYOF_SIZE - 1)/sizeof(regnode))
-#define ANYOF_CLASS_SKIP ((ANYOF_CLASS_SIZE - 1)/sizeof(regnode))
-#define ANYOF_CLASS_ADD_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP)
+#define ANYOF_POSIXL_SKIP ((ANYOF_POSIXL_SIZE - 1)/sizeof(regnode))
+#define ANYOF_POSIXL_FOLD_SKIP ((ANYOF_POSIXL_FOLD_SIZE - 1)/sizeof(regnode))
+#define ANYOF_CLASS_SKIP ANYOF_POSIXL_SKIP
+#define ANYOF_UTF8_LOCALE_INVLIST(node) (((regnode_charclass_posixl_fold*) (node))->utf8_locale_list)
/*
* Utility definitions.
#define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode))
-#define REG_SEEN_ZERO_LEN 0x00000001
-#define REG_SEEN_LOOKBEHIND 0x00000002
-#define REG_SEEN_GPOS 0x00000004
-#define REG_SEEN_EVAL 0x00000008
-#define REG_SEEN_CANY 0x00000010
-#define REG_SEEN_SANY REG_SEEN_CANY /* src bckwrd cmpt */
-#define REG_SEEN_RECURSE 0x00000020
-#define REG_TOP_LEVEL_BRANCHES 0x00000040
-#define REG_SEEN_VERBARG 0x00000080
-#define REG_SEEN_CUTGROUP 0x00000100
-#define REG_SEEN_RUN_ON_COMMENT 0x00000200
+#define REG_ZERO_LEN_SEEN 0x00000001
+#define REG_LOOKBEHIND_SEEN 0x00000002
+#define REG_GPOS_SEEN 0x00000004
+/* spare */
+#define REG_CANY_SEEN 0x00000010
+#define REG_RECURSE_SEEN 0x00000020
+#define REG_TOP_LEVEL_BRANCHES_SEEN 0x00000040
+#define REG_VERBARG_SEEN 0x00000080
+#define REG_CUTGROUP_SEEN 0x00000100
+#define REG_RUN_ON_COMMENT_SEEN 0x00000200
+#define REG_UNFOLDED_MULTI_SEEN 0x00000400
+#define REG_GOSTART_SEEN 0x00000800
+#define REG_UNBOUNDED_QUANTIFIER_SEEN 0x00001000
+
START_EXTERN_C
Perl_reg_named_buff_iter,
Perl_reg_qr_package,
#if defined(USE_ITHREADS)
- Perl_regdupe_internal
+ Perl_regdupe_internal,
#endif
+ Perl_re_op_compile
};
#endif /* DOINIT */
#endif /* PLUGGABLE_RE_EXTENSION */
* The character describes the function of the corresponding .data item:
* a - AV for paren_name_list under DEBUGGING
* f - start-class data for regstclass optimization
- * n - Root of op tree for (?{EVAL}) item
- * o - Start op for (?{EVAL}) item
- * p - Pad for (?{EVAL}) item
+ * l - start op for literal (?{EVAL}) item
+ * L - start op for literal (?{EVAL}) item, with separate CV (qr//)
+ * r - pointer to an embedded code-containing qr, e.g. /ab$qr/
* s - swash for Unicode-style character class, and the multicharacter
* strings resulting from casefolding the single-character entries
* in the character class
#define check_offset_max substrs->data[2].max_offset
#define check_end_shift substrs->data[2].end_shift
-#define RX_ANCHORED_SUBSTR(rx) (((struct regexp *)SvANY(rx))->anchored_substr)
-#define RX_ANCHORED_UTF8(rx) (((struct regexp *)SvANY(rx))->anchored_utf8)
-#define RX_FLOAT_SUBSTR(rx) (((struct regexp *)SvANY(rx))->float_substr)
-#define RX_FLOAT_UTF8(rx) (((struct regexp *)SvANY(rx))->float_utf8)
+#define RX_ANCHORED_SUBSTR(rx) (ReANY(rx)->anchored_substr)
+#define RX_ANCHORED_UTF8(rx) (ReANY(rx)->anchored_utf8)
+#define RX_FLOAT_SUBSTR(rx) (ReANY(rx)->float_substr)
+#define RX_FLOAT_UTF8(rx) (ReANY(rx)->float_utf8)
/* trie related stuff */
};
typedef struct _reg_ac_data reg_ac_data;
-/* ANY_BIT doesnt use the structure, so we can borrow it here.
+/* ANY_BIT doesn't use the structure, so we can borrow it here.
This is simpler than refactoring all of it as wed end up with
three different sets... */
if (re_debug_flags & RE_DEBUG_EXTRA_GPOS) x )
/* initialization */
-/* get_sv() can return NULL during global destruction. re_debug_flags can get
- * clobbered by a longjmp, so must be initialized */
+/* get_sv() can return NULL during global destruction. */
#define GET_RE_DEBUG_FLAGS DEBUG_r({ \
SV * re_debug_flags_sv = NULL; \
- re_debug_flags = 0; \
re_debug_flags_sv = get_sv(RE_DEBUG_FLAGS, 1); \
if (re_debug_flags_sv) { \
if (!SvIOK(re_debug_flags_sv)) \
#ifdef DEBUGGING
-#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; GET_RE_DEBUG_FLAGS;
+#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; \
+ PERL_UNUSED_VAR(re_debug_flags); GET_RE_DEBUG_FLAGS;
#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \
const char * const rpv = \
pv_pretty((dsv), (pv), (l), (m), \
PL_colors[(c1)],PL_colors[(c2)], \
- PERL_PV_ESCAPE_RE |((isuni) ? PERL_PV_ESCAPE_UNI : 0) ); \
+ PERL_PV_ESCAPE_RE|PERL_PV_ESCAPE_NONASCII |((isuni) ? PERL_PV_ESCAPE_UNI : 0) ); \
const int rlen = SvCUR(dsv)
#define RE_SV_ESCAPE(rpv,isuni,dsv,sv,m) \
const char * const rpv = \
pv_pretty((dsv), (SvPV_nolen_const(sv)), (SvCUR(sv)), (m), \
PL_colors[(c1)],PL_colors[(c2)], \
- PERL_PV_ESCAPE_RE |((isuni) ? PERL_PV_ESCAPE_UNI : 0) )
+ PERL_PV_ESCAPE_RE|PERL_PV_ESCAPE_NONASCII |((isuni) ? PERL_PV_ESCAPE_UNI : 0) )
#define RE_PV_QUOTED_DECL(rpv,isuni,dsv,pv,l,m) \
const char * const rpv = \
pv_pretty((dsv), (pv), (l), (m), \
PL_colors[0], PL_colors[1], \
- ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELLIPSES | \
+ ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_ESCAPE_NONASCII | PERL_PV_PRETTY_ELLIPSES | \
((isuni) ? PERL_PV_ESCAPE_UNI : 0)) \
)
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
* End:
*
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
*/