This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.sym: Add REFFU and NREFFU nodes
authorKarl Williamson <public@khwilliamson.com>
Wed, 1 Dec 2010 04:39:16 +0000 (21:39 -0700)
committerFather Chrysostomos <sprout@cpan.org>
Thu, 2 Dec 2010 02:10:21 +0000 (18:10 -0800)
These will be used for matching capture buffers case-insensitively using
Unicode semantics.

make regen will regenerate the delivered regnodes.h

regcomp.sym
regnodes.h

index ab57929..4e787a7 100644 (file)
@@ -194,6 +194,13 @@ NHORIZWS    NHORIZWS,   none 0 S  ; not horizontal whitespace   (Perl 6)
 FOLDCHAR    FOLDCHAR,   codepoint 1 ; codepoint with tricky case folding properties.
 EXACTFU     EXACT,      str        ; Match this string, folded, Unicode semantics for non-utf8 (prec. by length).
 
+# These could have been implemented using the FLAGS field of the regnode, but
+# by having a separate node type, we can use the existing switch statement to
+# avoid some tests
+REFFU       REF,        num 1 V   ; Match already matched string, folded using unicode semantics for non-utf8
+NREFFU       REF,        num 1 V   ; Match already matched string, folded using unicode semantics for non-utf8
+
+
 # NEW STUFF ABOVE THIS LINE  
 
 ################################################################################
index 97ac607..09ab661 100644 (file)
@@ -6,8 +6,8 @@
 
 /* Regops and State definitions */
 
-#define REGNODE_MAX            91
-#define REGMATCH_STATE_MAX     131
+#define REGNODE_MAX            93
+#define REGMATCH_STATE_MAX     133
 
 #define        END                     0       /* 0000 End of program. */
 #define        SUCCEED                 1       /* 0x01 Return from a subroutine, basically. */
@@ -70,7 +70,7 @@
 #define        MINMOD                  58      /* 0x3a Next operator is not greedy. */
 #define        LOGICAL                 59      /* 0x3b Next opcode should set the flag only. */
 #define        RENUM                   60      /* 0x3c Group with independently numbered parens. */
-#define        TRIE                    61      /* 0x3d Match many EXACT(FL?)? at once. flags==type */
+#define        TRIE                    61      /* 0x3d Match many EXACT(F[LU]?)? at once. flags==type */
 #define        TRIEC                   62      /* 0x3e Same as TRIE, but with embedded charclass data */
 #define        AHOCORASICK             63      /* 0x3f Aho Corasick stclass. flags==type */
 #define        AHOCORASICKC            64      /* 0x40 Same as AHOCORASICK, but with embedded charclass data */
 #define        NHORIZWS                87      /* 0x57 not horizontal whitespace   (Perl 6) */
 #define        FOLDCHAR                88      /* 0x58 codepoint with tricky case folding properties. */
 #define        EXACTFU                 89      /* 0x59 Match this string, folded, Unicode semantics for non-utf8 (prec. by length). */
-#define        OPTIMIZED               90      /* 0x5a Placeholder for dump. */
-#define        PSEUDO                  91      /* 0x5b Pseudo opcode for internal use. */
+#define        REFFU                   90      /* 0x5a Match already matched string, folded using unicode semantics for non-utf8 */
+#define        NREFFU                  91      /* 0x5b Match already matched string, folded using unicode semantics for non-utf8 */
+#define        OPTIMIZED               92      /* 0x5c Placeholder for dump. */
+#define        PSEUDO                  93      /* 0x5d Pseudo opcode for internal use. */
        /* ------------ States ------------- */
 #define        TRIE_next               (REGNODE_MAX + 1)       /* state for TRIE */
 #define        TRIE_next_fail          (REGNODE_MAX + 2)       /* state for TRIE */
@@ -239,6 +241,8 @@ EXTCONST U8 PL_regkind[] = {
        NHORIZWS,       /* NHORIZWS               */
        FOLDCHAR,       /* FOLDCHAR               */
        EXACT,          /* EXACTFU                */
+       REF,            /* REFFU                  */
+       REF,            /* NREFFU                 */
        NOTHING,        /* OPTIMIZED              */
        PSEUDO,         /* PSEUDO                 */
        /* ------------ States ------------- */
@@ -379,6 +383,8 @@ static const U8 regarglen[] = {
        0,                                      /* NHORIZWS     */
        EXTRA_SIZE(struct regnode_1),           /* FOLDCHAR     */
        0,                                      /* EXACTFU      */
+       EXTRA_SIZE(struct regnode_1),           /* REFFU        */
+       EXTRA_SIZE(struct regnode_1),           /* NREFFU       */
        0,                                      /* OPTIMIZED    */
        0,                                      /* PSEUDO       */
 };
@@ -476,6 +482,8 @@ static const char reg_off_by_arg[] = {
        0,      /* NHORIZWS     */
        0,      /* FOLDCHAR     */
        0,      /* EXACTFU      */
+       0,      /* REFFU        */
+       0,      /* NREFFU       */
        0,      /* OPTIMIZED    */
        0,      /* PSEUDO       */
 };
@@ -578,8 +586,10 @@ EXTCONST char * const PL_reg_name[] = {
        "NHORIZWS",                     /* 0x57 */
        "FOLDCHAR",                     /* 0x58 */
        "EXACTFU",                      /* 0x59 */
-       "OPTIMIZED",                    /* 0x5a */
-       "PSEUDO",                       /* 0x5b */
+       "REFFU",                        /* 0x5a */
+       "NREFFU",                       /* 0x5b */
+       "OPTIMIZED",                    /* 0x5c */
+       "PSEUDO",                       /* 0x5d */
        /* ------------ States ------------- */
        "TRIE_next",                    /* REGNODE_MAX +0x01 */
        "TRIE_next_fail",               /* REGNODE_MAX +0x02 */
@@ -674,7 +684,8 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__;
 #else
 EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
     CLUMP, BRANCH, BACK, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM,
-    REF, REFF, REFFL, SUSPEND, IFTHEN, BRANCHJ, NREF, NREFF, NREFFL,
+    REF, REFF, REFFL, SUSPEND, IFTHEN, BRANCHJ, NREF, NREFF, NREFFL, REFFU,
+    NREFFU,
     0
 };
 #endif /* DOINIT */
@@ -683,7 +694,7 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
 EXTCONST U8 PL_varies_bitmask[];
 #else
 EXTCONST U8 PL_varies_bitmask[] = {
-    0x00, 0x00, 0x00, 0xC0, 0xC1, 0x9F, 0x33, 0x01, 0x38, 0x00, 0x00, 0x00
+    0x00, 0x00, 0x00, 0xC0, 0xC1, 0x9F, 0x33, 0x01, 0x38, 0x00, 0x00, 0x0C
 };
 #endif /* DOINIT */