regcomp.c: Refactor \b{} parsing code
authorKarl Williamson <khw@cpan.org>
Thu, 27 Dec 2018 20:12:29 +0000 (13:12 -0700)
committerKarl Williamson <khw@cpan.org>
Fri, 28 Dec 2018 03:22:15 +0000 (20:22 -0700)
This just moves things around so that the information is kept in local
variables and the regnode not created until all that info has been
completely determined.  I believe it is clearer to read, but the impetus
came from the fact that prior to this commit, use of \b{} always
restarted the parse unnecessarily because the order of things made it
appear that a real /d op had appeared, whereas it was just the one
currently being constructed

regcomp.c

index 034033b..46b6913 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -13311,25 +13311,17 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             /* FALLTHROUGH */
        case 'b':
           {
+            U8 flags = 0;
            regex_charset charset = get_regex_charset(RExC_flags);
 
            RExC_seen_zerolen++;
             RExC_seen |= REG_LOOKBEHIND_SEEN;
            op = BOUND + charset;
 
-            if (op == BOUND) {
-                RExC_seen_d_op = TRUE;
-            }
-            else if (op == BOUNDL) {
-                RExC_contains_locale = 1;
-            }
-
-           ret = reg_node(pRExC_state, op);
-           *flagp |= SIMPLE;
            if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
-                FLAGS(REGNODE_p(ret)) = TRADITIONAL_BOUND;
+                flags = TRADITIONAL_BOUND;
                 if (op > BOUNDA) {  /* /aa is same as /a */
-                    OP(REGNODE_p(ret)) = BOUNDA;
+                    op = BOUNDA;
                 }
             }
             else {
@@ -13365,25 +13357,25 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         {
                             goto bad_bound_type;
                         }
-                        FLAGS(REGNODE_p(ret)) = GCB_BOUND;
+                        flags = GCB_BOUND;
                         break;
                     case 'l':
                         if (length != 2 || *(RExC_parse + 1) != 'b') {
                             goto bad_bound_type;
                         }
-                        FLAGS(REGNODE_p(ret)) = LB_BOUND;
+                        flags = LB_BOUND;
                         break;
                     case 's':
                         if (length != 2 || *(RExC_parse + 1) != 'b') {
                             goto bad_bound_type;
                         }
-                        FLAGS(REGNODE_p(ret)) = SB_BOUND;
+                        flags = SB_BOUND;
                         break;
                     case 'w':
                         if (length != 2 || *(RExC_parse + 1) != 'b') {
                             goto bad_bound_type;
                         }
-                        FLAGS(REGNODE_p(ret)) = WB_BOUND;
+                        flags = WB_BOUND;
                         break;
                     default:
                       bad_bound_type:
@@ -13396,8 +13388,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 RExC_parse = endbrace;
                 REQUIRE_UNI_RULES(flagp, 0);
 
-                if (op >= BOUNDA) {  /* /aa is same as /a */
-                    OP(REGNODE_p(ret)) = BOUNDU;
+                if (op == BOUND) {
+                    op = BOUNDU;
+                }
+                else if (op >= BOUNDA) {  /* /aa is same as /a */
+                    op = BOUNDU;
                     length += 4;
 
                     /* Don't have to worry about UTF-8, in this message because
@@ -13412,9 +13407,22 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 }
            }
 
+            if (op == BOUND) {
+                RExC_seen_d_op = TRUE;
+            }
+            else if (op == BOUNDL) {
+                RExC_contains_locale = 1;
+            }
+
             if (invert) {
-                OP(REGNODE_p(ret)) += NBOUND - BOUND;
+                op += NBOUND - BOUND;
             }
+
+           ret = reg_node(pRExC_state, op);
+            FLAGS(REGNODE_p(ret)) = flags;
+
+           *flagp |= SIMPLE;
+
            goto finish_meta_pat;
           }