This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Refactor so can remove duplicate code
authorKarl Williamson <public@khwilliamson.com>
Fri, 25 May 2012 04:14:04 +0000 (22:14 -0600)
committerKarl Williamson <public@khwilliamson.com>
Thu, 2 Aug 2012 15:24:51 +0000 (09:24 -0600)
This commit prepares the way for a later commit to remove a chunk of
essentially duplicate code.  It does this at the cost of an extra
test of a boolean each time through the loop.  But, it saves calculating
the fold unless necessary, a potentially expensive operation.  When the
next input is a quantifier that calculated fold is discarded, unused.
This commit avoids doing that calculation when the next input is a
quantifier.

regcomp.c

index 9e16aa0..9484642 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -10399,6 +10399,7 @@ tryagain:
            STRLEN foldlen;
            U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
             U8 node_type;
+            bool next_is_quantifier;
 
            /* Is this a LATIN LOWER CASE SHARP S in an EXACTFU node?  If so,
             * it is folded to 'ss' even if not utf8 */
@@ -10634,6 +10635,20 @@ tryagain:
                                      && ender == LATIN_SMALL_LETTER_SHARP_S);
                if ( RExC_flags & RXf_PMf_EXTENDED)
                    p = regwhite( pRExC_state, p );
+
+                /* If the next thing is a quantifier, it applies to this
+                 * character only, which means that this character has to be in
+                 * its own node and can't just be appended to the string in an
+                 * existing node, so if there are already other characters in
+                 * the node, close the node with just them, and set up to do
+                 * this character again next time through, when it will be the
+                 * only thing in its new node */
+                if ((next_is_quantifier = (p < RExC_end && ISMULT2(p))) && len)
+               {
+                    p = oldp;
+                    goto loopdone;
+                }
+
                if ((UTF && FOLD) || is_exactfu_sharp_s) {
                    /* Prime the casefolded buffer.  Locale rules, which apply
                     * only to code points < 256, aren't known until execution,
@@ -10695,10 +10710,8 @@ tryagain:
                        }
                    }
                }
-               if (p < RExC_end && ISMULT2(p)) { /* Back off on ?+*. */
-                   if (len)
-                       p = oldp;
-                   else if (UTF || is_exactfu_sharp_s) {
+               if (next_is_quantifier) {
+                   if (UTF || is_exactfu_sharp_s) {
                         if (FOLD) {
                              /* Emit all the Unicode characters. */
                              STRLEN numlen;