Correct INSTALL to warn about editing cflags, not cflags.SH

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index e3da6e9..b106cc1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3325,11 +3325,23 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              if ( noper_trietype
                                    &&
                                    (
-                                        ( noper_trietype == NOTHING )
-                                        ||
-                                        ( trietype == NOTHING )
-                                        ||
-                                        ( trietype == noper_trietype )
+                                        /* XXX: Currently we cannot allow a NOTHING node to be the first element
+                                         * of a TRIEABLE sequence, Otherwise we will overwrite the regop following
+                                         * the NOTHING with the TRIE regop later on. This is because a NOTHING node
+                                         * is only one regnode wide, and a TRIE is two regnodes. An example of a
+                                         * problematic pattern is: "x" =~ /\A(?>(?:(?:)A|B|C?x))\z/
+                                         * At a later point of time we can somewhat workaround this by handling
+                                         * NOTHING -> EXACT sequences as generated by /(?:)A|(?:)B/ type patterns,
+                                         * as we can effectively ignore the NOTHING regop in that case.
+                                         * This clause, which allows NOTHING to start a sequence is left commented
+                                         * out as a reference.
+                                         * - Yves
+
+                                           ( noper_trietype == NOTHING)
+                                           || ( trietype == NOTHING )
+                                        */
+                                        ( noper_trietype == NOTHING && trietype )
+                                        || ( trietype == noper_trietype )
                                    )
  #ifdef NOJUMPTRIE
                                    && noper_next == tail
@@ -3354,13 +3366,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  /* handle unmergable node -
                                   * noper may either be a triable node which can not be tried
                                   * together with the current trie, or a non triable node */
-                                if ( last && trietype != NOTHING ) {
-                                    /* if last is set then we have found at least two triable branch
-                                     * sequences in a row of a similar trietype so we can turn them
-                                     * into a trie */
-                                    make_trie( pRExC_state, 
-                                            startbranch, first, cur, tail, count, 
-                                            trietype, depth+1 );
+                                if ( last ) {
+                                    /* If last is set and trietype is not NOTHING then we have found
+                                     * at least two triable branch sequences in a row of a similar
+                                     * trietype so we can turn them into a trie. If/when we
+                                     * allow NOTHING to start a trie sequence this condition will be
+                                     * required, and it isn't expensive so we leave it in for now. */
+                                    if ( trietype != NOTHING )
+                                        make_trie( pRExC_state,
+                                                startbranch, first, cur, tail, count,
+                                                trietype, depth+1 );
                                      last = NULL; /* note: we clear/update first, trietype etc below, so we dont do it here */
                                  }
                                  if ( noper_trietype
@@ -3480,8 +3495,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             UV uc;
             if (UTF) {
                 const U8 * const s = (U8*)STRING(scan);
+               uc = utf8_to_uvchr_buf(s, s + l, NULL);
                 l = utf8_length(s, s + l);
-               uc = utf8_to_uvchr(s, NULL);
             } else {
                 uc = *((U8*)STRING(scan));
             }
@@ -3575,8 +3590,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             }
             if (UTF) {
                 const U8 * const s = (U8 *)STRING(scan);
+               uc = utf8_to_uvchr_buf(s, s + l, NULL);
                 l = utf8_length(s, s + l);
-               uc = utf8_to_uvchr(s, NULL);
             }
             else if (has_exactf_sharp_s) {
                 RExC_seen |= REG_SEEN_EXACTF_SHARP_S;
@@ -9822,7 +9837,10 @@ tryagain:
                               for (foldbuf = tmpbuf;
                                    foldlen;
                                    foldlen -= numlen) {
-                                  ender = utf8_to_uvchr(foldbuf, &numlen);
+
+                                  /* tmpbuf has been constructed by us, so we
+                                   * know it is valid utf8 */
+                                  ender = valid_utf8_to_uvchr(foldbuf, &numlen);
                                    if (numlen > 0) {
                                         const STRLEN unilen = reguni(pRExC_state, ender, s);
                                         s       += unilen;
@@ -9858,7 +9876,7 @@ tryagain:
                           for (foldbuf = tmpbuf;
                                foldlen;
                                foldlen -= numlen) {
-                              ender = utf8_to_uvchr(foldbuf, &numlen);
+                              ender = valid_utf8_to_uvchr(foldbuf, &numlen);
                                if (numlen > 0) {
                                     const STRLEN unilen = reguni(pRExC_state, ender, s);
                                     len     += unilen;